Megatest

Check-in [7c315bd32d]
Login
Overview
Comment:Lower gating on test launch to 0.05 journal load. Add exception handler for file-modification-time on .servinfo files
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.81-fixes
Files: files | file ages | folders
SHA1: 7c315bd32db45ee772faa7b98ae43af46b558aef
User & Date: mrwellan on 2024-07-17 19:17:07
Other Links: branch diff | manifest | tags
Context
2024-07-18
05:41
Minor cleanup Leaf check-in: 60706141c1 user: mrwellan tags: v1.81-fixes
2024-07-17
19:17
Lower gating on test launch to 0.05 journal load. Add exception handler for file-modification-time on .servinfo files check-in: 7c315bd32d user: mrwellan tags: v1.81-fixes
2024-07-15
15:47
Changed Megatest version to 1.8102 check-in: 98f3441b4f user: icfadm tags: v1.81
Changes

Modified runs.scm from [2d4118afd2] to [d02f63b65d].

1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
	 (run-limits-info        (runs:dat-can-run-more-tests runsdat))
	 ;; (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
	 (have-resources         (and (if *journal-stats*
					  (let* ((dbfname (conc
							   (dbfile:run-id->dbnum run-id)
							   ".db"))
						 (load (tt:get-journal-stats dbfname)))
					    (if (> load 0.1) ;; dbs too busy to start more tests
						(begin
						  (debug:print-info 0 *default-log-port* "Gating launch due to db load "load" based on journal file observations for "dbfname)
						 #f)
						#t))
					  (begin
					    (debug:print-info 0 *default-log-port* "Journal gating not started for "run-id)
					    #t)) ;; if journal monitoring not started do not gate







|







1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
	 (run-limits-info        (runs:dat-can-run-more-tests runsdat))
	 ;; (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
	 (have-resources         (and (if *journal-stats*
					  (let* ((dbfname (conc
							   (dbfile:run-id->dbnum run-id)
							   ".db"))
						 (load (tt:get-journal-stats dbfname)))
					    (if (> load 0.05) ;; dbs too busy to start more tests
						(begin
						  (debug:print-info 0 *default-log-port* "Gating launch due to db load "load" based on journal file observations for "dbfname)
						 #f)
						#t))
					  (begin
					    (debug:print-info 0 *default-log-port* "Journal gating not started for "run-id)
					    #t)) ;; if journal monitoring not started do not gate

Modified tcp-transportmod.scm from [b9c6fed28d] to [cc2742f5f6].

638
639
640
641
642
643
644



645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
				     (same-host (or (not prime-host) ;; i.e. this is the first host
						    (equal? prime-host host)))
				     (keep-srv  (and good-ping same-host)))
				(if keep-srv	
				    (loop (cdr servrs)
					  host
					  (cons servdat result))



				    (let* ((modtime (file-modification-time servinfofile)))
				      ;; if the .servinfo hasn't been touched in five min
				      ;; we can be pretty sure the server is truly dead
				      (if (> (- (current-seconds) modtime) 360)
					  (handle-exceptions
					   exn
					   (debug:print-info 0 *default-log-port*
							     "Error removing server info file: "servinfofile", "
							     (condition->list exn))
					   (delete-file* servinfofile))
					  (loop (cdr servrs) prime-host result))))))
			     (else
			      ;; can't delete it as we don't have a filename. NOTE: Should never get here.
			      (debug:print-info 0 *default-log-port* "ERROR: bad servinfo record \""servdat"\"")
			      (loop (cdr servrs) prime-host result)) ;; drop 
			     )))))
	       (home-host (if (null? good-srvrs)
			      #f







>
>
>
|








|
|







638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
				     (same-host (or (not prime-host) ;; i.e. this is the first host
						    (equal? prime-host host)))
				     (keep-srv  (and good-ping same-host)))
				(if keep-srv	
				    (loop (cdr servrs)
					  host
					  (cons servdat result))
				    (let* ((modtime (handle-exceptions
						     exn
						     9999 ;; file probably disappeared
						     (file-modification-time servinfofile))))
				      ;; if the .servinfo hasn't been touched in five min
				      ;; we can be pretty sure the server is truly dead
				      (if (> (- (current-seconds) modtime) 360)
					  (handle-exceptions
					   exn
					   (debug:print-info 0 *default-log-port*
							     "Error removing server info file: "servinfofile", "
							     (condition->list exn))
					   (delete-file* servinfofile)))
				      (loop (cdr servrs) prime-host result)))))
			     (else
			      ;; can't delete it as we don't have a filename. NOTE: Should never get here.
			      (debug:print-info 0 *default-log-port* "ERROR: bad servinfo record \""servdat"\"")
			      (loop (cdr servrs) prime-host result)) ;; drop 
			     )))))
	       (home-host (if (null? good-srvrs)
			      #f