Megatest

Changes On Branch 7c315bd32db45ee7
Login

Changes In Branch v1.81-fixes Through [7c315bd32d] Excluding Merge-Ins

This is equivalent to a diff from 98f3441b4f to 7c315bd32d

2024-08-19
11:42
CI/CD: Automated commit after successful test, build, and deploy for v1.81-fix-extract-scripts check-in: 29155bc147 user: fdiskadm tags: v1.81
2024-08-16
13:34
Patched forward the adjutant code that got lost in v1.65 Leaf check-in: 13060ce126 user: matt tags: v1.81-adjutant
2024-08-13
12:55
removed extra copy of launch:extract-scripts-logpro and corrected it to add .logpro to the logpro filenames check-in: e829926867 user: mmgraham tags: v1.81-fix-extract-scripts
2024-07-18
05:41
Minor cleanup Leaf check-in: 60706141c1 user: mrwellan tags: v1.81-fixes
2024-07-17
19:21
Changed Megatest version to v1.8181 check-in: a748f29739 user: icfadm tags: v1.81
19:17
Lower gating on test launch to 0.05 journal load. Add exception handler for file-modification-time on .servinfo files check-in: 7c315bd32d user: mrwellan tags: v1.81-fixes
17:13
Move sync transaction in an attempt to free up bound time in .mtdb/*.db files Leaf check-in: fab9bf9c5c user: mrwellan tags: v1.81-better sync
2024-07-15
15:47
Changed Megatest version to 1.8102 check-in: 98f3441b4f user: icfadm tags: v1.81
15:12
CI/CD: Automated commit after successful test, build, and deploy for v1.81-bump-server-load check-in: 1fff14fbea user: fdiskadm tags: v1.81

Modified runs.scm from [2d4118afd2] to [d02f63b65d].

1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
	 (run-limits-info        (runs:dat-can-run-more-tests runsdat))
	 ;; (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
	 (have-resources         (and (if *journal-stats*
					  (let* ((dbfname (conc
							   (dbfile:run-id->dbnum run-id)
							   ".db"))
						 (load (tt:get-journal-stats dbfname)))
					    (if (> load 0.1) ;; dbs too busy to start more tests
						(begin
						  (debug:print-info 0 *default-log-port* "Gating launch due to db load "load" based on journal file observations for "dbfname)
						 #f)
						#t))
					  (begin
					    (debug:print-info 0 *default-log-port* "Journal gating not started for "run-id)
					    #t)) ;; if journal monitoring not started do not gate







|







1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
	 (run-limits-info        (runs:dat-can-run-more-tests runsdat))
	 ;; (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
	 (have-resources         (and (if *journal-stats*
					  (let* ((dbfname (conc
							   (dbfile:run-id->dbnum run-id)
							   ".db"))
						 (load (tt:get-journal-stats dbfname)))
					    (if (> load 0.05) ;; dbs too busy to start more tests
						(begin
						  (debug:print-info 0 *default-log-port* "Gating launch due to db load "load" based on journal file observations for "dbfname)
						 #f)
						#t))
					  (begin
					    (debug:print-info 0 *default-log-port* "Journal gating not started for "run-id)
					    #t)) ;; if journal monitoring not started do not gate

Modified tcp-transportmod.scm from [b9c6fed28d] to [cc2742f5f6].

638
639
640
641
642
643
644



645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
				     (same-host (or (not prime-host) ;; i.e. this is the first host
						    (equal? prime-host host)))
				     (keep-srv  (and good-ping same-host)))
				(if keep-srv	
				    (loop (cdr servrs)
					  host
					  (cons servdat result))



				    (let* ((modtime (file-modification-time servinfofile)))
				      ;; if the .servinfo hasn't been touched in five min
				      ;; we can be pretty sure the server is truly dead
				      (if (> (- (current-seconds) modtime) 360)
					  (handle-exceptions
					   exn
					   (debug:print-info 0 *default-log-port*
							     "Error removing server info file: "servinfofile", "
							     (condition->list exn))
					   (delete-file* servinfofile))
					  (loop (cdr servrs) prime-host result))))))
			     (else
			      ;; can't delete it as we don't have a filename. NOTE: Should never get here.
			      (debug:print-info 0 *default-log-port* "ERROR: bad servinfo record \""servdat"\"")
			      (loop (cdr servrs) prime-host result)) ;; drop 
			     )))))
	       (home-host (if (null? good-srvrs)
			      #f







>
>
>
|








|
|







638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
				     (same-host (or (not prime-host) ;; i.e. this is the first host
						    (equal? prime-host host)))
				     (keep-srv  (and good-ping same-host)))
				(if keep-srv	
				    (loop (cdr servrs)
					  host
					  (cons servdat result))
				    (let* ((modtime (handle-exceptions
						     exn
						     9999 ;; file probably disappeared
						     (file-modification-time servinfofile))))
				      ;; if the .servinfo hasn't been touched in five min
				      ;; we can be pretty sure the server is truly dead
				      (if (> (- (current-seconds) modtime) 360)
					  (handle-exceptions
					   exn
					   (debug:print-info 0 *default-log-port*
							     "Error removing server info file: "servinfofile", "
							     (condition->list exn))
					   (delete-file* servinfofile)))
				      (loop (cdr servrs) prime-host result)))))
			     (else
			      ;; can't delete it as we don't have a filename. NOTE: Should never get here.
			      (debug:print-info 0 *default-log-port* "ERROR: bad servinfo record \""servdat"\"")
			      (loop (cdr servrs) prime-host result)) ;; drop 
			     )))))
	       (home-host (if (null? good-srvrs)
			      #f