Megatest

Diff
Login

Differences From Artifact [0962cf8b36]:

To Artifact [961da27317]:


91
92
93
94
95
96
97



98

99
100
101
102


103


104
105
106
107
108
109
110
91
92
93
94
95
96
97
98
99
100

101
102
103
104
105
106
107

108
109
110
111
112
113
114
115
116







+
+
+
-
+




+
+
-
+
+







  (let* ((stepname       (car ezstep))  ;; do stuff to run the step
	 (stepinfo       (cadr ezstep))
	;; (let ((info (cadr ezstep)))
	;; 		   (if (proc? info) "" info)))
	;; (stepproc       (let ((info (cadr ezstep)))
	;; 		   (if (proc? info) info #f)))
	 (stepparts      (string-match (regexp "^(\\{([^\\}\\{]*)\\}\\s*|)(.*)$") stepinfo))
	 (stepparams     (if (and (list? stepparts)
				  (> (length stepparts) 1))
			     (list-ref stepparts 2)
	 (stepparams     (list-ref stepparts 2)) ;; for future use, {VAR=1,2,3}, run step for each
			     #f)) ;; for future use, {VAR=1,2,3}, run step for each
	 (paramparts     (if (string? stepparams)
			     (map (lambda (x)(string-split x "=")) (string-split-fields "[^;]*=[^;]*" stepparams))
			     '()))
	 (subrun         (alist-ref "subrun" paramparts equal?))
	 (stepcmd        (if (and (list? stepparts)
				  (> (length stepparts) 2))
	 (stepcmd        (list-ref stepparts 3))
			     (list-ref stepparts 3)
			     (conc "# error, no command for step "stepname)))
	 (script         "") ; "#!/bin/bash\n") ;; yep, we depend on bin/bash FIXME!!!\
	 (logpro-file    (conc stepname ".logpro"))
	 (html-file      (conc stepname ".html"))
	 (dat-file       (conc stepname ".dat"))
	 (tconfig-logpro (configf:lookup testconfig "logpro" stepname))
	 (logpro-used    (common:file-exists? logpro-file)))

615
616
617
618
619
620
621

622








623
624
625
626
627
628
629
621
622
623
624
625
626
627
628

629
630
631
632
633
634
635
636
637
638
639
640
641
642
643







+
-
+
+
+
+
+
+
+
+







	    (set-signal-handler! signal/int sighand)
	    (set-signal-handler! signal/term sighand)
	    ) ;; (set-signal-handler! signal/stop sighand)
	  
	  ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART,
	  ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY*
	  ;;
	  (let* ((test-info (let loop ((tries 0))
	  (let* ((test-info (rmt:get-test-info-by-id run-id test-id))
			      (let ((tinfo (rmt:get-test-info-by-id run-id test-id)))
				(if tinfo
				    tinfo
				    (if (> tries 5)
					#f
					(begin
					  (thread-sleep! (+ 1 (* tries 10)))
					  (loop (+ tries 1))))))))
		 (test-host (if test-info
				(db:test-get-host        test-info)
				(begin
				  (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.")
				  (exit))))
		 (test-pid  (db:test-get-process_id  test-info)))
	    (cond
780
781
782
783
784
785
786
787



788
789















790
791
792
793
794
795
796
794
795
796
797
798
799
800

801
802
803
804

805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826







-
+
+
+

-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







	  ;; We are about to actually kick off the test
	  ;; so this is a good place to remove the records for 
	  ;; any previous runs
	  ;; (db:test-remove-steps db run-id testname itemdat)
	  ;; now is also a good time to write the .testconfig file
	  (let* ((tconfig-fname   (conc work-area "/.testconfig"))
		 (tconfig-tmpfile (conc tconfig-fname ".tmp"))
		 (tconfig         (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t))) ;; 'return-procs)))
		 (tconfig         (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs)))
		 (scripts (configf:get-section tconfig "scripts")))
	    ;; create .testconfig file
	    (configf:write-alist tconfig tconfig-tmpfile)
	    (file-move tconfig-tmpfile tconfig-fname #t))
	    (file-move tconfig-tmpfile tconfig-fname #t)
	    (delete-file* ".final-status")

	    ;; extract scripts from testconfig and write them to files in test run dir
	    (for-each
	     (lambda (scriptdat)
	       (match scriptdat
		      ((name content)
		       (with-output-to-file name
			 (lambda ()
			   (print content)
			   (change-file-mode name (bitwise-ior perm/irwxg perm/irwxu)))))
		      (else
		       (debug:print-info 0 "Invalid script definiton found in [scripts] section of testconfig. \"" scriptdat "\""))))
	     scripts))
	  ;; 
	  (let* ((m            (make-mutex))
		 (kill-job?    #f)
		 (exit-info    (make-launch:einf pid: #t exit-status: #t exit-code: #t rollup-status: 0)) ;; pid exit-status exit-code (i.e. process was successfully run) rollup-status
		 (job-thread   #f)
		 ;; (keep-going   #t)
		 (misc-flags   (let ((ht (make-hash-table)))
830
831
832
833
834
835
836




837
838
839
840
841
842
843
844
845
846
847

848


849
850
851
852
853
854
855
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880

881
882
883
884
885
886
887
888
889
890
891







+
+
+
+










-
+

+
+







				      (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN"))
				     ((eq? (launch:einf-rollup-status exit-info) 3) "CHECK")
				     ((eq? (launch:einf-rollup-status exit-info) 4) "WAIVED")
				     ((eq? (launch:einf-rollup-status exit-info) 5) "ABORT")
				     ((eq? (launch:einf-rollup-status exit-info) 6) "SKIP")
				     (else "FAIL")))) ;; (db:test-get-status testinfo)))
		    (debug:print-info 1 *default-log-port* "Test exited in state=" (db:test-get-state testinfo) ", setting state/status based on exit code of " (launch:einf-exit-status exit-info) " and rollup-status of " (launch:einf-rollup-status exit-info))
   
        ;; Leave a .final-status file for each sub-test
        (tests:save-final-status run-id test-id)

		    (tests:test-set-status! run-id 
					    test-id 
					    new-state
					    new-status
					    (args:get-arg "-m") #f)
		    ;; need to update the top test record if PASS or FAIL and this is a subtest
		    ;; NO NEED TO CALL set-state-status-and-roll-up-items HERE, THIS IS DONE IN set-state-status-and-roll-up-items called by tests:test-set-status!
		    ))
	      ;; for automated creation of the rollup html file this is a good place...
	      (if (not (equal? item-path ""))
		  (tests:summarize-items run-id test-id test-name #f))
		      (tests:summarize-items run-id test-id test-name #f))
	      (tests:summarize-test run-id test-id)  ;; don't force - just update if no
        ;; Leave a .final-status file for the top level test
        (tests:save-final-status run-id test-id)
	      (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id)))
	    (mutex-unlock! m)
            (launch:end-of-run-check run-id )
	    (debug:print 2 *default-log-port* "Output from running " fullrunscript ", pid " (launch:einf-pid exit-info) " in work area " 
			 work-area ":\n====\n exit code " (launch:einf-exit-code exit-info) "\n" "====\n")
	    (if (not (launch:einf-exit-status exit-info))
		(exit 4))))
871
872
873
874
875
876
877
878

879
880

881
882



883
884







885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911








912
913
914
915
916
917
918
907
908
909
910
911
912
913

914
915

916
917
918
919
920
921


922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947








948
949
950
951
952
953
954
955
956
957
958
959
960
961
962







-
+

-
+


+
+
+
-
-
+
+
+
+
+
+
+



















-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+







(define (launch:end-of-run-check run-id )
    (let*	((not-completed-cnt (rmt:get-not-completed-cnt run-id))  
           (running-cnt (rmt:get-count-tests-running-for-run-id run-id))
           (all-test-launched (rmt:get-var (conc "lunch-complete-" run-id)))
           (current-state (rmt:get-run-state run-id))
           (current-status (rmt:get-run-status run-id)))
     ;;get-vars run-id to query metadata table to check if all completed. if all-test-launched = yes then only not-completed-cnt = 0 means everyting is completed if no entry found in the table do nothing 
     (debug:print 0 *default-log-port* "rollup run state/status")                      
     (debug:print 0 *default-log-port* "Running test cnt :" running-cnt)                      
     (rmt:set-state-status-and-roll-up-run  run-id current-state current-status)
 
     (runs:update-junit-test-reporter-xml run-id) 
     (cond 
       ((and all-test-launched (eq? not-completed-cnt 0) (equal? all-test-launched "yes" ))
                (if (and (equal? (rmt:get-var (conc "end-of-run-" run-id)) "no") (common:simple-lock (conc "endOfRun" run-id)))
                (begin
           	(debug:print 4 *default-log-port* "look for  post hook. currseconds: " (current-seconds) " EOR " (rmt:get-var (conc "end-of-run-" run-id)))
           	(debug:print 0 *default-log-port* "look for  post hook.")
          	(runs:run-post-hook run-id))
                (debug:print 0 *default-log-port* "End of Run Detected.")
                (rmt:set-var (conc "end-of-run-" run-id) "yes")
                ;(thread-sleep! 10)
          	(runs:run-post-hook run-id)
                (debug:print 4 *default-log-port* "currseconds: " (current-seconds)" eor: " (rmt:get-var (conc "end-of-run-" run-id)))
                (common:simple-unlock (conc "endOfRun" run-id)))
                 (debug:print 0 *default-log-port* "End of Run Detected but not running post hook. This should happen when eor is set to yes. This will happen only when 2 tests exit at smae time. eor= " (rmt:get-var (conc "end-of-run-" run-id)))))
        ((> running-cnt 3) 
        	  (debug:print 0 *default-log-port* "There are " running-cnt " tests running." ))
        ((> running-cnt 0)
            (debug:print 0 *default-log-port* "running cnt > 0 but <= 3 kill-running-tests-if-dead" )
   				  (let ((kill-cnt (launch:kill-tests-if-dead run-id)))
           			(if (and all-test-launched  (equal? all-test-launched "yes") (eq? kill-cnt running-cnt))
           					(launch:end-of-run-check run-id)))) ;;todo
        (else  (debug:print 0 *default-log-port* "Should it get here?? May be everything is not launched yet. Running test cnt:" running-cnt " Not completed test cnt:" not-completed-cnt)
         (let* ((not-completed-tests (rmt:get-tests-for-run run-id "%" `("NOT_STARTED" "RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f)))
       (if (> (length not-completed-tests) 0) 
           (let loop ((running-test (car not-completed-tests))
			     (tal    (cdr not-completed-tests)))
		       (let* ((test-name (vector-ref running-test 2))
                 (item-path (vector-ref running-test 11)))
			       	(debug:print 0 *default-log-port* "test " test-name "/" item-path " not completed")
              (if (not (null? tal))
				  (loop (car tal) (cdr tal)))))))))))        
 
(define (launch:is-test-alive host pid)
(if (and host pid (not (equal? host "n/a")))
(let* ((cmd (conc "ssh " host " pstree -A " pid))
      (output (with-input-from-pipe cmd read-lines)))
  (print "cmd: " cmd "\n op: " output )
  (if(eq? (length output) 0)
     #f
     #t))
#t))
  (if (and host pid (not (equal? host "n/a")))
      (let* ((cmd (conc "ssh " host " pstree -A " pid))
	     (output (with-input-from-pipe cmd read-lines)))
	(debug:print 2 *default-log-port* "Running " cmd " received " output)
	(if (eq? (length output) 0)
	   #f
	   #t))
      #t))
 
(define (launch:kill-tests-if-dead run-id)
  (let* ((running-tests (rmt:get-tests-for-run run-id "%" `("RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f)))
       (let loop ((running-test (car running-tests))
			     (tal    (cdr running-tests))
			     (kill-cnt 0))
		       (let* ((test-name (vector-ref running-test 2))
1027
1028
1029
1030
1031
1032
1033
1034

1035
1036
1037
1038
1039
1040
1041
1071
1072
1073
1074
1075
1076
1077

1078
1079
1080
1081
1082
1083
1084
1085







-
+








(define (launch:setup-body #!key (force-reread #f) (areapath #f))
  (if (and (eq? *configstatus* 'fulldata)
	   *toppath*
	   (not force-reread)) ;; no need to reprocess
      *toppath*   ;; return toppath
      (let* ((use-cache (common:use-cache?)) ;; BB- use-cache checks *configdat* for use-cache setting.  We do not have *configdat*.  Bootstrapping problem here.
	     (toppath  (or *toppath* areapath (getenv "MT_RUN_AREA_HOME"))) ;; preserve toppath
	     (toppath  (common:get-toppath areapath))
	     (target   (common:args-get-target))
	     (sections (if target (list "default" target) #f)) ;; for runconfigs
	     (mtconfig (or (args:get-arg "-config") "megatest.config")) ;; allow overriding megatest.config 
             (cachefiles (launch:get-cache-file-paths areapath toppath target mtconfig))
	     ;; checking for null cachefiles should not be necessary, I was seeing error car of '(), might be a chicken bug or a red herring ...
	     (mtcachef   (if (null? cachefiles)
			     #f
1094
1095
1096
1097
1098
1099
1100
1101


1102
1103
1104
1105
1106
1107
1108
1138
1139
1140
1141
1142
1143
1144

1145
1146
1147
1148
1149
1150
1151
1152
1153







-
+
+







		  (set! toppath      *toppath*)
		  (if (not *toppath*)
		      (begin
			(debug:print-error 0 *default-log-port* "you are not in a megatest area!")
			(exit 1)))
		  (setenv "MT_RUN_AREA_HOME" *toppath*)
		  ;; the seed read is done, now read runconfigs, cache it then read megatest.config one more time and cache it
		  (let* ((keys         (rmt:get-keys))
		  (let* ((keys         (common:list-or-null (rmt:get-keys)
							    message: "Failed to retrieve keys in launch.scm. Please report this to the developers."))
			 (key-vals     (keys:target->keyval keys target))
			 (linktree     (common:get-linktree)) ;; (or (getenv "MT_LINKTREE")(if *configdat* (configf:lookup *configdat* "setup" "linktree") #f)))
					;     (if *configdat*
					; 	   (configf:lookup *configdat* "setup" "linktree")
					; 	   (conc *toppath* "/lt"))))
			 (second-pass  (find-and-read-config
					mtconfig
1222
1223
1224
1225
1226
1227
1228

1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239

1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253











1254
1255
1256
1257
1258
1259
1260
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284

1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297


1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315







+










-
+












-
-
+
+
+
+
+
+
+
+
+
+
+








	;; if have -append-config then read and append here
	(let ((cfname (args:get-arg "-append-config")))
	  (if (and cfname
		   (file-read-access? cfname))
	      (read-config cfname *configdat* #t))) ;; values are added to the hash, no need to do anything special.
	*toppath*)))


(define (get-best-disk confdat testconfig)
  (let* ((disks   (or (and testconfig (hash-table-ref/default testconfig "disks" #f))
		      (hash-table-ref/default confdat "disks" #f)))
	 (minspace (let ((m (configf:lookup confdat "setup" "minspace")))
		     (string->number (or m "10000")))))
    (if disks 
	(let ((res (common:get-disk-with-most-free-space disks minspace))) ;; min size of 1000, seems tad dumb
	  (if res
	      (cdr res)
	      (begin
	      (begin ;; DEAD CODE PATH - REVISIT!
;;		(if (common:low-noise-print 20 "No valid disks or no disk with enough space")
;;		    (debug:print-error 0 *default-log-port* "No valid disks found in megatest.config. Please add some to your [disks] section and ensure the directory exists and has enough space!\n    You can change minspace in the [setup] section of megatest.config. Current setting is: " minspace))
		;;(exit 1)
                 (if (null? disks)
                     (cons 1 (conc *toppath* "/runs"))
                     (let ((paths (sort disks (lambda (x y) (> (string-length (cadr x)) (string-length (cadr y)))))))
                       (let loop ((head (car paths)) (tail (cdr paths)))
                         (let ((result (handle-exceptions exn #f (create-directory (cadr head) #t))))
                           (if result
                               result
                               (if (null? tail)
                                   (cons 1 (conc *toppath* "/runs"))
                                   (loop (car tail) (cdr tail)))))))))))))) ;; the code creates the necessary directories if it does not exist and returns the path.

                                   (loop (car tail) (cdr tail)))))))))))
	;; no disks definition - use mtrah/runs, fall back to currdir/runs
	(let* ((toppath (or *toppath*
			    (common:get-toppath *toppath*)
			    (begin
			      (debug:print-error 0 *default-log-port* "Creating runs dir in current directory, this is probably not what you wanted. Please check your setup.")
			      (current-directory))))
	       (runsdir (conc toppath "/runs")))
	  (if (not (file-exists? runsdir))(create-directory runsdir))
	  runsdir)
	))) ;; the code creates the necessary directories if it does not exist and returns the path.

(define (launch:test-copy test-src-path test-path)
  (let* ((ovrcmd (let ((cmd (configf:lookup *configdat* "setup" "testcopycmd")))
		   (if cmd
		       ;; substitute the TEST_SRC_PATH and TEST_TARG_PATH
		       (string-substitute "TEST_TARG_PATH" test-path
					  (string-substitute "TEST_SRC_PATH" test-src-path cmd #t) #t)
1350
1351
1352
1353
1354
1355
1356
1357
1358


1359
1360
1361
1362
1363
1364
1365
1366


1367
1368
1369
1370
1371
1372
1373
1374
1375


1376
1377
1378
1379
1380
1381
1382
1405
1406
1407
1408
1409
1410
1411


1412
1413
1414
1415
1416
1417
1418
1419


1420
1421
1422
1423
1424
1425
1426
1427
1428


1429
1430
1431
1432
1433
1434
1435
1436
1437







-
-
+
+






-
-
+
+







-
-
+
+







    ;; level
    (if (not not-iterated) ;; i.e. iterated
	(let ((iterated-parent  (pathname-directory (conc lnkpath "/" item-path))))
	  (debug:print-info 2 *default-log-port* "Creating iterated parent " iterated-parent)
	  (handle-exceptions
	   exn
	   (begin
	     (debug:print-error 0 *default-log-port* " Failed to create directory " iterated-parent ((condition-property-accessor 'exn 'message) exn) ", exiting")
	     (exit 1))
	     (debug:print-error 0 *default-log-port* " Failed to create directory " iterated-parent ((condition-property-accessor 'exn 'message) exn) ", continuing but link tree may be corrupted")
	     #;(exit 1))
	   (create-directory iterated-parent #t))))

    (if (symbolic-link? lnkpath) 
	(handle-exceptions
	 exn
	 (begin
	   (debug:print-error 0 *default-log-port* " Failed to remove symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting")
	   (exit 1))
	   (debug:print-error 0 *default-log-port* " Failed to remove symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", continuing but link tree may be corrupted.")
	   #;(exit 1))
	 (delete-file lnkpath)))

    (if (not (or (common:file-exists? lnkpath)
		 (symbolic-link? lnkpath)))
	(handle-exceptions
	 exn
	 (begin
	   (debug:print-error 0 *default-log-port* " Failed to create symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", exiting")
	   (exit 1))
	   (debug:print-error 0 *default-log-port* " Failed to create symlink " lnkpath ((condition-property-accessor 'exn 'message) exn) ", continuing but link tree may be corrupted.")
	   #;(exit 1))
	 (create-symbolic-link toptest-path lnkpath)))
    
    ;; NB - This was not working right - some top tests are not getting the path set!!!
    ;;
    ;; Do the setting of this record after the paths are created so that the shortdir can 
    ;; be set to the real directory location. This is safer for future clean up if the link
    ;; tree is damaged or lost.