Megatest

Diff
Login

Differences From Artifact [580823485a]:

To Artifact [d8a0c34d3e]:


62
63
64
65
66
67
68
69
70


71
72
73

74

75
76


77
78
79
80
81
82
83
62
63
64
65
66
67
68


69
70
71
72

73
74
75


76
77
78
79
80
81
82
83
84







-
-
+
+


-
+

+
-
-
+
+







;;   the comment can be set in the step record in launch.scm
;;
(define (launch:load-logpro-dat run-id test-id stepname)
  (let ((cname (conc stepname ".dat")))
    (if (file-exists? cname)
	(let* ((dat  (read-config cname #f #f))
	       (csvr (db:logpro-dat->csv dat stepname))
	       (csvt (let-values (( (fmt-cell fmt-record fmt-csv) (make-format ",")))
				 (fmt-csv (map list->csv-record csvr))))
	       (csvt (let-values (((fmt-cell fmt-record fmt-csv) (make-format ",")))
		       (fmt-csv (map list->csv-record csvr))))
	       (status (configf:lookup dat "final" "exit-status"))
	       (msg     (configf:lookup dat "final" "message")))
          ;;(if csvt  ;; this if blocked stack dump caused by .dat file from logpro being 0-byte.  fixed by upgrading logpro
          (if csvt  ;; this if blocked stack dump caused by .dat file from logpro being 0-byte.  fixed by upgrading logpro
              (rmt:csv->test-data run-id test-id csvt)
	      (debug:print 0 *default-log-port* "ERROR: no csvdat exists for run-id: " run-id " test-id: " test-id " stepname: " stepname ", check that logpro version is 1.15 or newer"))
            ;;  (BB> "Error: run-id/test-id/stepname="run-id"/"test-id"/"stepname" => bad csvr="csvr)
            ;;  )
	  ;;  (BB> "Error: run-id/test-id/stepname="run-id"/"test-id"/"stepname" => bad csvr="csvr)
	  ;;  )
	  (cond
	   ((equal? status "PASS") "PASS") ;; skip the message part if status is pass
	   (status (conc (configf:lookup dat "final" "exit-status") ": " (if msg msg "no message")))
	   (else #f)))
	#f)))

(define (launch:runstep ezstep run-id test-id exit-info m tal testconfig)
251
252
253
254
255
256
257
258

259
260
261
262
263
264
265
252
253
254
255
256
257
258

259
260
261
262
263
264
265
266







-
+







  ;; (tests:test-set-status! test-id "RUNNING" "n/a" #f #f)
  ;; Since we should have a clean slate at this time there is no need to do 
  ;; any of the other stuff that tests:test-set-status! does. Let's just 
  ;; force RUNNING/n/a

  ;; (thread-sleep! 0.3)
  ;; (tests:test-force-state-status! run-id test-id "RUNNING" "n/a")
  (rmt:roll-up-pass-fail-counts run-id test-name item-path #f "RUNNING" #f) 
  (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "RUNNING" #f) 
  ;; (thread-sleep! 0.3) ;; NFS slowness has caused grief here

  ;; if there is a runscript do it first
  (if fullrunscript
      (let ((pid (process-run fullrunscript)))
	(rmt:test-set-top-process-pid run-id test-id pid)
	(let loop ((i 0))
455
456
457
458
459
460
461
462

463
464
465
466
467
468
469
456
457
458
459
460
461
462

463
464
465
466
467
468
469
470







-
+







	  (let ((sighand (lambda (signum)
			   ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting
			   (if (eq? signum signal/stop)
			       (debug:print-error 0 *default-log-port* "attempt to STOP process. Exiting."))
			   (set! *time-to-exit* #t)
			   (print "Received signal " signum ", cleaning up before exit. Please wait...")
			   (let ((th1 (make-thread (lambda ()
						     (tests:test-force-state-status! run-id test-id "INCOMPLETE" "KILLED")
						     (rmt:test-set-state-status run-id test-id "INCOMPLETE" "KILLED" #f)
						     (print "Killed by signal " signum ". Exiting")
						     (thread-sleep! 1)
						     (exit 1))))
				 (th2 (make-thread (lambda ()
						     (thread-sleep! 2)
						     (debug:print 0 *default-log-port* "Done")
						     (exit 4)))))
479
480
481
482
483
484
485
486



487
488
489
490



491
492



493
494
495
496
497
498
499
480
481
482
483
484
485
486

487
488
489
490
491
492

493
494
495
496

497
498
499
500
501
502
503
504
505
506







-
+
+
+



-
+
+
+

-
+
+
+







	  ;;
	  (let* ((test-info (rmt:get-test-info-by-id run-id test-id))
		 (test-host (db:test-get-host        test-info))
		 (test-pid  (db:test-get-process_id  test-info)))
	    (cond
	     ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "KILLREQ" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
	      (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
	      (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")) ;; prime it for running
	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
	      (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
	      ) ;; prime it for running
	     ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART"))
	      (if (process:alive-on-host? test-host test-pid)
		  (debug:print-error 0 *default-log-port* "test state is "  (db:test-get-state test-info) " and process " test-pid " is still running on host " test-host ", cannot proceed")
		  (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")))
		  ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
		  (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
		  ))
	     ((not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")))
	      (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a"))
	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
	      (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
	      )
	     (else ;; (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))
	      (debug:print-error 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed")
	      (exit))))
	  
	  (debug:print 2 *default-log-port* "Exectuing " test-name " (id: " test-id ") on " (get-host-name))
	  (set! keys       (rmt:get-keys))
	  ;; (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals) ;; these may be needed by the launching process
649
650
651
652
653
654
655
656

657
658
659
660
661
662
663
656
657
658
659
660
661
662

663
664
665
666
667
668
669
670







-
+







		    (debug:print-info 1 *default-log-port* "Test exited in state=" (db:test-get-state testinfo) ", setting state/status based on exit code of " (launch:einf-exit-status exit-info) " and rollup-status of " (launch:einf-rollup-status exit-info))
		    (tests:test-set-status! run-id 
					    test-id 
					    new-state
					    new-status
					    (args:get-arg "-m") #f)
		    ;; need to update the top test record if PASS or FAIL and this is a subtest
		    ;; NO NEED TO CALL roll-up-pass-fail-counts HERE, THIS IS DONE IN roll-up-pass-fail-counts called by tests:test-set-status!
		    ;; NO NEED TO CALL set-state-status-and-roll-up-items HERE, THIS IS DONE IN set-state-status-and-roll-up-items called by tests:test-set-status!
		    ))
	      ;; for automated creation of the rollup html file this is a good place...
	      (if (not (equal? item-path ""))
		  (tests:summarize-items run-id test-id test-name #f))
	      (tests:summarize-test run-id test-id)  ;; don't force - just update if no
	      (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id)))
	    (mutex-unlock! m)
1119
1120
1121
1122
1123
1124
1125
1126
1127


1128
1129
1130
1131
1132
1133
1134
1126
1127
1128
1129
1130
1131
1132


1133
1134
1135
1136
1137
1138
1139
1140
1141







-
-
+
+







			      (conc (if dir (conc dir "/") "")
				    (case (string->symbol exe)
				      ((dboard)    "../megatest")
				      ((mtest)     "../megatest")
				      ((dashboard) "megatest")
				      (else exe)))))
	   (launcher        (common:get-launcher *configdat* test-name item-path)) ;; (config-lookup *configdat* "jobtools"     "launcher"))
	   (test-sig   (conc (common:get-testsuite-name) ":" test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path
	   (work-area  #f)
	   (test-sig        (conc (common:get-testsuite-name) ":" test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path
	   (work-area       #f)
	   (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all
	   (diskpath   #f)
	   (cmdparms   #f)
	   (fullcmd    #f) ;; (define a (with-output-to-string (lambda ()(write x))))
	   (mt-bindir-path #f)
	   (testinfo   (rmt:get-test-info-by-id run-id test-id))
	   (mt_target  (string-intersperse (map cadr keyvals) "/"))
1146
1147
1148
1149
1150
1151
1152
1153

1154
1155
1156
1157
1158
1159
1160
1153
1154
1155
1156
1157
1158
1159

1160
1161
1162
1163
1164
1165
1166
1167







-
+







	    (debug:print-info 0 *default-log-port* "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path)
	    (runs:remove-test-directory testinfo 'remove-data-only))) ;; remove data only, do not perturb the record
      
      ;; prevent overlapping actions - set to LAUNCHED as early as possible
      ;;
      ;; the following call handles waiver propogation. cannot yet condense into roll-up-pass-fail
      (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED"))
      (rmt:roll-up-pass-fail-counts run-id test-name item-path #f "LAUNCHED" #f)
      (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "LAUNCHED" #f)
      ;; (pp (hash-table->alist tconfig))
      (set! diskpath (get-best-disk *configdat* tconfig))
      (if diskpath
	  (let ((dat  (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat)))
	    (set! work-area (car dat))
	    (set! toptest-work-area (cadr dat))
	    (debug:print-info 2 *default-log-port* "Using work area " work-area))