Megatest

Diff
Login

Differences From Artifact [98ad71ee6e]:

To Artifact [837708b5e2]:


369
370
371
372
373
374
375
376
377


378
379
380
381
382
383
384
369
370
371
372
373
374
375


376
377
378
379
380
381
382
383
384







-
-
+
+







     scripts)

    ;; extract logpro from testconfig and write them to files in test run dir
    (for-each
     (lambda (logprodat)
       (match logprodat
	      ((name content)
	       (debug:print-info 2 *default-log-port* "Creating logpro file "(current-directory)"/"name".logpro")
	       (with-output-to-file (conc name".logpro")
	       (debug:print-info 2 *default-log-port* "Creating logpro file "(current-directory)"/"name ".logpro")
               (with-output-to-file (conc name".logpro")
		 (lambda ()
		   (print content)
		   ;; (change-file-mode name (bitwise-ior perm/irwxg perm/irwxu))
		   )))
	      (else
	       (debug:print-info 0 "Invalid logpro definiton found in [logpro] section of testconfig. \"" logprodat "\""))))
     logpros)))
731
732
733
734
735
736
737
738


739
740
741
742


743

744
745
746
747
748
749
750
731
732
733
734
735
736
737

738
739
740
741
742
743
744
745

746
747
748
749
750
751
752
753







-
+
+




+
+
-
+







		 (monitorjob   (lambda ()
				 (launch:monitor-job  run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags)))
		 (th1          (make-thread monitorjob "monitor job"))
		 (th2          (make-thread runit "run job"))
                 (tconfig         (tests:get-testconfig test-name item-path tconfigreg #t))
                 (propagate-exit-code (configf:lookup *configdat* "setup" "propagate-exit-code"))
                 (propagate-status-list '("FAIL" "KILLED" "ABORT" "DEAD" "CHECK" "SKIP" "WAIVED"))
                 (test-status "not set")
                 (test-status     "not set")
		 (test-state      "not set")
		 (precmd          (configf:lookup tconfig "setup" "precmd"))
		 (postcmd         (configf:lookup tconfig "setup" "postcmd")))
	    ;; first, if set, run the precmd
	    (if precmd ;; (file-exists? precmd)(file-execute-access? precmd))
		(begin
		  ;; (save-environment-as-files "precmd-envt")
		(system precmd)) ;; up to test author to put nbfake if desired.
		  (system precmd))) ;; up to test author to put nbfake if desired.
	    (set! job-thread th2)
	    (thread-start! th1)
	    (thread-start! th2)
	    (thread-join! th2)
	    (debug:print-info 0 *default-log-port* "Megatest execute of test " test-name ", item path " item-path " complete. Notifying the db ...")
            (debug:print-info 2 *default-log-port* "exit-info = " exit-info)
	    (hash-table-set! misc-flags 'keep-going #f)
799
800
801
802
803
804
805
806
807



808
809
810
811
812
813
814
815
816
817
818
819
820






821
822
823
824
825
826
827
802
803
804
805
806
807
808


809
810
811
812
813
814


815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835







-
-
+
+
+



-
-








+
+
+
+
+
+







	      (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) ;; end of let*

	    (mutex-unlock! m)
            (launch:end-of-run-check run-id )
	    (debug:print 2 *default-log-port* "Output from running " fullrunscript ", pid " (launch:einf-pid exit-info) " in work area " 
			 work-area ":\n====\n exit code " (launch:einf-exit-code exit-info) "\n" "====\n")


            (set! test-status (db:test-get-status (rmt:get-testinfo-state-status run-id test-id)))
	    (let* ((testrec  (rmt:get-testinfo-state-status run-id test-id)))
              (set! test-status (db:test-get-status testrec))
	      (set! test-state  (db:test-get-state  testrec)))

            ;; If the propagate-exit-code option has been set in the megatest config, and the test status matches the list, set the exit code to 1.

	    (if postcmd
		(system postcmd))

            (if (and propagate-exit-code (string=? propagate-exit-code "yes") (member test-status propagate-status-list))
               (begin
                (debug:print 1 *default-log-port* "Setting exit status to 1 because of test status of " test-status) 
                (set! *globalexitstatus* 1)
               )
            )

	    (if postcmd
		(begin
		  (setenv "MT_TEST_STATE" test-state)
		  (setenv "MT_TEST_STATUS" test-status)
		  ;; (save-environment-as-files "postcmd-envt")
		  (system postcmd)))
	    (if (not (launch:einf-exit-status exit-info))
		(exit 4))))
        )))

;; Spec for End of test
;; At end of each test call, after marking self as COMPLETED do run-state-status-rollup
;; At transition to run COMPLETED/X do hooks
1498
1499
1500
1501
1502
1503
1504
1505






1506



1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518

1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532




1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
















1552
1553
1554
1555
1556
1557
1558
1506
1507
1508
1509
1510
1511
1512

1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533

1534












1535

1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548










1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571







-
+
+
+
+
+
+

+
+
+











-
+
-
-
-
-
-
-
-
-
-
-
-
-

-
+
+
+
+









-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







           (else #f))))
    (when do-scan?
      (debug:print 1 *default-log-port* "INFO: search and mark zombie tests")
      (rmt:set-var key (current-seconds))
      (rmt:find-and-mark-incomplete run-id #f))))



(defstruct launch:ajt
  (vars '())
  (exekey #f)
  (host-type #f)
  (test-sig  #f)
  (cmdline   #f))

;; append vars
(define (launch:ajt-add-vars dat vars)
  (launch:ajt-vars-set! dat (append (launch:ajt-vars dat) vars)))

;; 1. look though disks list for disk with most space
;; 2. create run dir on disk, path name is meaningful
;; 3. create link from run dir to megatest runs area 
;; 4. remotely run the test on allocated host
;;    - could be ssh to host from hosts table (update regularly with load)
;;    - could be netbatch
;;      (launch-test db (cadr status) test-conf))
(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params)
  (assert runname "FATAL: launch-test called with no runname")
  (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex
  (let* ( ;; (lock-key        (conc "test-" test-id))
  (let* (;; locking code removed from here commented out and pasted at end of file
	;; (got-lock        (let loop ((lock        (rmt:no-sync-get-lock lock-key))
	;; 			     (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds
	;; 		    (if (car lock)
	;; 			#t
	;; 			(if (> (current-seconds) expire-time)
	;; 			    (begin
	;; 			      (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to launch test " keyvals " " runname " " test-name " " test-path)
	;; 			      (rmt:no-sync-del! lock-key) ;; destroy the lock
	;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; 
	;; 			    (begin
	;; 			      (thread-sleep! 1)
	;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time))))))
	 (item-path       (item-list->path itemdat))
	 (contour         #f)) ;; NOT READY FOR THIS (args:get-arg "-contour")))
	 (contour         #f)                         ;; NOT READY FOR THIS (args:get-arg "-contour")))
	 ;; launcher-mode will be 'adjutant or 'normal
	 (launcher-mode   (string->symbol (or (configf:lookup *configdat* "jobtools" "mode") "normal")))
	 (ajtdat          (make-launch:ajt)))
    (let loop ((delta        (- (current-seconds) *last-launch*))
	       (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 0)))
      (if (> launch-delay delta)
	  (begin
	    ;; (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay.
	;;	(debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds"))
	    (thread-sleep! (- launch-delay delta))
	    (loop (- (current-seconds) *last-launch*) launch-delay))))
    (change-directory *toppath*)
    (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars)
     (append
      (list
       (list "MT_RUN_AREA_HOME" *toppath*)
       (list "MT_TEST_NAME" test-name)
       (list "MT_RUNNAME"   runname)
       (list "MT_ITEMPATH"  item-path)
       (list "MT_CONTOUR"   contour)
       )
      itemdat))
    (let ((var-list (append
		     (list
		      (list "MT_RUN_AREA_HOME" *toppath*)
		      (list "MT_TEST_NAME" test-name)
		      (list "MT_RUNNAME"   runname)
		      (list "MT_ITEMPATH"  item-path)
		      (list "MT_CONTOUR"   contour)
		      )
		     itemdat)))
       ;; consolidate this code with the code in megatest.scm for
       ;; "-execute", *maybe* - the longer they are set the longer
       ;; each launch takes (must be non-overlapping with the vars)
      (alist->env-vars var-list)
      ;; the var-list into the ajtdat adjutant record whether it is needed or not.
      (launch:ajt-add-vars ajtdat var-list))
    
    (let* ((tregistry       (tests:get-all)) ;; third param (below) is system-allowed
           ;; for tconfig, why do we allow fallback to test-conf?
	   (tconfig         (or (tests:get-testconfig test-name item-path tregistry #t force-create: #t)
				(begin
                                  (debug:print 0 *default-log-port* "WARNING: falling back to pre-calculated testconfig. This is likely not desired.")
                                  test-conf))) ;; force re-read now that all vars are set
	   (useshell        (let ((ush (configf:lookup *configdat* "jobtools"     "useshell")))
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582



1583
1584
1585
1586
1587
1588



1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601









1602
1603
1604
1605
1606

1607
1608
1609
1610
1611
1612
1613
1579
1580
1581
1582
1583
1584
1585




1586





1587
1588
1589






1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618

1619
1620
1621
1622
1623
1624
1625
1626







-
-
-
-

-
-
-
-
-
+
+
+
-
-
-
-
-
-
+
+
+













+
+
+
+
+
+
+
+
+




-
+







	   (subrun          (> (length (hash-table-ref/default tconfig "subrun"  '())) 0)) ;; send a flag to process a subrun
	   ;; (diskspace       (configf:lookup tconfig   "requirements" "diskspace"))
	   ;; (memory          (configf:lookup tconfig   "requirements" "memory"))
	   ;; (hosts           (configf:lookup *configdat* "jobtools"     "workhosts")) ;; I'm pretty sure this was never completed
	   (remote-megatest (configf:lookup *configdat* "setup" "executable"))
	   (run-time-limit  (or (configf:lookup  tconfig   "requirements" "runtimelim")
				(configf:lookup  *configdat* "setup" "runtimelim")))
	   ;; FIXME SOMEDAY: not good how this is so obtuse, this hack is to 
	   ;;                allow running from dashboard. Extract the path
	   ;;                from the called megatest and convert dashboard
	   ;;             	  or dboard to megatest
	   (local-megatest  (common:find-local-megatest))
	   #;(local-megatest  (let* ((lm  (car (argv)))
				   (dir (pathname-directory lm))
				   (exe (pathname-strip-directory lm)))
			      (conc (if dir (conc dir "/") "")
				    (case (string->symbol exe)
	   (launcher        (let ((l (common:get-launcher *configdat* test-name item-path launcher-mode)))
			      (if (string? l)
				  (string-split l)
				      ((dboard)    "../megatest")
				      ((mtest)     "../megatest")
				      ((dashboard) "megatest")
				      (else exe)))))
	   (launcher        (common:get-launcher *configdat* test-name item-path)) ;; (configf:lookup *configdat* "jobtools"     "launcher"))
	   (test-sig        (conc (common:get-testsuite-name) ":" test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path
				  l))) ;; some nonhomogenuity here. '(cmd param1 param2 ...) OR '(host-type launcher)
	    ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path
	   (test-sig        (conc (common:get-testsuite-name) ":" test-name ":" item-path))
	   (work-area       #f)
	   (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all
	   (diskpath   #f)
	   (cmdparms   #f)
	   (fullcmd    #f) ;; (define a (with-output-to-string (lambda ()(write x))))
	   (mt-bindir-path #f)
	   (testinfo   (rmt:get-test-info-by-id run-id test-id))
	   (mt_target  (string-intersperse (map cadr keyvals) "/"))
	   (debug-param (append (if (args:get-arg "-debug")  (list "-debug" (args:get-arg "-debug")) '())
				(if (args:get-arg "-logging")(list "-logging") '())
				(if (configf:lookup *configdat* "misc" "profilesw")
				    (list (configf:lookup *configdat* "misc" "profilesw"))
				    '()))))
      ;; save the test-sig in the ajtdat record
      (launch:ajt-test-sig-set! ajtdat test-sig)
      ;; go ahead and figure out if we have a host-type from the
      ;; launcher call above and save it in the ajtdat record
      (if (and (eq? launcher-mode 'adjutant)
	       (list? launcher)
	       (> (length launcher) 1))
	  (launch:ajt-host-type-set! ajtdat (car launcher)))
 
      ;; (if hosts (set! hosts (string-split hosts)))
      ;; set the megatest to be called on the remote host
      (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest"))
      (set! mt-bindir-path (pathname-directory remote-megatest))
      (if launcher (set! launcher (string-split launcher)))
      ;; (if launcher (set! launcher (string-split launcher)))           ;; yuk!
      ;; set up the run work area for this test
      (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run
	       (not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir
	  (begin
	    (debug:print-info 0 *default-log-port* "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path)
	    (runs:remove-test-directory testinfo 'remove-data-only))) ;; remove data only, do not perturb the record
      
1657
1658
1659
1660
1661
1662
1663


1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674




1675

1676
1677
1678
1679
1680
1681
1682

1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696








1697
1698
1699
1700





1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714















1715
1716
























1717

1718
1719
1720
1721
1722
1723
1724
1725
1726
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686



1687
1688
1689
1690

1691
1692
1693

1694
1695
1696

1697
1698
1699
1700
1701
1702









1703
1704
1705
1706
1707
1708
1709
1710




1711
1712
1713
1714
1715
1716
1717












1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758

1759
1760

1761
1762
1763
1764
1765
1766
1767







+
+








-
-
-
+
+
+
+
-
+


-



-
+





-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
-
-
-
-
+
+
+
+
+


-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+


+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+

-







					(list 'target    mt_target)
					(list 'contour   contour)
					(list 'runtlim   (if run-time-limit (common:hms-string->seconds run-time-limit) #f))
					(list 'env-ovrd  (hash-table-ref/default *configdat* "env-override" '())) 
					(list 'set-vars  (if params (hash-table-ref/default params "-setvars" #f)))
					(list 'runname   runname)
					(list 'mt-bindir-path mt-bindir-path))))))))
      ;; save the cmdparms in the ajtdat
      (launch:ajt-exekey-set! ajtdat cmdparms)

        (setenv "MT_CMDINFO" cmdparms)  ;; setting this for use in nblauncher
      
      ;; clean out step records from previous run if they exist
      ;; (rmt:delete-test-step-records run-id test-id)
      ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway
      (if (common:file-exists? work-area)
	  (change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir
      (cond
       ;; ((and launcher hosts) ;; must be using ssh hostname
       ;;    (set! fullcmd (append launcher (car hosts)(list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param)))

      ;; save the command line for adjutant mode (might never be needed but best to assemble it here)
      (launch:ajt-cmdline-set! ajtdat (string-intersperse
				       (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param)))
       ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms))))
      (cond       
       (launcher
	(set! fullcmd (append launcher (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param)))
       ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms))))
       (else
	(if (not useshell)(debug:print 0 *default-log-port* "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section"))
	(set! fullcmd (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param (list (if useshell "&" ""))))))
      ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" "")))))
      
      (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm"))))
      (debug:print 1 *default-log-port* "Launching " work-area)
      ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done
      (debug:print 4 *default-log-port* "fullcmd: " fullcmd)
      (set! *last-launch* (current-seconds)) ;; all that junk above takes time, set this as late as possible.
      (let* ((commonprevvals (alist->env-vars
			      (hash-table-ref/default *configdat* "env-override" '())))
	     (miscprevvals   (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute"
			      (append (list (list "MT_TEST_RUN_DIR" work-area)
					    (list "MT_TEST_NAME" test-name)
					    (list "MT_ITEM_INFO" (conc itemdat)) 
					    (list "MT_RUNNAME"   runname)
					    (list "MT_TARGET"    mt_target)
					    (list "MT_ITEMPATH"  item-path)
      (let* ((env-override-vars  (hash-table-ref/default *configdat* "env-override" '()))
	     (commonprevvals     (alist->env-vars env-override-vars))
	     (misc-vars          (append (list (list "MT_TEST_RUN_DIR" work-area)
					       (list "MT_TEST_NAME" test-name)
					       (list "MT_ITEM_INFO" (conc itemdat)) 
					       (list "MT_RUNNAME"   runname)
					       (list "MT_TARGET"    mt_target)
					       (list "MT_ITEMPATH"  item-path))
					    )
				      itemdat)))
	     (testprevvals   (alist->env-vars
			      (hash-table-ref/default tconfig "pre-launch-env-overrides" '())))
					 itemdat))
	     (miscprevvals   (alist->env-vars misc-vars));; consolidate this code with the code in megatest.scm for "-execute"
	     (test-vars      (hash-table-ref/default tconfig "pre-launch-env-overrides" '()))
	     (testprevvals   (alist->env-vars test-vars))
			      
	     ;; Launchwait defaults to true, must override it to turn off wait
	     (launchwait     (if (equal? (configf:lookup *configdat* "setup" "launchwait") "no") #f #t))
	     (launch-results-prev (apply (if launchwait ;; BB: TODO: refactor this to examine return code of launcher, if nonzero, set state to launch failed.
					     process:cmd-run-with-stderr-and-exitcode->list
					     process-run)
					 (if useshell
					     (let ((cmdstr (string-intersperse fullcmd " ")))
					       (if launchwait
						   cmdstr
						   (conc cmdstr " >> mt_launch.log 2>&1 &")))
					     (car fullcmd))
					 (if useshell
					     '()
					     (cdr fullcmd))))
	     ;; BB: TODO: refactor this to examine return code of launcher, if nonzero, set state to launch failed.
	     (launch-results-prev (if (eq? launcher-mode 'adjutant)
				      '(#t 0) ;; just some fake data to fool downstream but non-applicable code
				      (apply (if launchwait
						 process:cmd-run-with-stderr-and-exitcode->list
						 process-run)
					     (if useshell
						 (let ((cmdstr (string-intersperse fullcmd " ")))
						   (if launchwait
						       cmdstr
						       (conc cmdstr " >> mt_launch.log 2>&1 &")))
						 (car fullcmd))
					     (if useshell
						 '()
						 (cdr fullcmd)))))
             (success        (if launchwait (equal? 0 (cadr launch-results-prev)) #t))
             (launch-results (if launchwait (car launch-results-prev) launch-results-prev)))

	(launch:ajt-add-vars ajtdat env-override-vars)
	(launch:ajt-add-vars ajtdat misc-vars)
	(launch:ajt-add-vars ajtdat test-vars)

	;; if in adjutant mode we register the job in the jobs_queue
	;; then fire off an adjutant runner
	;;
	(if (eq? launcher-mode 'adjutant)
	    (let* ((adjutant-runner-cmd (append (cdr launcher)
						(list remote-megatest "-adjutant"
						      (launch:ajt-host-type ajtdat)
						      "-start-dir" *toppath*)))
		   (adj-cmd     (conc (string-intersperse (map conc adjutant-runner-cmd) " ")
				      "&")))         
	      (rmt:no-sync-add-job
	       (launch:ajt-host-type  ajtdat)
	       (launch:ajt-vars ajtdat)
	       (launch:ajt-exekey     ajtdat)
	       (launch:ajt-cmdline    ajtdat))
	      (print "adj-cmd: " adj-cmd)
	      (system adj-cmd)
	      ))
	
        (if (not success)
	(if (not success)
            (tests:test-set-status! run-id test-id "COMPLETED" "DEAD" "launcher failed; exited non-zero; check mt_launch.log" #f)) ;; (if launch-results launch-results "FAILED"))
        (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork.
	;; (rmt:no-sync-del! lock-key)         ;; release the lock for starting this test
	(if (not launchwait) ;; give the OS a little time to allow the process to start
	    (thread-sleep! 0.01))
	(with-output-to-file "mt_launch.log"
	  (lambda ()
	    (print "LAUNCHCMD: " (string-intersperse fullcmd " "))
	    (if (list? launch-results)
1738
1739
1740
1741
1742
1743
1744




1745
1746
1747
1748
1749
1750
1751
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796







+
+
+
+







	      ;; but this hack will work! Thanks go to Alan Post of the Chicken email list
	      ;; NB// Is this still needed? Should be safe to go back to "exit" now?
	      (process-signal (current-process-id) signal/kill)
	      ))
	(alist->env-vars miscprevvals)
	(alist->env-vars testprevvals)
	(alist->env-vars commonprevvals)
	;; yes, really should mutex all the way to here. Need to put this entire process into a fork.
	;; the unlock previously was further up. This seemed wrong as we should not proceed until the
	;; vars have been reset.
	(mutex-unlock! *launch-setup-mutex*)
	launch-results))
    (change-directory *toppath*)
    (thread-sleep! (configf:lookup-number *configdat* "setup" "inter-test-delay" default: 0.0))))

;; recover a test where the top controlling mtest may have died
;;
(define (launch:recover-test run-id test-id)
1767
1768
1769
1770
1771
1772
1773
















1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
		     (read-symbolic-link (conc "/proc/" pid "/cwd"))
		     #f)))
    ;; now wait on that process if all is correct
    ;; periodically update the db with runtime
    ;; when the process exits look at the db, if still RUNNING after 10 seconds set
    ;; state/status appropriately
    (process-wait pid)))


 ;; (lock-key        (conc "test-" test-id))
	;; (got-lock        (let loop ((lock        (rmt:no-sync-get-lock lock-key))
	;; 			     (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds
	;; 		    (if (car lock)
	;; 			#t
	;; 			(if (> (current-seconds) expire-time)
	;; 			    (begin
	;; 			      (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to launch test " keyvals " " runname " " test-name " " test-path)
	;; 			      (rmt:no-sync-del! lock-key) ;; destroy the lock
	;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; 
	;; 			    (begin
	;; 			      (thread-sleep! 1)
	;; 			      (loop (rmt:no-sync-get-lock lock-key) expire-time))))))