Megatest

Changes On Branch 2911d6d1e0c2c0ca
Login

Changes In Branch v1.65-aside Excluding Merge-Ins

This is equivalent to a diff from 0d4dd9a19f to 2911d6d1e0

2020-09-18
12:27
cherry picked 2 fixes, changed version to 1.6569 ==/7.2/2.0/PASS/1201/mars/== check-in: d145d0eb02 user: mmgraham tags: v1.65
2020-09-17
15:56
Merged in the rmt-call-throttle. Closed-Leaf check-in: 2911d6d1e0 user: mrwellan tags: v1.65-aside
14:42
Merged from v1.65-experimental check-in: 31090be521 user: mrwellan tags: v1.65-aside
2020-09-16
12:44
updates to remov-keep for archiving check-in: 81893b52af user: pjhatwal tags: 1.65-archive
2020-09-08
10:12
Added testplan section to manual. NOTE: Passes ext-tests without any re-runs, seems like a sweet spot. ==24.9/2.2/1201/WARN/mars== check-in: 0d4dd9a19f user: mrwellan tags: v1.65
2020-09-07
23:29
Added query throttle.

Fully tested and passes ext-tests on sles11 and Ubuntu. Closed-Leaf check-in: 70a65ade2a user: matt tags: v1.65-rmt-call-throttle-orig

2020-09-05
11:17
Merged cleanup branch back to v1.65 ==9.4/2.2/1201/WARN/mars== check-in: 0cbf1a0b26 user: matt tags: v1.65

Modified common.scm from [a82c407907] to [031e02547b].

565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
				   (lambda (a b)
				     (< (hash-table-ref all-files a)(hash-table-ref all-files b))))
			     (- num-logs max-allowed))))
	    (for-each
	     (lambda (file)
	       (let* ((fullname (conc "logs/" file)))
		 (if (directory? fullname)
		     (debug:print-error 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.")
		     (handle-exceptions
		      exn
		      (debug:print-error 0 *default-log-port* "failed to remove " fullname ", exn=" exn)
		      (delete-file* fullname)))))
	     files)
	    (debug:print-info 0 *default-log-port* "Deleted " (length files) " files from logs, keeping " max-allowed " files."))))))
  







|







565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
				   (lambda (a b)
				     (< (hash-table-ref all-files a)(hash-table-ref all-files b))))
			     (- num-logs max-allowed))))
	    (for-each
	     (lambda (file)
	       (let* ((fullname (conc "logs/" file)))
		 (if (directory? fullname)
		     (debug:print-info 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.")
		     (handle-exceptions
		      exn
		      (debug:print-error 0 *default-log-port* "failed to remove " fullname ", exn=" exn)
		      (delete-file* fullname)))))
	     files)
	    (debug:print-info 0 *default-log-port* "Deleted " (length files) " files from logs, keeping " max-allowed " files."))))))
  
773
774
775
776
777
778
779

780
781
782
783
784
785
786
    (2 "KILLREQ")
    (3 "KILLED")
    (4 "NOT_STARTED")
    (5 "COMPLETED")
    (6 "LAUNCHED")
    (7 "REMOTEHOSTSTART")
    (8 "RUNNING")

    ))

(define *common:dont-roll-up-states*
  '("DELETED"
    "REMOVING"
    "CLEANING"
    "ARCHIVE_REMOVING"







>







773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
    (2 "KILLREQ")
    (3 "KILLED")
    (4 "NOT_STARTED")
    (5 "COMPLETED")
    (6 "LAUNCHED")
    (7 "REMOTEHOSTSTART")
    (8 "RUNNING")
    (9 "LAUNCHING")
    ))

(define *common:dont-roll-up-states*
  '("DELETED"
    "REMOVING"
    "CLEANING"
    "ARCHIVE_REMOVING"
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
  '("KILLED" "KILLREQ" "STUCK" "INCOMPLETE" "DEAD"))

(define *common:well-ended-states* ;; an item's prereq in this state allows item to proceed
  '("PASS" "WARN" "CHECK" "WAIVED" "SKIP"))

;; BBnote: *common:running-states* used from db:set-state-status-and-roll-up-items
(define *common:running-states*     ;; test is either running or can be run
  '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED" "STARTED"))

(define *common:cant-run-states*    ;; These are stopping conditions that prevent a test from being run
  '("COMPLETED" "KILLED" "UNKNOWN" "INCOMPLETE" "ARCHIVED"))

(define *common:not-started-ok-statuses* ;; if not one of these statuses when in not_started state treat as dead
  '("n/a" "na" "PASS" "FAIL" "WARN" "CHECK" "WAIVED" "DEAD" "SKIP"))








|







812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
  '("KILLED" "KILLREQ" "STUCK" "INCOMPLETE" "DEAD"))

(define *common:well-ended-states* ;; an item's prereq in this state allows item to proceed
  '("PASS" "WARN" "CHECK" "WAIVED" "SKIP"))

;; BBnote: *common:running-states* used from db:set-state-status-and-roll-up-items
(define *common:running-states*     ;; test is either running or can be run
  '("RUNNING" "REMOTEHOSTSTART" "LAUNCHING" "LAUNCHED" "STARTED"))

(define *common:cant-run-states*    ;; These are stopping conditions that prevent a test from being run
  '("COMPLETED" "KILLED" "UNKNOWN" "INCOMPLETE" "ARCHIVED"))

(define *common:not-started-ok-statuses* ;; if not one of these statuses when in not_started state treat as dead
  '("n/a" "na" "PASS" "FAIL" "WARN" "CHECK" "WAIVED" "DEAD" "SKIP"))

Modified dashboard-context-menu.scm from [48947370a7] to [3b51803e6a].

88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
    "Kill Complete Run"
    #:action
    (lambda (obj)
      (common:run-a-command
       (conc "megatest -set-state-status KILLREQ,n/a -target " target
             " -runname " runname
             " -testpatt % "
             "  -state RUNNING,REMOTEHOSTSTART,LAUNCHED,NOT_STARTED"))))
   (iup:menu-item 
    "Delete Run Data"
    #:action
    (lambda (obj)
      (common:run-a-command
       (conc "megatest -remove-runs -target " target
             " -runname " runname







|







88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
    "Kill Complete Run"
    #:action
    (lambda (obj)
      (common:run-a-command
       (conc "megatest -set-state-status KILLREQ,n/a -target " target
             " -runname " runname
             " -testpatt % "
             "  -state RUNNING,REMOTEHOSTSTART,LAUNCHING,LAUNCHED,NOT_STARTED"))))
   (iup:menu-item 
    "Delete Run Data"
    #:action
    (lambda (obj)
      (common:run-a-command
       (conc "megatest -remove-runs -target " target
             " -runname " runname
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
    #:action
    (lambda (obj)
      ;; (rmt:test-set-state-status-by-id run-id test-id "KILLREQ" #f #f)
      (common:run-a-command
       (conc "megatest -set-state-status KILLREQ,n/a -target " target
             " -runname " runname
             " -testpatt " item-test-path 
             " -state RUNNING,REMOTEHOSTSTART,LAUNCHED"))))
   (iup:menu-item
    (conc "Delete data : " item-test-path)
    #:action
    (lambda (obj)
      (common:run-a-command
       (conc "megatest -remove-runs -target " target
             " -runname " runname







|







119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
    #:action
    (lambda (obj)
      ;; (rmt:test-set-state-status-by-id run-id test-id "KILLREQ" #f #f)
      (common:run-a-command
       (conc "megatest -set-state-status KILLREQ,n/a -target " target
             " -runname " runname
             " -testpatt " item-test-path 
             " -state RUNNING,REMOTEHOSTSTART,LAUNCHING,LAUNCHED"))))
   (iup:menu-item
    (conc "Delete data : " item-test-path)
    #:action
    (lambda (obj)
      (common:run-a-command
       (conc "megatest -remove-runs -target " target
             " -runname " runname
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
    #:action
    (lambda (obj)
      ;; (rmt:test-set-state-status-by-id run-id test-id "KILLREQ" #f #f)
      (common:run-a-command
       (conc "megatest -set-state-status KILLREQ,n/a -target " target
             " -runname " runname
             " -testpatt " item-test-path 
             " -state RUNNING,REMOTEHOSTSTART,LAUNCHED,NOT_STARTED"))))
   
   (let* ((rundir    (db:test-get-rundir      test-info))
          (has-subrun (subrun:subrun-test-initialized? rundir)))
     (if has-subrun
         (iup:menu-item
          "Launch subrun dashboard"
          #:action







|







217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
    #:action
    (lambda (obj)
      ;; (rmt:test-set-state-status-by-id run-id test-id "KILLREQ" #f #f)
      (common:run-a-command
       (conc "megatest -set-state-status KILLREQ,n/a -target " target
             " -runname " runname
             " -testpatt " item-test-path 
             " -state RUNNING,REMOTEHOSTSTART,LAUNCHING,LAUNCHED,NOT_STARTED"))))
   
   (let* ((rundir    (db:test-get-rundir      test-info))
          (has-subrun (subrun:subrun-test-initialized? rundir)))
     (if has-subrun
         (iup:menu-item
          "Launch subrun dashboard"
          #:action

Modified db.scm from [2f649dc1fb] to [381a3b66b8].

1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
                              id           INTEGER PRIMARY KEY,
                              test_id      INTEGER,
                              state        TEXT DEFAULT 'new',
                              status       TEXT DEFAULT 'n/a',
                              archive_type TEXT DEFAULT 'bup',
                              du           INTEGER,
                              archive_path TEXT);")))
         (print "creating trigges from init") 
        (db:create-triggers db)    
     db)) ;; )

;;======================================================================
;; A R C H I V E S
;;======================================================================








|







1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
                              id           INTEGER PRIMARY KEY,
                              test_id      INTEGER,
                              state        TEXT DEFAULT 'new',
                              status       TEXT DEFAULT 'n/a',
                              archive_type TEXT DEFAULT 'bup',
                              du           INTEGER,
                              archive_path TEXT);")))
         ;; (print "creating trigges from init") 
        (db:create-triggers db)    
     db)) ;; )

;;======================================================================
;; A R C H I V E S
;;======================================================================

1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
        (lambda (test-id run-dir uname testname item-path)
          (if (and (equal? uname "n/a")
                   (equal? item-path "")) ;; this is a toplevel test
              ;; what to do with toplevel? call rollup?
              (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels))
              (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched))))
        db
        "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');"
        run-id)
       
       (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.")
       (if (and (null? incompleted)
                (null? oldlaunched)
                (null? toplevels))
           #f







|







1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
        (lambda (test-id run-dir uname testname item-path)
          (if (and (equal? uname "n/a")
                   (equal? item-path "")) ;; this is a toplevel test
              ;; what to do with toplevel? call rollup?
              (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels))
              (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched))))
        db
        "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED','LAUNCHING');"
        run-id)
       
       (debug:print-info 18 *default-log-port* "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.")
       (if (and (null? incompleted)
                (null? oldlaunched)
                (null? toplevels))
           #f
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
		       "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests 
                           WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?)
                                          AND state IN ('REMOTEHOSTSTART');"))
	      (stmth3 (db:get-cache-stmth
		       dbstruct db
		       "SELECT id,rundir,uname,testname,item_path FROM tests
                           WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400
                                          AND state IN ('LAUNCHED');")))
	 ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes
	 ;;
	 ;; HOWEVER: this code in run:test seems to work fine
	 ;;              (> (- (current-seconds)(+ (db:test-get-event_time testdat)
	 ;;                     (db:test-get-run_duration testdat)))
	 ;;                    600) 
	 ;; (db:delay-if-busy dbdat)







|







1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
		       "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests 
                           WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?)
                                          AND state IN ('REMOTEHOSTSTART');"))
	      (stmth3 (db:get-cache-stmth
		       dbstruct db
		       "SELECT id,rundir,uname,testname,item_path FROM tests
                           WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400
                                          AND state IN ('LAUNCHED','LAUNCHING');")))
	 ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes
	 ;;
	 ;; HOWEVER: this code in run:test seems to work fine
	 ;;              (> (- (current-seconds)(+ (db:test-get-event_time testdat)
	 ;;                     (db:test-get-run_duration testdat)))
	 ;;                    600) 
	 ;; (db:delay-if-busy dbdat)
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
	  -1 "-" "-"))

;;
;; 1. cache tests-match-qry
;; 2. compile qry and store in hash
;; 3. convert for-each-row to fold
;;
(define (db:get-tests-for-run-state-status dbstruct run-id testpatt)
  (db:with-db
   dbstruct run-id #f
   (lambda (db)
     (let* ((res            '())
	    (stmt-cache      (dbr:dbstruct-stmt-cache dbstruct))
	    (stmth           (let* ((sh (db:hoh-get stmt-cache db testpatt)))
			       (or sh







|







3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
	  -1 "-" "-"))

;;
;; 1. cache tests-match-qry
;; 2. compile qry and store in hash
;; 3. convert for-each-row to fold
;;
#;(define (db:get-tests-for-run-state-status dbstruct run-id testpatt)
  (db:with-db
   dbstruct run-id #f
   (lambda (db)
     (let* ((res            '())
	    (stmt-cache      (dbr:dbstruct-stmt-cache dbstruct))
	    (stmth           (let* ((sh (db:hoh-get stmt-cache db testpatt)))
			       (or sh
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
				       test-id))))))
  (mt:process-triggers dbstruct run-id test-id newstate newstatus))

;; NEW BEHAVIOR: Count tests running in all runs!
;;
(define (db:get-count-tests-running dbstruct run-id fastmode)
  (let* ((qry (if fastmode
		  "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND NOT (uname = 'n/a' AND item_path = '') LIMIT 1;"
		  "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND NOT (uname = 'n/a' AND item_path = '');")))
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (db)
     (let* ((stmth (db:get-cache-stmth dbstruct  db qry)))
       (sqlite3:first-result stmth))))))

;; NEW BEHAVIOR: Count tests running in only one run!
;;
(define (db:get-count-tests-actually-running dbstruct run-id)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (db)
     (sqlite3:first-result
      db
      ;; WARNING BUG EDIT ME - merged from v1.55 - not sure what is right here ...
      ;; "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id NOT IN (SELECT id FROM runs WHERE state='deleted') AND NOT (uname = 'n/a' AND item_path = '');")
      "SELECT count(id) FROM tests WHERE state in ('RUNNING','REMOTEHOSTSTART','LAUNCHED') AND run_id=?;" 
      run-id)))) ;; NOT IN (SELECT id FROM runs WHERE state='deleted');")

;; NEW BEHAVIOR: Look only at single run with run-id
;; 
;; (define (db:get-running-stats dbstruct run-id)
(define (db:get-count-tests-running-for-run-id dbstruct run-id fastmode)
  (let* ((qry (if fastmode
		  "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=? LIMIT 1;"
		  "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=?;")))
    (db:with-db
     dbstruct
     run-id
     #f
     (lambda (db)
       (let* ((stmth (db:get-cache-stmth dbstruct db qry)))
	 (sqlite3:first-result stmth run-id))))))

;; For a given testname how many items are running? Used to determine
;; probability for regenerating html
;;
(define (db:get-count-tests-running-for-testname dbstruct run-id testname)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (db)
     (let* ((stmt "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id=? AND NOT (uname = 'n/a' AND item_path = '') AND testname=?;")
	    (stmth (db:get-cache-stmth dbstruct db stmt)))
       (sqlite3:first-result
	stmth run-id testname)))))

(define (db:get-not-completed-cnt dbstruct run-id)
(db:with-db
   dbstruct







|
|




















|







|
|

















|







3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
				       test-id))))))
  (mt:process-triggers dbstruct run-id test-id newstate newstatus))

;; NEW BEHAVIOR: Count tests running in all runs!
;;
(define (db:get-count-tests-running dbstruct run-id fastmode)
  (let* ((qry (if fastmode
		  "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','LAUNCHING','REMOTEHOSTSTART') AND NOT (uname = 'n/a' AND item_path = '') LIMIT 1;"
		  "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','LAUNCHING','REMOTEHOSTSTART') AND NOT (uname = 'n/a' AND item_path = '');")))
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (db)
     (let* ((stmth (db:get-cache-stmth dbstruct  db qry)))
       (sqlite3:first-result stmth))))))

;; NEW BEHAVIOR: Count tests running in only one run!
;;
(define (db:get-count-tests-actually-running dbstruct run-id)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (db)
     (sqlite3:first-result
      db
      ;; WARNING BUG EDIT ME - merged from v1.55 - not sure what is right here ...
      ;; "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND run_id NOT IN (SELECT id FROM runs WHERE state='deleted') AND NOT (uname = 'n/a' AND item_path = '');")
      "SELECT count(id) FROM tests WHERE state in ('RUNNING','REMOTEHOSTSTART','LAUNCHED','LAUNCHING') AND run_id=?;" 
      run-id)))) ;; NOT IN (SELECT id FROM runs WHERE state='deleted');")

;; NEW BEHAVIOR: Look only at single run with run-id
;; 
;; (define (db:get-running-stats dbstruct run-id)
(define (db:get-count-tests-running-for-run-id dbstruct run-id fastmode)
  (let* ((qry (if fastmode
		  "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','LAUNCHING','REMOTEHOSTSTART') AND run_id=? LIMIT 1;"
		  "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','LAUNCHING','REMOTEHOSTSTART') AND run_id=?;")))
    (db:with-db
     dbstruct
     run-id
     #f
     (lambda (db)
       (let* ((stmth (db:get-cache-stmth dbstruct db qry)))
	 (sqlite3:first-result stmth run-id))))))

;; For a given testname how many items are running? Used to determine
;; probability for regenerating html
;;
(define (db:get-count-tests-running-for-testname dbstruct run-id testname)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (db)
     (let* ((stmt "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHING','LAUNCHED','REMOTEHOSTSTART') AND run_id=? AND NOT (uname = 'n/a' AND item_path = '') AND testname=?;")
	    (stmth (db:get-cache-stmth dbstruct db stmt)))
       (sqlite3:first-result
	stmth run-id testname)))))

(define (db:get-not-completed-cnt dbstruct run-id)
(db:with-db
   dbstruct
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
	    (db:with-db
	     dbstruct
	     run-id
	     #f
	     (lambda (db)
	       (sqlite3:first-result
		db
		(conc "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHED','REMOTEHOSTSTART') AND testname in ('"
		      (string-intersperse testnames "','")
		      "') AND NOT (uname = 'n/a' AND item_path='');")) ;; should this include the (uname = 'n/a' ...) ???
	       ))
	    0))))

;; tags: '("tag%" "tag2" "%ag6")
;;

;; done with run when:
;;   0 tests in LAUNCHED, NOT_STARTED, REMOTEHOSTSTART, RUNNING
(define (db:estimated-tests-remaining dbstruct run-id)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (db)
     (sqlite3:first-result
      db
      "SELECT count(id) FROM tests WHERE state in ('LAUNCHED','NOT_STARTED','REMOTEHOSTSTART','RUNNING','KILLREQ') AND run_id=?;")
     run-id)))

;; map run-id, testname item-path to test-id
(define (db:get-test-id dbstruct run-id testname item-path)
  (db:with-db
   dbstruct
   run-id







|


















|







3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
	    (db:with-db
	     dbstruct
	     run-id
	     #f
	     (lambda (db)
	       (sqlite3:first-result
		db
		(conc "SELECT count(id) FROM tests WHERE state in ('RUNNING','LAUNCHING','LAUNCHED','REMOTEHOSTSTART') AND testname in ('"
		      (string-intersperse testnames "','")
		      "') AND NOT (uname = 'n/a' AND item_path='');")) ;; should this include the (uname = 'n/a' ...) ???
	       ))
	    0))))

;; tags: '("tag%" "tag2" "%ag6")
;;

;; done with run when:
;;   0 tests in LAUNCHED, NOT_STARTED, REMOTEHOSTSTART, RUNNING
(define (db:estimated-tests-remaining dbstruct run-id)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (db)
     (sqlite3:first-result
      db
      "SELECT count(id) FROM tests WHERE state in ('LAUNCHING','LAUNCHED','NOT_STARTED','REMOTEHOSTSTART','RUNNING','KILLREQ') AND run_id=?;")
     run-id)))

;; map run-id, testname item-path to test-id
(define (db:get-test-id dbstruct run-id testname item-path)
  (db:with-db
   dbstruct
   run-id
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055

4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
4098
4099
4100
4101
	 (begin
	   (debug:print-error 0 *default-log-port* "reception failed. Received \"" msg "\" but cannot translate it.")
           (print-call-chain (current-error-port))
	   msg))) ;; crude reply for when things go awry
    ((zmq nmsg)(with-input-from-string msg (lambda ()(deserialize))))
    (else msg))) ;; rpc

;; ; This is to be the big daddy call NOPE: Replaced by db:set-state-status-and-roll-up-items
;; ;
;; define (db:test-set-state-status dbstruct run-id test-id state status msg)
;;  (let ((dbdat  (db:get-db dbstruct run-id)))
;;    (if (member state '("LAUNCHED" "REMOTEHOSTSTART"))
;; 	(db:general-call dbdat 'set-test-start-time (list test-id)))
;;    ;; (if msg
;;    ;; 	(db:general-call dbdat 'state-status-msg (list state status msg test-id))
;;    ;; 	(db:general-call dbdat 'state-status     (list state status test-id)))
;;    (db:set-state-status-and-roll-up-items dbstruct run-id test-id #f state status msg)
;;    ;; process the test_data table
;;    (if (and test-id state status (equal? status "AUTO")) 
;; 	(db:test-data-rollup dbstruct run-id test-id status))
;;    (mt:process-triggers dbstruct run-id test-id state status)))

;; state is the priority rollup of all states
;; status is the priority rollup of all completed statesfu
;;
;; if test-name is an integer work off that instead of test-name test-path
;;
(define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment)
  ;; establish info on incoming test followed by info on top level test
  ;; BBnote - for mode itemwait, linkage between upstream test & matching item status is propagated to run queue in db:prereqs-not-met
  (let* ((testdat      (if (number? test-name)
			   (db:get-test-info-by-id dbstruct run-id test-name) ;; test-name is actually a test-id
			   (db:get-test-info       dbstruct run-id test-name item-path)))
	 (test-id      (db:test-get-id testdat))
	 (test-name    (if (number? test-name)
			   (db:test-get-testname testdat)
			   test-name))
	 (item-path    (db:test-get-item-path testdat))
         (tl-testdat   (db:get-test-info dbstruct run-id test-name ""))
         (tl-test-id   (if tl-testdat
			   (db:test-get-id tl-testdat)
			   #f)))
    (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) 
	(db:general-call dbstruct 'set-test-start-time (list test-id)))
    (mutex-lock! *db-transaction-mutex*)
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (let ((tr-res
              (sqlite3:with-transaction
               db
               (lambda ()
                 ;; NB// Pass the db so it is part fo the transaction
                 (db:test-set-state-status db run-id test-id state status comment) ;; this call sets the item state/status
                 (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item
                     (let* ((state-status-counts  (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test
			      						  (state-stauses (db:roll-up-rules state-status-counts state status))
                          (newstate (car state-stauses))
                          (newstatus (cadr state-stauses)))
                       (debug:print 4 *default-log-port* "BB> tl-test-id="tl-test-id" ; "test-name":"item-path" newstate="newstate" newstatus="newstatus" len(sscs)="(length state-status-counts)  " state-status-counts: "
							(apply conc
                  (map (lambda (x)
                     (conc
                     		(with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | "))
                              state-status-counts))); end debug:print
   
                       (if tl-test-id
			   (db:test-set-state-status db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct
		       ))))))
         (mutex-unlock! *db-transaction-mutex*)
         (if (and test-id state status (equal? status "AUTO")) 
             (db:test-data-rollup dbstruct run-id test-id status))
         tr-res)))))

(define (db:roll-up-rules state-status-counts state status)
		(let* ((running     (length (filter (lambda (x)
                          (member (dbr:counts-state x) *common:running-states*))
                                 state-status-counts)))
           (bad-not-started      (length (filter (lambda (x)
                                      (and (equal? (dbr:counts-state x) "NOT_STARTED") 
                                        (not (member (dbr:counts-status x)  *common:not-started-ok-statuses*))))
																	state-status-counts)))
           (all-curr-states      (common:special-sort  ;; worst -> best (sort of)
                                    (delete-duplicates
                                      (if (and state (not (member state *common:dont-roll-up-states*)))
                                          (cons state (map dbr:counts-state state-status-counts))
                                          (map dbr:counts-state state-status-counts)))
                                                  *common:std-states* >))
           (all-curr-statuses    (common:special-sort  ;; worst -> best
                                    (delete-duplicates
                                      (if (and state status (not (member state *common:dont-roll-up-states*)))
                                          (cons status (map dbr:counts-status state-status-counts))
                                          (map dbr:counts-status state-status-counts)))
                                                   *common:std-statuses* >))
           (non-completes        (filter (lambda (x)
							 										 (not (member x (cons "COMPLETED" *common:dont-roll-up-states*))))
						       									all-curr-states))
			     (preq-fails        (filter (lambda (x)
							 								(equal? x "PREQ_FAIL"))
						       							all-curr-statuses))
           (num-non-completes (length non-completes))
 					 (newstate          (cond
															((> running 0)           "RUNNING")            ;; anything running, call the situation running
                              ((> (length preq-fails) 0) "NOT_STARTED")

															((> bad-not-started 0)   "COMPLETED")          ;; we have an ugly situation, it is completed in the sense we cannot do more.
															((> num-non-completes 0) (car non-completes))  ;;  (remove (lambda (x)(equal? "COMPLETED" x)) all-curr-states))) ;; only rollup DELETED if all DELETED
															(else                    (car all-curr-states))))
           (newstatus         (cond
                              ((> (length preq-fails) 0)  "PREQ_FAIL")
                              ((or (> bad-not-started 0)
                                   (and (equal? newstate "NOT_STARTED")
                                      (> num-non-completes 0)))
                                            "STARTED")
                              (else (car all-curr-statuses)))))
 					(debug:print-info 2 *default-log-port*
                                         "\n--> probe db:set-state-status-and-roll-up-items: "
                                         "\n--> state-status-counts: "(map dbr:counts->alist state-status-counts)
                                         "\n--> running:             "running
                                         "\n--> bad-not-started:     "bad-not-started
                                         "\n--> non-non-completes:   "num-non-completes
                                         "\n--> non-completes:       "non-completes
                                         "\n--> all-curr-states:     "all-curr-states
                                         "\n--> all-curr-statuses:     "all-curr-statuses
                                         "\n--> newstate              "newstate
                                         "\n--> newstatus            "newstatus
                                         "\n\n")

                        ;; NB// Pass the db so it is part of the transaction
         (list newstate newstatus)))

(define (db:set-state-status-and-roll-up-run dbstruct run-id curr-state curr-status)
    (mutex-lock! *db-transaction-mutex*)
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (let ((tr-res
              (sqlite3:with-transaction
               db
               (lambda ()
                   (let* ((state-status-counts  (db:get-all-state-status-counts-for-run dbstruct run-id))
													(state-stauses (db:roll-up-rules state-status-counts #f #f ))
                          (newstate (car state-stauses))
                          (newstatus (cadr state-stauses))) 
                    (if (or (not (eq? newstate curr-state)) (not (eq?  newstatus curr-status)))
                   (db:set-run-state-status dbstruct run-id newstate newstatus )))))))
         (mutex-unlock! *db-transaction-mutex*)
         tr-res))))


(define (db:get-all-state-status-counts-for-run dbstruct run-id)







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<




















|













|
|
|
















|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|











|
|
|







3952
3953
3954
3955
3956
3957
3958















3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
	 (begin
	   (debug:print-error 0 *default-log-port* "reception failed. Received \"" msg "\" but cannot translate it.")
           (print-call-chain (current-error-port))
	   msg))) ;; crude reply for when things go awry
    ((zmq nmsg)(with-input-from-string msg (lambda ()(deserialize))))
    (else msg))) ;; rpc
















;; state is the priority rollup of all states
;; status is the priority rollup of all completed statesfu
;;
;; if test-name is an integer work off that instead of test-name test-path
;;
(define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment)
  ;; establish info on incoming test followed by info on top level test
  ;; BBnote - for mode itemwait, linkage between upstream test & matching item status is propagated to run queue in db:prereqs-not-met
  (let* ((testdat      (if (number? test-name)
			   (db:get-test-info-by-id dbstruct run-id test-name) ;; test-name is actually a test-id
			   (db:get-test-info       dbstruct run-id test-name item-path)))
	 (test-id      (db:test-get-id testdat))
	 (test-name    (if (number? test-name)
			   (db:test-get-testname testdat)
			   test-name))
	 (item-path    (db:test-get-item-path testdat))
         (tl-testdat   (db:get-test-info dbstruct run-id test-name ""))
         (tl-test-id   (if tl-testdat
			   (db:test-get-id tl-testdat)
			   #f)))
    (if (member state '("LAUNCHING","LAUNCHED" "REMOTEHOSTSTART")) 
	(db:general-call dbstruct 'set-test-start-time (list test-id)))
    (mutex-lock! *db-transaction-mutex*)
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (let ((tr-res
              (sqlite3:with-transaction
               db
               (lambda ()
                 ;; NB// Pass the db so it is part fo the transaction
                 (db:test-set-state-status db run-id test-id state status comment) ;; this call sets the item state/status
                 (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item
                     (let* ((state-status-counts  (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test
			    (state-statuses       (db:roll-up-rules state-status-counts state status))
			    (newstate             (car state-statuses))
			    (newstatus            (cadr state-statuses)))
                       (debug:print 4 *default-log-port* "BB> tl-test-id="tl-test-id" ; "test-name":"item-path" newstate="newstate" newstatus="newstatus" len(sscs)="(length state-status-counts)  " state-status-counts: "
							(apply conc
                  (map (lambda (x)
                     (conc
                     		(with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | "))
                              state-status-counts))); end debug:print
   
                       (if tl-test-id
			   (db:test-set-state-status db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct
		       ))))))
         (mutex-unlock! *db-transaction-mutex*)
         (if (and test-id state status (equal? status "AUTO")) 
             (db:test-data-rollup dbstruct run-id test-id status))
         tr-res)))))

(define (db:roll-up-rules state-status-counts state status)
  (let* ((running     (length (filter (lambda (x)
					(member (dbr:counts-state x) *common:running-states*))
				      state-status-counts)))
	 (bad-not-started      (length (filter (lambda (x)
						 (and (equal? (dbr:counts-state x) "NOT_STARTED") 
						      (not (member (dbr:counts-status x)  *common:not-started-ok-statuses*))))
					       state-status-counts)))
	 (all-curr-states      (common:special-sort  ;; worst -> best (sort of)
				(delete-duplicates
				 (if (and state (not (member state *common:dont-roll-up-states*)))
				     (cons state (map dbr:counts-state state-status-counts))
				     (map dbr:counts-state state-status-counts)))
				*common:std-states* >))
	 (all-curr-statuses    (common:special-sort  ;; worst -> best
				(delete-duplicates
				 (if (and state status (not (member state *common:dont-roll-up-states*)))
				     (cons status (map dbr:counts-status state-status-counts))
				     (map dbr:counts-status state-status-counts)))
				*common:std-statuses* >))
	 (non-completes        (filter (lambda (x)
					 (not (member x (cons "COMPLETED" *common:dont-roll-up-states*))))
				       all-curr-states))
	 (preq-fails        (filter (lambda (x)
				      (equal? x "PREQ_FAIL"))
				    all-curr-statuses))
	 (num-non-completes (length non-completes))
	 (newstate          (cond
			     ((> running 0)             "RUNNING")          ;; anything running, call the situation running
			     ((and (> (length preq-fails) 0)
				   (> num-non-completes 0)) "NOT_STARTED")  ;; not really sure about this one - mrw - Sep 7, 2020 - trying to fix NOT_STARTED/PREREQ_FAILS in itemized rollup
			     ((> bad-not-started 0)     "COMPLETED")        ;; we have an ugly situation, it is completed in the sense we cannot do more.
			     ((> num-non-completes 0) (car non-completes))  ;;  (remove (lambda (x)(equal? "COMPLETED" x)) all-curr-states))) ;; only rollup DELETED if all DELETED
			     (else                    (car all-curr-states))))
	 (newstatus         (cond
			     ((> (length preq-fails) 0)  "PREQ_FAIL")
			     ((or (> bad-not-started 0)
				  (and (equal? newstate "NOT_STARTED")
				       (> num-non-completes 0)))
			      "STARTED")
			     (else (car all-curr-statuses)))))
    (debug:print-info 2 *default-log-port*
		      "\n--> probe db:set-state-status-and-roll-up-items: "
		      "\n--> state-status-counts: "(map dbr:counts->alist state-status-counts)
		      "\n--> running:             "running
		      "\n--> bad-not-started:     "bad-not-started
		      "\n--> non-non-completes:   "num-non-completes
		      "\n--> non-completes:       "non-completes
		      "\n--> all-curr-states:     "all-curr-states
		      "\n--> all-curr-statuses:     "all-curr-statuses
		      "\n--> newstate              "newstate
		      "\n--> newstatus            "newstatus
		      "\n\n")
    
    ;; NB// Pass the db so it is part of the transaction
    (list newstate newstatus)))

(define (db:set-state-status-and-roll-up-run dbstruct run-id curr-state curr-status)
    (mutex-lock! *db-transaction-mutex*)
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (let ((tr-res
              (sqlite3:with-transaction
               db
               (lambda ()
                   (let* ((state-status-counts  (db:get-all-state-status-counts-for-run dbstruct run-id))
			  (state-statuses       (db:roll-up-rules state-status-counts #f #f ))
                          (newstate             (car state-statuses))
                          (newstatus            (cadr state-statuses))) 
                    (if (or (not (eq? newstate curr-state)) (not (eq?  newstatus curr-status)))
                   (db:set-run-state-status dbstruct run-id newstate newstatus )))))))
         (mutex-unlock! *db-transaction-mutex*)
         tr-res))))


(define (db:get-all-state-status-counts-for-run dbstruct run-id)
4245
4246
4247
4248
4249
4250
4251
4252
4253
4254
4255
4256
4257
4258
4259
                                                     AND item_path != '' 
                                                     AND status NOT IN ('n/a')
                                                     AND state in ('NOT_STARTED')) > 0 THEN 'UNKNOWN'
                                   WHEN (SELECT count(id) FROM tests 
                                                WHERE testname=?
                                                     AND item_path != '' 
                                                     AND (status NOT IN ('TEN_STRIKES','BLOCKED') OR status IN ('INCOMPLETE'))
                                                     AND state in ('RUNNING','NOT_STARTED','LAUNCHED','REMOTEHOSTSTART')) > 0 THEN 'RUNNING'
                                   WHEN (SELECT count(id) FROM tests 
                                                WHERE testname=?
                                                     AND item_path != '' 
                                                     AND state NOT IN ('COMPLETED','DELETED')) = 0 THEN 'COMPLETED'
                                   WHEN (SELECT count(id) FROM tests 
                                                WHERE testname=?
                                                     AND item_path != '' 







|







4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
                                                     AND item_path != '' 
                                                     AND status NOT IN ('n/a')
                                                     AND state in ('NOT_STARTED')) > 0 THEN 'UNKNOWN'
                                   WHEN (SELECT count(id) FROM tests 
                                                WHERE testname=?
                                                     AND item_path != '' 
                                                     AND (status NOT IN ('TEN_STRIKES','BLOCKED') OR status IN ('INCOMPLETE'))
                                                     AND state in ('RUNNING','NOT_STARTED','LAUNCHING','LAUNCHED','REMOTEHOSTSTART')) > 0 THEN 'RUNNING'
                                   WHEN (SELECT count(id) FROM tests 
                                                WHERE testname=?
                                                     AND item_path != '' 
                                                     AND state NOT IN ('COMPLETED','DELETED')) = 0 THEN 'COMPLETED'
                                   WHEN (SELECT count(id) FROM tests 
                                                WHERE testname=?
                                                     AND item_path != '' 
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677
4678
                              (debug:print 0 *default-log-port*
                               "WARNING: itemmap has problem \"" itemmap "\", patt: " patt ", repl: " repl)
			      res))))
	    (if (null? tal)
		newr
		(loop (car tal)(cdr tal) newr)))))))




;; the new prereqs calculation, looks also at itempath if specified
;; all prereqs must be met
;;    if prereq test with itempath='' is COMPLETED and PASS, WARN, CHECK, or WAIVED then prereq is met
;;    if prereq test with itempath=ref-item-path and COMPLETED with PASS, WARN, CHECK, or WAIVED then prereq is met
;;
;; Note: mode 'normal means that tests must be COMPLETED and ok (i.e. PASS, WARN, CHECK, SKIP or WAIVED)
;;       mode 'toplevel means that tests must be COMPLETED only







<
<
<







4648
4649
4650
4651
4652
4653
4654



4655
4656
4657
4658
4659
4660
4661
                              (debug:print 0 *default-log-port*
                               "WARNING: itemmap has problem \"" itemmap "\", patt: " patt ", repl: " repl)
			      res))))
	    (if (null? tal)
		newr
		(loop (car tal)(cdr tal) newr)))))))




;; the new prereqs calculation, looks also at itempath if specified
;; all prereqs must be met
;;    if prereq test with itempath='' is COMPLETED and PASS, WARN, CHECK, or WAIVED then prereq is met
;;    if prereq test with itempath=ref-item-path and COMPLETED with PASS, WARN, CHECK, or WAIVED then prereq is met
;;
;; Note: mode 'normal means that tests must be COMPLETED and ok (i.e. PASS, WARN, CHECK, SKIP or WAIVED)
;;       mode 'toplevel means that tests must be COMPLETED only
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
  (append
   (if (member 'exclusive mode)
       (let ((running-tests (db:get-tests-for-run dbstruct
						  #f  ;; run-id of #f means for all runs. 
						  (if (string=? ref-item-path "")   ;; testpatt
						      ref-test-name
						      (conc ref-test-name "/" ref-item-path))
						  '("LAUNCHED" "REMOTEHOSTSTART" "RUNNING") ;; states
						  '()          ;; statuses
						  #f           ;; offset
						  #f           ;; limit
						  #f           ;; not-in
						  #f           ;; sort by
						  #f           ;; sort order
						  'shortlist   ;; query type







|







4672
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
  (append
   (if (member 'exclusive mode)
       (let ((running-tests (db:get-tests-for-run dbstruct
						  #f  ;; run-id of #f means for all runs. 
						  (if (string=? ref-item-path "")   ;; testpatt
						      ref-test-name
						      (conc ref-test-name "/" ref-item-path))
						  '("LAUNCHING" "LAUNCHED" "REMOTEHOSTSTART" "RUNNING") ;; states
						  '()          ;; statuses
						  #f           ;; offset
						  #f           ;; limit
						  #f           ;; not-in
						  #f           ;; sort by
						  #f           ;; sort order
						  'shortlist   ;; query type

Modified dcommon.scm from [0db7864f6b] to [32b29e5f3f].

1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
				    (dboard:tabdat-test-patts-use tabdat))
			   #:expand "YES"
			   #:size "x30" ;; was 10x30
			   #:multiline "YES")))
      (set! test-patterns-textbox tb)
      (dboard:tabdat-test-patterns-textbox-set! tabdat tb)
      tb))
;; (iup:frame
;;  #:title "Target"
;;  ;; Target selectors
;;  (apply iup:hbox
;; 	   (let* ((dat      (dashboard:update-target-selector tabdat action-proc: update-keyvals))
;; 		  (key-lb   (car dat))
;; 		  (combos   (cadr dat)))
;; 	     combos)))
   ;; (iup:hbox
   ;;  ;; Text box for STATES
   ;;  (iup:frame
   ;;   #:title "States"
   ;;   (dashboard:text-list-toggle-box 
   ;;    ;; Move these definitions to common and find the other useages and replace!
   ;;    (map cadr *common:std-states*) ;; '("COMPLETED" "RUNNING" "STUCK" "INCOMPLETE" "LAUNCHED" "REMOTEHOSTSTART" "KILLED")
   ;;    (lambda (all)
   ;;      (dboard:tabdat-states-set! tabdat all)
   ;;      (dashboard:update-run-command tabdat))))
   ;;  ;; Text box for STATES
   ;;  (iup:frame
   ;;   #:title "Statuses"
   ;;   (dashboard:text-list-toggle-box 
   ;;    (map cadr *common:std-statuses*) ;; '("PASS" "FAIL" "n/a" "CHECK" "WAIVED" "SKIP" "DELETED" "STUCK/DEAD")
   ;;    (lambda (all)
   ;;      (dboard:tabdat-statuses-set! tabdat all)
   ;;      (dashboard:update-run-command tabdat)))))
   ))

(define (dcommon:command-tests-tasks-canvas tabdat test-records sorted-testnames tests-draw-state)
  (iup:frame
   #:title "Tests and Tasks"
   (let* ((updater #f)
	  (last-xadj 0)







<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







1213
1214
1215
1216
1217
1218
1219


























1220
1221
1222
1223
1224
1225
1226
				    (dboard:tabdat-test-patts-use tabdat))
			   #:expand "YES"
			   #:size "x30" ;; was 10x30
			   #:multiline "YES")))
      (set! test-patterns-textbox tb)
      (dboard:tabdat-test-patterns-textbox-set! tabdat tb)
      tb))


























   ))

(define (dcommon:command-tests-tasks-canvas tabdat test-records sorted-testnames tests-draw-state)
  (iup:frame
   #:title "Tests and Tasks"
   (let* ((updater #f)
	  (last-xadj 0)

Modified docs/manual/megatest_manual.html from [a02a70016f] to [18b25df6be].

1
2
3
4
5
6
7
8
9
10
11
12
<!DOCTYPE html>
<html lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta name="generator" content="AsciiDoc 8.6.7">
<title>The Megatest Users Manual</title>
<style type="text/css">
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */

/* Default font. */
body {
  font-family: Georgia,serif;




|







1
2
3
4
5
6
7
8
9
10
11
12
<!DOCTYPE html>
<html lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta name="generator" content="AsciiDoc 8.6.10">
<title>The Megatest Users Manual</title>
<style type="text/css">
/* Shared CSS for AsciiDoc xhtml11 and html5 backends */

/* Default font. */
body {
  font-family: Georgia,serif;
82
83
84
85
86
87
88
89



90
91
92



93
94
95
96
97
98
99

ul, ol, li > p {
  margin-top: 0;
}
ul > li     { color: #aaa; }
ul > li > * { color: black; }

pre {



  padding: 0;
  margin: 0;
}




#author {
  color: #527bbd;
  font-weight: bold;
  font-size: 1.1em;
}
#email {







|
>
>
>



>
>
>







82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105

ul, ol, li > p {
  margin-top: 0;
}
ul > li     { color: #aaa; }
ul > li > * { color: black; }

.monospaced, code, pre {
  font-family: "Courier New", Courier, monospace;
  font-size: inherit;
  color: navy;
  padding: 0;
  margin: 0;
}
pre {
  white-space: pre-wrap;
}

#author {
  color: #527bbd;
  font-weight: bold;
  font-size: 1.1em;
}
#email {
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228

div.exampleblock > div.content {
  border-left: 3px solid #dddddd;
  padding-left: 0.5em;
}

div.imageblock div.content { padding-left: 0; }
span.image img { border-style: none; }
a.image:visited { color: white; }

dl {
  margin-top: 0.8em;
  margin-bottom: 0.8em;
}
dt {







|







220
221
222
223
224
225
226
227
228
229
230
231
232
233
234

div.exampleblock > div.content {
  border-left: 3px solid #dddddd;
  padding-left: 0.5em;
}

div.imageblock div.content { padding-left: 0; }
span.image img { border-style: none; vertical-align: text-bottom; }
a.image:visited { color: white; }

dl {
  margin-top: 0.8em;
  margin-bottom: 0.8em;
}
dt {
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429


/*
 * xhtml11 specific
 *
 * */

tt {
  font-family: "Courier New", Courier, monospace;
  font-size: inherit;
  color: navy;
}

div.tableblock {
  margin-top: 1.0em;
  margin-bottom: 1.5em;
}
div.tableblock > table {
  border: 3px solid #527bbd;
}







<
<
<
<
<
<







416
417
418
419
420
421
422






423
424
425
426
427
428
429


/*
 * xhtml11 specific
 *
 * */







div.tableblock {
  margin-top: 1.0em;
  margin-bottom: 1.5em;
}
div.tableblock > table {
  border: 3px solid #527bbd;
}
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468


/*
 * html5 specific
 *
 * */

.monospaced {
  font-family: "Courier New", Courier, monospace;
  font-size: inherit;
  color: navy;
}

table.tableblock {
  margin-top: 1.0em;
  margin-bottom: 1.5em;
}
thead, p.tableblock.header {
  font-weight: bold;
  color: #527bbd;







<
<
<
<
<
<







449
450
451
452
453
454
455






456
457
458
459
460
461
462


/*
 * html5 specific
 *
 * */







table.tableblock {
  margin-top: 1.0em;
  margin-bottom: 1.5em;
}
thead, p.tableblock.header {
  font-weight: bold;
  color: #527bbd;
534
535
536
537
538
539
540


541
542
543
544
545
546
547
body.manpage div.sectionbody {
  margin-left: 3em;
}

@media print {
  body.manpage div#toc { display: none; }
}


@media screen {
  body {
    max-width: 50em; /* approximately 80 characters wide */
    margin-left: 16em;
  }

  #toc {







>
>







528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
body.manpage div.sectionbody {
  margin-left: 3em;
}

@media print {
  body.manpage div#toc { display: none; }
}


@media screen {
  body {
    max-width: 50em; /* approximately 80 characters wide */
    margin-left: 16em;
  }

  #toc {
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
</div></div>
</div>
<div class="sect2">
<h3 id="_trim_trailing_spaces">Trim trailing spaces</h3>
<div class="admonitionblock">
<table><tr>
<td class="icon">
<img src="/nfs/pdx/disks/ice.disk.002/icfadm/pkgs/asciidoc/8.6.7/images/icons/note.png" alt="Note">
</td>
<td class="content">As of Megatest version v1.6548 trim-trailing-spaces defaults to yes.</td>
</tr></table>
</div>
<div class="listingblock">
<div class="content monospaced">
<pre>[configf:settings trim-trailing-spaces no]







|







2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
</div></div>
</div>
<div class="sect2">
<h3 id="_trim_trailing_spaces">Trim trailing spaces</h3>
<div class="admonitionblock">
<table><tr>
<td class="icon">
<img src="/usr/images/icons/note.png" alt="Note">
</td>
<td class="content">As of Megatest version v1.6548 trim-trailing-spaces defaults to yes.</td>
</tr></table>
</div>
<div class="listingblock">
<div class="content monospaced">
<pre>[configf:settings trim-trailing-spaces no]
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
<pre># A normal waiton waits for the prior tests to be COMPLETED
# and PASS, CHECK or WAIVED
waiton test1 test2</pre>
</div></div>
<div class="admonitionblock">
<table><tr>
<td class="icon">
<img src="/nfs/pdx/disks/ice.disk.002/icfadm/pkgs/asciidoc/8.6.7/images/icons/note.png" alt="Note">
</td>
<td class="content">Dynamic waiton lists must be capable of being calculated at the
beginning of a run. This is because Megatest walks the tree of waitons
to create the list of tests to execute.</td>
</tr></table>
</div>
<div class="listingblock">







|







2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
<pre># A normal waiton waits for the prior tests to be COMPLETED
# and PASS, CHECK or WAIVED
waiton test1 test2</pre>
</div></div>
<div class="admonitionblock">
<table><tr>
<td class="icon">
<img src="/usr/images/icons/note.png" alt="Note">
</td>
<td class="content">Dynamic waiton lists must be capable of being calculated at the
beginning of a run. This is because Megatest walks the tree of waitons
to create the list of tests to execute.</td>
</tr></table>
</div>
<div class="listingblock">
2686
2687
2688
2689
2690
2691
2692



















2693
2694
2695
2696
2697
2698
2699
item is COMPLETED and PASS, CHECK or WAIVED in the prior test. This
was historically called "itemwait" mode. The terms "itemwait" and
"itemmatch" are synonyms.</p></div>
<div class="listingblock">
<div class="content monospaced">
<pre>[requirements]
mode itemmatch</pre>



















</div></div>
</div>
<div class="sect2">
<h3 id="_overriding_enviroment_variables">Overriding Enviroment Variables</h3>
<div class="paragraph"><p>Override variables before starting the test. Can include files (perhaps generated by megatest -envdelta or similar).</p></div>
<div class="listingblock">
<div class="content monospaced">







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
item is COMPLETED and PASS, CHECK or WAIVED in the prior test. This
was historically called "itemwait" mode. The terms "itemwait" and
"itemmatch" are synonyms.</p></div>
<div class="listingblock">
<div class="content monospaced">
<pre>[requirements]
mode itemmatch</pre>
</div></div>
<div class="paragraph"><p>Exclusive mode allows only one instance of a particular test to run at a time.</p></div>
<div class="listingblock">
<div class="content monospaced">
<pre>[requirements]
mode exclusive</pre>
</div></div>
<div class="admonitionblock">
<table><tr>
<td class="icon">
<img src="/usr/images/icons/note.png" alt="Note">
</td>
<td class="content">Modes can be combined</td>
</tr></table>
</div>
<div class="listingblock">
<div class="content monospaced">
<pre>[requirements]
mode toplevel exclusive</pre>
</div></div>
</div>
<div class="sect2">
<h3 id="_overriding_enviroment_variables">Overriding Enviroment Variables</h3>
<div class="paragraph"><p>Override variables before starting the test. Can include files (perhaps generated by megatest -envdelta or similar).</p></div>
<div class="listingblock">
<div class="content monospaced">
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
<div class="content monospaced">
<pre>[triggers]
COMPLETED/ xterm -e bash -s --</pre>
</div></div>
<div class="admonitionblock">
<table><tr>
<td class="icon">
<img src="/nfs/pdx/disks/ice.disk.002/icfadm/pkgs/asciidoc/8.6.7/images/icons/note.png" alt="Note">
</td>
<td class="content">There is a trailing space after the double-dash</td>
</tr></table>
</div>
<div class="paragraph"><p>There are a number of environment variables available to the trigger script
but since triggers can be called in various contexts not all variables are
available at all times. The trigger script should check for the variable and







|







3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
<div class="content monospaced">
<pre>[triggers]
COMPLETED/ xterm -e bash -s --</pre>
</div></div>
<div class="admonitionblock">
<table><tr>
<td class="icon">
<img src="/usr/images/icons/note.png" alt="Note">
</td>
<td class="content">There is a trailing space after the double-dash</td>
</tr></table>
</div>
<div class="paragraph"><p>There are a number of environment variables available to the trigger script
but since triggers can be called in various contexts not all variables are
available at all times. The trigger script should check for the variable and
3483
3484
3485
3486
3487
3488
3489

3490
3491
3492
3493
</div>
</div>
<div id="footnotes"><hr></div>
<div id="footer">
<div id="footer-text">
Version 1.5<br>
Last updated 2020-09-08 08:39:29 PDT

</div>
</div>
</body>
</html>







>




3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
</div>
</div>
<div id="footnotes"><hr></div>
<div id="footer">
<div id="footer-text">
Version 1.5<br>
Last updated 2020-09-08 08:39:29 PDT
 2020-08-22 12:47:36 MST
</div>
</div>
</body>
</html>

Modified docs/manual/reference.txt from [6aa04b6eea] to [2bb7de5262].

407
408
409
410
411
412
413














414
415
416
417
418
419
420
was historically called "itemwait" mode. The terms "itemwait" and
"itemmatch" are synonyms.

-------------------
[requirements]
mode itemmatch
-------------------















Overriding Enviroment Variables
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Override variables before starting the test. Can include files (perhaps generated by megatest -envdelta or similar).

--------------------







>
>
>
>
>
>
>
>
>
>
>
>
>
>







407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
was historically called "itemwait" mode. The terms "itemwait" and
"itemmatch" are synonyms.

-------------------
[requirements]
mode itemmatch
-------------------

Exclusive mode allows only one instance of a particular test to run at a time.

-------------------
[requirements]
mode exclusive
-------------------

NOTE: Modes can be combined

-------------------
[requirements]
mode toplevel exclusive
-------------------

Overriding Enviroment Variables
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Override variables before starting the test. Can include files (perhaps generated by megatest -envdelta or similar).

--------------------

Modified docs/manual/server.png from [ae7d7ee58e] to [43882638fe].

cannot compute difference between binary files

Modified gutils.scm from [94030f1a6e] to [4581eccaf8].

62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
       (else (list "180 33 49" status))))
    ;;      (if (equal? status "PASS")
    ;;	  '("70 249 73" "PASS")
    ;;	  (if (or (equal? status "WARN")
    ;;		  (equal? status "WAIVED"))
    ;;	      (list "255 172 13" status)
    ;;	      (list "223 33 49"  status)))) ;; greenish orangeish redish
    ((LAUNCHED)         (list "101 123 142"  state))
    ((CHECK)            (list "255 100 50"   state))
    ((REMOTEHOSTSTART)  (list "50 130 195"   state))
    ((RUNNING STARTED)          (list "9 131 232"    state))
    ((KILLREQ)          (list "39 82 206"    state))
    ((KILLED)           (list "234 101 17"   state))
    ((NOT_STARTED)      (case (string->symbol status)
			  ((CHECK STARTED)(list (gutils:get-color-spec 'SKIP) state))
			  (else   (list "240 240 240"                 state))))
    ;; for xor mode below
    ;;
    ((CLEAN)
     (case (string->symbol status)
       ((CLEAN-FAIL CLEAN-CHECK CLEAN-ABORT)  (list "200 130 13" status)) ;; orange requested for these
       (else  (list "60  235 63" status))))
    ((DIRTY-BETTER)     (list "160  255 153" status))







|
|
|
|
|
|
|
|
|







62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
       (else (list "180 33 49" status))))
    ;;      (if (equal? status "PASS")
    ;;	  '("70 249 73" "PASS")
    ;;	  (if (or (equal? status "WARN")
    ;;		  (equal? status "WAIVED"))
    ;;	      (list "255 172 13" status)
    ;;	      (list "223 33 49"  status)))) ;; greenish orangeish redish
    ((LAUNCHED LAUNCHING) (list "101 123 142"  state))
    ((CHECK)              (list "255 100 50"   state))
    ((REMOTEHOSTSTART)    (list "50 130 195"   state))
    ((RUNNING STARTED)    (list "9 131 232"    state))
    ((KILLREQ)            (list "39 82 206"    state))
    ((KILLED)             (list "234 101 17"   state))
    ((NOT_STARTED)        (case (string->symbol status)
			    ((CHECK STARTED)(list (gutils:get-color-spec 'SKIP) state))
			    (else   (list "240 240 240"                 state))))
    ;; for xor mode below
    ;;
    ((CLEAN)
     (case (string->symbol status)
       ((CLEAN-FAIL CLEAN-CHECK CLEAN-ABORT)  (list "200 130 13" status)) ;; orange requested for these
       (else  (list "60  235 63" status))))
    ((DIRTY-BETTER)     (list "160  255 153" status))

Modified launch.scm from [d0067277fa] to [d3e85514f4].

463
464
465
466
467
468
469
470

471
472
473
474
475
476
477
478
				(db:test-get-host        test-info)
				(begin
				  (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.")
				  (exit))))
		 (test-pid  (db:test-get-process_id  test-info)))
	    (cond
             ;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag.
	     ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun

	      (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")

              (rmt:general-call 'set-test-start-time #f test-id)
              (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
	      ) ;; prime it for running
	     ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART"))
	      (if (process:alive-on-host? test-host test-pid)







|
>
|







463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
				(db:test-get-host        test-info)
				(begin
				  (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.")
				  (exit))))
		 (test-pid  (db:test-get-process_id  test-info)))
	    (cond
             ;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag.
	     ((or (member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
		  (member (db:test-get-status test-info) '("ABORT")))                               ;; prior run of this test ABORTed, ok to rerun without reset of state
	      (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE, KILLED or ABORT, treat this execute call as a rerun request")
	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")

              (rmt:general-call 'set-test-start-time #f test-id)
              (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
	      ) ;; prime it for running
	     ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART"))
	      (if (process:alive-on-host? test-host test-pid)
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
        	  (debug:print 0 *default-log-port* "There are " running-cnt " tests running." ))
        ((> running-cnt 0)
            (debug:print 0 *default-log-port* "running cnt > 0 but <= 3 kill-running-tests-if-dead" )
   				  (let ((kill-cnt (launch:kill-tests-if-dead run-id)))
           			(if (and all-test-launched  (equal? all-test-launched "yes") (eq? kill-cnt running-cnt))
           					(launch:end-of-run-check run-id)))) ;;todo
        (else  (debug:print 0 *default-log-port* "Should it get here?? May be everything is not launched yet. Running test cnt:" running-cnt " Not completed test cnt:" not-completed-cnt)
         (let* ((not-completed-tests (rmt:get-tests-for-run run-id "%" `("NOT_STARTED" "RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f)))
       (if (> (length not-completed-tests) 0) 
           (let loop ((running-test (car not-completed-tests))
			     (tal    (cdr not-completed-tests)))
		       (let* ((test-name (vector-ref running-test 2))
                 (item-path (vector-ref running-test 11)))
			       	(debug:print 0 *default-log-port* "test " test-name "/" item-path " not completed")
              (if (not (null? tal))
				  (loop (car tal) (cdr tal)))))))))))        
 
(define (launch:is-test-alive host pid)
  (if (and host pid (not (equal? host "n/a")))
      (let* ((cmd (conc "ssh " host " pstree -A " pid))
	     (output (with-input-from-pipe cmd read-lines)))
	(debug:print 2 *default-log-port* "Running " cmd " received " output)
	(if (eq? (length output) 0)
	   #f
	   #t))
      #t))
 
(define (launch:kill-tests-if-dead run-id)
  (let* ((running-tests (rmt:get-tests-for-run run-id "%" `("RUNNING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f)))
       (let loop ((running-test (car running-tests))
			     (tal    (cdr running-tests))
			     (kill-cnt 0))
		       (let* ((test-name (vector-ref running-test 2))
                 (item-path (vector-ref running-test 11))
								 (test-id (vector-ref running-test 0))
                 (host (vector-ref running-test 6))







|




















|







758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
        	  (debug:print 0 *default-log-port* "There are " running-cnt " tests running." ))
        ((> running-cnt 0)
            (debug:print 0 *default-log-port* "running cnt > 0 but <= 3 kill-running-tests-if-dead" )
   				  (let ((kill-cnt (launch:kill-tests-if-dead run-id)))
           			(if (and all-test-launched  (equal? all-test-launched "yes") (eq? kill-cnt running-cnt))
           					(launch:end-of-run-check run-id)))) ;;todo
        (else  (debug:print 0 *default-log-port* "Should it get here?? May be everything is not launched yet. Running test cnt:" running-cnt " Not completed test cnt:" not-completed-cnt)
         (let* ((not-completed-tests (rmt:get-tests-for-run run-id "%" `("NOT_STARTED" "RUNNING" "LAUNCHING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f)))
       (if (> (length not-completed-tests) 0) 
           (let loop ((running-test (car not-completed-tests))
			     (tal    (cdr not-completed-tests)))
		       (let* ((test-name (vector-ref running-test 2))
                 (item-path (vector-ref running-test 11)))
			       	(debug:print 0 *default-log-port* "test " test-name "/" item-path " not completed")
              (if (not (null? tal))
				  (loop (car tal) (cdr tal)))))))))))        
 
(define (launch:is-test-alive host pid)
  (if (and host pid (not (equal? host "n/a")))
      (let* ((cmd (conc "ssh " host " pstree -A " pid))
	     (output (with-input-from-pipe cmd read-lines)))
	(debug:print 2 *default-log-port* "Running " cmd " received " output)
	(if (eq? (length output) 0)
	   #f
	   #t))
      #t))
 
(define (launch:kill-tests-if-dead run-id)
  (let* ((running-tests (rmt:get-tests-for-run run-id "%" `("RUNNING" "LAUNCHING" "LAUNCHED" "REMOTEHOSTSTART") `() #f #f #f #f #f #f #f #f)))
       (let loop ((running-test (car running-tests))
			     (tal    (cdr running-tests))
			     (kill-cnt 0))
		       (let* ((test-name (vector-ref running-test 2))
                 (item-path (vector-ref running-test 11))
								 (test-id (vector-ref running-test 0))
                 (host (vector-ref running-test 6))

Modified megatest-version.scm from [957a43ff94] to [3028461078].

16
17
18
19
20
21
22
23
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;; Always use two or four digit decimal
;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00..

;; (declare (unit megatest-version))

(define megatest-version 1.6566)







|
16
17
18
19
20
21
22
23
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;; Always use two or four digit decimal
;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00..

;; (declare (unit megatest-version))

(define megatest-version 1.6568)

Modified rmt.scm from [39d97c528a] to [01b8ff2065].

52
53
54
55
56
57
58



















59
60
61
62
63
64
65


66
67
68
69
70
71
72
73
	      cinfo
	      (if (server:check-if-running areapath)
		  (client:setup areapath)
		  #f))))

(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id




















;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected

  #;(common:telemetry-log (conc "rmt:"(->string cmd))
                        payload: `((rid . ,rid)
                                   (params . ,params)))


                          
  (if (> attemptnum 2)
      (debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum))
    
  (cond
   ((> attemptnum 2) (thread-sleep! 0.05))
   ((> attemptnum 10) (thread-sleep! 0.5))
   ((> attemptnum 20) (thread-sleep! 1)))







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







>
>
|







52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
	      cinfo
	      (if (server:check-if-running areapath)
		  (client:setup areapath)
		  #f))))

(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id

(define *rmt-query-last-call-time* 0)
(define *rmt-query-last-rest-time* 0) ;; last time there was at least a 1/2 second rest - giving other processes access to the db

;; NOTE: This query rest algorythm will not adapt to long query times. REDESIGN NEEDED. TODO. FIXME.
;;
(define (rmt:query-rest)
  (let* ((now (current-milliseconds)))
    (cond
     ((> (- now *rmt-query-last-call-time*) 500)  ;; it's been a while since last query - no need to rest
      (set! *rmt-query-last-rest-time*  now)
      (set! *rmt-query-last-call-time*  now))
     ((> (- now *rmt-query-last-rest-time*) 5000) ;; no natural rests have happened
      (debug:print 0 *default-log-port* "query rest needed. blocking for 1/2 second.")
      (thread-sleep! 0.5) ;; force a rest of a half second
      (set! *rmt-query-last-rest-time* now)
      (set! *rmt-query-last-call-time* now))
     (else ;; sufficient rests have occurred, just record the last query time
      (set! *rmt-query-last-call-time* now)))))

;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected

  #;(common:telemetry-log (conc "rmt:"(->string cmd))
                        payload: `((rid . ,rid)
                                   (params . ,params)))
  (if (not (equal? (configf:lookup *configdat* "setup" "query-rest") "no"))
      (rmt:query-rest))
  
  (if (> attemptnum 2)
      (debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum))
    
  (cond
   ((> attemptnum 2) (thread-sleep! 0.05))
   ((> attemptnum 10) (thread-sleep! 0.5))
   ((> attemptnum 20) (thread-sleep! 1)))

Modified runs.scm from [030b929939] to [fcd77cb783].

58
59
60
61
62
63
64
65




66
67
68
69
70
71
72
  (last-load-check-time    0)
  (last-jobs-check-time    0)
  )

(defstruct runs:testdat
  hed tal reg reruns  test-record
  test-name item-path jobgroup
  waitons testmode  newtal itemmaps prereqs-not-met)




  
;; look in the $MT_RUN_AREA_HOME/.softlocks directory for key-host-pid.softlock files
;;  - remove any that are over 3600 seconds old
;;  - if there are any that are younger than 10 seconds
;;      * sleep 10 seconds
;;      * touch my key-host-pid.softlock file
;;      * return







|
>
>
>
>







58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
  (last-load-check-time    0)
  (last-jobs-check-time    0)
  )

(defstruct runs:testdat
  hed tal reg reruns  test-record
  test-name item-path jobgroup
  waitons testmode  newtal
  itemmaps
  (prereqs-not-met '())
  (last-update 0) ;; 
  )
  
;; look in the $MT_RUN_AREA_HOME/.softlocks directory for key-host-pid.softlock files
;;  - remove any that are over 3600 seconds old
;;  - if there are any that are younger than 10 seconds
;;      * sleep 10 seconds
;;      * touch my key-host-pid.softlock file
;;      * return
827
828
829
830
831
832
833






















834
835
836
837
838
839
840
841
842
843

844

845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
;;    => review of a previously seen test is higher priority of never visited test
;; reg - list of previously visited tests
;; tal - list of never visited tests
;;   prefer next hed to be from reg than tal.

(define runs:nothing-left-in-queue-count 0)























;;======================================================================
;; runs:expand-items is called by runs:run-tests-queue
;;======================================================================
;;
;; return value of runs:expand-items is passed back to runs-tests-queue and is fed to named loop with this signature:
;;    (let loop ((hed         (car sorted-test-names))
;;	         (tal         (cdr sorted-test-names))
;;	         (reg         '()) ;; registered, put these at the head of tal 
;;	         (reruns      '()))
(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps)

  (let* ((loop-list       (list hed tal reg reruns))

	 (prereqs-not-met (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)))
			    (if (list? res)
				res
				(begin
				  (debug:print 0 *default-log-port*
					       "ERROR: rmt:get-prereqs-not-met returned non-list!\n"
					       "  res=" res " run-id=" run-id " waitons=" waitons " hed=" hed " item-path=" item-path " testmode=" testmode " itemmaps=" itemmaps)
				  '()))))
         (have-itemized (not (null? (lset-intersection eq? testmode '(itemmatch itemwait)))))
	 ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))
	 (fails           (runs:calc-fails prereqs-not-met))
	 (prereq-fails    (runs:calc-prereq-fail prereqs-not-met))
	 (non-completed   (runs:calc-not-completed prereqs-not-met))
	 (runnables       (runs:calc-runnable prereqs-not-met))
         (unexpanded-prereqs
          (filter (lambda (testname)







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>









|
>

>
|






|
|







831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
;;    => review of a previously seen test is higher priority of never visited test
;; reg - list of previously visited tests
;; tal - list of never visited tests
;;   prefer next hed to be from reg than tal.

(define runs:nothing-left-in-queue-count 0)

(define (runs:lazy-get-prereqs-not-met  testdat run-id waitons hed item-path #!key (mode '(normal))(itemmaps #f)) ;; mode: testmode itemmaps: itemmaps)
  (if (< (- (current-seconds) (runs:testdat-last-update testdat)) 10) ;; only refresh for this test if it has been at least 10 seconds
      (begin
	;; (debug:print 0 *default-log-port* "last-update=" (runs:testdat-last-update testdat) "(current-seconds)=" (current-seconds))
	(runs:testdat-prereqs-not-met testdat))
      ;;                     (rmt:get-prereqs-not-met 46     '("r1") "y1" ""       mode: '(itemmatch) itemmaps: #f) 
      (let* ((res (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: mode         itemmaps: itemmaps)))
		    (debug:print 4 *default-log-port* "Get prereqs for " hed ", have " (length res)
				 " prereqs. last-update=" (runs:testdat-last-update testdat) " current-seconds=" (current-seconds)
				 " delta=" (- (current-seconds) (runs:testdat-last-update testdat)))
		    (if (list? res)
			res
			(begin
			  (debug:print 0 *default-log-port*
				       "ERROR: rmt:get-prereqs-not-met returned non-list!\n"
				       "  res=" res " run-id=" run-id " waitons=" waitons " hed=" hed
				       " item-path=" item-path " testmode=" mode " itemmaps=" itemmaps)
			  '())))))
	(runs:testdat-prereqs-not-met-set! testdat res)
	(runs:testdat-last-update-set! testdat (current-seconds))
	res)))
	   
;;======================================================================
;; runs:expand-items is called by runs:run-tests-queue
;;======================================================================
;;
;; return value of runs:expand-items is passed back to runs-tests-queue and is fed to named loop with this signature:
;;    (let loop ((hed         (car sorted-test-names))
;;	         (tal         (cdr sorted-test-names))
;;	         (reg         '()) ;; registered, put these at the head of tal 
;;	         (reruns      '()))
(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record
			   can-run-more items runname tconfig reglen test-registry test-records itemmaps testdat)
  (let* ((loop-list       (list hed tal reg reruns))
	 (prereqs-not-met (runs:lazy-get-prereqs-not-met testdat run-id waitons hed item-path mode: testmode itemmaps: itemmaps))
	  #;(let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)))
			    (if (list? res)
				res
				(begin
				  (debug:print 0 *default-log-port*
					       "ERROR: rmt:get-prereqs-not-met returned non-list!\n"
					       "  res=" res " run-id=" run-id " waitons=" waitons " hed=" hed " item-path=" item-path " testmode=" testmode " itemmaps=" itemmaps)
				  '())))
	 (have-itemized   (not (null? (lset-intersection eq? testmode '(itemmatch itemwait)))))
	 ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))
	 (fails           (runs:calc-fails prereqs-not-met))
	 (prereq-fails    (runs:calc-prereq-fail prereqs-not-met))
	 (non-completed   (runs:calc-not-completed prereqs-not-met))
	 (runnables       (runs:calc-runnable prereqs-not-met))
         (unexpanded-prereqs
          (filter (lambda (testname)
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
							  (conc run-id "," test-id) #f))
		    (test-name  (db:test-get-testname     testdat))
		    (item-path  (db:test-get-item-path    testdat))
		    (state      (db:test-get-state        testdat))
		    (status     (db:test-get-status       testdat))
		    (event-time (db:test-get-event_time   testdat))
		    (duration   (db:test-get-run_duration testdat)))
	       (if (and (not (member state '("DELETED" "REMOTEHOSTSTART" "RUNNING" "LAUNCHED""NOT_STARTED")))
			(not (and prevdat
				  (equal? state  (db:test-get-state  prevdat))
				  (equal? status (db:test-get-status prevdat)))))
		   (let ((fmt   (runs:gendat-inc-results-fmt *runs:general-data*))
			 (dtime (seconds->year-work-week/day-time event-time))) 
		     (if (runs:lownoise "inc-print" 600)
			 (format #t fmt "State" "Status" "Start Time" "Duration" "Test path"))







|







1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
							  (conc run-id "," test-id) #f))
		    (test-name  (db:test-get-testname     testdat))
		    (item-path  (db:test-get-item-path    testdat))
		    (state      (db:test-get-state        testdat))
		    (status     (db:test-get-status       testdat))
		    (event-time (db:test-get-event_time   testdat))
		    (duration   (db:test-get-run_duration testdat)))
	       (if (and (not (member state '("DELETED" "REMOTEHOSTSTART" "RUNNING" "LAUNCHED" "LAUNCHING" "NOT_STARTED")))
			(not (and prevdat
				  (equal? state  (db:test-get-state  prevdat))
				  (equal? status (db:test-get-status prevdat)))))
		   (let ((fmt   (runs:gendat-inc-results-fmt *runs:general-data*))
			 (dtime (seconds->year-work-week/day-time event-time))) 
		     (if (runs:lownoise "inc-print" 600)
			 (format #t fmt "State" "Status" "Start Time" "Duration" "Test path"))
1440
1441
1442
1443
1444
1445
1446
1447


1448
1449
1450
1451
1452
1453
1454

;; every time though the loop increment the test/itempatt val.
;; when the min is > max-allowed and none running then force exit
;;
(define *max-tries-hash* (make-hash-table))

(define (runs:pretty-long-list lst)
   (if (> (length lst) 8)(append (take lst 3)(list "...")) lst))



;;======================================================================
;; runs:run-tests-queue is called by runs:run-tests
;;======================================================================
;;
;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... >
(define (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests reglen-in all-tests-registry)







|
>
>







1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484

;; every time though the loop increment the test/itempatt val.
;; when the min is > max-allowed and none running then force exit
;;
(define *max-tries-hash* (make-hash-table))

(define (runs:pretty-long-list lst)
  (if (> (length lst) 8)(append (take lst 3)(list "...")) lst))

(define *runs-testdat-cache* (make-hash-table)) ;; full/testname => testdat

;;======================================================================
;; runs:run-tests-queue is called by runs:run-tests
;;======================================================================
;;
;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... >
(define (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests reglen-in all-tests-registry)
1564
1565
1566
1567
1568
1569
1570
















1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585


1586
1587
1588
1589
1590
1591
1592
				   extras)
				  extras)
				'())))
	     (waitons     (delete-duplicates (append (tests:testqueue-get-waitons test-record) extra-waits) equal?))
	     (newtal      (append tal (list hed)))
	     (regfull     (>= (length reg) reglen))
	     (num-running (rmt:get-count-tests-running-for-run-id run-id #t)) ;; fastmode=yes
















	     (testdat     (make-runs:testdat
			   hed: hed
			   tal: tal
			   reg: reg
			   reruns: reruns
			   test-record: test-record
			   test-name:   test-name
			   item-path:   item-path
			   jobgroup:    jobgroup
			   waitons:     waitons
			   testmode:    testmode
			   newtal:      newtal
			   itemmaps:    itemmaps
			   ;; prereqs-not-met: prereqs-not-met
			   )))


	(runs:dat-regfull-set! runsdat regfull)
    
	(if (> num-running 0)
            (set! last-time-some-running (current-seconds)))

        (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000)))
            (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>
>







1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
				   extras)
				  extras)
				'())))
	     (waitons     (delete-duplicates (append (tests:testqueue-get-waitons test-record) extra-waits) equal?))
	     (newtal      (append tal (list hed)))
	     (regfull     (>= (length reg) reglen))
	     (num-running (rmt:get-count-tests-running-for-run-id run-id #t)) ;; fastmode=yes
	     (testdat     (let ((oldtestdat (hash-table-ref/default *runs-testdat-cache* tfullname #f)))
			    (if oldtestdat
				(begin
				  (runs:testdat-hed-set!         oldtestdat hed)
				  (runs:testdat-tal-set!         oldtestdat tal)
				  (runs:testdat-reg-set!         oldtestdat reg)
				  (runs:testdat-reruns-set!      oldtestdat reruns)
				  (runs:testdat-test-record-set! oldtestdat test-record)
				  (runs:testdat-newtal-set!      oldtestdat newtal)
				  
				  (if (not (equal? (runs:testdat-waitons oldtestdat) waitons))
				      (debug:print 0 *default-log-port* " waitons changed for runs:testdat"))
				  (if (not (equal? (runs:testdat-itemmaps oldtestdat) itemmaps))
				      (debug:print 0 *default-log-port* " itemmaps changed for runs:testdat"))
				  
				  oldtestdat)
				(let ((newtestdat (make-runs:testdat
						   hed: hed
						   tal: tal
						   reg: reg
						   reruns: reruns
						   test-record: test-record
						   test-name:   test-name
						   item-path:   item-path
						   jobgroup:    jobgroup
						   waitons:     waitons
						   testmode:    testmode
						   newtal:      newtal
						   itemmaps:    itemmaps
						   ;; prereqs-not-met: prereqs-not-met
						   )))
				  (hash-table-set! *runs-testdat-cache* tfullname newtestdat)
				  newtestdat)))))
	(runs:dat-regfull-set! runsdat regfull)
    
	(if (> num-running 0)
            (set! last-time-some-running (current-seconds)))

        (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000)))
            (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))
1713
1714
1715
1716
1717
1718
1719


1720
1721
1722
1723
1724
1725
1726
1727
				  ;; wait for load here
				  (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat)))
				  (loop-can-run-more (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)
						     (- remtries 1)))))))
		       )))))

	  ;; I'm not clear on why prereqs are gathered here TODO: verfiy this is needed


	  (runs:testdat-prereqs-not-met-set! testdat (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))

	  ;; I'm not clear on why we'd capture running job counts here TODO: verify this is needed
	  (runs:dat-can-run-more-tests-set! runsdat (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs))

	  (let ((loop-list (runs:process-expanded-tests runsdat testdat))) ;; in process-expanded-tests ultimately run:test -> launch-test -> test actually running
            (if loop-list (apply loop loop-list))))








>
>
|







1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
				  ;; wait for load here
				  (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat)))
				  (loop-can-run-more (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)
						     (- remtries 1)))))))
		       )))))

	  ;; I'm not clear on why prereqs are gathered here TODO: verfiy this is needed
	  (runs:lazy-get-prereqs-not-met testdat run-id waitons hed item-path
					 mode: testmode
					 itemmaps: itemmaps)

	  ;; I'm not clear on why we'd capture running job counts here TODO: verify this is needed
	  (runs:dat-can-run-more-tests-set! runsdat (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs))

	  (let ((loop-list (runs:process-expanded-tests runsdat testdat))) ;; in process-expanded-tests ultimately run:test -> launch-test -> test actually running
            (if loop-list (apply loop loop-list))))

1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
         
	 ;; if items is a proc then need to run items:get-items-from-config, get the list and loop 
	 ;;    - but only do that if resources exist to kick off the job
	 ;; EXPAND ITEMS
	 ((or (procedure? items)(eq? items 'have-procedure))
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-4")
	  (let ((can-run-more    #f)) ;; (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)))
	    (if (not can-run-more) #;(and (list? can-run-more)
		(car can-run-more))
		(let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps))) ;; itemized test expanded here
		  (if loop-list
		      (apply loop loop-list)
                      (debug:print-info 4 *default-log-port* " -- Can't expand hed="hed)
                      )
                  )
		;; if can't run more just loop with next possible test
		(loop (car newtal)(cdr newtal) reg reruns))))







|

|







1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
         
	 ;; if items is a proc then need to run items:get-items-from-config, get the list and loop 
	 ;;    - but only do that if resources exist to kick off the job
	 ;; EXPAND ITEMS
	 ((or (procedure? items)(eq? items 'have-procedure))
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-4")
	  (let ((can-run-more    #f)) ;; (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)))
	    (if (not can-run-more) #;(and (list? can-run-more) ;; IDEA, this mechanism may have had some value, make it configurable to test pros/cons TODO
		(car can-run-more))
		(let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps testdat))) ;; itemized test expanded here
		  (if loop-list
		      (apply loop loop-list)
                      (debug:print-info 4 *default-log-port* " -- Can't expand hed="hed)
                      )
                  )
		;; if can't run more just loop with next possible test
		(loop (car newtal)(cdr newtal) reg reruns))))
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
	    (if (> (current-seconds)(+ last-time-incomplete 900))
		(let ((actual-num-running (rmt:get-count-tests-running-for-run-id run-id #f))) ;; fastmode=no
		  (debug:print-info 0 *default-log-port* "Marking stuck tests as INCOMPLETE while waiting for run " run-id
				    ". Running as pid " (current-process-id) " on " (get-host-name))
		  (set! last-time-incomplete (current-seconds)) ;; FIXME, this might be causing slow down - use of set!
		  (rmt:find-and-mark-incomplete run-id #f)
		  (debug:print-info 0 *default-log-port* "run-wait specified, waiting on " actual-num-running
				    " tests in RUNNING, REMOTEHOSTSTART or LAUNCHED state at "
				    (time->string (seconds->local-time (current-seconds))))))
	    ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat)))
	    (thread-sleep! 5) ;; (if (>= num-running max-concurrent-jobs) 5 1))
	    (wait-loop (rmt:get-count-tests-running-for-run-id run-id #t) ;; fastmode=yes
		       num-running))))
    ;; LET* ((test-record
    ;; we get here on "drop through". All done!







|







1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
	    (if (> (current-seconds)(+ last-time-incomplete 900))
		(let ((actual-num-running (rmt:get-count-tests-running-for-run-id run-id #f))) ;; fastmode=no
		  (debug:print-info 0 *default-log-port* "Marking stuck tests as INCOMPLETE while waiting for run " run-id
				    ". Running as pid " (current-process-id) " on " (get-host-name))
		  (set! last-time-incomplete (current-seconds)) ;; FIXME, this might be causing slow down - use of set!
		  (rmt:find-and-mark-incomplete run-id #f)
		  (debug:print-info 0 *default-log-port* "run-wait specified, waiting on " actual-num-running
				    " tests in RUNNING, REMOTEHOSTSTART, LAUNCHING or LAUNCHED state at "
				    (time->string (seconds->local-time (current-seconds))))))
	    ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat)))
	    (thread-sleep! 5) ;; (if (>= num-running max-concurrent-jobs) 5 1))
	    (wait-loop (rmt:get-count-tests-running-for-run-id run-id #t) ;; fastmode=yes
		       num-running))))
    ;; LET* ((test-record
    ;; we get here on "drop through". All done!
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926

1927


1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949

1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105






2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
(define (run:test run-id run-info keyvals runname test-record flags parent-test test-registry all-tests-registry runsdat testdat-rec)
  ;; All these vars might be referenced by the testconfig file reader
  ;; 
  ;; NEED to reprocess testconfig here, ensuring that item variables are available.
  ;; This is for Tal's issue with item-specific env vars not being set for use in skip.
  ;; HSD https://hsdes.intel.com/appstore/icf/index.html#/article?articleId=1408763273
  ;;
  (let* ((test-name    (tests:testqueue-get-testname   test-record))
	 (test-waitons (tests:testqueue-get-waitons    test-record))
	 (itemdat      (tests:testqueue-get-itemdat    test-record))
	 (item-path     "")
	 (db           #f)
	 (full-test-name #f))




    ;; setting itemdat to a list if it is #f
    (if (not itemdat)(set! itemdat '()))
    (set! item-path (item-list->path itemdat))
    (set! full-test-name (db:test-make-full-name test-name item-path))
    (runs:set-megatest-env-vars run-id inrunname: runname testname: test-name itempath: item-path) ;; these may be needed by the launching process

   (let* ((test-conf ;; re-instate the tests:get-testconfig once the kinks are worked out. FIXME!!!
	    ;; (tests:get-testconfig test-name item-path all-tests-registry #t force-create: #t))
	    (tests:testqueue-get-testconfig test-record )) 
	   (test-path    (hash-table-ref all-tests-registry test-name)) ;; (conc *toppath* "/tests/" test-name)) ;; could use tests:get-testconfig here ...

	   (force        (hash-table-ref/default flags "-force" #f))
	   (rerun        (hash-table-ref/default flags "-rerun" #f))
	   (keepgoing    (hash-table-ref/default flags "-keepgoing" #f))
	   (incomplete-timeout (string->number (or (configf:lookup *configdat* "setup" "incomplete-timeout") "x")))
	   )

    (debug:print-info 4 *default-log-port*
		      "\nTESTNAME: " full-test-name 
		      "\n   test-config: " (hash-table->alist test-conf)
		      "\n   itemdat: " itemdat
		      )

    (debug:print 2 *default-log-port* "Attempting to launch test " full-test-name)
    ;; (setenv "MT_TEST_NAME" test-name) ;; 
    ;; (setenv "MT_ITEMPATH"  item-path)
    ;; (setenv "MT_RUNNAME"   runname)
    (change-directory *toppath*)

    ;; Here is where the test_meta table is best updated
    ;; Yes, another use of a global for caching. Need a better way?
    ;;
    ;; There is now a single call to runs:update-all-test_meta and this 
    ;; per-test call is not needed. Given the delicacy of the move to 
    ;; v1.55 this code is being left in place for the time being.
    ;;
    (if (not (hash-table-ref/default *test-meta-updated* test-name #f))
        (begin
          (hash-table-set! *test-meta-updated* test-name #t)
          (runs:update-test_meta test-name test-conf)))
    
    ;; itemdat => ((ripeness "overripe") (temperature "cool") (season "summer"))
    (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/"))
	   (test-id       (rmt:get-test-id run-id test-name item-path))
	   (testdat       (if test-id (rmt:get-test-info-by-id run-id test-id) #f)))
      (if (not testdat)
	  (let loop ()
	    ;; ensure that the path exists before registering the test
	    ;; NOPE: Cannot! Don't know yet which disk area will be assigned....
	    ;; (system (conc "mkdir -p " new-test-path))
	    ;;
	    ;; (open-run-close tests:register-test db run-id test-name item-path)
	    ;;
	    ;; NB// for the above line. I want the test to be registered long before this routine gets called!
	    ;;
	    (if (not test-id)(set! test-id (rmt:get-test-id run-id test-name item-path)))
	    (if (not test-id)
		(begin
		  (debug:print 2 *default-log-port* "WARN: Test not pre-created? test-name=" test-name ", item-path=" item-path ", run-id=" run-id)
		  (rmt:register-test run-id test-name item-path)
		  (set! test-id (rmt:get-test-id run-id test-name item-path))))
	    (debug:print-info 4 *default-log-port* "test-id=" test-id ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"")
	    (set! testdat (rmt:get-test-info-by-id run-id test-id))
	    (if (not testdat)
		(begin
		  (debug:print-info 0 *default-log-port* "WARNING: server is overloaded, trying again in two seconds")
		  ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat)))
		  (thread-sleep! 2)
		  (loop)))))
      (if (not testdat) ;; should NOT happen
	  (debug:print-error 0 *default-log-port* "failed to get test record for test-id " test-id))
      (set! test-id (db:test-get-id testdat))
      (if (common:file-exists? test-path)
	  (change-directory test-path)
	  (begin
	    (debug:print-error 0 *default-log-port* "test run path not created before attempting to run the test. Perhaps you are running -remove-runs at the same time?")
	    (change-directory *toppath*)))
      (case (if force ;; (args:get-arg "-force")
		'NOT_STARTED
		(if testdat
		    (string->symbol (test:get-state testdat))
		    'failed-to-insert))
	((failed-to-insert)
	 (debug:print-error 0 *default-log-port* "Failed to insert the record into the db"))
	((NOT_STARTED COMPLETED DELETED INCOMPLETE)
	 (let ((runflag #f))
	   (cond
	    ;; -force, run no matter what
	    (force (set! runflag #t))
	    ;; NOT_STARTED, run no matter what
	    ((member (test:get-state testdat) '("DELETED" "NOT_STARTED" "INCOMPLETE"))(set! runflag #t))
	    ;; not -rerun and PASS, WARN or CHECK, do no run
	    ((and (or (not rerun)
		      keepgoing)
		  ;; Require to force re-run for COMPLETED or *anything* + PASS,WARN or CHECK
		  (or (member (test:get-status testdat) '("PASS" "WARN" "CHECK" "SKIP" "WAIVED"))
		      (member (test:get-state  testdat) '("COMPLETED")))) 
	     (debug:print-info 2 *default-log-port* "running test " test-name "/" item-path " suppressed as it is " (test:get-state testdat) " and " (test:get-status testdat))
	     (hash-table-set! test-registry full-test-name 'DONOTRUN) ;; COMPLETED)
	     (set! runflag #f))
	    ;; -rerun and status is one of the specifed, run it
	    ((and rerun
		  (let* ((rerunlst   (string-split rerun ","))
			 (must-rerun (member (test:get-status testdat) rerunlst)))
		    (debug:print-info 3 *default-log-port* "-rerun list: " rerun ", test-status: " (test:get-status testdat)", must-rerun: " must-rerun)
		    must-rerun))
	     (debug:print-info 2 *default-log-port* "Rerun forced for test " test-name "/" item-path)
	     (set! runflag #t))
	    ;; -keepgoing, do not rerun FAIL
	    ((and keepgoing
		  (member (test:get-status testdat) '("FAIL")))
	     (set! runflag #f))
	    ((and (not rerun)
		  (member (test:get-status testdat) '("FAIL" "n/a")))
	     (set! runflag #t))
	    (else (set! runflag #f)))
	   (debug:print 4 *default-log-port* "RUNNING => runflag: " runflag " STATE: " (test:get-state testdat) " STATUS: " (test:get-status testdat))
	   (if (not runflag)
	       (if (not parent-test)
		   (if (runs:lownoise (conc "not starting test" full-test-name) 60)
		       (debug:print 1 *default-log-port* "NOTE: Not starting test " full-test-name " as it is state \"" (test:get-state testdat) 
				    "\" and status \"" (test:get-status testdat) "\", use -rerun \"" (test:get-status testdat)
				    "\" or -force to override")))
	       ;; NOTE: No longer be checking prerequisites here! Will never get here unless prereqs are
	       ;;       already met.
	       ;; This would be a great place to do the process-fork
	       ;; 
	       (let ((skip-test   #f)
		     (skip-check  (configf:get-section test-conf "skip")))
		 (cond 
		  ;; Have to check for skip conditions. This one skips if there are same-named tests
		  ;; currently running
		  ((and skip-check
			(configf:lookup test-conf "skip" "prevrunning"))
		   ;; run-ids = #f means *all* runs
		   (let ((running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f)))
		     (if (not (null? running-tests)) ;; have to skip 
			 (set! skip-test "Skipping due to previous tests running"))))

 		    ;; split the string and OR of file-exists?
		    ((and skip-check
			  (configf:lookup test-conf "skip" "fileexists"))
		     (let* ((files    (string-split (configf:lookup test-conf "skip" "fileexists")))
			    (existing (filter common:file-exists? files)))
		       (if (not (null? existing)) ;; (common:file-exists? (configf:lookup test-conf "skip" "fileexists"))
			   (set! skip-test (conc "Skipping due to existance of file(s) " (string-intersperse existing ", ")))))) ;; (configf:lookup test-conf "skip" "fileexists")))))

 		    ((and skip-check
			  (configf:lookup test-conf "skip" "filenotexists"))
		      (let* ((files    (string-split (configf:lookup test-conf "skip" "filenotexists")))
			      (existing (filter common:file-exists? files)))
			      (if (null? existing) ;;  (common:file-exists? (configf:lookup test-conf "skip" "filenotexists")))
			        (set! skip-test (conc "Skipping due to non existance of files " (string-intersperse files ", ")))))) ;; (configf:lookup test-conf "skip" "filenotexists")))))        
		 
       ((and skip-check
			(configf:lookup test-conf "skip" "script"))
		   (if (= (system (configf:lookup test-conf "skip" "script")) 0)
                        (set! skip-test (conc "Skipping due to zero return value of script " (configf:lookup test-conf "skip" "script")))))
      
                  ((and skip-check
			(configf:lookup test-conf "skip" "rundelay"))
		   ;; run-ids = #f means *all* runs
		   (let* ((numseconds      (common:hms-string->seconds (configf:lookup test-conf "skip" "rundelay")))
			  (running-tests   (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHED") '() #f))
			  (completed-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("COMPLETED" "INCOMPLETE") '("PASS" "FAIL" "ABORT") #f)) ;; ironically INCOMPLETE is same as COMPLETED in this contex
			  (last-run-times  (map db:mintest-get-event_time completed-tests))
			  (time-since-last (- (current-seconds) (if (null? last-run-times) 0 (common:max last-run-times)))))
		     (if (or (not (null? running-tests)) ;; have to skip if test is running
			     (> numseconds time-since-last))
			 (set! skip-test (conc "Skipping due to previous test run less than " (configf:lookup test-conf "skip" "rundelay") " ago"))))))
		 
		 (if skip-test
		     (begin
		       (mt:test-set-state-status-by-id run-id test-id "COMPLETED" "SKIP" skip-test)
		       (debug:print-info 1 *default-log-port* "SKIPPING Test " full-test-name " due to " skip-test))
		     ;;
		     ;; Here the test is handed off to launch.scm for launch-test to complete the launch process
		     ;;
		     (begin






		       ;; wait for less than max jobs here
		       (if (runs:dat-wait-for-jobs-function runsdat)
			   ((runs:dat-wait-for-jobs-function runsdat) testdat-rec))
		       
		       (if (not (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat flags))
			   (begin
			     (print "ERROR: Failed to launch the test. Exiting as soon as possible")
			     (set! *globalexitstatus* 1) ;; 
			     (process-signal (current-process-id) signal/kill))
			   )
		       ;; wait again here?
		       ))))))
	((KILLED) 
	 (debug:print 1 *default-log-port* "NOTE: " full-test-name " is already running or was explictly killed, use -force to launch it.")
	 (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN)) ;; KILLED))
	((LAUNCHED REMOTEHOSTSTART RUNNING)  
	 (debug:print 2 *default-log-port* "NOTE: " test-name " is already running"))
	;; (if (> (- (current-seconds)(+ (db:test-get-event_time testdat)
	;; 			       (db:test-get-run_duration testdat)))
	;; 	(or incomplete-timeout
	;; 	    6000)) ;; i.e. no update for more than 6000 seconds
	;;      (begin
	;;        (debug:print 0 *default-log-port* "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD")
	;;        (tests:test-set-status! run-id test-id "INCOMPLETE" "STUCK/DEAD" "" #f))
	;;        ;; (tests:test-set-status! test-id "INCOMPLETE" "STUCK/DEAD" "" #f))
	;;      (debug:print 2 *default-log-port* "NOTE: " test-name " is already running")))
	(else      
	 (debug:print-error 0 *default-log-port* "Failed to launch test " full-test-name ". Unrecognised state " (test:get-state testdat))
	 (case (string->symbol (test:get-state testdat)) 
	   ((COMPLETED INCOMPLETE)
	    (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN))
	   (else
	    (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN)))))))))

;;======================================================================
;; END OF NEW STUFF
;;======================================================================

(define (get-dir-up-n dir . params) 
  (let ((dparts  (string-split dir "/"))







|
|
|
|
<
|
>

>
>

<
<
<


|



<
|


|
|
<
|
|
|
|
|
>
|
<
<
<
|

|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<
<
<
<
<
<
<
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|











|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>
>
>
>
>
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|







1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974

1975
1976
1977
1978
1979
1980



1981
1982
1983
1984
1985
1986

1987
1988
1989
1990
1991

1992
1993
1994
1995
1996
1997
1998



1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018








2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
(define (run:test run-id run-info keyvals runname test-record flags parent-test test-registry all-tests-registry runsdat testdat-rec)
  ;; All these vars might be referenced by the testconfig file reader
  ;; 
  ;; NEED to reprocess testconfig here, ensuring that item variables are available.
  ;; This is for Tal's issue with item-specific env vars not being set for use in skip.
  ;; HSD https://hsdes.intel.com/appstore/icf/index.html#/article?articleId=1408763273
  ;;
  (let* ((test-name       (tests:testqueue-get-testname   test-record))
	 (test-waitons    (tests:testqueue-get-waitons    test-record))
	 (itemdat         (or (tests:testqueue-get-itemdat test-record) '()))
	 (item-path       (item-list->path itemdat))

	 (full-test-name  (db:test-make-full-name test-name item-path))
	 (test-id         (rmt:get-test-id run-id test-name item-path)))

    ;; NOTE: Getting here does NOT mean that the test will be launched. 

    ;; setting itemdat to a list if it is #f



    (runs:set-megatest-env-vars run-id inrunname: runname testname: test-name itempath: item-path) ;; these may be needed by the launching process

    (let* ((test-conf ;; re-instate the tests:get-testconfig once the kinks are worked out. FIXME!!!
	    ;; (tests:get-testconfig test-name item-path all-tests-registry #t force-create: #t))
	    (tests:testqueue-get-testconfig test-record )) 
	   (test-path    (hash-table-ref all-tests-registry test-name)) ;; (conc *toppath* "/tests/" test-name)) ;; could use tests:get-testconfig here ...

	   (forceopt     (hash-table-ref/default flags "-force" #f))
	   (rerun        (hash-table-ref/default flags "-rerun" #f))
	   (keepgoing    (hash-table-ref/default flags "-keepgoing" #f))
	   (incomplete-timeout (string->number (or (configf:lookup *configdat* "setup" "incomplete-timeout") "x"))))
      

      (debug:print-info 4 *default-log-port*
			"\nTESTNAME: " full-test-name 
			"\n   test-config: " (hash-table->alist test-conf)
			"\n   itemdat: " itemdat
			)
      
      (debug:print 2 *default-log-port* "Attempting to launch test " full-test-name)



      (change-directory *toppath*)

      ;; Here is where the test_meta table is best updated
      ;; Yes, another use of a global for caching. Need a better way?
      ;;
      ;; There is now a single call to runs:update-all-test_meta and this 
      ;; per-test call is not needed. Given the delicacy of the move to 
      ;; v1.55 this code is being left in place for the time being.
      ;;
      (if (not (hash-table-ref/default *test-meta-updated* test-name #f))
	  (begin
	    (hash-table-set! *test-meta-updated* test-name #t)
	    (runs:update-test_meta test-name test-conf)))
      
      ;; itemdat => ((ripeness "overripe") (temperature "cool") (season "summer"))
      (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/"))
	     (test-id       (or test-id (rmt:get-test-id run-id test-name item-path)))
	     (testdat       (if test-id (rmt:get-test-info-by-id run-id test-id) #f)))
	(if (not testdat)
	    (let loop ()








	      (if (not test-id)(set! test-id (rmt:get-test-id run-id test-name item-path)))
	      (if (not test-id)
		  (begin
		    (debug:print 0 *default-log-port* "WARN: Test not pre-created? test-name=" test-name ", item-path=" item-path ", run-id=" run-id)
		    (rmt:register-test run-id test-name item-path)
		    (set! test-id (rmt:get-test-id run-id test-name item-path))))
	      (debug:print-info 4 *default-log-port* "test-id=" test-id ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"")
	      (set! testdat (rmt:get-test-info-by-id run-id test-id))
	      (if (not testdat)
		  (begin
		    (debug:print-info 0 *default-log-port* "WARNING: server is overloaded, trying again in two seconds")
		    ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat)))
		    (thread-sleep! 2)
		    (loop)))))
	(if (not testdat) ;; should NOT happen
	    (debug:print-error 0 *default-log-port* "failed to get test record for test-id " test-id))
	(set! test-id (db:test-get-id testdat))
	(if (common:file-exists? test-path)
	    (change-directory test-path)
	    (begin
	      (debug:print-error 0 *default-log-port* "test run path not created before attempting to run the test. Perhaps you are running -remove-runs at the same time?")
	      (change-directory *toppath*)))
	(case (if forceopt ;; (args:get-arg "-force")
		  'NOT_STARTED
		  (if testdat
		      (string->symbol (test:get-state testdat))
		      'failed-to-insert))
	  ((failed-to-insert)
	   (debug:print-error 0 *default-log-port* "Failed to insert the record into the db"))
	  ((NOT_STARTED COMPLETED DELETED INCOMPLETE)
	   (let ((runflag #f))
	     (cond
	      ;; -force, run no matter what
	      (force (set! runflag #t))
	      ;; NOT_STARTED, run no matter what
	      ((member (test:get-state testdat) '("DELETED" "NOT_STARTED" "INCOMPLETE"))(set! runflag #t))
	      ;; not -rerun and PASS, WARN or CHECK, do no run
	      ((and (or (not rerun)
			keepgoing)
		    ;; Require to force re-run for COMPLETED or *anything* + PASS,WARN or CHECK
		    (or (member (test:get-status testdat) '("PASS" "WARN" "CHECK" "SKIP" "WAIVED"))
			(member (test:get-state  testdat) '("COMPLETED")))) 
	       (debug:print-info 2 *default-log-port* "running test " test-name "/" item-path " suppressed as it is " (test:get-state testdat) " and " (test:get-status testdat))
	       (hash-table-set! test-registry full-test-name 'DONOTRUN) ;; COMPLETED)
	       (set! runflag #f))
	      ;; -rerun and status is one of the specifed, run it
	      ((and rerun
		    (let* ((rerunlst   (string-split rerun ","))
			   (must-rerun (member (test:get-status testdat) rerunlst)))
		      (debug:print-info 3 *default-log-port* "-rerun list: " rerun ", test-status: " (test:get-status testdat)", must-rerun: " must-rerun)
		      must-rerun))
	       (debug:print-info 2 *default-log-port* "Rerun forced for test " test-name "/" item-path)
	       (set! runflag #t))
	      ;; -keepgoing, do not rerun FAIL
	      ((and keepgoing
		    (member (test:get-status testdat) '("FAIL")))
	       (set! runflag #f))
	      ((and (not rerun)
		    (member (test:get-status testdat) '("FAIL" "n/a")))
	       (set! runflag #t))
	      (else (set! runflag #f)))
	     (debug:print 4 *default-log-port* "RUNNING => runflag: " runflag " STATE: " (test:get-state testdat) " STATUS: " (test:get-status testdat))
	     (if (not runflag)
		 (if (not parent-test)
		     (if (runs:lownoise (conc "not starting test" full-test-name) 60)
			 (debug:print 1 *default-log-port* "NOTE: Not starting test " full-test-name " as it is state \"" (test:get-state testdat) 
				      "\" and status \"" (test:get-status testdat) "\", use -rerun \"" (test:get-status testdat)
				      "\" or -force to override")))
		 ;; NOTE: No longer be checking prerequisites here! Will never get here unless prereqs are
		 ;;       already met.
		 ;; This would be a great place to do the process-fork
		 ;; 
		 (let ((skip-test   #f)
		       (skip-check  (configf:get-section test-conf "skip")))
		   (cond 
		    ;; Have to check for skip conditions. This one skips if there are same-named tests
		    ;; currently running
		    ((and skip-check
			  (configf:lookup test-conf "skip" "prevrunning"))
		     ;; run-ids = #f means *all* runs
		     (let ((running-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHING" "LAUNCHED") '() #f)))
		       (if (not (null? running-tests)) ;; have to skip 
			   (set! skip-test "Skipping due to previous tests running"))))

 		    ;; split the string and OR of file-exists?
		    ((and skip-check
			  (configf:lookup test-conf "skip" "fileexists"))
		     (let* ((files    (string-split (configf:lookup test-conf "skip" "fileexists")))
			    (existing (filter common:file-exists? files)))
		       (if (not (null? existing)) ;; (common:file-exists? (configf:lookup test-conf "skip" "fileexists"))
			   (set! skip-test (conc "Skipping due to existance of file(s) " (string-intersperse existing ", ")))))) ;; (configf:lookup test-conf "skip" "fileexists")))))

 		    ((and skip-check
			  (configf:lookup test-conf "skip" "filenotexists"))
		     (let* ((files    (string-split (configf:lookup test-conf "skip" "filenotexists")))
			    (existing (filter common:file-exists? files)))
		       (if (null? existing) ;;  (common:file-exists? (configf:lookup test-conf "skip" "filenotexists")))
			   (set! skip-test (conc "Skipping due to non existance of files " (string-intersperse files ", ")))))) ;; (configf:lookup test-conf "skip" "filenotexists")))))        
		    
		    ((and skip-check
			  (configf:lookup test-conf "skip" "script"))
		     (if (= (system (configf:lookup test-conf "skip" "script")) 0)
			 (set! skip-test (conc "Skipping due to zero return value of script " (configf:lookup test-conf "skip" "script")))))
		    
		    ((and skip-check
			  (configf:lookup test-conf "skip" "rundelay"))
		     ;; run-ids = #f means *all* runs
		     (let* ((numseconds      (common:hms-string->seconds (configf:lookup test-conf "skip" "rundelay")))
			    (running-tests   (rmt:get-tests-for-runs-mindata #f full-test-name '("RUNNING" "REMOTEHOSTSTART" "LAUNCHING" "LAUNCHED") '() #f))
			    (completed-tests (rmt:get-tests-for-runs-mindata #f full-test-name '("COMPLETED" "INCOMPLETE") '("PASS" "FAIL" "ABORT") #f)) ;; ironically INCOMPLETE is same as COMPLETED in this contex
			    (last-run-times  (map db:mintest-get-event_time completed-tests))
			    (time-since-last (- (current-seconds) (if (null? last-run-times) 0 (common:max last-run-times)))))
		       (if (or (not (null? running-tests)) ;; have to skip if test is running
			       (> numseconds time-since-last))
			   (set! skip-test (conc "Skipping due to previous test run less than " (configf:lookup test-conf "skip" "rundelay") " ago"))))))
		   
		   (if skip-test
		       (begin
			 (mt:test-set-state-status-by-id run-id test-id "COMPLETED" "SKIP" skip-test)
			 (debug:print-info 1 *default-log-port* "SKIPPING Test " full-test-name " due to " skip-test))
		       ;;
		       ;; Here the test is handed off to launch.scm for launch-test to complete the launch process
		       ;;
		       (begin
			 ;; first thing to do is to set the test to LAUNCHING to prevent or minimise
			 ;; races on exclusive mode and max_concurrent_jobs
			 (if (equal? (test:get-state testdat) "NOT_STARTED")
			     (rmt:test-set-state-status run-id test-id "LAUNCHING" "n/a" #f)
			     (debug:print 0 *default-log-port* "Odd, test " test-id " is in state " (test:get-state testdat) " yet we are trying to launch it."))
    
			 ;; wait for less than max jobs here
			 (if (runs:dat-wait-for-jobs-function runsdat)
			     ((runs:dat-wait-for-jobs-function runsdat) testdat-rec))
			 
			 (if (not (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat flags))
			     (begin
			       (print "ERROR: Failed to launch the test. Exiting as soon as possible")
			       (set! *globalexitstatus* 1) ;; 
			       (process-signal (current-process-id) signal/kill))
			     )
			 ;; wait again here?
			 ))))))
	  ((KILLED) 
	   (debug:print 1 *default-log-port* "NOTE: " full-test-name " is already running or was explictly killed, use -force to launch it.")
	   (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN)) ;; KILLED))
	  ((LAUNCHING LAUNCHED REMOTEHOSTSTART RUNNING)  
	   (debug:print 2 *default-log-port* "NOTE: " test-name " is already running"))
	  ;; (if (> (- (current-seconds)(+ (db:test-get-event_time testdat)
	  ;; 			       (db:test-get-run_duration testdat)))
	  ;; 	(or incomplete-timeout
	  ;; 	    6000)) ;; i.e. no update for more than 6000 seconds
	  ;;      (begin
	  ;;        (debug:print 0 *default-log-port* "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD")
	  ;;        (tests:test-set-status! run-id test-id "INCOMPLETE" "STUCK/DEAD" "" #f))
	  ;;        ;; (tests:test-set-status! test-id "INCOMPLETE" "STUCK/DEAD" "" #f))
	  ;;      (debug:print 2 *default-log-port* "NOTE: " test-name " is already running")))
	  (else      
	   (debug:print-error 0 *default-log-port* "Failed to launch test " full-test-name ". Unrecognised state " (test:get-state testdat))
	   (case (string->symbol (test:get-state testdat)) 
	     ((COMPLETED INCOMPLETE)
	      (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN))
	     (else
	      (hash-table-set! test-registry (db:test-make-full-name test-name test-path) 'DONOTRUN)))))))))

;;======================================================================
;; END OF NEW STUFF
;;======================================================================

(define (get-dir-up-n dir . params) 
  (let ((dparts  (string-split dir "/"))
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
				      ) ; end case rem-status
                                    ) ; end let
                                  ); end cond has-subrun

                                 (else
                                  ;; BB - TODO - consider backgrounding to threads to delete tests (work below) 
                                  (debug:print-info 0 *default-log-port* "test: " test-name " itest-state: " test-state)
                                  (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ"))
                                      (begin
                                        (if (not (hash-table-ref/default test-retry-time test-fulln #f))
                                            (begin
                                              ;; want to set to REMOVING BUT CANNOT do it here?
                                              (hash-table-set! test-retry-time test-fulln (current-seconds))))
                                        (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time)
                                            ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first







|







2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
				      ) ; end case rem-status
                                    ) ; end let
                                  ); end cond has-subrun

                                 (else
                                  ;; BB - TODO - consider backgrounding to threads to delete tests (work below) 
                                  (debug:print-info 0 *default-log-port* "test: " test-name " itest-state: " test-state)
                                  (if (member test-state (list "RUNNING" "LAUNCHING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ"))
                                      (begin
                                        (if (not (hash-table-ref/default test-retry-time test-fulln #f))
                                            (begin
                                              ;; want to set to REMOVING BUT CANNOT do it here?
                                              (hash-table-set! test-retry-time test-fulln (current-seconds))))
                                        (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time)
                                            ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
                                      (if (not (null? tal))
                                         (loop (car tal)(cdr tal)))))))
				(rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id)))
                               ((kill-runs)
                                ;; RUNNING -> KILLREQ
                                ;; LAUNCHED,RUNNING,REMOTEHOSTSTART -> NOT STARTED
                                (cond
                                 ((and has-subrun (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")))
                                  (common:send-thunk-to-background-thread
                                   (lambda ()
                                     (let* ((subrun-remove-succeeded
                                             (subrun:kill-subrun run-dir keep-records)))
                                       #t)))
                                  (if (not (null? tal))
				    (loop (car tal)(cdr tal)))







|







2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
                                      (if (not (null? tal))
                                         (loop (car tal)(cdr tal)))))))
				(rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id)))
                               ((kill-runs)
                                ;; RUNNING -> KILLREQ
                                ;; LAUNCHED,RUNNING,REMOTEHOSTSTART -> NOT STARTED
                                (cond
                                 ((and has-subrun (member test-state (list "RUNNING" "LAUNCHING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")))
                                  (common:send-thunk-to-background-thread
                                   (lambda ()
                                     (let* ((subrun-remove-succeeded
                                             (subrun:kill-subrun run-dir keep-records)))
                                       #t)))
                                  (if (not (null? tal))
				    (loop (car tal)(cdr tal)))
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
			 (test-state	(vector-ref test 3))
			 (comment	(vector-ref test 14))   
			 (test-status	(vector-ref test 4))
			 (exc-msg	(conc "No bucket for State " test-state " Status " test-status))
			 (new-doc	(cond 
						((member test-state (list "RUNNING" ))
							((sxml-modify `("testsuite" insert-into (testcase (@ (name ,tc-name)) (inProgress)))) doc))
						((member test-state (list "LAUNCHED" "REMOTEHOSTSTART"  "NOT_STARTED"))
							((sxml-modify `("testsuite" insert-into (testcase (@ (name ,tc-name)) (inQueue)))) doc))
						((member test-status (list "PASS" "WARN" "WAIVED"))
							((sxml-modify `("testsuite" insert-into (testcase (@ (name ,tc-name))))) doc))
						((member test-status (list "FAIL" "CHECK"))
							((sxml-modify `("testsuite" insert-into (testcase (@ (name ,tc-name)) (failure (@ (message ,comment) (type "failure")))))) doc)) 
						((member test-status (list "DEAD" "KILLED" "ABORT" "PREQ_FAIL" "PREQ_DISCARDED"))
							((sxml-modify `("testsuite" insert-into (testcase (@ (name ,tc-name)) (failure (@ (message ,comment) (type "error")))))) doc))







|







2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
			 (test-state	(vector-ref test 3))
			 (comment	(vector-ref test 14))   
			 (test-status	(vector-ref test 4))
			 (exc-msg	(conc "No bucket for State " test-state " Status " test-status))
			 (new-doc	(cond 
						((member test-state (list "RUNNING" ))
							((sxml-modify `("testsuite" insert-into (testcase (@ (name ,tc-name)) (inProgress)))) doc))
						((member test-state (list "LAUNCHING" "LAUNCHED" "REMOTEHOSTSTART"  "NOT_STARTED"))
							((sxml-modify `("testsuite" insert-into (testcase (@ (name ,tc-name)) (inQueue)))) doc))
						((member test-status (list "PASS" "WARN" "WAIVED"))
							((sxml-modify `("testsuite" insert-into (testcase (@ (name ,tc-name))))) doc))
						((member test-status (list "FAIL" "CHECK"))
							((sxml-modify `("testsuite" insert-into (testcase (@ (name ,tc-name)) (failure (@ (message ,comment) (type "failure")))))) doc)) 
						((member test-status (list "DEAD" "KILLED" "ABORT" "PREQ_FAIL" "PREQ_DISCARDED"))
							((sxml-modify `("testsuite" insert-into (testcase (@ (name ,tc-name)) (failure (@ (message ,comment) (type "error")))))) doc))