Megatest

Changes On Branch 195f4a1733d50ea9
Login

Changes In Branch v1.65-diet2 Through [195f4a1733] Excluding Merge-Ins

This is equivalent to a diff from 275adb0d10 to 195f4a1733

2021-01-24
15:38
Merged in latest check-in: 14db3c2571 user: matt tags: v1.65-diet2
12:11
use old rollup technique check-in: 195f4a1733 user: matt tags: v1.65-diet2
2021-01-21
23:10
Sketched out possible speed up with short-circuit rollup of items check-in: 9809735c40 user: matt tags: v1.65-diet2
2021-01-17
23:34
Start from low load node and add diet one by one check-in: f462c25d37 user: matt tags: v1.65-diet2
14:29
merged diet branch Closed-Leaf check-in: b6a618cc23 user: matt tags: v1.65-pre-69-diet
2020-10-03
17:58
Bad logic on re-calc of prereq needed. The (runs:testdat-prereqs-not-met testdat) is telling you that this needs recalc as it was previously not met. Thus can bypass if *was* met previously (although why would we reach here if it was met previously?). Closed-Leaf check-in: 4e90b83f0b user: matt tags: v1.65-big-refactor (unpublished)
2020-09-05
21:50
Fixed wrong use of optional that should have been key. Closed-Leaf check-in: ff41f9d1e7 user: matt tags: v1.65-prereq-qry-freq
13:41
Try reduced frequency queries for prereq not met. ==/3.5/0.83/PASS/1201/mars/== check-in: 275adb0d10 user: matt tags: v1.65-prereq-qry-freq
11:17
Merged cleanup branch back to v1.65 ==9.4/2.2/1201/WARN/mars== check-in: 0cbf1a0b26 user: matt tags: v1.65

Modified api.scm from [4fa67bb6bd] to [280d15e219].

161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
				   cmd-in
				   (string->symbol cmd-in)))
            (params            (vector-ref dat 1))
            (start-t           (current-milliseconds))
            (readonly-mode     (dbr:dbstruct-read-only dbstruct))
            (readonly-command  (member cmd api:read-only-queries))
            (writecmd-in-readonly-mode (and readonly-mode (not readonly-command)))
            (foo               (begin
                                 (common:telemetry-log (conc "api-in:"(->string cmd))
                                                       payload: `((params . ,params)))
                                 
                                 #t))
            (res    
             (if writecmd-in-readonly-mode
                 (conc "attempt to run write command "cmd" on a read-only database")







|







161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
				   cmd-in
				   (string->symbol cmd-in)))
            (params            (vector-ref dat 1))
            (start-t           (current-milliseconds))
            (readonly-mode     (dbr:dbstruct-read-only dbstruct))
            (readonly-command  (member cmd api:read-only-queries))
            (writecmd-in-readonly-mode (and readonly-mode (not readonly-command)))
            #;(foo               (begin
                                 (common:telemetry-log (conc "api-in:"(->string cmd))
                                                       payload: `((params . ,params)))
                                 
                                 #t))
            (res    
             (if writecmd-in-readonly-mode
                 (conc "attempt to run write command "cmd" on a read-only database")
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
       ;; save all stats
       (let ((delta-t (- (current-milliseconds)
			 start-t)))
	 (hash-table-set! *db-api-call-time* cmd
			  (cons delta-t (hash-table-ref/default *db-api-call-time* cmd '()))))
       (if writecmd-in-readonly-mode
           (begin
             (common:telemetry-log (conc "api-out:"(->string cmd))
                                   payload: `((params . ,params)
                                              (ok-res . #t)))
	     (vector #f res))
           (begin
             (common:telemetry-log (conc "api-out:"(->string cmd))
                                   payload: `((params . ,params)
                                              (ok-res . #f)))
             (vector #t res))))))))

;; http-server  send-response
;;                 api:process-request
;;                    db:*







|




|







357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
       ;; save all stats
       (let ((delta-t (- (current-milliseconds)
			 start-t)))
	 (hash-table-set! *db-api-call-time* cmd
			  (cons delta-t (hash-table-ref/default *db-api-call-time* cmd '()))))
       (if writecmd-in-readonly-mode
           (begin
             #;(common:telemetry-log (conc "api-out:"(->string cmd))
                                   payload: `((params . ,params)
                                              (ok-res . #t)))
	     (vector #f res))
           (begin
             #;(common:telemetry-log (conc "api-out:"(->string cmd))
                                   payload: `((params . ,params)
                                              (ok-res . #f)))
             (vector #t res))))))))

;; http-server  send-response
;;                 api:process-request
;;                    db:*

Modified common.scm from [a82c407907] to [5791123b30].

3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
	     (begin
	       (debug:print 0 *default-log-port* "joining threads failed. exn=" exn)
               #t) ;; just ignore it, it might have died in the meantime so joining it will throw an exception
             (thread-join! thread))
           )))
   (hash-table-keys *common:thread-punchlist*)))

(define *common:telemetry-log-state* 'startup)
(define *common:telemetry-log-socket* #f)

(define (common:telemetry-log-open)
  (if (eq? *common:telemetry-log-state* 'startup)
      (let* ((serverhost (configf:lookup *configdat* "telemetry" "host"))
             (serverport (configf:lookup-number *configdat* "telemetry" "port"))
             (user (or (get-environment-variable "USER") "unknown"))
             (host (or (get-environment-variable "HOST") "unknown")))
        (set! *common:telemetry-log-state*
              (handle-exceptions
               exn
               (begin
                 (debug:print-info 0 *default-log-port* "common-telemetry-log open udp port failure")
                 'broken)
               (if (and serverhost serverport user host)
                   (let* ((s (udp-open-socket)))
                     ;;(udp-bind! s #f 0)
                     (udp-connect! s serverhost serverport)
                     (set! *common:telemetry-log-socket* s)
                     'open)
                   'not-needed))))))
  
(define (common:telemetry-log event #!key (payload '()))
  (if (eq? *common:telemetry-log-state* 'startup)
      (common:telemetry-log-open))

  (if (eq? 'open *common:telemetry-log-state*)
      (handle-exceptions
       exn
       (begin







|
|

|



















|







3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
	     (begin
	       (debug:print 0 *default-log-port* "joining threads failed. exn=" exn)
               #t) ;; just ignore it, it might have died in the meantime so joining it will throw an exception
             (thread-join! thread))
           )))
   (hash-table-keys *common:thread-punchlist*)))

#;(define *common:telemetry-log-state* 'startup)
#;(define *common:telemetry-log-socket* #f)

#;(define (common:telemetry-log-open)
  (if (eq? *common:telemetry-log-state* 'startup)
      (let* ((serverhost (configf:lookup *configdat* "telemetry" "host"))
             (serverport (configf:lookup-number *configdat* "telemetry" "port"))
             (user (or (get-environment-variable "USER") "unknown"))
             (host (or (get-environment-variable "HOST") "unknown")))
        (set! *common:telemetry-log-state*
              (handle-exceptions
               exn
               (begin
                 (debug:print-info 0 *default-log-port* "common-telemetry-log open udp port failure")
                 'broken)
               (if (and serverhost serverport user host)
                   (let* ((s (udp-open-socket)))
                     ;;(udp-bind! s #f 0)
                     (udp-connect! s serverhost serverport)
                     (set! *common:telemetry-log-socket* s)
                     'open)
                   'not-needed))))))
  
#;(define (common:telemetry-log event #!key (payload '()))
  (if (eq? *common:telemetry-log-state* 'startup)
      (common:telemetry-log-open))

  (if (eq? 'open *common:telemetry-log-state*)
      (handle-exceptions
       exn
       (begin
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
                   (base64:base64-encode
                    (z3:encode-buffer
                     (with-output-to-string (lambda () (pp payload))))))
                  (msg     (conc user":"host":"start":"(current-process-id)":"(car (argv))":"
                                 toppath":"payload-serialized)))
             (udp-send *common:telemetry-log-socket* msg))))))
  
(define (common:telemetry-log-close)
  (when (or (member *common:telemetry-log-state* '(broken-or-no-server-preclose open)) *common:telemetry-log-socket*)
    (handle-exceptions
     exn
     (begin
       (define *common:telemetry-log-state* 'closed-fail)
       (debug:print-info 0 *default-log-port* "common-telemetry-log closure failure")
       )
     (begin
       (define *common:telemetry-log-state* 'closed)
       (udp-close-socket *common:telemetry-log-socket*)
       (set! *common:telemetry-log-socket* #f)))))








|












3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
                   (base64:base64-encode
                    (z3:encode-buffer
                     (with-output-to-string (lambda () (pp payload))))))
                  (msg     (conc user":"host":"start":"(current-process-id)":"(car (argv))":"
                                 toppath":"payload-serialized)))
             (udp-send *common:telemetry-log-socket* msg))))))
  
#;(define (common:telemetry-log-close)
  (when (or (member *common:telemetry-log-state* '(broken-or-no-server-preclose open)) *common:telemetry-log-socket*)
    (handle-exceptions
     exn
     (begin
       (define *common:telemetry-log-state* 'closed-fail)
       (debug:print-info 0 *default-log-port* "common-telemetry-log closure failure")
       )
     (begin
       (define *common:telemetry-log-state* 'closed)
       (udp-close-socket *common:telemetry-log-socket*)
       (set! *common:telemetry-log-socket* #f)))))

Modified db.scm from [2f649dc1fb] to [fce7914eb2].

3966
3967
3968
3969
3970
3971
3972
3973















































































































3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
;;    ;; 	(db:general-call dbdat 'state-status-msg (list state status msg test-id))
;;    ;; 	(db:general-call dbdat 'state-status     (list state status test-id)))
;;    (db:set-state-status-and-roll-up-items dbstruct run-id test-id #f state status msg)
;;    ;; process the test_data table
;;    (if (and test-id state status (equal? status "AUTO")) 
;; 	(db:test-data-rollup dbstruct run-id test-id status))
;;    (mt:process-triggers dbstruct run-id test-id state status)))
















































































































;; state is the priority rollup of all states
;; status is the priority rollup of all completed statesfu
;;
;; if test-name is an integer work off that instead of test-name test-path
;;
(define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment)
  ;; establish info on incoming test followed by info on top level test
  ;; BBnote - for mode itemwait, linkage between upstream test & matching item status is propagated to run queue in db:prereqs-not-met
  (let* ((testdat      (if (number? test-name)
			   (db:get-test-info-by-id dbstruct run-id test-name) ;; test-name is actually a test-id
			   (db:get-test-info       dbstruct run-id test-name item-path)))
	 (test-id      (db:test-get-id testdat))
	 (test-name    (if (number? test-name)








>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>





|







3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093
4094
4095
4096
4097
;;    ;; 	(db:general-call dbdat 'state-status-msg (list state status msg test-id))
;;    ;; 	(db:general-call dbdat 'state-status     (list state status test-id)))
;;    (db:set-state-status-and-roll-up-items dbstruct run-id test-id #f state status msg)
;;    ;; process the test_data table
;;    (if (and test-id state status (equal? status "AUTO")) 
;; 	(db:test-data-rollup dbstruct run-id test-id status))
;;    (mt:process-triggers dbstruct run-id test-id state status)))

;; NOT FINISHED 
(define (db:calc-state-status-toplevel state status tl-state tl-status)
  `(,state ,status))

;;   (match state
;;     (("COMPLETED")
;;      (match `(,tl-state ,tl-status)
;;        (("COMPLETED" "PASS") `(,state ,status))
;;        (("COMPLETED" thestatus)
;; 	(case (string->symbol thestatus)
;; 	  ((ABORT CHECK DEAD)
;; 	   (if `("COMPLETED" ,thestatus))
;; 	(match `(,thestatus ,status)
;; 	  (("FAIL" "ABORT") '("COMPLETED" "ABORT"))
;; 	  (("FAIL" "CHECK") '("COMPLETED" "CHECK"))
;; 	  (("FAIL" "DEAD")  '("COMPLETED" "DEAD"))
;; 	  (("WARN" "FAIL")  '("COMPLETED" "FAIL"))
;; 	  (("WARN" "CHECK") '("COMPLETED" "CHECK"))
;; 	  (("WARN" "DEAD")
       
(define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment)
  ;; establish info on incoming test followed by info on top level test
  ;; BBnote - for mode itemwait, linkage between upstream test & matching item status is propagated to run queue in db:prereqs-not-met
  ;; (mutex-lock! *db-transaction-mutex*) ;; why do we need a mutex?
  (let* ((testdat      (if (number? test-name)
			   (db:get-test-info-by-id dbstruct run-id test-name) ;; test-name is actually a test-id
			   (db:get-test-info       dbstruct run-id test-name item-path)))
	 (test-id      (db:test-get-id testdat))
	 (test-name    (if (number? test-name)
			   (db:test-get-testname testdat)
			   test-name))
	 (no-sync-db   (db:no-sync-db #f))
	 (rollup-flag  #f)
	 (wait-flag    #f)
	 (rollup-lock-key  (conc run-id "-rollup-" test-name))
	 (waiting-lock-key (conc run-id "-waiting-" test-name)))
    (db:test-set-state-status dbstruct run-id test-id state status #f)
    (if (and test-id state status (equal? status "AUTO")) 
	(db:test-data-rollup dbstruct run-id test-id status))
    (if (member state '("LAUNCHED" "REMOTEHOSTSTART"))
	(db:general-call dbstruct 'set-test-start-time (list test-id)))

    (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item
	(begin
	  ;; is there a rollup lock? If not, take it
	  (sqlite3:with-transaction
	   no-sync-db
	   (lambda ()
	     (let* ((rollup-lock-time (db:no-sync-get/default no-sync-db rollup-lock-key #f))
		    (waiting-lock-time (db:no-sync-get/default no-sync-db waiting-lock-key #f)))
	       (if rollup-lock-time ;; someone is doing a rollup
		   (if (not waiting-lock-time) ;; no one is waiting
		       (begin
			 (set! wait-flag #t)
			 (set! rollup-flag #t)
			 (db:no-sync-set no-sync-db waiting-lock-key (current-seconds)))) ;; we are going to wait
		   (begin
		     (set! rollup-flag #t)
		     (db:no-sync-set no-sync-db rollup-lock-key (current-seconds)))))))
	  (if wait-flag
	      (let loop ((count 100))
		(thread-sleep! 2)
		(if (and (not (db:no-sync-get/default no-sync-db waiting-lock-key #f))
			 (> count 0))
		    (loop (+ count 1))
		    (sqlite3:with-transaction
		     no-sync-db
		     (lambda ()
		       (db:no-sync-set no-sync-db rollup-lock-key (current-seconds))
		       (db:no-sync-del! no-sync-db waiting-lock-key))))))
	  ;; now the rollup
	  (if rollup-flag ;; put this into a thread
	      (thread-start! (make-thread
			      (lambda ()
				(db:roll-up-test-state-status dbstruct run-id test-name state status)
				(db:no-sync-del! no-sync-db rollup-flag))
			      (conc "thread for run-id: " run-id " test-name: " test-name))))))))

;; I'd like to remove the need for item-path - it is logically not needed here
;; for now we pass in state and status - NOTE: There is a possible race if a test
;; is rapidly re-run while an earlier run is waiting to rollup.
;;
(define (db:roll-up-test-state-status dbstruct run-id test-name state status)
  (let* ((testdat      (if (number? test-name)
			   (db:get-test-info-by-id dbstruct run-id test-name) ;; test-name is actually a test-id
			   (db:get-test-info       dbstruct run-id test-name "")))
	 (test-id      (db:test-get-id testdat))
	 (test-name    (if (number? test-name)
			   (db:test-get-testname testdat)
			   test-name))
	 (tl-testdat   (db:get-test-info dbstruct run-id test-name ""))
	 (tl-test-id   (db:test-get-id tl-testdat)))
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       ;; NB// Pass the db so it is part fo the transaction
       ;; item-path is used in get-all-state-status counts to exclude current state/status of THIS test
       ;; but with the state/status being set earlier this is not needed any longer
       (let* ((state-status-counts  (db:get-all-state-status-counts-for-testname dbstruct run-id test-name))
	      (state-statuses       (db:roll-up-rules state-status-counts state status))
	      (newstate             (if (null? state-statuses)
					state
					(car state-statuses)))
	      (newstatus            (if (null? state-statuses)
					status
					(cadr state-statuses))))
	 (if tl-test-id
	     (db:test-set-state-status db run-id tl-test-id newstate newstatus #f))
	 )))
    #t))

;; state is the priority rollup of all states
;; status is the priority rollup of all completed statesfu
;;
;; if test-name is an integer work off that instead of test-name test-path
;;
(define (db:set-state-status-and-roll-up-items-orig dbstruct run-id test-name item-path state status comment)
  ;; establish info on incoming test followed by info on top level test
  ;; BBnote - for mode itemwait, linkage between upstream test & matching item status is propagated to run queue in db:prereqs-not-met
  (let* ((testdat      (if (number? test-name)
			   (db:get-test-info-by-id dbstruct run-id test-name) ;; test-name is actually a test-id
			   (db:get-test-info       dbstruct run-id test-name item-path)))
	 (test-id      (db:test-get-id testdat))
	 (test-name    (if (number? test-name)
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061
4062
4063
4064
4065
4066
4067
4068
4069
4070
4071
4072
4073
4074
4075
4076
4077
4078
4079
4080
4081
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092


4093


4094
4095
4096
4097
4098
4099
4100
4101
4102
4103
4104
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (let ((tr-res
              (sqlite3:with-transaction
               db
               (lambda ()
                 ;; NB// Pass the db so it is part fo the transaction
                 (db:test-set-state-status db run-id test-id state status comment) ;; this call sets the item state/status
                 (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item
                     (let* ((state-status-counts  (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test
			      						  (state-stauses (db:roll-up-rules state-status-counts state status))
                          (newstate (car state-stauses))
                          (newstatus (cadr state-stauses)))
                       (debug:print 4 *default-log-port* "BB> tl-test-id="tl-test-id" ; "test-name":"item-path" newstate="newstate" newstatus="newstatus" len(sscs)="(length state-status-counts)  " state-status-counts: "
							(apply conc
                  (map (lambda (x)
                     (conc
                     		(with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | "))
                              state-status-counts))); end debug:print
   
                       (if tl-test-id
			   (db:test-set-state-status db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct
		       ))))))
         (mutex-unlock! *db-transaction-mutex*)
         (if (and test-id state status (equal? status "AUTO")) 
             (db:test-data-rollup dbstruct run-id test-id status))
         tr-res)))))

(define (db:roll-up-rules state-status-counts state status)
		(let* ((running     (length (filter (lambda (x)
                          (member (dbr:counts-state x) *common:running-states*))
                                 state-status-counts)))
           (bad-not-started      (length (filter (lambda (x)
                                      (and (equal? (dbr:counts-state x) "NOT_STARTED") 
                                        (not (member (dbr:counts-status x)  *common:not-started-ok-statuses*))))
																	state-status-counts)))
           (all-curr-states      (common:special-sort  ;; worst -> best (sort of)
                                    (delete-duplicates
                                      (if (and state (not (member state *common:dont-roll-up-states*)))
                                          (cons state (map dbr:counts-state state-status-counts))
                                          (map dbr:counts-state state-status-counts)))
                                                  *common:std-states* >))
           (all-curr-statuses    (common:special-sort  ;; worst -> best
                                    (delete-duplicates
                                      (if (and state status (not (member state *common:dont-roll-up-states*)))
                                          (cons status (map dbr:counts-status state-status-counts))
                                          (map dbr:counts-status state-status-counts)))
                                                   *common:std-statuses* >))
           (non-completes        (filter (lambda (x)
							 										 (not (member x (cons "COMPLETED" *common:dont-roll-up-states*))))
						       									all-curr-states))
			     (preq-fails        (filter (lambda (x)
							 								(equal? x "PREQ_FAIL"))
						       							all-curr-statuses))
           (num-non-completes (length non-completes))
 					 (newstate          (cond
															((> running 0)           "RUNNING")            ;; anything running, call the situation running
                              ((> (length preq-fails) 0) "NOT_STARTED")
															((> bad-not-started 0)   "COMPLETED")          ;; we have an ugly situation, it is completed in the sense we cannot do more.
															((> num-non-completes 0) (car non-completes))  ;;  (remove (lambda (x)(equal? "COMPLETED" x)) all-curr-states))) ;; only rollup DELETED if all DELETED
															(else                    (car all-curr-states))))
           (newstatus         (cond
                              ((> (length preq-fails) 0)  "PREQ_FAIL")
                              ((or (> bad-not-started 0)
                                   (and (equal? newstate "NOT_STARTED")
                                      (> num-non-completes 0)))
                                            "STARTED")
                              (else (car all-curr-statuses)))))
 					(debug:print-info 2 *default-log-port*
                                         "\n--> probe db:set-state-status-and-roll-up-items: "
                                         "\n--> state-status-counts: "(map dbr:counts->alist state-status-counts)
                                         "\n--> running:             "running
                                         "\n--> bad-not-started:     "bad-not-started
                                         "\n--> non-non-completes:   "num-non-completes
                                         "\n--> non-completes:       "non-completes
                                         "\n--> all-curr-states:     "all-curr-states
                                         "\n--> all-curr-statuses:     "all-curr-statuses
                                         "\n--> newstate              "newstate
                                         "\n--> newstatus            "newstatus
                                         "\n\n")

                        ;; NB// Pass the db so it is part of the transaction
         (list newstate newstatus)))

(define (db:set-state-status-and-roll-up-run dbstruct run-id curr-state curr-status)
    (mutex-lock! *db-transaction-mutex*)
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (let ((tr-res
              (sqlite3:with-transaction
               db
               (lambda ()
                   (let* ((state-status-counts  (db:get-all-state-status-counts-for-run dbstruct run-id))
													(state-stauses (db:roll-up-rules state-status-counts #f #f ))


                          (newstate (car state-stauses))


                          (newstatus (cadr state-stauses))) 
                    (if (or (not (eq? newstate curr-state)) (not (eq?  newstatus curr-status)))
                   (db:set-run-state-status dbstruct run-id newstate newstatus )))))))
         (mutex-unlock! *db-transaction-mutex*)
         tr-res))))


(define (db:get-all-state-status-counts-for-run dbstruct run-id)
 (let* ((test-count-recs (db:with-db
                                  dbstruct #f #f
                                  (lambda (db)







|



|
|
|
















|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|




<
|
|


|








|
>
>
|
>
>
|


|







4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123
4124
4125
4126
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
4184
4185
4186
4187
4188

4189
4190
4191
4192
4193
4194
4195
4196
4197
4198
4199
4200
4201
4202
4203
4204
4205
4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (let ((tr-res
              (sqlite3:with-transaction
               db
               (lambda ()
                 ;; NB// Pass the db so it is part of the transaction
                 (db:test-set-state-status db run-id test-id state status comment) ;; this call sets the item state/status
                 (if (not (equal? item-path "")) ;; only roll up IF incoming test is an item
                     (let* ((state-status-counts  (db:get-all-state-status-counts-for-test dbstruct run-id test-name item-path state status)) ;; item-path is used to exclude current state/status of THIS test
			    (state-statuses        (db:roll-up-rules state-status-counts state status))
                          (newstate  (car state-statuses))
                          (newstatus (cadr state-statuses)))
                       (debug:print 4 *default-log-port* "BB> tl-test-id="tl-test-id" ; "test-name":"item-path" newstate="newstate" newstatus="newstatus" len(sscs)="(length state-status-counts)  " state-status-counts: "
							(apply conc
                  (map (lambda (x)
                     (conc
                     		(with-output-to-string (lambda () (pp (dbr:counts->alist x)))) " | "))
                              state-status-counts))); end debug:print
   
                       (if tl-test-id
			   (db:test-set-state-status db run-id tl-test-id newstate newstatus #f)) ;; we are still in the transaction - must access the db and not the dbstruct
		       ))))))
         (mutex-unlock! *db-transaction-mutex*)
         (if (and test-id state status (equal? status "AUTO")) 
             (db:test-data-rollup dbstruct run-id test-id status))
         tr-res)))))

(define (db:roll-up-rules state-status-counts state status)
  (let* ((running              (length (filter (lambda (x)
						 (member (dbr:counts-state x) *common:running-states*))
					       state-status-counts)))
	 (bad-not-started      (length (filter (lambda (x)
						 (and (equal? (dbr:counts-state x) "NOT_STARTED") 
						      (not (member (dbr:counts-status x)  *common:not-started-ok-statuses*))))
					       state-status-counts)))
	 (all-curr-states      (common:special-sort  ;; worst -> best (sort of)
				(delete-duplicates
				 (if (and state (not (member state *common:dont-roll-up-states*)))
				     (cons state (map dbr:counts-state state-status-counts))
				     (map dbr:counts-state state-status-counts)))
				*common:std-states* >))
	 (all-curr-statuses    (common:special-sort  ;; worst -> best
				(delete-duplicates
				 (if (and state status (not (member state *common:dont-roll-up-states*)))
				     (cons status (map dbr:counts-status state-status-counts))
				     (map dbr:counts-status state-status-counts)))
				*common:std-statuses* >))
	 (non-completes        (filter (lambda (x)
					 (not (member x (cons "COMPLETED" *common:dont-roll-up-states*))))
				       all-curr-states))
	 (preq-fails        (filter (lambda (x)
				      (equal? x "PREQ_FAIL"))
				    all-curr-statuses))
	 (num-non-completes (length non-completes))
	 (newstate          (cond
			     ((> running 0)             "RUNNING")            ;; anything running, call the situation running
			     ((> (length preq-fails) 0) "NOT_STARTED")
			     ((> bad-not-started 0)     "COMPLETED")          ;; we have an ugly situation, it is completed in the sense we cannot do more.
			     ((> num-non-completes 0)   (car non-completes))  ;;  (remove (lambda (x)(equal? "COMPLETED" x)) all-curr-states))) ;; only rollup DELETED if all DELETED
			     (else                      (car all-curr-states))))
	 (newstatus         (cond
			     ((> (length preq-fails) 0)  "PREQ_FAIL")
			     ((or (> bad-not-started 0)
				  (and (equal? newstate "NOT_STARTED")
				       (> num-non-completes 0)))
			      "STARTED")
			     (else (car all-curr-statuses)))))
    (debug:print-info 2 *default-log-port*
		      "\n--> probe db:set-state-status-and-roll-up-items: "
		      "\n--> state-status-counts: "(map dbr:counts->alist state-status-counts)
		      "\n--> running:             "running
		      "\n--> bad-not-started:     "bad-not-started
		      "\n--> non-non-completes:   "num-non-completes
		      "\n--> non-completes:       "non-completes
		      "\n--> all-curr-states:     "all-curr-states
                                         "\n--> all-curr-statuses:     "all-curr-statuses
                                         "\n--> newstate              "newstate
                                         "\n--> newstatus            "newstatus
                                         "\n\n")

    ;; NB// Pass the db so it is part of the transaction
    (list newstate newstatus)))

(define (db:set-state-status-and-roll-up-run dbstruct run-id curr-state curr-status)
    ;; (mutex-lock! *db-transaction-mutex*)
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (let ((tr-res
              (sqlite3:with-transaction
               db
               (lambda ()
                   (let* ((state-status-counts  (db:get-all-state-status-counts-for-run dbstruct run-id))
			  (state-statuses        (db:roll-up-rules state-status-counts #f #f ))
                          (newstate             (if (null? state-statuses)
						    curr-state
						    (car state-statuses)))
			  (newstatus            (if (null? state-status)
						    curr-status
						    (cadr state-statuses))))
                    (if (or (not (eq? newstate curr-state)) (not (eq?  newstatus curr-status)))
                   (db:set-run-state-status dbstruct run-id newstate newstatus )))))))
         ;; (mutex-unlock! *db-transaction-mutex*)
         tr-res))))


(define (db:get-all-state-status-counts-for-run dbstruct run-id)
 (let* ((test-count-recs (db:with-db
                                  dbstruct #f #f
                                  (lambda (db)
4146
4147
4148
4149
4150
4151
4152















4153
4154
4155
4156
4157
4158
4159

         (nonmatch-countrec-lambda (lambda (countrec) (not (match-countrec-lambda countrec))))
         
         (unrelated-rec-list   
          (filter nonmatch-countrec-lambda other-items-count-recs)))
    
    (cons updated-count-rec unrelated-rec-list)))
















;; (define (db:get-all-item-states db run-id test-name)
;;   (sqlite3:map-row 
;;    (lambda (a) a)
;;    db
;;    "SELECT DISTINCT state FROM tests WHERE item_path != '' AND state != 'DELETED' AND run_id=? AND testname=?"
;;    run-id test-name))







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288

         (nonmatch-countrec-lambda (lambda (countrec) (not (match-countrec-lambda countrec))))
         
         (unrelated-rec-list   
          (filter nonmatch-countrec-lambda other-items-count-recs)))
    
    (cons updated-count-rec unrelated-rec-list)))

;; full count not including toplevel
;;
(define (db:get-all-state-status-counts-for-testname dbstruct run-id test-name)
  (let* ((test-count-recs (db:with-db
			   dbstruct #f #f
			   (lambda (db)
			     (sqlite3:map-row
			      (lambda (state status count)
				(make-dbr:counts state: state status: status count: count))
			      db
			      "SELECT state,status,count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' GROUP BY state,status;"
                                     run-id test-name)))))
    test-count-recs))


;; (define (db:get-all-item-states db run-id test-name)
;;   (sqlite3:map-row 
;;    (lambda (a) a)
;;    db
;;    "SELECT DISTINCT state FROM tests WHERE item_path != '' AND state != 'DELETED' AND run_id=? AND testname=?"
;;    run-id test-name))

Modified launch.scm from [d0067277fa] to [4cbc0e59d3].

209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
    ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area)
    (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10)

    (let loop ((minutes   (calc-minutes))
	       (cpu-load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
	       (disk-free (get-df (current-directory)))
               (last-sync (current-seconds)))
      (common:telemetry-log "zombie" (conc "launch:monitor-job - top of loop encountered at "(current-seconds)" with last-sync="last-sync))
      (let* ((over-time     (> (current-seconds) (+ last-sync update-period)))
             (new-cpu-load  (let* ((load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
                                   (delta (abs (- load cpu-load))))
                              (if (> delta 0.1) ;; don't bother updating with small changes
                                  load
                                  #f)))
             (new-disk-free (let* ((df    (if over-time ;; only get df every 30 seconds







|







209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
    ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area)
    (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10)

    (let loop ((minutes   (calc-minutes))
	       (cpu-load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
	       (disk-free (get-df (current-directory)))
               (last-sync (current-seconds)))
      ;; (common:telemetry-log "zombie" (conc "launch:monitor-job - top of loop encountered at "(current-seconds)" with last-sync="last-sync))
      (let* ((over-time     (> (current-seconds) (+ last-sync update-period)))
             (new-cpu-load  (let* ((load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
                                   (delta (abs (- load cpu-load))))
                              (if (> delta 0.1) ;; don't bother updating with small changes
                                  load
                                  #f)))
             (new-disk-free (let* ((df    (if over-time ;; only get df every 30 seconds
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
             (do-sync       (or new-cpu-load new-disk-free over-time))

             (test-info   (rmt:get-test-info-by-id run-id test-id))
             (state       (db:test-get-state test-info))
             (status      (db:test-get-status test-info))
             (kill-reason  "no kill reason specified")
             (kill-job?    #f))
        (common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period))
        (cond
         ((test-get-kill-request run-id test-id)
          (set! kill-reason "KILLING TEST since received kill request (KILLREQ)")
          (set! kill-job? #t))
         ((and runtlim (> (- (current-seconds) start-seconds) runtlim))
          (set! kill-reason (conc "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" (- (current-seconds) start-seconds) " seconds, limit=" runtlim))
          (set! kill-job? #t))
         ((equal? status "DEAD")
          (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
          (rmt:set-state-status-and-roll-up-items run-id test-id 'foo "RUNNING" "n/a" "was marked dead; really still running.")
          ;;(set! kill-reason "KILLING TEST because it was marked as DEAD by launch:handle-zombie-tests (might indicate really overloaded server or else overzealous setup.deadtime)") ;; MARK RUNNING
          (set! kill-job? #f)))

        (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync)
        (launch:handle-zombie-tests run-id)
        (when do-sync
          ;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append)
          ;;  (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes)))))
          (common:telemetry-log "zombie" (conc  "launch:monitor-job - dosync started at "(current-seconds)))
          (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
          (common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds))))
        
	(if kill-job? 
	    (begin
              (debug:print-info 0 *default-log-port* "proceeding to kill test: "kill-reason)
	      (mutex-lock! m)
	      ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this
	      ;;       section and the runit section? Or add a loop that tries three times with a 1/4 second







|


















|

|







231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
             (do-sync       (or new-cpu-load new-disk-free over-time))

             (test-info   (rmt:get-test-info-by-id run-id test-id))
             (state       (db:test-get-state test-info))
             (status      (db:test-get-status test-info))
             (kill-reason  "no kill reason specified")
             (kill-job?    #f))
        ;; (common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period))
        (cond
         ((test-get-kill-request run-id test-id)
          (set! kill-reason "KILLING TEST since received kill request (KILLREQ)")
          (set! kill-job? #t))
         ((and runtlim (> (- (current-seconds) start-seconds) runtlim))
          (set! kill-reason (conc "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" (- (current-seconds) start-seconds) " seconds, limit=" runtlim))
          (set! kill-job? #t))
         ((equal? status "DEAD")
          (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
          (rmt:set-state-status-and-roll-up-items run-id test-id 'foo "RUNNING" "n/a" "was marked dead; really still running.")
          ;;(set! kill-reason "KILLING TEST because it was marked as DEAD by launch:handle-zombie-tests (might indicate really overloaded server or else overzealous setup.deadtime)") ;; MARK RUNNING
          (set! kill-job? #f)))

        (debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync)
        (launch:handle-zombie-tests run-id)
        (when do-sync
          ;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append)
          ;;  (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes)))))
          ;; (common:telemetry-log "zombie" (conc  "launch:monitor-job - dosync started at "(current-seconds)))
          (tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
          #;(common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds))))
        
	(if kill-job? 
	    (begin
              (debug:print-info 0 *default-log-port* "proceeding to kill test: "kill-reason)
	      (mutex-lock! m)
	      ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this
	      ;;       section and the runit section? Or add a loop that tries three times with a 1/4 second

Modified mt.scm from [e9055c2687] to [283ae4be89].

100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
		   (if last-time
		       (< (current-seconds)(+ last-time 5))
		       #f))))
    (if useres
	(let ((result (vector-ref res 1)))
	  (debug:print 4 *default-log-port* "Using lazy value res: " result)
	  result)
	(let ((newres (rmt:get-prereqs-not-met run-id waitons ref-item-path mode: mode itemmaps: itemmaps)))
	  (hash-table-set! *pre-reqs-met-cache* key (vector (current-seconds) newres))
	  newres))))

(define (mt:get-run-stats dbstruct run-id)
;;  Get run stats from local access, move this ... but where?
  (db:get-run-stats dbstruct run-id))








|







100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
		   (if last-time
		       (< (current-seconds)(+ last-time 5))
		       #f))))
    (if useres
	(let ((result (vector-ref res 1)))
	  (debug:print 4 *default-log-port* "Using lazy value res: " result)
	  result)
	(let ((newres (rmt:get-prereqs-not-met run-id waitons ref-item-path mode itemmaps)))
	  (hash-table-set! *pre-reqs-met-cache* key (vector (current-seconds) newres))
	  newres))))

(define (mt:get-run-stats dbstruct run-id)
;;  Get run stats from local access, move this ... but where?
  (db:get-run-stats dbstruct run-id))

Modified rmt.scm from [39d97c528a] to [46b655b2eb].

656
657
658
659
660
661
662

663
664
665
666
667
668
669
670
(define (rmt:test-get-paths-matching-keynames-target-new keynames target res testpatt statepatt statuspatt runname)
  (let ((run-ids (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt)))
    (apply append 
	   (map (lambda (run-id)
		  (rmt:send-receive 'test-get-paths-matching-keynames-target-new run-id (list run-id keynames target res testpatt statepatt statuspatt runname)))
	   run-ids))))


(define (rmt:get-prereqs-not-met run-id waitons ref-test-name ref-item-path #!key (mode '(normal))(itemmaps #f))
  (rmt:send-receive 'get-prereqs-not-met run-id (list run-id waitons ref-test-name ref-item-path mode itemmaps)))

(define (rmt:get-count-tests-running-for-run-id run-id fastmode)
  (rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id fastmode)))

(define (rmt:get-not-completed-cnt run-id)
  (rmt:send-receive 'get-not-completed-cnt run-id (list run-id)))







>
|







656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
(define (rmt:test-get-paths-matching-keynames-target-new keynames target res testpatt statepatt statuspatt runname)
  (let ((run-ids (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt)))
    (apply append 
	   (map (lambda (run-id)
		  (rmt:send-receive 'test-get-paths-matching-keynames-target-new run-id (list run-id keynames target res testpatt statepatt statuspatt runname)))
	   run-ids))))

;; NOTE: rmt functions can NEVER have key params as they might be called as local
(define (rmt:get-prereqs-not-met run-id waitons ref-test-name ref-item-path mode itemmaps) ;; #!key (mode '(normal))(itemmaps #f))
  (rmt:send-receive 'get-prereqs-not-met run-id (list run-id waitons ref-test-name ref-item-path mode itemmaps)))

(define (rmt:get-count-tests-running-for-run-id run-id fastmode)
  (rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id fastmode)))

(define (rmt:get-not-completed-cnt run-id)
  (rmt:send-receive 'get-not-completed-cnt run-id (list run-id)))

Modified runs.scm from [7d76065ab9] to [4de72ed2f2].

831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
;;    => review of a previously seen test is higher priority of never visited test
;; reg - list of previously visited tests
;; tal - list of never visited tests
;;   prefer next hed to be from reg than tal.

(define runs:nothing-left-in-queue-count 0)

(define (runs:lazy-get-prereqs-not-met  testdat run-id waitons hed item-path #!optional (mode '(normal))(itemmaps #f)) ;; mode: testmode itemmaps: itemmaps)
  (if (and (runs:testdat-prereqs-not-met testdat)
	   (< (- (current-seconds) (runs:testdat-last-update testdat)) 10)) ;; only refresh for this test if it has been at least 10 seconds
      (runs:testdat-prereqs-not-met testdat)
      (let* ((res (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: mode itemmaps: itemmaps)))
		    (if (list? res)
			res
			(begin
			  (debug:print 0 *default-log-port*
				       "ERROR: rmt:get-prereqs-not-met returned non-list!\n"
				       "  res=" res " run-id=" run-id " waitons=" waitons " hed=" hed " item-path=" item-path " testmode=" mode " itemmaps=" itemmaps)
			  '())))))







|



|







831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
;;    => review of a previously seen test is higher priority of never visited test
;; reg - list of previously visited tests
;; tal - list of never visited tests
;;   prefer next hed to be from reg than tal.

(define runs:nothing-left-in-queue-count 0)

(define (runs:lazy-get-prereqs-not-met  testdat run-id waitons hed item-path #!key (mode '(normal))(itemmaps #f)) ;; mode: testmode itemmaps: itemmaps)
  (if (and (runs:testdat-prereqs-not-met testdat)
	   (< (- (current-seconds) (runs:testdat-last-update testdat)) 10)) ;; only refresh for this test if it has been at least 10 seconds
      (runs:testdat-prereqs-not-met testdat)
      (let* ((res (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode itemmaps)))
		    (if (list? res)
			res
			(begin
			  (debug:print 0 *default-log-port*
				       "ERROR: rmt:get-prereqs-not-met returned non-list!\n"
				       "  res=" res " run-id=" run-id " waitons=" waitons " hed=" hed " item-path=" item-path " testmode=" mode " itemmaps=" itemmaps)
			  '())))))
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
;;	         (tal         (cdr sorted-test-names))
;;	         (reg         '()) ;; registered, put these at the head of tal 
;;	         (reruns      '()))
(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record
			   can-run-more items runname tconfig reglen test-registry test-records itemmaps testdat)
  (let* ((loop-list       (list hed tal reg reruns))
	 (prereqs-not-met (runs:lazy-get-prereqs-not-met testdat run-id waitons hed item-path mode: testmode itemmaps: itemmaps))
	  #;(let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)))
			    (if (list? res)
				res
				(begin
				  (debug:print 0 *default-log-port*
					       "ERROR: rmt:get-prereqs-not-met returned non-list!\n"
					       "  res=" res " run-id=" run-id " waitons=" waitons " hed=" hed " item-path=" item-path " testmode=" testmode " itemmaps=" itemmaps)
				  '())))
	 (have-itemized   (not (null? (lset-intersection eq? testmode '(itemmatch itemwait)))))
	 ;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))
	 (fails           (runs:calc-fails prereqs-not-met))
	 (prereq-fails    (runs:calc-prereq-fail prereqs-not-met))
	 (non-completed   (runs:calc-not-completed prereqs-not-met))
	 (runnables       (runs:calc-runnable prereqs-not-met))
         (unexpanded-prereqs
          (filter (lambda (testname)
                    (let* ((test-rec (hash-table-ref test-records testname))







<
<
<
<
<
<
<
<

<







860
861
862
863
864
865
866








867

868
869
870
871
872
873
874
;;	         (tal         (cdr sorted-test-names))
;;	         (reg         '()) ;; registered, put these at the head of tal 
;;	         (reruns      '()))
(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record
			   can-run-more items runname tconfig reglen test-registry test-records itemmaps testdat)
  (let* ((loop-list       (list hed tal reg reruns))
	 (prereqs-not-met (runs:lazy-get-prereqs-not-met testdat run-id waitons hed item-path mode: testmode itemmaps: itemmaps))








	 (have-itemized   (not (null? (lset-intersection eq? testmode '(itemmatch itemwait)))))

	 (fails           (runs:calc-fails prereqs-not-met))
	 (prereq-fails    (runs:calc-prereq-fail prereqs-not-met))
	 (non-completed   (runs:calc-not-completed prereqs-not-met))
	 (runnables       (runs:calc-runnable prereqs-not-met))
         (unexpanded-prereqs
          (filter (lambda (testname)
                    (let* ((test-rec (hash-table-ref test-records testname))
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
	 (run-limits-info        (runs:dat-can-run-more-tests runsdat))
	 ;; (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
	 (have-resources         (car run-limits-info))
	 (num-running            (list-ref run-limits-info 1))
	 (num-running-in-jobgroup(list-ref run-limits-info 2)) 
	 (max-concurrent-jobs    (list-ref run-limits-info 3))
	 (job-group-limit        (list-ref run-limits-info 4))
	 ;; (prereqs-not-met        (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))
	 ;; (prereqs-not-met         (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))
	 (fails                  (if (list? prereqs-not-met) ;; TODO: rename fails to failed-prereqs
				      (runs:calc-fails prereqs-not-met)
				      (begin
					(debug:print-error 0 *default-log-port* "prereqs-not-met is not a list! " prereqs-not-met)
					'())))
	 (non-completed           (filter (lambda (x)             ;; remove hed from not completed list, duh, of course it is not completed!
					    (not (equal? x hed)))







<
<







1106
1107
1108
1109
1110
1111
1112


1113
1114
1115
1116
1117
1118
1119
	 (run-limits-info        (runs:dat-can-run-more-tests runsdat))
	 ;; (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
	 (have-resources         (car run-limits-info))
	 (num-running            (list-ref run-limits-info 1))
	 (num-running-in-jobgroup(list-ref run-limits-info 2)) 
	 (max-concurrent-jobs    (list-ref run-limits-info 3))
	 (job-group-limit        (list-ref run-limits-info 4))


	 (fails                  (if (list? prereqs-not-met) ;; TODO: rename fails to failed-prereqs
				      (runs:calc-fails prereqs-not-met)
				      (begin
					(debug:print-error 0 *default-log-port* "prereqs-not-met is not a list! " prereqs-not-met)
					'())))
	 (non-completed           (filter (lambda (x)             ;; remove hed from not completed list, duh, of course it is not completed!
					    (not (equal? x hed)))
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
                   registry-mutex: registry-mutex
                   flags: flags
                   keyvals: keyvals
                   run-info: run-info
                   ;; newtal: newtal
                   all-tests-registry: all-tests-registry
                   ;; itemmaps: itemmaps
                   ;; prereqs-not-met: (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)
                   ;; can-run-more-tests: (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs) ;; look at the test jobgroup and tot jobs running
                   )))

    ;; Initialize the test-registery hash with tests that already have a record
    ;; convert state to symbol and use that as the hash value
    (for-each (lambda (trec)
		(let ((id (db:test-get-id        trec))







<







1505
1506
1507
1508
1509
1510
1511

1512
1513
1514
1515
1516
1517
1518
                   registry-mutex: registry-mutex
                   flags: flags
                   keyvals: keyvals
                   run-info: run-info
                   ;; newtal: newtal
                   all-tests-registry: all-tests-registry
                   ;; itemmaps: itemmaps

                   ;; can-run-more-tests: (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs) ;; look at the test jobgroup and tot jobs running
                   )))

    ;; Initialize the test-registery hash with tests that already have a record
    ;; convert state to symbol and use that as the hash value
    (for-each (lambda (trec)
		(let ((id (db:test-get-id        trec))

Modified tests.scm from [0094b671e6] to [6adeda2896].

19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
;;======================================================================

;;======================================================================
;; Tests
;;======================================================================

(declare (unit tests))
(declare (uses lock-queue))
(declare (uses db))
(declare (uses tdb))
(declare (uses common))
;; (declare (uses dcommon)) ;; needed for the steps processing
(declare (uses items))
(declare (uses runconfig))
;; (declare (uses sdb))







|







19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
;;======================================================================

;;======================================================================
;; Tests
;;======================================================================

(declare (unit tests))
;;(declare (uses lock-queue))
(declare (uses db))
(declare (uses tdb))
(declare (uses common))
;; (declare (uses dcommon)) ;; needed for the steps processing
(declare (uses items))
(declare (uses runconfig))
;; (declare (uses sdb))