Megatest

Diff
Login

Differences From Artifact [631ee0cd7b]:

To Artifact [a971957198]:


112
113
114
115
116
117
118


119

120
121
122
123
124
125
126
	     (hash-table-set! vals (car key) (cadr key))) ;; (cdb:remote-run db:get-run-key-val #f run-id (car key))))
	   keyvals)))
    ;; from the cached data set the vars
    (hash-table-for-each
     vals
     (lambda (key val)
       (debug:print 2 "setenv " key " " val)


       (setenv key val)))

    (if (not (get-environment-variable "MT_TARGET"))(setenv "MT_TARGET" target))
    (alist->env-vars (hash-table-ref/default *configdat* "env-override" '()))
    ;; Lets use this as an opportunity to put MT_RUNNAME in the environment
    (setenv "MT_RUNNAME" (if inrunname inrunname (cdb:remote-run db:get-run-name-from-id #f run-id)))
    (setenv "MT_RUN_AREA_HOME" *toppath*)))

(define (set-item-env-vars itemdat)







>
>
|
>







112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
	     (hash-table-set! vals (car key) (cadr key))) ;; (cdb:remote-run db:get-run-key-val #f run-id (car key))))
	   keyvals)))
    ;; from the cached data set the vars
    (hash-table-for-each
     vals
     (lambda (key val)
       (debug:print 2 "setenv " key " " val)
       (if (and (string? key)
		(string? val))
	   (setenv key val)
	   (debug:print 0 "ERROR: Malformed environment variable definition: var=" var ", val=" val))))
    (if (not (get-environment-variable "MT_TARGET"))(setenv "MT_TARGET" target))
    (alist->env-vars (hash-table-ref/default *configdat* "env-override" '()))
    ;; Lets use this as an opportunity to put MT_RUNNAME in the environment
    (setenv "MT_RUNNAME" (if inrunname inrunname (cdb:remote-run db:get-run-name-from-id #f run-id)))
    (setenv "MT_RUN_AREA_HOME" *toppath*)))

(define (set-item-env-vars itemdat)
203
204
205
206
207
208
209





210
211
212
213
214
215
216
	 (deferred          '()) ;; delay running these since they have a waiton clause
	 (runconfigf         (conc  *toppath* "/runconfigs.config"))
	 (required-tests    '())
	 (test-records       (make-hash-table))
	 (all-tests-registry (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names
	 (all-test-names     (hash-table-keys all-tests-registry))
	 (test-names         (tests:filter-test-names all-test-names test-patts)))





    (set-megatest-env-vars run-id inkeys: keys) ;; these may be needed by the launching process
    (if (file-exists? runconfigf)
	(setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals "pre-launch-env-vars")
	(debug:print 0 "WARNING: You do not have a run config file: " runconfigf))
    
    ;; look up all tests matching the comma separated list of globs in
    ;; test-patts (using % as wildcard)







>
>
>
>
>







206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
	 (deferred          '()) ;; delay running these since they have a waiton clause
	 (runconfigf         (conc  *toppath* "/runconfigs.config"))
	 (required-tests    '())
	 (test-records       (make-hash-table))
	 (all-tests-registry (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names
	 (all-test-names     (hash-table-keys all-tests-registry))
	 (test-names         (tests:filter-test-names all-test-names test-patts)))

    ;; Update the synchronous setting in the db based on the default or what is set by the user
    ;; This is done once here on a call to run tests rather than on every call to open-db
    (cdb:remote-run db:set-sync #f)

    (set-megatest-env-vars run-id inkeys: keys) ;; these may be needed by the launching process
    (if (file-exists? runconfigf)
	(setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals "pre-launch-env-vars")
	(debug:print 0 "WARNING: You do not have a run config file: " runconfigf))
    
    ;; look up all tests matching the comma separated list of globs in
    ;; test-patts (using % as wildcard)
224
225
226
227
228
229
230



231
232
233
234
235
236
237
	(begin
	  ;; have to delete test records where NOT_STARTED since they can cause -keepgoing to 
	  ;; get stuck due to becoming inaccessible from a failed test. I.e. if test B depends 
	  ;; on test A but test B reached the point on being registered as NOT_STARTED and test
	  ;; A failed for some reason then on re-run using -keepgoing the run can never complete.
	  (cdb:delete-tests-in-state *runremote* run-id "NOT_STARTED")
	  (cdb:remote-run db:set-tests-state-status #f run-id test-names #f "FAIL" "NOT_STARTED" "FAIL")))




    ;; now add non-directly referenced dependencies (i.e. waiton)
    ;;======================================================================
    ;; refactoring this block into tests:get-full-data
    ;;
    ;; What happended, this code is now duplicated in tests!?
    ;;







>
>
>







232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
	(begin
	  ;; have to delete test records where NOT_STARTED since they can cause -keepgoing to 
	  ;; get stuck due to becoming inaccessible from a failed test. I.e. if test B depends 
	  ;; on test A but test B reached the point on being registered as NOT_STARTED and test
	  ;; A failed for some reason then on re-run using -keepgoing the run can never complete.
	  (cdb:delete-tests-in-state *runremote* run-id "NOT_STARTED")
	  (cdb:remote-run db:set-tests-state-status #f run-id test-names #f "FAIL" "NOT_STARTED" "FAIL")))

    ;; Ensure all tests are registered in the test_meta table
    (runs:update-all-test_meta #f)

    ;; now add non-directly referenced dependencies (i.e. waiton)
    ;;======================================================================
    ;; refactoring this block into tests:get-full-data
    ;;
    ;; What happended, this code is now duplicated in tests!?
    ;;
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
      (cdr reg)
      (if (null? tal) ;; if tal is null and reg not full then '() as reg contents moved to tal
	  '()
	  reg)))

(define runs:nothing-left-in-queue-count 0)

(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry)
  (let* ((loop-list       (list hed tal reg reruns))
	 (prereqs-not-met (mt:get-prereqs-not-met run-id waitons item-path mode: testmode))
	 (fails           (runs:calc-fails prereqs-not-met))
	 (non-completed   (runs:calc-not-completed prereqs-not-met)))
    (debug:print-info 4 "START OF INNER COND #2 "
		      "\n can-run-more:    " can-run-more
		      "\n testname:        " hed
		      "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met)
		      "\n non-completed:   " (runs:pretty-string non-completed) 
		      "\n fails:           " (runs:pretty-string fails)
		      "\n testmode:        " testmode
		      "\n (eq? testmode 'toplevel): " (eq? testmode 'toplevel)
		      "\n (null? non-completed):    " (null? non-completed)
		      "\n reruns:          " reruns
		      "\n items:           " items
		      "\n can-run-more:    " can-run-more)

    (cond
     ;; all prereqs met, fire off the test
     ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch
     
     ((member (hash-table-ref/default test-registry (runs:make-full-test-name hed item-path) 'n/a)
	      '(DONOTRUN removed)) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here
      (debug:print-info 1 "Test " hed " set to \"" (hash-table-ref test-registry (runs:make-full-test-name hed item-path)) "\". Removing it from the queue")
      (if (or (not (null? tal))
	      (not (null? reg)))
	  (list (runs:queue-next-hed tal reg reglen regfull)
		(runs:queue-next-tal tal reg reglen regfull)







|

|


















|







376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
      (cdr reg)
      (if (null? tal) ;; if tal is null and reg not full then '() as reg contents moved to tal
	  '()
	  reg)))

(define runs:nothing-left-in-queue-count 0)

(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records)
  (let* ((loop-list       (list hed tal reg reruns))
	 (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode))
	 (fails           (runs:calc-fails prereqs-not-met))
	 (non-completed   (runs:calc-not-completed prereqs-not-met)))
    (debug:print-info 4 "START OF INNER COND #2 "
		      "\n can-run-more:    " can-run-more
		      "\n testname:        " hed
		      "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met)
		      "\n non-completed:   " (runs:pretty-string non-completed) 
		      "\n fails:           " (runs:pretty-string fails)
		      "\n testmode:        " testmode
		      "\n (eq? testmode 'toplevel): " (eq? testmode 'toplevel)
		      "\n (null? non-completed):    " (null? non-completed)
		      "\n reruns:          " reruns
		      "\n items:           " items
		      "\n can-run-more:    " can-run-more)

    (cond
     ;; all prereqs met, fire off the test
     ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch

     ((member (hash-table-ref/default test-registry (runs:make-full-test-name hed item-path) 'n/a)
	      '(DONOTRUN removed)) ;; *common:cant-run-states-sym*) ;; '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE)) ;; try to catch repeat processing of COMPLETED tests here
      (debug:print-info 1 "Test " hed " set to \"" (hash-table-ref test-registry (runs:make-full-test-name hed item-path)) "\". Removing it from the queue")
      (if (or (not (null? tal))
	      (not (null? reg)))
	  (list (runs:queue-next-hed tal reg reglen regfull)
		(runs:queue-next-tal tal reg reglen regfull)
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
	(set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process
	(let ((items-list (items:get-items-from-config tconfig)))
	  (if (list? items-list)
	      (begin
		(tests:testqueue-set-items! test-record items-list)
		(list hed tal reg reruns))
	      (begin
		(debug:print 0 "ERROR: The proc from reading the setup did not yield a list - please report this")
		(exit 1))))))

     ((and (null? fails)
	   (not (null? non-completed)))
      (let* ((allinqueue (map (lambda (x)(if (string? x) x (db:test-get-testname x)))
        		      (append newtal reruns)))
	     ;; prereqstrs is a list of test names as strings that are prereqs for hed







|







434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
	(set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process
	(let ((items-list (items:get-items-from-config tconfig)))
	  (if (list? items-list)
	      (begin
		(tests:testqueue-set-items! test-record items-list)
		(list hed tal reg reruns))
	      (begin
		(debug:print 0 "ERROR: The proc from reading the items table did not yield a list - please report this")
		(exit 1))))))

     ((and (null? fails)
	   (not (null? non-completed)))
      (let* ((allinqueue (map (lambda (x)(if (string? x) x (db:test-get-testname x)))
        		      (append newtal reruns)))
	     ;; prereqstrs is a list of test names as strings that are prereqs for hed
448
449
450
451
452
453
454
455

456



457
458
459
460
461
462
463
464
465
466
467
468
469
	;; We need to use this to dequeue this item as CANNOTRUN
	(for-each (lambda (prereq)
		    (if (eq? (hash-table-ref/default test-registry prereq 'justfine) 'CANNOTRUN)
			(set! give-up #t)))
		  prereqstrs)
	(if (and give-up
		 (not (and (null? tal)(null? reg))))
	    (begin

	      (debug:print 1 "WARNING: test " hed " has no discarded prerequisites, removing it from the queue")



	      (list (runs:queue-next-hed tal reg reglen regfull)
		    (runs:queue-next-tal tal reg reglen regfull)
		    (runs:queue-next-reg tal reg reglen regfull)
		    reruns))
	    (list (car newtal)(append (cdr newtal) reg) '() reruns))))


     ;; (debug:print-info 1 "allinqueue: " allinqueue)
     ;; (debug:print-info 1 "prereqstrs: " prereqstrs)
     ;; (debug:print-info 1 "notinqueue: " notinqueue)
     ;; (debug:print-info 1 "tal:        " tal)
     ;; (debug:print-info 1 "newtal:     " newtal)
     ;; (debug:print-info 1 "reg:        " reg)







|
>
|
>
>
>
|
|
|
|
|
<







459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476

477
478
479
480
481
482
483
	;; We need to use this to dequeue this item as CANNOTRUN
	(for-each (lambda (prereq)
		    (if (eq? (hash-table-ref/default test-registry prereq 'justfine) 'CANNOTRUN)
			(set! give-up #t)))
		  prereqstrs)
	(if (and give-up
		 (not (and (null? tal)(null? reg))))
	    (let ((trimmed-tal (mt:discard-blocked-tests run-id hed tal test-records))
		  (trimmed-reg (mt:discard-blocked-tests run-id hed reg test-records)))
	      (debug:print 1 "WARNING: test " hed " has discarded prerequisites, removing it from the queue")
	      (if (and (null? trimmed-tal)
		       (null? trimmed-reg))
		  #f
		  (list (runs:queue-next-hed trimmed-tal trimmed-reg reglen regfull)
			(runs:queue-next-tal trimmed-tal trimmed-reg reglen regfull)
			(runs:queue-next-reg trimmed-tal trimmed-reg reglen regfull)
			reruns)))
	      (list (car newtal)(append (cdr newtal) reg) '() reruns))))


     ;; (debug:print-info 1 "allinqueue: " allinqueue)
     ;; (debug:print-info 1 "prereqstrs: " prereqstrs)
     ;; (debug:print-info 1 "notinqueue: " notinqueue)
     ;; (debug:print-info 1 "tal:        " tal)
     ;; (debug:print-info 1 "newtal:     " newtal)
     ;; (debug:print-info 1 "reg:        " reg)
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
(define (runs:process-expanded-tests hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry)
  (let* ((run-limits-info         (runs:can-run-more-tests jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
	 (have-resources          (car run-limits-info))
	 (num-running             (list-ref run-limits-info 1))
	 (num-running-in-jobgroup (list-ref run-limits-info 2)) 
	 (max-concurrent-jobs     (list-ref run-limits-info 3))
	 (job-group-limit         (list-ref run-limits-info 4))
	 (prereqs-not-met         (mt:get-prereqs-not-met run-id waitons item-path mode: testmode))
	 (fails                   (runs:calc-fails prereqs-not-met))
	 (non-completed           (runs:calc-not-completed prereqs-not-met))
	 (loop-list               (list hed tal reg reruns)))
    (debug:print-info 4 "have-resources: " have-resources " prereqs-not-met: (" 
		      (string-intersperse 
		       (map (lambda (t)
			      (if (vector? t)







|







583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
(define (runs:process-expanded-tests hed tal reg reruns reglen regfull test-record runname test-name item-path jobgroup max-concurrent-jobs run-id waitons item-path testmode test-patts required-tests test-registry registry-mutex flags keyvals run-info newtal all-tests-registry)
  (let* ((run-limits-info         (runs:can-run-more-tests jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
	 (have-resources          (car run-limits-info))
	 (num-running             (list-ref run-limits-info 1))
	 (num-running-in-jobgroup (list-ref run-limits-info 2)) 
	 (max-concurrent-jobs     (list-ref run-limits-info 3))
	 (job-group-limit         (list-ref run-limits-info 4))
	 (prereqs-not-met         (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode))
	 (fails                   (runs:calc-fails prereqs-not-met))
	 (non-completed           (runs:calc-not-completed prereqs-not-met))
	 (loop-list               (list hed tal reg reruns)))
    (debug:print-info 4 "have-resources: " have-resources " prereqs-not-met: (" 
		      (string-intersperse 
		       (map (lambda (t)
			      (if (vector? t)
666
667
668
669
670
671
672




673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692


693
694
695
696
697
698
699
     
     ;; This is the final stage, everything is in place so launch the test
     ;;
     ((and have-resources
	   (or (null? prereqs-not-met)
	       (and (eq? testmode 'toplevel)
		    (null? non-completed))))




      (run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry)
      (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'running)
      (runs:shrink-can-run-more-tests-count)  ;; DELAY TWEAKER (still needed?)
      ;; (thread-sleep! *global-delta*)
      (if (or (not (null? tal))(not (null? reg)))
	  (list (runs:queue-next-hed tal reg reglen regfull)
		(runs:queue-next-tal tal reg reglen regfull)
		(runs:queue-next-reg tal reg reglen regfull)
		reruns)
	  #f))
     
     ;; must be we have unmet prerequisites
     ;;
     (else
      (debug:print 4 "FAILS: " fails)
      ;; If one or more of the prereqs-not-met are FAIL then we can issue
      ;; a message and drop hed from the items to be processed.

      (if (not (null? prereqs-not-met))
	  (debug:print-info 1 "waiting on tests; " (string-intersperse prereqs-not-met ", ")))


      
      (if (null? fails)
	  (begin
	    ;; couldn't run, take a breather
	    (debug:print-info 0 "Waiting for more work to do...")
	    (thread-sleep! 1)
	    (list (car newtal)(cdr newtal) reg reruns))







>
>
>
>

















|

|
>
>







680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
     
     ;; This is the final stage, everything is in place so launch the test
     ;;
     ((and have-resources
	   (or (null? prereqs-not-met)
	       (and (eq? testmode 'toplevel)
		    (null? non-completed))))
      ;; (hash-table-delete! *max-tries-hash* (runs:make-full-test-name test-name item-path))
      ;; we are going to reset all the counters for test retries by setting a new hash table
      ;; this means they will increment only when nothing can be run
      (set! *max-tries-hash* (make-hash-table))
      (run:test run-id run-info keyvals runname test-record flags #f test-registry all-tests-registry)
      (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'running)
      (runs:shrink-can-run-more-tests-count)  ;; DELAY TWEAKER (still needed?)
      ;; (thread-sleep! *global-delta*)
      (if (or (not (null? tal))(not (null? reg)))
	  (list (runs:queue-next-hed tal reg reglen regfull)
		(runs:queue-next-tal tal reg reglen regfull)
		(runs:queue-next-reg tal reg reglen regfull)
		reruns)
	  #f))
     
     ;; must be we have unmet prerequisites
     ;;
     (else
      (debug:print 4 "FAILS: " fails)
      ;; If one or more of the prereqs-not-met are FAIL then we can issue
      ;; a message and drop hed from the items to be processed.
      ;; (runs:mixed-list-testname-and-testrec->list-of-strings prereqs-not-met)
      (if (not (null? prereqs-not-met))
	  (debug:print-info 1 "waiting on tests; " (string-intersperse 
						    (runs:mixed-list-testname-and-testrec->list-of-strings 
						     prereqs-not-met) ", ")))
      
      (if (null? fails)
	  (begin
	    ;; couldn't run, take a breather
	    (debug:print-info 0 "Waiting for more work to do...")
	    (thread-sleep! 1)
	    (list (car newtal)(cdr newtal) reg reruns))
712
713
714
715
716
717
718





719
720
721
722
723
724





725
726
727
728
729
730
731
732
733
734
735
736

737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754






755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770



771
772
773
774
775
776
777
			  (cons hed reruns)))
		  (begin
		    (debug:print 0 "WARNING: Test not processed correctly. Could be a race condition in your test implementation? Dropping test " hed) ;;  " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)")
		    (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?)
		    ;; (list hed tal reg reruns)
		    (list (car newtal)(cdr newtal) reg reruns)
		    ))))))))






;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... >
(define (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests reglen-in all-tests-registry)
  ;; At this point the list of parent tests is expanded 
  ;; NB// Should expand items here and then insert into the run queue.
  (debug:print 5 "test-records: " test-records ", flags: " (hash-table->alist flags))





  (let ((run-info              (cdb:remote-run db:get-run-info #f run-id))
	(tests-info            (mt:get-tests-for-run run-id #f '() '())) ;;  qryvals: "id,testname,item_path"))
	(sorted-test-names     (tests:sort-by-priority-and-waiton test-records))
	(test-registry         (make-hash-table))
	(registry-mutex        (make-mutex))
	(num-retries           0)
	(max-retries           (config-lookup *configdat* "setup" "maxretries"))
	(max-concurrent-jobs   (let ((mcj (config-lookup *configdat* "setup"     "max_concurrent_jobs")))
				 (if (and mcj (string->number mcj))
				     (string->number mcj)
				     1))) ;; length of the register queue ahead
	(reglen                (if (number? reglen-in) reglen-in 1)))


    ;; Initialize the test-registery hash with tests that already have a record
    ;; convert state to symbol and use that as the hash value
    (for-each (lambda (trec)
		(let ((id (db:test-get-id        trec))
		      (tn (db:test-get-testname  trec))
		      (ip (db:test-get-item-path trec))
		      (st (db:test-get-state     trec)))
		  (if (not (equal? st "DELETED"))
		      (hash-table-set! test-registry (runs:make-full-test-name tn ip) (string->symbol st)))))
	      tests-info)
    (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100))

    (let loop ((hed         (car sorted-test-names))
	       (tal         (cdr sorted-test-names))
	       (reg         '()) ;; registered, put these at the head of tal 
	       (reruns      '()))
      (if (not (null? reruns))(debug:print-info 4 "reruns=" reruns))







      ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns)
      (let* ((test-record (hash-table-ref test-records hed))
	     (test-name   (tests:testqueue-get-testname test-record))
	     (tconfig     (tests:testqueue-get-testconfig test-record))
	     (jobgroup    (config-lookup tconfig "requirements" "jobgroup"))
	     (testmode    (let ((m (config-lookup tconfig "requirements" "mode")))
			    (if m (string->symbol m) 'normal)))
	     (waitons     (tests:testqueue-get-waitons    test-record))
	     (priority    (tests:testqueue-get-priority   test-record))
	     (itemdat     (tests:testqueue-get-itemdat    test-record)) ;; itemdat can be a string, list or #f
	     (items       (tests:testqueue-get-items      test-record))
	     (item-path   (item-list->path itemdat))
	     (tfullname   (runs:make-full-test-name test-name item-path))
	     (newtal      (append tal (list hed)))
	     (regfull     (>= (length reg) reglen)))




	;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard
	;; and it is clear they *should* have run but did not.
	(if (not (hash-table-ref/default test-registry (runs:make-full-test-name test-name "") #f))
	    (begin
	      (cdb:tests-register-test *runremote* run-id test-name "")
	      (hash-table-set! test-registry (runs:make-full-test-name test-name "") 'done)))







>
>
>
>
>






>
>
>
>
>











|
>


















>
>
>
>
>
>
















>
>
>







732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
			  (cons hed reruns)))
		  (begin
		    (debug:print 0 "WARNING: Test not processed correctly. Could be a race condition in your test implementation? Dropping test " hed) ;;  " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)")
		    (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?)
		    ;; (list hed tal reg reruns)
		    (list (car newtal)(cdr newtal) reg reruns)
		    ))))))))

;; every time though the loop increment the test/itempatt val.
;; when the min is > max-allowed and none running then force exit
;;
(define *max-tries-hash* (make-hash-table))

;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... >
(define (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests reglen-in all-tests-registry)
  ;; At this point the list of parent tests is expanded 
  ;; NB// Should expand items here and then insert into the run queue.
  (debug:print 5 "test-records: " test-records ", flags: " (hash-table->alist flags))

  ;; Do mark-and-find clean up of db before starting runing of quue
  ;;
  ;; (cdb:remote-run db:find-and-mark-incomplete #f)

  (let ((run-info              (cdb:remote-run db:get-run-info #f run-id))
	(tests-info            (mt:get-tests-for-run run-id #f '() '())) ;;  qryvals: "id,testname,item_path"))
	(sorted-test-names     (tests:sort-by-priority-and-waiton test-records))
	(test-registry         (make-hash-table))
	(registry-mutex        (make-mutex))
	(num-retries           0)
	(max-retries           (config-lookup *configdat* "setup" "maxretries"))
	(max-concurrent-jobs   (let ((mcj (config-lookup *configdat* "setup"     "max_concurrent_jobs")))
				 (if (and mcj (string->number mcj))
				     (string->number mcj)
				     1))) ;; length of the register queue ahead
	(reglen                (if (number? reglen-in) reglen-in 1))
	(last-time-incomplete  (current-seconds)))

    ;; Initialize the test-registery hash with tests that already have a record
    ;; convert state to symbol and use that as the hash value
    (for-each (lambda (trec)
		(let ((id (db:test-get-id        trec))
		      (tn (db:test-get-testname  trec))
		      (ip (db:test-get-item-path trec))
		      (st (db:test-get-state     trec)))
		  (if (not (equal? st "DELETED"))
		      (hash-table-set! test-registry (runs:make-full-test-name tn ip) (string->symbol st)))))
	      tests-info)
    (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100))

    (let loop ((hed         (car sorted-test-names))
	       (tal         (cdr sorted-test-names))
	       (reg         '()) ;; registered, put these at the head of tal 
	       (reruns      '()))
      (if (not (null? reruns))(debug:print-info 4 "reruns=" reruns))

      ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes
      ;; (if (> (current-seconds)(+ last-time-incomplete 900))
      ;;     (begin
      ;;       (set! last-time-incomplete (current-seconds))
      ;;       (cdb:remote-run db:find-and-mark-incomplete #f)))

      ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns)
      (let* ((test-record (hash-table-ref test-records hed))
	     (test-name   (tests:testqueue-get-testname test-record))
	     (tconfig     (tests:testqueue-get-testconfig test-record))
	     (jobgroup    (config-lookup tconfig "requirements" "jobgroup"))
	     (testmode    (let ((m (config-lookup tconfig "requirements" "mode")))
			    (if m (string->symbol m) 'normal)))
	     (waitons     (tests:testqueue-get-waitons    test-record))
	     (priority    (tests:testqueue-get-priority   test-record))
	     (itemdat     (tests:testqueue-get-itemdat    test-record)) ;; itemdat can be a string, list or #f
	     (items       (tests:testqueue-get-items      test-record))
	     (item-path   (item-list->path itemdat))
	     (tfullname   (runs:make-full-test-name test-name item-path))
	     (newtal      (append tal (list hed)))
	     (regfull     (>= (length reg) reglen)))

	(hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1))
	;; (debug:print 0 "max-tries-hash: " (hash-table->alist *max-tries-hash*))

	;; Ensure all top level tests get registered. This way they show up as "NOT_STARTED" on the dashboard
	;; and it is clear they *should* have run but did not.
	(if (not (hash-table-ref/default test-registry (runs:make-full-test-name test-name "") #f))
	    (begin
	      (cdb:tests-register-test *runremote* run-id test-name "")
	      (hash-table-set! test-registry (runs:make-full-test-name test-name "") 'done)))
809
810
811
812
813
814
815
















816
817
818
819
820
821
822
	;; error
	(if (member test-name waitons)
	    (begin
	      (debug:print 0 "ERROR: test " test-name " has listed itself as a waiton, please correct this!")
	      (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons))))

	(cond 

















	 ;; items is #f then the test is ok to be handed off to launch (but not before)
	 ;; 
	 ((not items)
	  (debug:print-info 4 "OUTER COND: (not items)")
	  (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests))
		   (not (null? tal)))







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
	;; error
	(if (member test-name waitons)
	    (begin
	      (debug:print 0 "ERROR: test " test-name " has listed itself as a waiton, please correct this!")
	      (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons))))

	(cond 
	 
	 ;; We want to catch tests that have waitons that are NOT in the queue and discard them IFF 
	 ;; they have been through the wringer 10 or more times
	 ((and (list? waitons)
	       (not (null? waitons))
	       (> (hash-table-ref/default *max-tries-hash* tfullname 0) 10)
	       (not (null? (filter
			    number?
			    (map (lambda (waiton)
				   (if (and (not (member waiton tal))            ;; this waiton is not in the list to be tried to run
					    (not (member waiton reruns)))
				       1
				       #f))
				 waitons))))) ;; could do this more elegantly with a marker....
	  (debug:print 0 "WARNING: Marking test " tfullname " as not runnable. It is waiting on tests that cannot be run. Giving up now.")
	  (hash-table-set! test-registry tfullname 'removed))

	 ;; items is #f then the test is ok to be handed off to launch (but not before)
	 ;; 
	 ((not items)
	  (debug:print-info 4 "OUTER COND: (not items)")
	  (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests))
		   (not (null? tal)))
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
	 ;; if items is a proc then need to run items:get-items-from-config, get the list and loop 
	 ;;    - but only do that if resources exist to kick off the job
	 ;; EXPAND ITEMS
	 ((or (procedure? items)(eq? items 'have-procedure))
	  (let ((can-run-more    (runs:can-run-more-tests jobgroup max-concurrent-jobs)))
	    (if (and (list? can-run-more)
		     (car can-run-more))
		(let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry)))
		  (if loop-list
		      (apply loop loop-list)))
		;; if can't run more just loop with next possible test
		(loop (car newtal)(cdr newtal) reg reruns))))
	    
	 ;; this case should not happen, added to help catch any bugs
	 ((and (list? items) itemdat)







|







928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
	 ;; if items is a proc then need to run items:get-items-from-config, get the list and loop 
	 ;;    - but only do that if resources exist to kick off the job
	 ;; EXPAND ITEMS
	 ((or (procedure? items)(eq? items 'have-procedure))
	  (let ((can-run-more    (runs:can-run-more-tests jobgroup max-concurrent-jobs)))
	    (if (and (list? can-run-more)
		     (car can-run-more))
		(let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records)))
		  (if loop-list
		      (apply loop loop-list)))
		;; if can't run more just loop with next possible test
		(loop (car newtal)(cdr newtal) reg reruns))))
	    
	 ;; this case should not happen, added to help catch any bugs
	 ((and (list? items) itemdat)
963
964
965
966
967
968
969





970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
    (setenv "MT_ITEMPATH"  item-path)
    (setenv "MT_RUNNAME"   runname)
    (set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process
    (change-directory *toppath*)

    ;; Here is where the test_meta table is best updated
    ;; Yes, another use of a global for caching. Need a better way?





    (if (not (hash-table-ref/default *test-meta-updated* test-name #f))
        (begin
	   (hash-table-set! *test-meta-updated* test-name #t)
           (runs:update-test_meta test-name test-conf)))
    
    ;; itemdat => ((ripeness "overripe") (temperature "cool") (season "summer"))
    (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/"))
	   (test-id       (cdb:remote-run db:get-test-id #f  run-id test-name item-path))
	   (testdat       (cdb:get-test-info-by-id *runremote* test-id)))
      (if (not testdat)
	  (let loop ()
	    ;; ensure that the path exists before registering the test
	    ;; NOPE: Cannot! Don't know yet which disk area will be assigned....
	    ;; (system (conc "mkdir -p " new-test-path))
	    ;;
	    ;; (open-run-close tests:register-test db run-id test-name item-path)
	    ;;
	    ;; NB// for the above line. I want the test to be registered long before this routine gets called!
	    ;;
	    (set! test-id (cdb:remote-run db:get-test-id #f run-id test-name item-path))
	    (if (not test-id)
		(begin
		  (debug:print 2 "WARN: Test not pre-created? test-name=" test-name ", item-path=" item-path ", run-id=" run-id)
		  (cdb:tests-register-test *runremote* run-id test-name item-path)
		  (set! test-id (cdb:remote-run db:get-test-id #f run-id test-name item-path))))
	    (debug:print-info 4 "test-id=" test-id ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"")
	    (set! testdat (cdb:get-test-info-by-id *runremote* test-id))
	    (if (not testdat)
		(begin
		  (debug:print-info 0 "WARNING: server is overloaded, trying again in one second")
		  (thread-sleep! 1)
		  (loop)))))







>
>
>
>
>







|
|










|




|







1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
    (setenv "MT_ITEMPATH"  item-path)
    (setenv "MT_RUNNAME"   runname)
    (set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process
    (change-directory *toppath*)

    ;; Here is where the test_meta table is best updated
    ;; Yes, another use of a global for caching. Need a better way?
    ;;
    ;; There is now a single call to runs:update-all-test_meta and this 
    ;; per-test call is not needed. Given the delicacy of the move to 
    ;; v1.55 this code is being left in place for the time being.
    ;;
    (if (not (hash-table-ref/default *test-meta-updated* test-name #f))
        (begin
	   (hash-table-set! *test-meta-updated* test-name #t)
           (runs:update-test_meta test-name test-conf)))
    
    ;; itemdat => ((ripeness "overripe") (temperature "cool") (season "summer"))
    (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/"))
	   (test-id       (cdb:remote-run db:get-test-id-cached #f  run-id test-name item-path))
	   (testdat       (if test-id (cdb:get-test-info-by-id *runremote* test-id) #f)))
      (if (not testdat)
	  (let loop ()
	    ;; ensure that the path exists before registering the test
	    ;; NOPE: Cannot! Don't know yet which disk area will be assigned....
	    ;; (system (conc "mkdir -p " new-test-path))
	    ;;
	    ;; (open-run-close tests:register-test db run-id test-name item-path)
	    ;;
	    ;; NB// for the above line. I want the test to be registered long before this routine gets called!
	    ;;
	    (if (not test-id)(set! test-id (cdb:remote-run db:get-test-id-cached #f run-id test-name item-path)))
	    (if (not test-id)
		(begin
		  (debug:print 2 "WARN: Test not pre-created? test-name=" test-name ", item-path=" item-path ", run-id=" run-id)
		  (cdb:tests-register-test *runremote* run-id test-name item-path)
		  (set! test-id (cdb:remote-run db:get-test-id-cached #f run-id test-name item-path))))
	    (debug:print-info 4 "test-id=" test-id ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"")
	    (set! testdat (cdb:get-test-info-by-id *runremote* test-id))
	    (if (not testdat)
		(begin
		  (debug:print-info 0 "WARNING: server is overloaded, trying again in one second")
		  (thread-sleep! 1)
		  (loop)))))
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
	     (begin
	       (print "Updating " test-name " " fld " to " val)
	       (cdb:remote-run db:testmeta-update-field #f test-name fld val)))))
     '(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9)))))

;; Update test_meta for all tests
(define (runs:update-all-test_meta db)
  (let ((test-names (tests:get-valid-tests)))
    (for-each 
     (lambda (test-name)
       (let* ((test-conf    (mt:lazy-read-test-config test-name)))
	 ;; use the cdb:remote-run instead of passing in db
	 (if test-conf (runs:update-test_meta test-name test-conf))))
     test-names)))

;; This could probably be refactored into one complex query ...
(define (runs:rollup-run keys runname user keyvals)
  (debug:print 4 "runs:rollup-run, keys: " keys " :runname " runname " user: " user)
  (let* ((db              #f)
	 (new-run-id      (cdb:remote-run db:register-run #f keyvals runname "new" "n/a" user))
	 (prev-tests      (cdb:remote-run test:get-matching-previous-test-run-records db new-run-id "%" "%"))







|





|







1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
	     (begin
	       (print "Updating " test-name " " fld " to " val)
	       (cdb:remote-run db:testmeta-update-field #f test-name fld val)))))
     '(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9)))))

;; Update test_meta for all tests
(define (runs:update-all-test_meta db)
  (let ((test-names (tests:get-all))) ;; (tests:get-valid-tests)))
    (for-each 
     (lambda (test-name)
       (let* ((test-conf    (mt:lazy-read-test-config test-name)))
	 ;; use the cdb:remote-run instead of passing in db
	 (if test-conf (runs:update-test_meta test-name test-conf))))
     (hash-table-keys test-names))))

;; This could probably be refactored into one complex query ...
(define (runs:rollup-run keys runname user keyvals)
  (debug:print 4 "runs:rollup-run, keys: " keys " :runname " runname " user: " user)
  (let* ((db              #f)
	 (new-run-id      (cdb:remote-run db:register-run #f keyvals runname "new" "n/a" user))
	 (prev-tests      (cdb:remote-run test:get-matching-previous-test-run-records db new-run-id "%" "%"))