Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -1,6 +1,6 @@ - +# make install CSCOPTS='-accumulate-profile -profile-name $(PWD)/profile-ww$(shell date +%V.%u)' PREFIX=$(PWD) CSCOPTS= INSTALL=install SRCFILES = common.scm items.scm launch.scm \ ods.scm runconfig.scm server.scm configf.scm \ @@ -55,10 +55,13 @@ db.o ezsteps.o keys.o launch.o megatest.o monitor.o runs-for-ref.o runs.o tests.o : key_records.scm tests.o tasks.o dashboard-tasks.o : task_records.scm runs.o : test_records.scm megatest.o : megatest-fossil-hash.scm +# Temporary while transitioning to new routine +runs.o : run-tests-queue-classic.scm run-tests-queue-new.scm + megatest-fossil-hash.scm : $(SRCFILES) megatest.scm *_records.scm echo "(define megatest-fossil-hash \"$(MTESTHASH)\")" > megatest-fossil-hash.new if ! diff -q megatest-fossil-hash.new megatest-fossil-hash.scm ; then echo copying .new to .scm;cp -f megatest-fossil-hash.new megatest-fossil-hash.scm;fi $(OFILES) $(GOFILES) : common_records.scm Index: NOTES ================================================================== --- NOTES +++ NOTES @@ -3,5 +3,28 @@ 3. Tests may or may not have file system access to the originating run area. rsync is used to pull the test area to the home host if and only if the originating area can not be seen via file system. NO LONGER TRUE. Rsync is used but file system must be visible. 4. All db access is done via the home host. NOT IMPLEMENTED YET. + +REMOTE ACCESS DB LOADS + +INFO: (0) Max cached queries was 10 +INFO: (0) Number of cached writes 27043 +INFO: (0) Average cached write time 15.0634544983915 ms +INFO: (0) Number non-cached queries 71928 +INFO: (0) Average non-cached time 5.15547491936381 ms +INFO: (0) Server shutdown complete. Exiting + + +fdktestqa on Apr 29, 2013: 1812 tests + +INFO: (0) Max cached queries was 10 +INFO: (0) Number of cached writes 41335 +INFO: (0) Average cached write time 206.081553163179 ms +INFO: (0) Number non-cached queries 74289 +INFO: (0) Average non-cached time 1055.09826488444 ms +INFO: (0) Server shutdown complete. Exiting + +Start: 0 at Sun Apr 28 22:18:25 MST 2013 +Max: 52 at Sun Apr 28 23:06:59 MST 2013 +End: 6 at Sun Apr 28 23:47:51 MST 2013 Index: configf.scm ================================================================== --- configf.scm +++ configf.scm @@ -59,11 +59,11 @@ (define configf:comment-rx (regexp "^\\s*#.*")) (define configf:cont-ln-rx (regexp "^(\\s+)(\\S+.*)$")) ;; read a line and process any #{ ... } constructs -(define configf:var-expand-regex (regexp "^(.*)#\\{(scheme|system|shell|getenv|get|runconfigs-get)\\s+([^\\}\\{]*)\\}(.*)")) +(define configf:var-expand-regex (regexp "^(.*)#\\{(scheme|system|shell|getenv|get|runconfigs-get|rget)\\s+([^\\}\\{]*)\\}(.*)")) (define (configf:process-line l ht) (let loop ((res l)) (if (string? res) (let ((matchdat (string-search configf:var-expand-regex res))) (if matchdat @@ -81,10 +81,11 @@ (let* ((parts (string-split cmd)) (sect (car parts)) (var (cadr parts))) (conc "(lambda (ht)(config-lookup ht \"" sect "\" \"" var "\"))"))) ((runconfigs-get) (conc "(lambda (ht)(runconfigs-get ht \"" cmd "\"))")) + ((rget) (conc "(lambda (ht)(runconfigs-get ht \"" cmd "\"))")) (else "(lambda (ht)(print \"ERROR\") \"ERROR\")")))) ;; (print "fullcmd=" fullcmd) (with-input-from-string fullcmd (lambda () (set! result ((eval (read)) ht)))) @@ -110,18 +111,26 @@ ;; Lookup a value in runconfigs based on -reqtarg or -target (define (runconfigs-get config var) (let ((targ (or (args:get-arg "-reqtarg")(args:get-arg "-target")))) (if targ - (config-lookup config targ var) - #f))) + (or (configf:lookup config targ var) + (configf:lookup config "default" var)) + (configf:lookup config "default" var)))) (define-inline (configf:read-line p ht allow-processing) - (if (and allow-processing - (not (eq? allow-processing 'return-string))) - (configf:process-line (read-line p) ht) - (read-line p))) + (let loop ((inl (read-line p))) + (if (and (string? inl) + (not (string-null? inl)) + (equal? "\\" (string-take-right inl 1))) ;; last character is \ + (let ((nextl (read-line p))) + (if (not (eof-object? nextl)) + (loop (string-append inl nextl)))) + (if (and allow-processing + (not (eq? allow-processing 'return-string))) + (configf:process-line inl ht) + inl)))) ;; read a config file, returns hash table of alists ;; read a config file, returns hash table of alists ;; adds to ht if given (must be #f otherwise) Index: dashboard-tests.scm ================================================================== --- dashboard-tests.scm +++ dashboard-tests.scm @@ -270,13 +270,13 @@ (keydat (if testdat (open-run-close db:get-key-val-pairs #f run-id) #f)) (rundat (if testdat (open-run-close db:get-run-info #f run-id) #f)) (runname (if testdat (db:get-value-by-header (db:get-row rundat) (db:get-header rundat) "runname") #f)) - (teststeps (if testdat (db:get-compressed-steps test-id) '())) (logfile "/this/dir/better/not/exist") (rundir logfile) + (teststeps (if testdat (db:get-compressed-steps test-id work-area: rundir) '())) (testfullname (if testdat (db:test-get-fullname testdat) "Gathering data ...")) (testname (if testdat (db:test-get-testname testdat) "n/a")) (testmeta (if testdat (let ((tm (open-run-close db:testmeta-get-record #f testname))) (if tm tm (make-db:testmeta))) @@ -305,15 +305,19 @@ (refreshdat (lambda () (let* ((curr-mod-time (file-modification-time db-path)) (need-update (or (and (> curr-mod-time db-mod-time) (> (current-seconds) (+ last-update 2))) ;; every two seconds if db touched request-update)) - (newtestdat (if need-update (open-run-close db:get-test-info-by-id #f test-id)))) + (newtestdat (if need-update + (handle-exceptions + exn + (debug:print-info 2 "test db access issue: " ((condition-property-accessor 'exn 'message) exn)) + (open-run-close db:get-test-info-by-id #f test-id ))))) (cond ((and need-update newtestdat) (set! testdat newtestdat) - (set! teststeps (db:get-compressed-steps test-id)) + (set! teststeps (db:get-compressed-steps test-id work-area: rundir)) (set! logfile (conc (db:test-get-rundir testdat) "/" (db:test-get-final_logf testdat))) (set! rundir (db:test-get-rundir testdat)) (set! testfullname (db:test-get-fullname testdat)) ;; (debug:print 0 "INFO: teststeps=" (intersperse teststeps "\n ")) ) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -241,24 +241,24 @@ ;;====================================================================== ;; T E S T S P E C I F I C D B ;;====================================================================== ;; Create the sqlite db for the individual test(s) -(define (open-test-db testpath) - (debug:print-info 11 "open-test-db " testpath) - (if (and testpath - (directory? testpath) - (file-read-access? testpath)) - (let* ((dbpath (conc testpath "/testdat.db")) +(define (open-test-db work-area) + (debug:print-info 11 "open-test-db " work-area) + (if (and work-area + (directory? work-area) + (file-read-access? work-area)) + (let* ((dbpath (conc work-area "/testdat.db")) (dbexists (file-exists? dbpath)) (handler (make-busy-timeout (if (args:get-arg "-override-timeout") (string->number (args:get-arg "-override-timeout")) 136000)))) (handle-exceptions exn (begin - (debug:print 0 "ERROR: problem accessing test db " testpath ", you probably should clean and re-run this test" + (debug:print 0 "ERROR: problem accessing test db " work-area ", you probably should clean and re-run this test" ((condition-property-accessor 'exn 'message) exn)) #f) (set! db (sqlite3:open-database dbpath))) (sqlite3:set-busy-handler! db handler) (if (not dbexists) @@ -265,29 +265,31 @@ (begin (sqlite3:execute db "PRAGMA synchronous = FULL;") (debug:print-info 11 "Initialized test database " dbpath) (db:testdb-initialize db))) ;; (sqlite3:execute db "PRAGMA synchronous = 0;") - (debug:print-info 11 "open-test-db END (sucessful)" testpath) + (debug:print-info 11 "open-test-db END (sucessful)" work-area) ;; now let's test that everything is correct (handle-exceptions exn (begin - (debug:print 0 "ERROR: problem accessing test db " testpath ", you probably should clean and re-run this test" + (debug:print 0 "ERROR: problem accessing test db " work-area ", you probably should clean and re-run this test" ((condition-property-accessor 'exn 'message) exn)) #f) ;; Is there a cheaper single line operation that will check for existance of a table ;; and raise an exception ? (sqlite3:execute db "SELECT id FROM test_data LIMIT 1;")) db) (begin - (debug:print-info 11 "open-test-db END (unsucessful)" testpath) + (debug:print-info 11 "open-test-db END (unsucessful)" work-area) #f))) ;; find and open the testdat.db file for an existing test -(define (db:open-test-db-by-test-id db test-id) - (let* ((test-path (cdb:remote-run db:test-get-rundir-from-test-id db test-id))) +(define (db:open-test-db-by-test-id db test-id #!key (work-area #f)) + (let* ((test-path (if work-area + work-area + (cdb:remote-run db:test-get-rundir-from-test-id db test-id)))) (debug:print 3 "TEST PATH: " test-path) (open-test-db test-path))) (define (db:testdb-initialize db) (debug:print 11 "db:testdb-initialize START") @@ -540,10 +542,43 @@ (conc fieldname " " wildtype " '" patt "'"))) (if (null? patts) '("") patts)) comparator))) + + +;; register a test run with the db +(define (db:register-run db keys keyvallst runname state status user) + (debug:print 3 "runs:register-run, keys: " keys " keyvallst: " keyvallst " runname: " runname " state: " state " status: " status " user: " user) + (let* ((keystr (keys->keystr keys)) + (comma (if (> (length keys) 0) "," "")) + (andstr (if (> (length keys) 0) " AND " "")) + (valslots (keys->valslots keys)) ;; ?,?,? ... + (keyvals (map cadr keyvallst)) + (allvals (append (list runname state status user) keyvals)) + (qryvals (append (list runname) keyvals)) + (key=?str (string-intersperse (map (lambda (k)(conc (key:get-fieldname k) "=?")) keys) " AND "))) + (debug:print 3 "keys: " keys " allvals: " allvals " keyvals: " keyvals) + (debug:print 2 "NOTE: using target " (string-intersperse keyvals "/") " for this run") + (if (and runname (null? (filter (lambda (x)(not x)) keyvals))) ;; there must be a better way to "apply and" + (let ((res #f)) + (apply sqlite3:execute db (conc "INSERT OR IGNORE INTO runs (runname,state,status,owner,event_time" comma keystr ") VALUES (?,?,?,?,strftime('%s','now')" comma valslots ");") + allvals) + (apply sqlite3:for-each-row + (lambda (id) + (set! res id)) + db + (let ((qry (conc "SELECT id FROM runs WHERE (runname=? " andstr key=?str ");"))) + ;(debug:print 4 "qry: " qry) + qry) + qryvals) + (sqlite3:execute db "UPDATE runs SET state=?,status=? WHERE id=?;" state status res) + res) + (begin + (debug:print 0 "ERROR: Called without all necessary keys") + #f)))) + ;; replace header and keystr with a call to runs:get-std-run-fields ;; ;; keypatts: ( (KEY1 "abc%def")(KEY2 "%") ) ;; runpatts: patt1,patt2 ... @@ -701,25 +736,10 @@ ;;====================================================================== ;; T E S T S ;;====================================================================== -(define (db:tests-register-test db run-id test-name item-path) - (debug:print-info 11 "db:tests-register-test START db=" db ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"") - (let ((item-paths (if (equal? item-path "") - (list item-path) - (list item-path "")))) - (for-each - (lambda (pth) - (sqlite3:execute db "INSERT OR IGNORE INTO tests (run_id,testname,event_time,item_path,state,status) VALUES (?,?,strftime('%s','now'),?,'NOT_STARTED','n/a');" - run-id - test-name - pth)) - item-paths) - (debug:print-info 11 "db:tests-register-test END db=" db ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"") - #f)) - ;; states and statuses are lists, turn them into ("PASS","FAIL"...) and use NOT IN ;; i.e. these lists define what to NOT show. ;; states and statuses are required to be lists, empty is ok ;; not-in #t = above behaviour, #f = must match (define (db:get-tests-for-run db run-id testpatt states statuses @@ -824,13 +844,13 @@ ) (debug:print-info 11 "db:get-tests-for-run START run-ids=" run-ids ", testpatt=" testpatt ", states=" states ", statuses=" statuses ", not-in=" not-in ", sort-by=" sort-by) res)) ;; this one is a bit broken BUG FIXME -(define (db:delete-test-step-records db test-id) +(define (db:delete-test-step-records db test-id #!key (work-area #f)) ;; Breaking it into two queries for better file access interleaving - (let* ((tdb (db:open-test-db-by-test-id db test-id))) + (let* ((tdb (db:open-test-db-by-test-id db test-id work-area: work-area))) ;; test db's can go away - must check every time (if tdb (begin (sqlite3:execute tdb "DELETE FROM test_steps;") (sqlite3:execute tdb "DELETE FROM test_data;") @@ -860,11 +880,12 @@ (let ((targtime (- (current-seconds)(* 30 24 60 60)))) ;; one month in the past (sqlite3:execute db "DELETE FROM tests WHERE state='DELETED' AND event_time numretries 0)(apply cdb:client-call serverdat qtype immediate (- numretries 1) params))) (let* ((push-socket (vector-ref serverdat 0)) (sub-socket (vector-ref serverdat 1)) (client-sig (client:get-signature)) @@ -1217,31 +1272,32 @@ (receive-message* sub-socket) ;; now get the actual message (let ((myres (db:string->obj (receive-message* sub-socket)))) (if (equal? query-sig (vector-ref myres 1)) (set! res (vector-ref myres 2)) - (loop)))))) - (timeout (lambda () - (let loop ((n numretries)) - (thread-sleep! 15) - (if (not res) - (if (> numretries 0) - (begin - (debug:print 2 "WARNING: no reply to query " params ", trying resend") - (debug:print-info 11 "re-sending message") - (send-message push-socket zdat) - (debug:print-info 11 "message re-sent") - (loop (- n 1))) - ;; (apply cdb:client-call *runremote* qtype immediate (- numretries 1) params)) - (begin - (debug:print 0 "ERROR: cdb:client-call timed out " params ", exiting.") - (exit 5)))))))) + (loop))))))) + ;; (timeout (lambda () + ;; (let loop ((n numretries)) + ;; (thread-sleep! 15) + ;; (if (not res) + ;; (if (> numretries 0) + ;; (begin + ;; (debug:print 2 "WARNING: no reply to query " params ", trying resend") + ;; (debug:print-info 11 "re-sending message") + ;; (send-message push-socket zdat) + ;; (debug:print-info 11 "message re-sent") + ;; (loop (- n 1))) + ;; ;; (apply cdb:client-call *runremote* qtype immediate (- numretries 1) params)) + ;; (begin + ;; (debug:print 0 "ERROR: cdb:client-call timed out " params ", exiting.") + ;; (exit 5)))))))) (debug:print-info 11 "Starting threads") (let ((th1 (make-thread send-receive "send receive")) - (th2 (make-thread timeout "timeout"))) + ;; (th2 (make-thread timeout "timeout")) + ) (thread-start! th1) - (thread-start! th2) + ;; (thread-start! th2) (thread-join! th1) (debug:print-info 11 "cdb:client-call returning res=" res) res)))))) (define (cdb:set-verbosity serverdat val) @@ -1266,14 +1322,11 @@ (define (cdb:pass-fail-counts serverdat test-id fail-count pass-count) (cdb:client-call serverdat 'pass-fail-counts #t *default-numtries* fail-count pass-count test-id)) (define (cdb:tests-register-test serverdat run-id test-name item-path) - (let ((item-paths (if (equal? item-path "") - (list item-path) - (list item-path "")))) - (cdb:client-call serverdat 'register-test #t *default-numtries* run-id test-name item-path))) + (cdb:client-call serverdat 'register-test #t *default-numtries* run-id test-name item-path)) (define (cdb:flush-queue serverdat) (cdb:client-call serverdat 'flush #f *default-numtries*)) (define (cdb:kill-server serverdat) @@ -1304,10 +1357,14 @@ db "SELECT rundir,final_logf FROM tests WHERE run_id=? AND testname=? AND item_path='';" run-id test-name) res)) +;;====================================================================== +;; A G R E G A T E D T R A N S A C T I O N D B W R I T E S +;;====================================================================== + (define db:queries (list '(register-test "INSERT OR IGNORE INTO tests (run_id,testname,event_time,item_path,state,status) VALUES (?,?,strftime('%s','now'),?,'NOT_STARTED','n/a');") '(state-status "UPDATE tests SET state=?,status=? WHERE id=?;") '(state-status-msg "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;") '(pass-fail-counts "UPDATE tests SET fail_count=?,pass_count=? WHERE id=?;") @@ -1323,10 +1380,15 @@ '(test-set-log "UPDATE tests SET final_logf=? WHERE id=?;") '(test-set-rundir-by-test-id "UPDATE tests SET rundir=? WHERE id=?") '(test-set-rundir "UPDATE tests SET rundir=? WHERE run_id=? AND testname=? AND item_path=?;") '(delete-tests-in-state "DELETE FROM tests WHERE state=? AND run_id=?;") '(tests:test-set-toplog "UPDATE tests SET final_logf=? WHERE run_id=? AND testname=? AND item_path='';") + '(update-cpuload-diskfree "UPDATE tests SET cpuload=?,diskfree=? WHERE id=?;") + '(update-run-duration "UPDATE tests SET run_duration=? WHERE id=?;") + '(update-uname-host "UPDATE tests SET uname=?,host=? WHERE id=?;") + '(update-test-state "UPDATE tests SET state=? WHERE state=? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));") + '(update-test-status "UPDATE tests SET status=? WHERE status like ? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));") )) ;; do not run these as part of the transaction (define db:special-queries '(rollup-tests-pass-fail db:roll-up-pass-fail-counts @@ -1593,13 +1655,13 @@ ;;====================================================================== ;; T E S T D A T A ;;====================================================================== -(define (db:csv->test-data db test-id csvdata) +(define (db:csv->test-data db test-id csvdata #!key (work-area #f)) (debug:print 4 "test-id " test-id ", csvdata: " csvdata) - (let ((tdb (db:open-test-db-by-test-id db test-id))) + (let ((tdb (db:open-test-db-by-test-id db test-id work-area: work-area))) (if tdb (let ((csvlist (csv->list (make-csv-reader (open-input-string csvdata) '((strip-leading-whitespace? #t) (strip-trailing-whitespace? #t)) )))) ;; (csv->list csvdata))) @@ -1649,17 +1711,17 @@ ((<=) (if (<= value expected) "pass" "fail")) (else (conc "ERROR: bad tol comparator " tol)))))) (debug:print 4 "AFTER2: category: " category " variable: " variable " value: " value ", expected: " expected " tol: " tol " units: " units " status: " status " comment: " comment) (sqlite3:execute tdb "INSERT OR REPLACE INTO test_data (test_id,category,variable,value,expected,tol,units,comment,status,type) VALUES (?,?,?,?,?,?,?,?,?,?);" - test-id category variable value expected tol units (if comment comment "") status type) - (sqlite3:finalize! tdb))) - csvlist))))) + test-id category variable value expected tol units (if comment comment "") status type))) + csvlist) + (sqlite3:finalize! tdb))))) ;; get a list of test_data records matching categorypatt -(define (db:read-test-data db test-id categorypatt) - (let ((tdb (db:open-test-db-by-test-id db test-id))) +(define (db:read-test-data db test-id categorypatt #!key (work-area #f)) + (let ((tdb (db:open-test-db-by-test-id db test-id work-area: work-area))) (if tdb (let ((res '())) (sqlite3:for-each-row (lambda (id test_id category variable value expected tol units comment status type) (set! res (cons (vector id test_id category variable value expected tol units comment status type) res))) @@ -1668,28 +1730,28 @@ (sqlite3:finalize! tdb) (reverse res)) '()))) ;; NOTE: Run this local with #f for db !!! -(define (db:load-test-data db test-id) +(define (db:load-test-data db test-id #!key (work-area #f)) (let loop ((lin (read-line))) (if (not (eof-object? lin)) (begin (debug:print 4 lin) - (db:csv->test-data db test-id lin) + (db:csv->test-data db test-id lin work-area: work-area) (loop (read-line))))) ;; roll up the current results. ;; FIXME: Add the status to - (db:test-data-rollup db test-id #f)) + (db:test-data-rollup db test-id #f work-area: work-area)) ;; WARNING: Do NOT call this for the parent test on an iterated test ;; Roll up test_data pass/fail results ;; look at the test_data status field, ;; if all are pass (any case) and the test status is PASS or NULL or '' then set test status to PASS. ;; if one or more are fail (any case) then set test status to PASS, non "pass" or "fail" are ignored -(define (db:test-data-rollup db test-id status) - (let ((tdb (db:open-test-db-by-test-id db test-id)) +(define (db:test-data-rollup db test-id status #!key (work-area #f)) + (let ((tdb (db:open-test-db-by-test-id db test-id work-area: work-area)) (fail-count 0) (pass-count 0)) (if tdb (begin (sqlite3:for-each-row @@ -1738,12 +1800,12 @@ (define (db:step-get-time-as-string vec) (seconds->time-string (db:step-get-event_time vec))) ;; db-get-test-steps-for-run -(define (db:get-steps-for-test db test-id) - (let* ((tdb (db:open-test-db-by-test-id db test-id)) +(define (db:get-steps-for-test db test-id #!key (work-area #f)) + (let* ((tdb (db:open-test-db-by-test-id db test-id work-area: work-area)) (res '())) (if tdb (begin (sqlite3:for-each-row (lambda (id test-id stepname state status event-time logfile) @@ -1755,12 +1817,12 @@ (reverse res)) '()))) ;; get a pretty table to summarize steps ;; -(define (db:get-steps-table db test-id) - (let ((steps (db:get-steps-for-test db test-id))) +(define (db:get-steps-table db test-id #!key (work-area #f)) + (let ((steps (db:get-steps-for-test db test-id work-area: work-area))) ;; organise the steps for better readability (let ((res (make-hash-table))) (for-each (lambda (step) (debug:print 6 "step=" step) @@ -1815,12 +1877,12 @@ (else #f))))) res))) ;; get a pretty table to summarize steps ;; -(define (db:get-steps-table-list db test-id) - (let ((steps (db:get-steps-for-test db test-id))) +(define (db:get-steps-table-list db test-id #!key (work-area #f)) + (let ((steps (db:get-steps-for-test db test-id work-area: work-area))) ;; organise the steps for better readability (let ((res (make-hash-table))) (for-each (lambda (step) (debug:print 6 "step=" step) @@ -1873,35 +1935,38 @@ ((eq? (db:step-get-event_time a)(db:step-get-event_time b)) (< (db:step-get-id a) (db:step-get-id b))) (else #f))))) res))) -(define (db:get-compressed-steps test-id) - (let* ((comprsteps (open-run-close db:get-steps-table #f test-id))) - (map (lambda (x) - ;; take advantage of the \n on time->string - (vector - (vector-ref x 0) - (let ((s (vector-ref x 1))) - (if (number? s)(seconds->time-string s) s)) - (let ((s (vector-ref x 2))) - (if (number? s)(seconds->time-string s) s)) - (vector-ref x 3) ;; status - (vector-ref x 4) - (vector-ref x 5))) ;; time delta - (sort (hash-table-values comprsteps) - (lambda (a b) - (let ((time-a (vector-ref a 1)) - (time-b (vector-ref b 1))) - (if (and (number? time-a)(number? time-b)) - (if (< time-a time-b) - #t - (if (eq? time-a time-b) - (stringstring + (vector + (vector-ref x 0) + (let ((s (vector-ref x 1))) + (if (number? s)(seconds->time-string s) s)) + (let ((s (vector-ref x 2))) + (if (number? s)(seconds->time-string s) s)) + (vector-ref x 3) ;; status + (vector-ref x 4) + (vector-ref x 5))) ;; time delta + (sort (hash-table-values comprsteps) + (lambda (a b) + (let ((time-a (vector-ref a 1)) + (time-b (vector-ref b 1))) + (if (and (number? time-a)(number? time-b)) + (if (< time-a time-b) + #t + (if (eq? time-a time-b) + (stringenv-vars env-ovrd) (set-megatest-env-vars run-id) (set-item-env-vars itemdat) (save-environment-as-files "megatest") ;; open-run-close not needed for test-set-meta-info - (test-set-meta-info #f test-id run-id test-name itemdat 0) + (tests:set-meta-info #f test-id run-id test-name itemdat 0 work-area) (tests:test-set-status! test-id "REMOTEHOSTSTART" "n/a" (args:get-arg "-m") #f) (if (args:get-arg "-xterm") (set! fullrunscript "xterm") (if (and fullrunscript (not (file-execute-access? fullrunscript))) (system (conc "chmod ug+x " fullrunscript)))) @@ -208,11 +208,11 @@ ;; call the command using mt_ezstep (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "-") " " stepcmd)) (debug:print 4 "script: " script) ;; DO NOT remote - (db:teststep-set-status! #f test-id stepname "start" "-" #f #f) + (db:teststep-set-status! #f test-id stepname "start" "-" #f #f work-area: work-area) ;; now launch (let ((pid (process-run script))) (let processloop ((i 0)) (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) (mutex-lock! m) @@ -226,11 +226,11 @@ (processloop (+ i 1)))) )) (let ((exinfo (vector-ref exit-info 2)) (logfna (if logpro-used (conc stepname ".html") ""))) ;; testing if procedures called in a remote call cause problems (ans: no or so I suspect) - (db:teststep-set-status! #f test-id stepname "end" exinfo #f logfna)) + (db:teststep-set-status! #f test-id stepname "end" exinfo #f logfna work-area: work-area)) (if logpro-used (cdb:test-set-log! *runremote* test-id (conc stepname ".html"))) ;; set the test final status (let* ((this-step-status (cond ((and (eq? (vector-ref exit-info 2) 2) logpro-used) 'warn) @@ -276,11 +276,11 @@ (kill-tries 0)) (let loop ((minutes (calc-minutes))) (begin (set! kill-job? (test-get-kill-request test-id)) ;; run-id test-name itemdat)) ;; open-run-close not needed for test-set-meta-info - (test-set-meta-info #f test-id run-id test-name itemdat minutes) + (tests:set-meta-info #f test-id run-id test-name itemdat minutes work-area) (if kill-job? (begin (mutex-lock! m) (let* ((pid (vector-ref exit-info 0))) (if (number? pid) @@ -336,11 +336,11 @@ (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN")) (else "FAIL")) (args:get-arg "-m") #f))) ;; for automated creation of the rollup html file this is a good place... (if (not (equal? item-path "")) - (open-run-close tests:summarize-items #f run-id test-name #f)) ;; don't force - just update if no + (tests:summarize-items #f run-id test-name #f)) ;; don't force - just update if no ) (mutex-unlock! m) ;; (exec-results (cmd-run->list fullrunscript)) ;; (list ">" (conc test-name "-run.log")))) ;; (success exec-results)) ;; (eq? (cadr exec-results) 0))) (debug:print 2 "Output from running " fullrunscript ", pid " (vector-ref exit-info 0) " in work area " @@ -406,18 +406,16 @@ ;; ;; All log file links should be stored relative to the top of link path ;; ;; - [ - ] ;; -(define (create-work-area db run-id test-id test-src-path disk-path testname itemdat) - (let* ((run-info (cdb:remote-run db:get-run-info #f run-id)) - (item-path (item-list->path itemdat)) +(define (create-work-area run-id run-info key-vals test-id test-src-path disk-path testname itemdat) + (let* ((item-path (item-list->path itemdat)) (runname (db:get-value-by-header (db:get-row run-info) (db:get-header run-info) "runname")) ;; convert back to db: from rdb: - this is always run at server end - (key-vals (cdb:remote-run db:get-key-vals #f run-id)) (target (string-intersperse key-vals "/")) (not-iterated (equal? "" item-path)) ;; all tests are found at /test-base or /test-base @@ -537,15 +535,16 @@ (begin (let* ((ovrcmd (let ((cmd (config-lookup *configdat* "setup" "testcopycmd"))) (if cmd ;; substitute the TEST_SRC_PATH and TEST_TARG_PATH (string-substitute "TEST_TARG_PATH" test-path - (string-substitute "TEST_SRC_PATH" test-src-path cmd)) + (string-substitute "TEST_SRC_PATH" test-src-path cmd #t) #t) #f))) (cmd (if ovrcmd ovrcmd - (conc "rsync -av" (if (debug:debug-mode 1) "" "q") " " test-src-path "/ " test-path "/"))) + (conc "rsync -av" (if (debug:debug-mode 1) "" "q") " " test-src-path "/ " test-path "/" + " >> " test-path "/mt_launch.log 2>> " test-path "/mt_launch.log"))) (status (system cmd))) (if (not (eq? status 0)) (debug:print 2 "ERROR: problem with running \"" cmd "\""))) (list lnkpathf lnkpath )) (list #f #f)))) @@ -555,11 +554,11 @@ ;; 3. create link from run dir to megatest runs area ;; 4. remotely run the test on allocated host ;; - could be ssh to host from hosts table (update regularly with load) ;; - could be netbatch ;; (launch-test db (cadr status) test-conf)) -(define (launch-test db run-id runname test-conf keyvallst test-name test-path itemdat params) +(define (launch-test test-id run-id run-info key-vals runname test-conf keyvallst test-name test-path itemdat params) (change-directory *toppath*) (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute" (list ;; (list "MT_TEST_RUN_DIR" work-area) (list "MT_RUN_AREA_HOME" *toppath*) (list "MT_TEST_NAME" test-name) @@ -594,11 +593,11 @@ (diskpath #f) (cmdparms #f) (fullcmd #f) ;; (define a (with-output-to-string (lambda ()(write x)))) (mt-bindir-path #f) (item-path (item-list->path itemdat)) - (test-id (cdb:remote-run db:get-test-id #f run-id test-name item-path)) + ;; (test-id (cdb:remote-run db:get-test-id #f run-id test-name item-path)) (testinfo (cdb:get-test-info-by-id *runremote* test-id)) (mt_target (string-intersperse (map cadr keyvallst) "/")) (debug-param (append (if (args:get-arg "-debug") (list "-debug" (args:get-arg "-debug")) '()) (if (args:get-arg "-logging")(list "-logging") '())))) (if hosts (set! hosts (string-split hosts))) @@ -607,11 +606,11 @@ (set! mt-bindir-path (pathname-directory remote-megatest)) (if launcher (set! launcher (string-split launcher))) ;; set up the run work area for this test (set! diskpath (get-best-disk *configdat*)) (if diskpath - (let ((dat (open-run-close create-work-area db run-id test-id test-path diskpath test-name itemdat))) + (let ((dat (create-work-area run-id run-info key-vals test-id test-path diskpath test-name itemdat))) (set! work-area (car dat)) (set! toptest-work-area (cadr dat)) (debug:print-info 2 "Using work area " work-area)) (begin (set! work-area (conc test-path "/tmp_run")) @@ -668,34 +667,37 @@ (list "MT_ITEM_INFO" (conc itemdat)) (list "MT_RUNNAME" runname) (list "MT_TARGET" mt_target) ) itemdat))) - (launch-results (apply cmd-run-with-stderr->list ;; cmd-run-proc-each-line + (launch-results (apply (if (equal? (configf:lookup *configdat* "setup" "launchwait") "yes") + cmd-run-with-stderr->list + process-run) (if useshell (string-intersperse fullcmd " ") (car fullcmd)) - ;; conc (if useshell '() - (cdr fullcmd))))) ;; launcher fullcmd)));; (apply cmd-run-proc-each-line launcher print fullcmd))) ;; (cmd-run->list fullcmd)) - (with-output-to-file "mt_launch.log" - (lambda () - (apply print launch-results))) + (cdr fullcmd))))) + (if (list? launch-results) + (with-output-to-file "mt_launch.log" + (lambda () + (apply print launch-results)) + #:append)) (debug:print 2 "Launching completed, updating db") (debug:print 2 "Launch results: " launch-results) (if (not launch-results) - (begin - (print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now") - ;; (sqlite3:finalize! db) - ;; good ole "exit" seems not to work - ;; (_exit 9) - ;; but this hack will work! Thanks go to Alan Post of the Chicken email list - ;; NB// Is this still needed? Should be safe to go back to "exit" now? - (process-signal (current-process-id) signal/kill) - )) + (begin + (print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now") + ;; (sqlite3:finalize! db) + ;; good ole "exit" seems not to work + ;; (_exit 9) + ;; but this hack will work! Thanks go to Alan Post of the Chicken email list + ;; NB// Is this still needed? Should be safe to go back to "exit" now? + (process-signal (current-process-id) signal/kill) + )) (alist->env-vars miscprevvals) (alist->env-vars testprevvals) (alist->env-vars commonprevvals) launch-results)) (change-directory *toppath*)) Index: megatest-version.scm ================================================================== --- megatest-version.scm +++ megatest-version.scm @@ -1,7 +1,7 @@ ;; Always use two digit decimal ;; 1.01, 1.02...1.10,1.11 ... 1.99,2.00.. (declare (unit megatest-version)) -(define megatest-version 1.5417) +(define megatest-version 1.5421) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -34,12 +34,20 @@ (include "db_records.scm") (include "megatest-fossil-hash.scm") ;; (use trace dot-locking) ;; (trace -;; cdb:client-call -;; cdb:remote-run +;; tests:match) +;; db:teststep-set-status! +;; db:open-test-db-by-test-id +;; db:test-get-rundir-from-test-id +;; cdb:tests-register-test +;; cdb:tests-update-uname-host +;; cdb:tests-update-run-duration +;; ;; cdb:client-call +;; ;; cdb:remote-run +;; ) ;; cdb:test-set-status-state ;; change-directory ;; db:process-queue-item ;; db:test-get-logfile-info ;; db:teststep-set-status! @@ -119,10 +127,11 @@ -list-targets : list the targets in runconfigs.config -list-db-targets : list the target combinations used in the db -show-config : dump the internal representation of the megatest.config file -show-runconfig : dump the internal representation of the runconfigs.config file -dumpmode json : dump in json format instead of sexpr + -show-cmdinfo : dump the command info for a test (run in test environment) Misc -rebuild-db : bring the database schema up to date -update-meta : update the tests metadata for all tests -env2file fname : write the environment to fname.csh and fname.sh @@ -231,10 +240,11 @@ "-list-disks" "-list-targets" "-list-db-targets" "-show-runconfig" "-show-config" + "-show-cmdinfo" ;; queries "-test-paths" ;; get path(s) to a test, ordered by youngest first "-runall" ;; run all tests "-remove-runs" @@ -431,10 +441,17 @@ ((string=? (args:get-arg "-dumpmode") "json") (json-write data)) (else (debug:print 0 "ERROR: -dumpmode of " (args:get-arg "-dumpmode") " not recognised"))) (set! *didsomething* #t))) + +(if (args:get-arg "-show-cmdinfo") + (let ((data (read (open-input-string (base64:base64-decode (getenv "MT_CMDINFO")))))) + (if (equal? (args:get-arg "-dumpmode") "json") + (json-write data) + (pp data)) + (set! *didsomething* #t))) ;;====================================================================== ;; Remove old run(s) ;;====================================================================== @@ -605,11 +622,12 @@ "run a test" (lambda (target runname keys keynames keyvallst) (runs:run-tests target runname (args:get-arg "-runtests") - (args:get-arg "-testpatt") + (or (args:get-arg "-testpatt") + (args:get-arg "-runtests")) user args:arg-hash)))) ;;====================================================================== ;; Rollup into a run @@ -797,21 +815,22 @@ (runscript (assoc/default 'runscript cmdinfo)) (db-host (assoc/default 'db-host cmdinfo)) (run-id (assoc/default 'run-id cmdinfo)) (test-id (assoc/default 'test-id cmdinfo)) (itemdat (assoc/default 'itemdat cmdinfo)) + (work-area (assoc/default 'work-area cmdinfo)) (db #f)) (change-directory testpath) ;; (set! *runremote* runremote) (set! *transport-type* (string->symbol transport)) (if (not (setup-for-run)) (begin (debug:print 0 "Failed to setup, exiting") (exit 1))) (if (and state status) - ;; DO NOT remote run - (db:teststep-set-status! db test-id step state status msg logfile) + ;; DO NOT remote run, makes calls to the testdat.db test db. + (db:teststep-set-status! db test-id step state status msg logfile work-area: work-area) (begin (debug:print 0 "ERROR: You must specify :state and :status with every call to -step") (exit 6)))))) (if (args:get-arg "-step") @@ -823,11 +842,12 @@ (args:get-arg "-setlog") (args:get-arg "-m")) ;; (if db (sqlite3:finalize! db)) (set! *didsomething* #t))) -(if (or (args:get-arg "-setlog") ;; since setting up is so costly lets piggyback on -test-status +(if (or (and (args:get-arg "-setlog") ;; since setting up is so costly lets piggyback on -test-status + (not (args:get-arg "-step"))) ;; -setlog may have been processed already in the "-step" previous (args:get-arg "-set-toplog") (args:get-arg "-test-status") (args:get-arg "-set-values") (args:get-arg "-load-test-data") (args:get-arg "-runstep") @@ -845,10 +865,11 @@ (runscript (assoc/default 'runscript cmdinfo)) (db-host (assoc/default 'db-host cmdinfo)) (run-id (assoc/default 'run-id cmdinfo)) (test-id (assoc/default 'test-id cmdinfo)) (itemdat (assoc/default 'itemdat cmdinfo)) + (work-area (assoc/default 'work-area cmdinfo)) (db #f) ;; (open-db)) (state (args:get-arg ":state")) (status (args:get-arg ":status"))) (change-directory testpath) ;; (set! *runremote* runremote) @@ -862,11 +883,11 @@ ;; (client:setup) (if (args:get-arg "-load-test-data") ;; has sub commands that are rdb: ;; DO NOT put this one into either cdb:remote-run or open-run-close - (db:load-test-data db test-id)) + (db:load-test-data db test-id work-area: work-area)) (if (args:get-arg "-setlog") (let ((logfname (args:get-arg "-setlog"))) (cdb:test-set-log! *runremote* test-id logfname))) (if (args:get-arg "-set-toplog") ;; DO NOT run remote @@ -894,11 +915,11 @@ (fullcmd (conc "(" (string-intersperse (cons cmd params) " ") ") " redir " " logfile))) ;; mark the start of the test ;; DO NOT run remote - (db:teststep-set-status! db test-id stepname "start" "n/a" (args:get-arg "-m") logfile) + (db:teststep-set-status! db test-id stepname "start" "n/a" (args:get-arg "-m") logfile work-area: work-area) ;; run the test step (debug:print-info 2 "Running \"" fullcmd "\"") (change-directory startingdir) (set! exitstat (system fullcmd)) ;; cmd params)) (set! *globalexitstatus* exitstat) @@ -914,11 +935,11 @@ (set! *globalexitstatus* exitstat) ;; no necessary (change-directory testpath) (cdb:test-set-log! *runremote* test-id htmllogfile))) (let ((msg (args:get-arg "-m"))) ;; DO NOT run remote - (db:teststep-set-status! db test-id stepname "end" exitstat msg logfile)) + (db:teststep-set-status! db test-id stepname "end" exitstat msg logfile work-area: work-area)) ))) (if (or (args:get-arg "-test-status") (args:get-arg "-set-values")) (let ((newstatus (cond ((number? status) (if (equal? status 0) "PASS" "FAIL")) @@ -941,11 +962,11 @@ ;; (sqlite3:finalize! db) (exit 6))) (let* ((msg (args:get-arg "-m")) (numoth (length (hash-table-keys otherdata)))) ;; Convert to rpc inside the tests:test-set-status! call, not here - (tests:test-set-status! test-id state newstatus msg otherdata)))) + (tests:test-set-status! test-id state newstatus msg otherdata work-area: work-area)))) (if db (sqlite3:finalize! db)) (set! *didsomething* #t)))) ;;====================================================================== ;; Various helper commands can go below here ADDED run-tests-queue-classic.scm Index: run-tests-queue-classic.scm ================================================================== --- /dev/null +++ run-tests-queue-classic.scm @@ -0,0 +1,301 @@ + +;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... > +(define (runs:run-tests-queue-classic run-id runname test-records keyvallst flags test-patts) + ;; At this point the list of parent tests is expanded + ;; NB// Should expand items here and then insert into the run queue. + (debug:print 5 "test-records: " test-records ", keyvallst: " keyvallst " flags: " (hash-table->alist flags)) + (let ((run-info (cdb:remote-run db:get-run-info #f run-id)) + (key-vals (cdb:remote-run db:get-key-vals #f run-id)) + (sorted-test-names (tests:sort-by-priority-and-waiton test-records)) + (test-registry (make-hash-table)) + (registry-mutex (make-mutex)) + (num-retries 0) + (max-retries (config-lookup *configdat* "setup" "maxretries")) + (max-concurrent-jobs (let ((mcj (config-lookup *configdat* "setup" "max_concurrent_jobs"))) + (if (and mcj (string->number mcj)) + (string->number mcj) + 1)))) + (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100)) + (if (not (null? sorted-test-names)) + (let loop ((hed (car sorted-test-names)) + (tal (cdr sorted-test-names)) + (reruns '())) + (if (not (null? reruns))(debug:print-info 4 "reruns=" reruns)) + ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns) + (let* ((test-record (hash-table-ref test-records hed)) + (test-name (tests:testqueue-get-testname test-record)) + (tconfig (tests:testqueue-get-testconfig test-record)) + (testmode (let ((m (config-lookup tconfig "requirements" "mode"))) + (if m (string->symbol m) 'normal))) + (waitons (tests:testqueue-get-waitons test-record)) + (priority (tests:testqueue-get-priority test-record)) + (itemdat (tests:testqueue-get-itemdat test-record)) ;; itemdat can be a string, list or #f + (items (tests:testqueue-get-items test-record)) + (item-path (item-list->path itemdat)) + (newtal (append tal (list hed)))) + + (debug:print 6 + "test-name: " test-name + "\n hed: " hed + "\n itemdat: " itemdat + "\n items: " items + "\n item-path: " item-path + "\n waitons: " waitons + "\n num-retries: " num-retries + "\n tal: " tal + "\n reruns: " reruns) + + ;; check for hed in waitons => this would be circular, remove it and issue an + ;; error + (if (member test-name waitons) + (begin + (debug:print 0 "ERROR: test " test-name " has listed itself as a waiton, please correct this!") + (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons)))) + + (cond ;; OUTER COND + ((not items) ;; when false the test is ok to be handed off to launch (but not before) + (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path)) + (not (null? tal))) + (loop (car tal)(cdr tal) reruns)) + (let* ((run-limits-info (runs:can-run-more-tests test-record max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running + (have-resources (car run-limits-info)) + (num-running (list-ref run-limits-info 1)) + (num-running-in-jobgroup (list-ref run-limits-info 2)) + (max-concurrent-jobs (list-ref run-limits-info 3)) + (job-group-limit (list-ref run-limits-info 4)) + (prereqs-not-met (db:get-prereqs-not-met run-id waitons item-path mode: testmode)) + (fails (runs:calc-fails prereqs-not-met)) + (non-completed (runs:calc-not-completed prereqs-not-met))) + (debug:print-info 8 "have-resources: " have-resources " prereqs-not-met: " + (string-intersperse + (map (lambda (t) + (if (vector? t) + (conc (db:test-get-state t) "/" (db:test-get-status t)) + (conc " WARNING: t is not a vector=" t ))) + prereqs-not-met) ", ") " fails: " fails) + (debug:print-info 4 "hed=" hed "\n test-record=" test-record "\n test-name: " test-name "\n item-path: " item-path "\n test-patts: " test-patts) + + ;; Don't know at this time if the test have been launched at some time in the past + ;; i.e. is this a re-launch? + (debug:print-info 4 "run-limits-info = " run-limits-info) + (cond ;; INNER COND #1 for a launchable test + ;; Check item path against item-patts + ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path)) ;; This test/itempath is not to be run + ;; else the run is stuck, temporarily or permanently + ;; but should check if it is due to lack of resources vs. prerequisites + (debug:print-info 1 "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts) + ;; (thread-sleep! *global-delta*) + (if (not (null? tal)) + (loop (car tal)(cdr tal) reruns))) + ;; Registry has been started for this test but has not yet completed + ;; this should be rare, the case where there are only a couple of tests and the db is slow + ;; delay a short while and continue + ;; ((eq? (hash-table-ref/default test-registry (runs:make-full-test-name test-name item-path) #f) 'start) + ;; (thread-sleep! 0.01) + ;; (loop (car newtal)(cdr newtal) reruns)) + ;; count number of 'done, if more than 100 then skip on through. + (;; (and (< (length (filter (lambda (x)(eq? x 'done))(hash-table-values test-registry))) 100) ;; why get more than 200 ahead? + (not (hash-table-ref/default test-registry (runs:make-full-test-name test-name item-path) #f)) ;; ) ;; too many changes required. Implement later. + (debug:print-info 4 "Pre-registering test " test-name "/" item-path " to create placeholder" ) + ;; NEED TO THREADIFY THIS + (let ((th (make-thread (lambda () + (mutex-lock! registry-mutex) + (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'start) + (mutex-unlock! registry-mutex) + ;; If haven't done it before register a top level test if this is an itemized test + (if (not (eq? (hash-table-ref/default test-registry (runs:make-full-test-name test-name "") #f) 'done)) + (cdb:tests-register-test *runremote* run-id test-name "")) + (cdb:tests-register-test *runremote* run-id test-name item-path) + (mutex-lock! registry-mutex) + (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done) + (mutex-unlock! registry-mutex)) + (conc test-name "/" item-path)))) + (thread-start! th)) + ;; TRY (thread-sleep! *global-delta*) + (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) + (loop (car newtal)(cdr newtal) reruns)) + ;; At this point *all* test registrations must be completed. + ((not (null? (filter (lambda (x)(eq? 'start x))(hash-table-values test-registry)))) + (debug:print-info 0 "Waiting on test registrations: " (string-intersperse + (filter (lambda (x) + (eq? (hash-table-ref/default test-registry x #f) 'start)) + (hash-table-keys test-registry)) + ", ")) + (thread-sleep! 0.1) + (loop hed tal reruns)) + ((not have-resources) ;; simply try again after waiting a second + (debug:print-info 1 "no resources to run new tests, waiting ...") + ;; Have gone back and forth on this but db starvation is an issue. + ;; wait one second before looking again to run jobs. + (thread-sleep! 1) ;; (+ 2 *global-delta*)) + ;; could have done hed tal here but doing car/cdr of newtal to rotate tests + (loop (car newtal)(cdr newtal) reruns)) + ((and have-resources + (or (null? prereqs-not-met) + (and (eq? testmode 'toplevel) + (null? non-completed)))) + (run:test run-id run-info key-vals runname keyvallst test-record flags #f) + (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'running) + (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) + ;; (thread-sleep! *global-delta*) + (if (not (null? tal)) + (loop (car tal)(cdr tal) reruns))) + (else ;; must be we have unmet prerequisites + (debug:print 4 "FAILS: " fails) + ;; If one or more of the prereqs-not-met are FAIL then we can issue + ;; a message and drop hed from the items to be processed. + (if (null? fails) + (begin + ;; couldn't run, take a breather + (debug:print-info 4 "Shouldn't really get here, race condition? Unable to launch more tests at this moment, killing time ...") + ;; (thread-sleep! (+ 0.01 *global-delta*)) ;; long sleep here - no resources, may as well be patient + ;; we made new tal by sticking hed at the back of the list + (loop (car newtal)(cdr newtal) reruns)) + ;; the waiton is FAIL so no point in trying to run hed ever again + (if (not (null? tal)) + (if (vector? hed) + (begin + (debug:print 1 "WARN: Dropping test " (db:test-get-testname hed) "/" (db:test-get-item-path hed) + " from the launch list as it has prerequistes that are FAIL") + (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) + ;; (thread-sleep! *global-delta*) + (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'removed) + (loop (car tal)(cdr tal) (cons hed reruns))) + (begin + (debug:print 1 "WARN: Test not processed correctly. Could be a race condition in your test implementation? " hed) ;; " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)") + (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) + ;; (thread-sleep! (+ 0.01 *global-delta*)) + (loop hed tal reruns))))))))) ;; END OF INNER COND + + ;; case where an items came in as a list been processed + ((and (list? items) ;; thus we know our items are already calculated + (not itemdat)) ;; and not yet expanded into the list of things to be done + (if (and (debug:debug-mode 1) ;; (>= *verbosity* 1) + (> (length items) 0) + (> (length (car items)) 0)) + (pp items)) + (for-each + (lambda (my-itemdat) + (let* ((new-test-record (let ((newrec (make-tests:testqueue))) + (vector-copy! test-record newrec) + newrec)) + (my-item-path (item-list->path my-itemdat))) + (if (tests:match test-patts hed my-item-path) ;; (patt-list-match my-item-path item-patts) ;; yes, we want to process this item, NOTE: Should not need this check here! + (let ((newtestname (runs:make-full-test-name hed my-item-path))) ;; test names are unique on testname/item-path + (tests:testqueue-set-items! new-test-record #f) + (tests:testqueue-set-itemdat! new-test-record my-itemdat) + (tests:testqueue-set-item_path! new-test-record my-item-path) + (hash-table-set! test-records newtestname new-test-record) + (set! tal (cons newtestname tal)))))) ;; since these are itemized create new test names testname/itempath + items) + (if (not (null? tal)) + (begin + (debug:print-info 4 "End of items list, looping with next after short delay") + ;; (thread-sleep! (+ 0.01 *global-delta*)) + (loop (car tal)(cdr tal) reruns)))) + + ;; if items is a proc then need to run items:get-items-from-config, get the list and loop + ;; - but only do that if resources exist to kick off the job + ((or (procedure? items)(eq? items 'have-procedure)) + (let ((can-run-more (runs:can-run-more-tests test-record max-concurrent-jobs))) + (if (and (list? can-run-more) + (car can-run-more)) + (let* ((prereqs-not-met (db:get-prereqs-not-met run-id waitons item-path mode: testmode)) + (fails (runs:calc-fails prereqs-not-met)) + (non-completed (runs:calc-not-completed prereqs-not-met))) + (debug:print-info 8 "can-run-more: " can-run-more + "\n testname: " hed + "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met) + "\n non-completed: " (runs:pretty-string non-completed) + "\n fails: " (runs:pretty-string fails) + "\n testmode: " testmode + "\n num-retries: " num-retries + "\n (eq? testmode 'toplevel): " (eq? testmode 'toplevel) + "\n (null? non-completed): " (null? non-completed) + "\n reruns: " reruns + "\n items: " items + "\n can-run-more: " can-run-more) + ;; (thread-sleep! (+ 0.01 *global-delta*)) + (cond ;; INNER COND #2 + ((or (null? prereqs-not-met) ;; all prereqs met, fire off the test + ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch + (and (eq? testmode 'toplevel) + (null? non-completed))) + (let ((test-name (tests:testqueue-get-testname test-record))) + (setenv "MT_TEST_NAME" test-name) ;; + (setenv "MT_RUNNAME" runname) + (set-megatest-env-vars run-id) ;; these may be needed by the launching process + (let ((items-list (items:get-items-from-config tconfig))) + (if (list? items-list) + (begin + (tests:testqueue-set-items! test-record items-list) + ;; (thread-sleep! *global-delta*) + (loop hed tal reruns)) + (begin + (debug:print 0 "ERROR: The proc from reading the setup did not yield a list - please report this") + (exit 1)))))) + ((null? fails) + (debug:print-info 4 "fails is null, moving on in the queue but keeping " hed " for now") + ;; only increment num-retries when there are no tests runing + (if (eq? 0 (list-ref can-run-more 1)) + (begin + ;; TRY (if (> num-retries 100) ;; first 100 retries are low time cost + ;; TRY (thread-sleep! (+ 2 *global-delta*)) + ;; TRY (thread-sleep! (+ 0.01 *global-delta*))) + (set! num-retries (+ num-retries 1)))) + (if (> num-retries max-retries) + (if (not (null? tal)) + (loop (car tal)(cdr tal) reruns)) + (loop (car newtal)(cdr newtal) reruns))) ;; an issue with prereqs not yet met? + ((and (not (null? fails))(eq? testmode 'normal)) + (debug:print-info 1 "test " hed " (mode=" testmode ") has failed prerequisite(s); " + (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ") + ", removing it from to-do list") + (if (not (null? tal)) + (begin + ;; (thread-sleep! *global-delta*) + (loop (car tal)(cdr tal)(cons hed reruns))))) + (else + (debug:print 8 "ERROR: No handler for this condition.") + ;; TRY (thread-sleep! (+ 1 *global-delta*)) + (loop (car newtal)(cdr newtal) reruns)))) ;; END OF IF CAN RUN MORE + + ;; if can't run more just loop with next possible test + (begin + (debug:print-info 4 "processing the case with a lambda for items or 'have-procedure. Moving through the queue without dropping " hed) + ;; (thread-sleep! (+ 2 *global-delta*)) + (loop (car newtal)(cdr newtal) reruns))))) ;; END OF (or (procedure? items)(eq? items 'have-procedure)) + + ;; this case should not happen, added to help catch any bugs + ((and (list? items) itemdat) + (debug:print 0 "ERROR: Should not have a list of items in a test and the itemspath set - please report this") + (exit 1)) + ((not (null? reruns)) + (let* ((newlst (tests:filter-non-runnable run-id tal test-records)) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED, + (junked (lset-difference equal? tal newlst))) + (debug:print-info 4 "full drop through, if reruns is less than 100 we will force retry them, reruns=" reruns ", tal=" tal) + (if (< num-retries max-retries) + (set! newlst (append reruns newlst))) + (set! num-retries (+ num-retries 1)) + ;; (thread-sleep! (+ 1 *global-delta*)) + (if (not (null? newlst)) + ;; since reruns have been tacked on to newlst create new reruns from junked + (loop (car newlst)(cdr newlst)(delete-duplicates junked))))) + ((not (null? tal)) + (debug:print-info 4 "I'm pretty sure I shouldn't get here.")) + (else + (debug:print-info 4 "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns)) + )))) ;; LET* ((test-record + + ;; we get here on "drop through" - loop for next test in queue + ;; FIXME!!!! THIS SHOULD NOT REQUIRE AN EXIT!!!!!!! + + (debug:print-info 1 "All tests launched") + (thread-sleep! 0.5) + ;; FIXME! This harsh exit should not be necessary.... + ;; (if (not *runremote*)(exit)) ;; + #f)) ;; return a #f as a hint that we are done + ;; Here we need to check that all the tests remaining to be run are eligible to run + ;; and are not blocked by failed + + ADDED run-tests-queue-new.scm Index: run-tests-queue-new.scm ================================================================== --- /dev/null +++ run-tests-queue-new.scm @@ -0,0 +1,334 @@ + +;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... > +(define (runs:run-tests-queue-new run-id runname test-records keyvallst flags test-patts reglen) + ;; At this point the list of parent tests is expanded + ;; NB// Should expand items here and then insert into the run queue. + (debug:print 5 "test-records: " test-records ", keyvallst: " keyvallst " flags: " (hash-table->alist flags)) + (let ((run-info (cdb:remote-run db:get-run-info #f run-id)) + (key-vals (cdb:remote-run db:get-key-vals #f run-id)) + (sorted-test-names (tests:sort-by-priority-and-waiton test-records)) + (test-registry (make-hash-table)) + (registry-mutex (make-mutex)) + (num-retries 0) + (max-retries (config-lookup *configdat* "setup" "maxretries")) + (max-concurrent-jobs (let ((mcj (config-lookup *configdat* "setup" "max_concurrent_jobs"))) + (if (and mcj (string->number mcj)) + (string->number mcj) + 1)))) ;; length of the register queue ahead + (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100)) + (if (not (null? sorted-test-names)) + (let loop ((hed (car sorted-test-names)) + (tal (cdr sorted-test-names)) + (reg '()) ;; registered, put these at the head of tal + (reruns '())) + (if (not (null? reruns))(debug:print-info 4 "reruns=" reruns)) + ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns) + (let* ((test-record (hash-table-ref test-records hed)) + (test-name (tests:testqueue-get-testname test-record)) + (tconfig (tests:testqueue-get-testconfig test-record)) + (testmode (let ((m (config-lookup tconfig "requirements" "mode"))) + (if m (string->symbol m) 'normal))) + (waitons (tests:testqueue-get-waitons test-record)) + (priority (tests:testqueue-get-priority test-record)) + (itemdat (tests:testqueue-get-itemdat test-record)) ;; itemdat can be a string, list or #f + (items (tests:testqueue-get-items test-record)) + (item-path (item-list->path itemdat)) + (newtal (append tal (list hed))) + (regfull (> (length reg) reglen))) + ;; (if (> (length reg) 10) + ;; (begin + ;; (set! tal (cons hed tal)) + ;; (set! hed (car reg)) + ;; (set! reg (cdr reg)) + ;; (set! newtal tal))) + (debug:print 6 + "test-name: " test-name + "\n hed: " hed + "\n itemdat: " itemdat + "\n items: " items + "\n item-path: " item-path + "\n waitons: " waitons + "\n num-retries: " num-retries + "\n tal: " tal + "\n reruns: " reruns) + + ;; check for hed in waitons => this would be circular, remove it and issue an + ;; error + (if (member test-name waitons) + (begin + (debug:print 0 "ERROR: test " test-name " has listed itself as a waiton, please correct this!") + (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons)))) + + (cond ;; OUTER COND + ((not items) ;; when false the test is ok to be handed off to launch (but not before) + (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path)) + (not (null? tal))) + (loop (car tal)(cdr tal) reg reruns)) + (let* ((run-limits-info (runs:can-run-more-tests test-record max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running + (have-resources (car run-limits-info)) + (num-running (list-ref run-limits-info 1)) + (num-running-in-jobgroup (list-ref run-limits-info 2)) + (max-concurrent-jobs (list-ref run-limits-info 3)) + (job-group-limit (list-ref run-limits-info 4)) + (prereqs-not-met (db:get-prereqs-not-met run-id waitons item-path mode: testmode)) + (fails (runs:calc-fails prereqs-not-met)) + (non-completed (runs:calc-not-completed prereqs-not-met))) + (debug:print-info 8 "have-resources: " have-resources " prereqs-not-met: " + (string-intersperse + (map (lambda (t) + (if (vector? t) + (conc (db:test-get-state t) "/" (db:test-get-status t)) + (conc " WARNING: t is not a vector=" t ))) + prereqs-not-met) ", ") " fails: " fails) + (debug:print-info 4 "hed=" hed "\n test-record=" test-record "\n test-name: " test-name "\n item-path: " item-path "\n test-patts: " test-patts) + + ;; Don't know at this time if the test have been launched at some time in the past + ;; i.e. is this a re-launch? + (debug:print-info 4 "run-limits-info = " run-limits-info) + (cond ;; INNER COND #1 for a launchable test + ;; Check item path against item-patts + ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path)) ;; This test/itempath is not to be run + ;; else the run is stuck, temporarily or permanently + ;; but should check if it is due to lack of resources vs. prerequisites + (debug:print-info 1 "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts) + ;; (thread-sleep! *global-delta*) + (if (not (null? tal)) + (loop (runs:queue-next-hed tal reg reglen regfull) + (runs:queue-next-tal tal reg reglen regfull) + (runs:queue-next-reg tal reg reglen regfull) + reruns))) + ;; Registry has been started for this test but has not yet completed + ;; this should be rare, the case where there are only a couple of tests and the db is slow + ;; delay a short while and continue + ;; ((eq? (hash-table-ref/default test-registry (runs:make-full-test-name test-name item-path) #f) 'start) + ;; (thread-sleep! 0.01) + ;; (loop (car newtal)(cdr newtal) reruns)) + ;; count number of 'done, if more than 100 then skip on through. + ((not (hash-table-ref/default test-registry (runs:make-full-test-name test-name item-path) #f)) ;; ) ;; too many changes required. Implement later. + (debug:print-info 4 "Pre-registering test " test-name "/" item-path " to create placeholder" ) + (let ((th (make-thread (lambda () + (mutex-lock! registry-mutex) + (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'start) + (mutex-unlock! registry-mutex) + ;; If haven't done it before register a top level test if this is an itemized test + (if (not (eq? (hash-table-ref/default test-registry (runs:make-full-test-name test-name "") #f) 'done)) + (cdb:tests-register-test *runremote* run-id test-name "")) + (cdb:tests-register-test *runremote* run-id test-name item-path) + (mutex-lock! registry-mutex) + (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done) + (mutex-unlock! registry-mutex)) + (conc test-name "/" item-path)))) + (thread-start! th)) + (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) + (if (and (null? tal)(null? reg)) + (loop hed tal reg reruns) + (loop (runs:queue-next-hed tal reg reglen regfull) + (runs:queue-next-tal tal reg reglen regfull) + (let ((newl (append reg (list hed)))) + (if regfull + (cdr newl) + newl)) + reruns))) + ;; At this point hed test registration must be completed. + ((eq? (hash-table-ref/default test-registry (runs:make-full-test-name test-name item-path) #f) + 'start) + (debug:print-info 0 "Waiting on test registration(s): " (string-intersperse + (filter (lambda (x) + (eq? (hash-table-ref/default test-registry x #f) 'start)) + (hash-table-keys test-registry)) + ", ")) + (thread-sleep! 0.1) + (loop hed tal reg reruns)) + ((not have-resources) ;; simply try again after waiting a second + (debug:print-info 1 "no resources to run new tests, waiting ...") + ;; Have gone back and forth on this but db starvation is an issue. + ;; wait one second before looking again to run jobs. + (thread-sleep! 1) ;; (+ 2 *global-delta*)) + ;; could have done hed tal here but doing car/cdr of newtal to rotate tests + (loop (car newtal)(cdr newtal) reg reruns)) + ((and have-resources + (or (null? prereqs-not-met) + (and (eq? testmode 'toplevel) + (null? non-completed)))) + (run:test run-id run-info key-vals runname keyvallst test-record flags #f) + (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'running) + (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) + ;; (thread-sleep! *global-delta*) + (if (not (null? tal)) + (loop (runs:queue-next-hed tal reg reglen regfull) + (runs:queue-next-tal tal reg reglen regfull) + (runs:queue-next-reg tal reg reglen regfull) + reruns))) + (else ;; must be we have unmet prerequisites + (debug:print 4 "FAILS: " fails) + ;; If one or more of the prereqs-not-met are FAIL then we can issue + ;; a message and drop hed from the items to be processed. + (if (null? fails) + (begin + ;; couldn't run, take a breather + (debug:print-info 4 "Shouldn't really get here, race condition? Unable to launch more tests at this moment, killing time ...") + ;; (thread-sleep! (+ 0.01 *global-delta*)) ;; long sleep here - no resources, may as well be patient + ;; we made new tal by sticking hed at the back of the list + (loop (car newtal)(cdr newtal) reg reruns)) + ;; the waiton is FAIL so no point in trying to run hed ever again + (if (not (null? tal)) + (if (vector? hed) + (begin + (debug:print 1 "WARN: Dropping test " (db:test-get-testname hed) "/" (db:test-get-item-path hed) + " from the launch list as it has prerequistes that are FAIL") + (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) + ;; (thread-sleep! *global-delta*) + (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'removed) + (loop (runs:queue-next-hed tal reg reglen regfull) + (runs:queue-next-tal tal reg reglen regfull) + (runs:queue-next-reg tal reg reglen regfull) + (cons hed reruns))) + (begin + (debug:print 1 "WARN: Test not processed correctly. Could be a race condition in your test implementation? " hed) ;; " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)") + (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) + ;; (thread-sleep! (+ 0.01 *global-delta*)) + (loop hed tal reg reruns))))))))) ;; END OF INNER COND + + ;; case where an items came in as a list been processed + ((and (list? items) ;; thus we know our items are already calculated + (not itemdat)) ;; and not yet expanded into the list of things to be done + (if (and (debug:debug-mode 1) ;; (>= *verbosity* 1) + (> (length items) 0) + (> (length (car items)) 0)) + (pp items)) + (for-each + (lambda (my-itemdat) + (let* ((new-test-record (let ((newrec (make-tests:testqueue))) + (vector-copy! test-record newrec) + newrec)) + (my-item-path (item-list->path my-itemdat))) + (if (tests:match test-patts hed my-item-path) ;; (patt-list-match my-item-path item-patts) ;; yes, we want to process this item, NOTE: Should not need this check here! + (let ((newtestname (runs:make-full-test-name hed my-item-path))) ;; test names are unique on testname/item-path + (tests:testqueue-set-items! new-test-record #f) + (tests:testqueue-set-itemdat! new-test-record my-itemdat) + (tests:testqueue-set-item_path! new-test-record my-item-path) + (hash-table-set! test-records newtestname new-test-record) + (set! tal (cons newtestname tal)))))) ;; since these are itemized create new test names testname/itempath + items) + (if (not (null? tal)) + (begin + (debug:print-info 4 "End of items list, looping with next after short delay") + ;; (thread-sleep! (+ 0.01 *global-delta*)) + (loop (runs:queue-next-hed tal reg reglen regfull) + (runs:queue-next-tal tal reg reglen regfull) + (runs:queue-next-reg tal reg reglen regfull) + reruns)))) + + ;; if items is a proc then need to run items:get-items-from-config, get the list and loop + ;; - but only do that if resources exist to kick off the job + ((or (procedure? items)(eq? items 'have-procedure)) + (let ((can-run-more (runs:can-run-more-tests test-record max-concurrent-jobs))) + (if (and (list? can-run-more) + (car can-run-more)) + (let* ((prereqs-not-met (db:get-prereqs-not-met run-id waitons item-path mode: testmode)) + (fails (runs:calc-fails prereqs-not-met)) + (non-completed (runs:calc-not-completed prereqs-not-met))) + (debug:print-info 8 "can-run-more: " can-run-more + "\n testname: " hed + "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met) + "\n non-completed: " (runs:pretty-string non-completed) + "\n fails: " (runs:pretty-string fails) + "\n testmode: " testmode + "\n num-retries: " num-retries + "\n (eq? testmode 'toplevel): " (eq? testmode 'toplevel) + "\n (null? non-completed): " (null? non-completed) + "\n reruns: " reruns + "\n items: " items + "\n can-run-more: " can-run-more) + ;; (thread-sleep! (+ 0.01 *global-delta*)) + (cond ;; INNER COND #2 + ((or (null? prereqs-not-met) ;; all prereqs met, fire off the test + ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch + (and (eq? testmode 'toplevel) + (null? non-completed))) + (let ((test-name (tests:testqueue-get-testname test-record))) + (setenv "MT_TEST_NAME" test-name) ;; + (setenv "MT_RUNNAME" runname) + (set-megatest-env-vars run-id) ;; these may be needed by the launching process + (let ((items-list (items:get-items-from-config tconfig))) + (if (list? items-list) + (begin + (tests:testqueue-set-items! test-record items-list) + ;; (thread-sleep! *global-delta*) + (loop hed tal reg reruns)) + (begin + (debug:print 0 "ERROR: The proc from reading the setup did not yield a list - please report this") + (exit 1)))))) + ((null? fails) + (debug:print-info 4 "fails is null, moving on in the queue but keeping " hed " for now") + ;; only increment num-retries when there are no tests runing + (if (eq? 0 (list-ref can-run-more 1)) + (begin + ;; TRY (if (> num-retries 100) ;; first 100 retries are low time cost + ;; TRY (thread-sleep! (+ 2 *global-delta*)) + ;; TRY (thread-sleep! (+ 0.01 *global-delta*))) + (set! num-retries (+ num-retries 1)))) + (if (> num-retries max-retries) + (if (not (null? tal)) + (loop (runs:queue-next-hed tal reg reglen regfull) + (runs:queue-next-tal tal reg reglen regfull) + (runs:queue-next-reg tal reg reglen regfull) + reruns)) + (loop (car newtal)(cdr newtal) reg reruns))) ;; an issue with prereqs not yet met? + ((and (not (null? fails))(eq? testmode 'normal)) + (debug:print-info 1 "test " hed " (mode=" testmode ") has failed prerequisite(s); " + (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ") + ", removing it from to-do list") + (if (not (null? tal)) + (begin + ;; (thread-sleep! *global-delta*) + (loop (runs:queue-next-hed tal reg reglen regfull) + (runs:queue-next-tal tal reg reglen regfull) + (runs:queue-next-reg tal reg reglen regfull) + (cons hed reruns))))) + (else + (debug:print 8 "ERROR: No handler for this condition.") + ;; TRY (thread-sleep! (+ 1 *global-delta*)) + (loop (car newtal)(cdr newtal) reg reruns)))) ;; END OF IF CAN RUN MORE + + ;; if can't run more just loop with next possible test + (begin + (debug:print-info 4 "processing the case with a lambda for items or 'have-procedure. Moving through the queue without dropping " hed) + ;; (thread-sleep! (+ 2 *global-delta*)) + (loop (car newtal)(cdr newtal) reg reruns))))) ;; END OF (or (procedure? items)(eq? items 'have-procedure)) + + ;; this case should not happen, added to help catch any bugs + ((and (list? items) itemdat) + (debug:print 0 "ERROR: Should not have a list of items in a test and the itemspath set - please report this") + (exit 1)) + ((not (null? reruns)) + (let* ((newlst (tests:filter-non-runnable run-id tal test-records)) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED, + (junked (lset-difference equal? tal newlst))) + (debug:print-info 4 "full drop through, if reruns is less than 100 we will force retry them, reruns=" reruns ", tal=" tal) + (if (< num-retries max-retries) + (set! newlst (append reruns newlst))) + (set! num-retries (+ num-retries 1)) + ;; (thread-sleep! (+ 1 *global-delta*)) + (if (not (null? newlst)) + ;; since reruns have been tacked on to newlst create new reruns from junked + (loop (car newlst)(cdr newlst) reg (delete-duplicates junked))))) + ((not (null? tal)) + (debug:print-info 4 "I'm pretty sure I shouldn't get here.")) + ((not (null? reg)) ;; could we get here with leftovers? + (debug:print-info 0 "Have leftovers!") + (loop (car reg)(cdr reg) '() reruns)) + (else + (debug:print-info 4 "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns)) + )))) ;; LET* ((test-record + + ;; we get here on "drop through" - loop for next test in queue + ;; FIXME!!!! THIS SHOULD NOT REQUIRE AN EXIT!!!!!!! + + (debug:print-info 1 "All tests launched") + (thread-sleep! 0.5) + ;; FIXME! This harsh exit should not be necessary.... + ;; (if (not *runremote*)(exit)) ;; + #f)) ;; return a #f as a hint that we are done +;; Here we need to check that all the tests remaining to be run are eligible to run +;; and are not blocked by failed + Index: runconfig.scm ================================================================== --- runconfig.scm +++ runconfig.scm @@ -59,16 +59,16 @@ sections) (debug:print 2 "---") (set! *already-seen-runconfig-info* #t))) finaldat)) -(define (set-run-config-vars db run-id keys keyvals) +(define (set-run-config-vars run-id keys keyvals targ-from-db) (push-directory *toppath*) (let ((runconfigf (conc *toppath* "/runconfigs.config")) (targ (or (args:get-arg "-target") (args:get-arg "-reqtarg") - (db:get-target db run-id)))) + targ-from-db))) (pop-directory) (if (file-exists? runconfigf) (setup-env-defaults runconfigf run-id #t keys keyvals environ-patt: (conc "(default" (if targ ADDED runs-launch-loop-test.scm Index: runs-launch-loop-test.scm ================================================================== --- /dev/null +++ runs-launch-loop-test.scm @@ -0,0 +1,59 @@ +(use srfi-69) + +(define (runs:queue-next-hed tal reg n regful) + (if regful + (car reg) + (car tal))) + +(define (runs:queue-next-tal tal reg n regful) + (if regful + tal + (let ((newtal (cdr tal))) + (if (null? newtal) + reg + newtal + )))) + +(define (runs:queue-next-reg tal reg n regful) + (if regful + (cdr reg) + (if (eq? (length tal) 1) + '() + reg))) + +(use trace) +(trace runs:queue-next-hed + runs:queue-next-tal + runs:queue-next-reg) + + +(define tests '(1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20)) + +(define test-registry (make-hash-table)) + +(define n 3) + +(let loop ((hed (car tests)) + (tal (cdr tests)) + (reg '())) + (let* ((reglen (length reg)) + (regful (> reglen n))) + (print "hed=" hed ", length reg=" (length reg) ", (> lenreg n)=" (> (length reg) n)) + (let ((newtal (append tal (list hed)))) ;; used if we are not done with this test + (cond + ((not (hash-table-ref/default test-registry hed #f)) + (hash-table-set! test-registry hed #t) + (print "Registering #" hed) + (if (not (null? tal)) + (loop (runs:queue-next-hed tal reg n regful) + (runs:queue-next-tal tal reg n regful) + (let ((newl (append reg (list hed)))) + (if regful + (cdr newl) + newl))))) + (else + (print "Running #" hed) + (if (not (null? tal)) + (loop (runs:queue-next-hed tal reg n regful) + (runs:queue-next-tal tal reg n regful) + (runs:queue-next-reg tal reg n regful)))))))) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -118,34 +118,28 @@ (debug:print 2 "setenv " (car item) " " (cadr item)) (setenv (car item) (cadr item))) itemdat)) (define *last-num-running-tests* 0) -(define *runs:can-run-more-tests-delay* 0) -(define (runs:shrink-can-run-more-tests-delay) - (set! *runs:can-run-more-tests-delay* 0)) ;; (/ *runs:can-run-more-tests-delay* 2))) + +;; Every time can-run-more-tests is called increment the delay +;; if the cou +(define *runs:can-run-more-tests-count* 0) +(define (runs:shrink-can-run-more-tests-count) + (set! *runs:can-run-more-tests-count* 0)) ;; (/ *runs:can-run-more-tests-count* 2))) -(define (runs:can-run-more-tests test-record) - (thread-sleep! *runs:can-run-more-tests-delay*) +(define (runs:can-run-more-tests test-record max-concurrent-jobs) + (thread-sleep! (cond + ((> *runs:can-run-more-tests-count* 20) 2);; obviously haven't had any work to do for a while + (else 0))) (let* ((tconfig (tests:testqueue-get-testconfig test-record)) (jobgroup (config-lookup tconfig "requirements" "jobgroup")) - ;; Heuristic fix. These are getting called too rapidly when jobs are running or stuck - ;; so we are going to increment a global delay by 0.1 seconds up to 10 seconds - ;; every time runs:can-run-more-tests is called. - ;; when a test is launched or other activity occurs divide the delay by 2 (num-running (cdb:remote-run db:get-count-tests-running #f)) (num-running-in-jobgroup (cdb:remote-run db:get-count-tests-running-in-jobgroup #f jobgroup)) - (max-concurrent-jobs (let ((mcj (config-lookup *configdat* "setup" "max_concurrent_jobs"))) - (if (and mcj (string->number mcj)) - (string->number mcj) - 1))) (job-group-limit (config-lookup *configdat* "jobgroups" jobgroup))) - (if (and (> (+ num-running num-running-in-jobgroup) 0) - (< *runs:can-run-more-tests-delay* 1)) - (begin - (set! *runs:can-run-more-tests-delay* (+ *runs:can-run-more-tests-delay* 0.009)) - (debug:print-info 14 "can-run-more-tests-delay: " *runs:can-run-more-tests-delay*))) + (if (> (+ num-running num-running-in-jobgroup) 0) + (set! *runs:can-run-more-tests-count* (+ *runs:can-run-more-tests-count* 1))) (if (not (eq? *last-num-running-tests* num-running)) (begin (debug:print 2 "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running) (set! *last-num-running-tests* num-running))) (if (not (eq? 0 *globalexitstatus*)) @@ -171,40 +165,10 @@ ;; New methodology. These routines will replace the above in time. For ;; now the code is duplicated. This stuff is initially used in the monitor ;; based code. ;;====================================================================== -;; register a test run with the db -(define (runs:register-run db keys keyvallst runname state status user) - (debug:print 3 "runs:register-run, keys: " keys " keyvallst: " keyvallst " runname: " runname " state: " state " status: " status " user: " user) - (let* ((keystr (keys->keystr keys)) - (comma (if (> (length keys) 0) "," "")) - (andstr (if (> (length keys) 0) " AND " "")) - (valslots (keys->valslots keys)) ;; ?,?,? ... - (keyvals (map cadr keyvallst)) - (allvals (append (list runname state status user) keyvals)) - (qryvals (append (list runname) keyvals)) - (key=?str (string-intersperse (map (lambda (k)(conc (key:get-fieldname k) "=?")) keys) " AND "))) - (debug:print 3 "keys: " keys " allvals: " allvals " keyvals: " keyvals) - (debug:print 2 "NOTE: using target " (string-intersperse keyvals "/") " for this run") - (if (and runname (null? (filter (lambda (x)(not x)) keyvals))) ;; there must be a better way to "apply and" - (let ((res #f)) - (apply sqlite3:execute db (conc "INSERT OR IGNORE INTO runs (runname,state,status,owner,event_time" comma keystr ") VALUES (?,?,?,?,strftime('%s','now')" comma valslots ");") - allvals) - (apply sqlite3:for-each-row - (lambda (id) - (set! res id)) - db - (let ((qry (conc "SELECT id FROM runs WHERE (runname=? " andstr key=?str ");"))) - ;(debug:print 4 "qry: " qry) - qry) - qryvals) - (sqlite3:execute db "UPDATE runs SET state=?,status=? WHERE id=?;" state status res) - res) - (begin - (debug:print 0 "ERROR: Called without all necessary keys") - #f)))) ;; This is a duplicate of run-tests (which has been deprecated). Use this one instead of run tests. ;; keyvals. ;; ;; test-names: Comma separated patterns same as test-patts but used in selection @@ -214,11 +178,11 @@ (define (runs:run-tests target runname test-names test-patts user flags) (common:clear-caches) ;; clear all caches (let* ((db #f) (keys (cdb:remote-run db:get-keys #f)) (keyvallst (keys:target->keyval keys target)) - (run-id (cdb:remote-run runs:register-run #f keys keyvallst runname "new" "n/a" user)) ;; test-name))) + (run-id (cdb:remote-run db:register-run #f keys keyvallst runname "new" "n/a" user)) ;; test-name))) (keyvals (if run-id (cdb:remote-run db:get-key-vals #f run-id) #f)) (deferred '()) ;; delay running these since they have a waiton clause ;; keepgoing is the defacto modality now, will add hit-n-run a bit later ;; (keepgoing (hash-table-ref/default flags "-keepgoing" #f)) (runconfigf (conc *toppath* "/runconfigs.config")) @@ -329,11 +293,14 @@ (if (not (null? required-tests)) (debug:print-info 1 "Adding " required-tests " to the run queue")) ;; NOTE: these are all parent tests, items are not expanded yet. (debug:print-info 4 "test-records=" (hash-table->alist test-records)) - (runs:run-tests-queue run-id runname test-records keyvallst flags test-patts) + (let ((reglen (any->number (configf:lookup *configdat* "setup" "runqueue")))) + (if reglen + (runs:run-tests-queue-new run-id runname test-records keyvallst flags test-patts reglen) + (runs:run-tests-queue-classic run-id runname test-records keyvallst flags test-patts))) (debug:print-info 4 "All done by here"))) (define (runs:calc-fails prereqs-not-met) (filter (lambda (test) (and (vector? test) ;; not (string? test)) @@ -357,270 +324,38 @@ lst)) (define (runs:make-full-test-name testname itempath) (if (equal? itempath "") testname (conc testname "/" itempath))) -;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... > -(define (runs:run-tests-queue run-id runname test-records keyvallst flags test-patts) - ;; At this point the list of parent tests is expanded - ;; NB// Should expand items here and then insert into the run queue. - (debug:print 5 "test-records: " test-records ", keyvallst: " keyvallst " flags: " (hash-table->alist flags)) - (let ((sorted-test-names (tests:sort-by-priority-and-waiton test-records)) - (test-registery (make-hash-table)) - (num-retries 0) - (max-retries (config-lookup *configdat* "setup" "maxretries"))) - (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100)) - (if (not (null? sorted-test-names)) - (let loop ((hed (car sorted-test-names)) - (tal (cdr sorted-test-names)) - (reruns '())) - (if (not (null? reruns))(debug:print-info 4 "reruns=" reruns)) - ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns) - (let* ((test-record (hash-table-ref test-records hed)) - (test-name (tests:testqueue-get-testname test-record)) - (tconfig (tests:testqueue-get-testconfig test-record)) - (testmode (let ((m (config-lookup tconfig "requirements" "mode"))) - (if m (string->symbol m) 'normal))) - (waitons (tests:testqueue-get-waitons test-record)) - (priority (tests:testqueue-get-priority test-record)) - (itemdat (tests:testqueue-get-itemdat test-record)) ;; itemdat can be a string, list or #f - (items (tests:testqueue-get-items test-record)) - (item-path (item-list->path itemdat)) - (newtal (append tal (list hed)))) - - (debug:print 6 - "test-name: " test-name - "\n hed: " hed - "\n itemdat: " itemdat - "\n items: " items - "\n item-path: " item-path - "\n waitons: " waitons - "\n num-retries: " num-retries - "\n tal: " tal - "\n reruns: " reruns) - - ;; check for hed in waitons => this would be circular, remove it and issue an - ;; error - (if (member test-name waitons) - (begin - (debug:print 0 "ERROR: test " test-name " has listed itself as a waiton, please correct this!") - (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons)))) - - (cond ;; OUTER COND - ((not items) ;; when false the test is ok to be handed off to launch (but not before) - (let* ((run-limits-info (open-run-close runs:can-run-more-tests test-record)) ;; look at the test jobgroup and tot jobs running - (have-resources (car run-limits-info)) - (num-running (list-ref run-limits-info 1)) - (num-running-in-jobgroup (list-ref run-limits-info 2)) - (max-concurrent-jobs (list-ref run-limits-info 3)) - (job-group-limit (list-ref run-limits-info 4)) - (prereqs-not-met (open-run-close db:get-prereqs-not-met #f run-id waitons item-path mode: testmode)) - (fails (runs:calc-fails prereqs-not-met)) - (non-completed (runs:calc-not-completed prereqs-not-met))) - (debug:print-info 8 "have-resources: " have-resources " prereqs-not-met: " - (string-intersperse - (map (lambda (t) - (if (vector? t) - (conc (db:test-get-state t) "/" (db:test-get-status t)) - (conc " WARNING: t is not a vector=" t ))) - prereqs-not-met) ", ") " fails: " fails) - (debug:print-info 4 "hed=" hed "\n test-record=" test-record "\n test-name: " test-name "\n item-path: " item-path "\n test-patts: " test-patts) - - ;; Don't know at this time if the test have been launched at some time in the past - ;; i.e. is this a re-launch? - (debug:print-info 4 "run-limits-info = " run-limits-info) - (cond ;; INNER COND #1 for a launchable test - ;; Check item path against item-patts - ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path)) ;; This test/itempath is not to be run - ;; else the run is stuck, temporarily or permanently - ;; but should check if it is due to lack of resources vs. prerequisites - (debug:print-info 1 "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts) - ;; (thread-sleep! *global-delta*) - (if (not (null? tal)) - (loop (car tal)(cdr tal) reruns))) - ( ;; (and - (not (hash-table-ref/default test-registery (runs:make-full-test-name test-name item-path) #f)) - ;; (and max-concurrent-jobs (> (- max-concurrent-jobs num-running) 5))) - (debug:print-info 4 "Pre-registering test " test-name "/" item-path " to create placeholder" ) - (open-run-close db:tests-register-test #f run-id test-name item-path) - (hash-table-set! test-registery (runs:make-full-test-name test-name item-path) #t) - ;; (thread-sleep! *global-delta*) -(runs:shrink-can-run-more-tests-delay) - (loop (car newtal)(cdr newtal) reruns)) - ((not have-resources) ;; simply try again after waiting a second - (debug:print-info 1 "no resources to run new tests, waiting ...") - ;; (thread-sleep! (+ 2 *global-delta*)) - ;; could have done hed tal here but doing car/cdr of newtal to rotate tests - (loop (car newtal)(cdr newtal) reruns)) - ((and have-resources - (or (null? prereqs-not-met) - (and (eq? testmode 'toplevel) - (null? non-completed)))) - (run:test run-id runname keyvallst test-record flags #f) -(runs:shrink-can-run-more-tests-delay) - ;; (thread-sleep! *global-delta*) - (if (not (null? tal)) - (loop (car tal)(cdr tal) reruns))) - (else ;; must be we have unmet prerequisites - (debug:print 4 "FAILS: " fails) - ;; If one or more of the prereqs-not-met are FAIL then we can issue - ;; a message and drop hed from the items to be processed. - (if (null? fails) - (begin - ;; couldn't run, take a breather - (debug:print-info 4 "Shouldn't really get here, race condition? Unable to launch more tests at this moment, killing time ...") - ;; (thread-sleep! (+ 0.01 *global-delta*)) ;; long sleep here - no resources, may as well be patient - ;; we made new tal by sticking hed at the back of the list - (loop (car newtal)(cdr newtal) reruns)) - ;; the waiton is FAIL so no point in trying to run hed ever again - (if (not (null? tal)) - (if (vector? hed) - (begin (debug:print 1 "WARN: Dropping test " (db:test-get-testname hed) "/" (db:test-get-item-path hed) - " from the launch list as it has prerequistes that are FAIL") -(runs:shrink-can-run-more-tests-delay) - ;; (thread-sleep! *global-delta*) - (loop (car tal)(cdr tal) (cons hed reruns))) - (begin - (debug:print 1 "WARN: Test not processed correctly. Could be a race condition in your test implementation? " hed) ;; " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)") -(runs:shrink-can-run-more-tests-delay) - ;; (thread-sleep! (+ 0.01 *global-delta*)) - (loop hed tal reruns))))))))) ;; END OF INNER COND - - ;; case where an items came in as a list been processed - ((and (list? items) ;; thus we know our items are already calculated - (not itemdat)) ;; and not yet expanded into the list of things to be done - (if (and (debug:debug-mode 1) ;; (>= *verbosity* 1) - (> (length items) 0) - (> (length (car items)) 0)) - (pp items)) - (for-each - (lambda (my-itemdat) - (let* ((new-test-record (let ((newrec (make-tests:testqueue))) - (vector-copy! test-record newrec) - newrec)) - (my-item-path (item-list->path my-itemdat))) - (if (tests:match test-patts hed my-item-path) ;; (patt-list-match my-item-path item-patts) ;; yes, we want to process this item, NOTE: Should not need this check here! - (let ((newtestname (runs:make-full-test-name hed my-item-path))) ;; test names are unique on testname/item-path - (tests:testqueue-set-items! new-test-record #f) - (tests:testqueue-set-itemdat! new-test-record my-itemdat) - (tests:testqueue-set-item_path! new-test-record my-item-path) - (hash-table-set! test-records newtestname new-test-record) - (set! tal (cons newtestname tal)))))) ;; since these are itemized create new test names testname/itempath - items) - (if (not (null? tal)) - (begin - (debug:print-info 4 "End of items list, looping with next after short delay") - ;; (thread-sleep! (+ 0.01 *global-delta*)) - (loop (car tal)(cdr tal) reruns)))) - - ;; if items is a proc then need to run items:get-items-from-config, get the list and loop - ;; - but only do that if resources exist to kick off the job - ((or (procedure? items)(eq? items 'have-procedure)) - (let ((can-run-more (runs:can-run-more-tests test-record))) - (if (and (list? can-run-more) - (car can-run-more)) - (let* ((prereqs-not-met (open-run-close db:get-prereqs-not-met #f run-id waitons item-path mode: testmode)) - (fails (runs:calc-fails prereqs-not-met)) - (non-completed (runs:calc-not-completed prereqs-not-met))) - (debug:print-info 8 "can-run-more: " can-run-more - "\n testname: " hed - "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met) - "\n non-completed: " (runs:pretty-string non-completed) - "\n fails: " (runs:pretty-string fails) - "\n testmode: " testmode - "\n num-retries: " num-retries - "\n (eq? testmode 'toplevel): " (eq? testmode 'toplevel) - "\n (null? non-completed): " (null? non-completed) - "\n reruns: " reruns - "\n items: " items - "\n can-run-more: " can-run-more) - ;; (thread-sleep! (+ 0.01 *global-delta*)) - (cond ;; INNER COND #2 - ((or (null? prereqs-not-met) ;; all prereqs met, fire off the test - ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch - (and (eq? testmode 'toplevel) - (null? non-completed))) - (let ((test-name (tests:testqueue-get-testname test-record))) - (setenv "MT_TEST_NAME" test-name) ;; - (setenv "MT_RUNNAME" runname) - (set-megatest-env-vars run-id) ;; these may be needed by the launching process - (let ((items-list (items:get-items-from-config tconfig))) - (if (list? items-list) - (begin - (tests:testqueue-set-items! test-record items-list) - ;; (thread-sleep! *global-delta*) - (loop hed tal reruns)) - (begin - (debug:print 0 "ERROR: The proc from reading the setup did not yield a list - please report this") - (exit 1)))))) - ((null? fails) - (debug:print-info 4 "fails is null, moving on in the queue but keeping " hed " for now") - ;; only increment num-retries when there are no tests runing - (if (eq? 0 (list-ref can-run-more 1)) - (begin - (if (> num-retries 100) ;; first 100 retries are low time cost - (thread-sleep! (+ 2 *global-delta*)) - (thread-sleep! (+ 0.01 *global-delta*))) - (set! num-retries (+ num-retries 1)))) - (if (> num-retries max-retries) - (if (not (null? tal)) - (loop (car tal)(cdr tal) reruns)) - (loop (car newtal)(cdr newtal) reruns))) ;; an issue with prereqs not yet met? - ((and (not (null? fails))(eq? testmode 'normal)) - (debug:print-info 1 "test " hed " (mode=" testmode ") has failed prerequisite(s); " - (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ") - ", removing it from to-do list") - (if (not (null? tal)) - (begin - ;; (thread-sleep! *global-delta*) - (loop (car tal)(cdr tal)(cons hed reruns))))) - (else - (debug:print 8 "ERROR: No handler for this condition.") - (thread-sleep! (+ 1 *global-delta*)) - (loop (car newtal)(cdr newtal) reruns)))) ;; END OF IF CAN RUN MORE - - ;; if can't run more just loop with next possible test - (begin - (debug:print-info 4 "processing the case with a lambda for items or 'have-procedure. Moving through the queue without dropping " hed) - ;; (thread-sleep! (+ 2 *global-delta*)) - (loop (car newtal)(cdr newtal) reruns))))) ;; END OF (or (procedure? items)(eq? items 'have-procedure)) - - ;; this case should not happen, added to help catch any bugs - ((and (list? items) itemdat) - (debug:print 0 "ERROR: Should not have a list of items in a test and the itemspath set - please report this") - (exit 1)) - ((not (null? reruns)) - (let* ((newlst (tests:filter-non-runnable run-id tal test-records)) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED, - (junked (lset-difference equal? tal newlst))) - (debug:print-info 4 "full drop through, if reruns is less than 100 we will force retry them, reruns=" reruns ", tal=" tal) - (if (< num-retries max-retries) - (set! newlst (append reruns newlst))) - (set! num-retries (+ num-retries 1)) - ;; (thread-sleep! (+ 1 *global-delta*)) - (if (not (null? newlst)) - ;; since reruns have been tacked on to newlst create new reruns from junked - (loop (car newlst)(cdr newlst)(delete-duplicates junked))))) - ((not (null? tal)) - (debug:print-info 4 "I'm pretty sure I shouldn't get here.")) - (else - (debug:print-info 4 "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns)) - )))) ;; LET* ((test-record - - ;; we get here on "drop through" - loop for next test in queue - ;; FIXME!!!! THIS SHOULD NOT REQUIRE AN EXIT!!!!!!! - - (debug:print-info 1 "All tests launched") - (thread-sleep! 0.5) - ;; FIXME! This harsh exit should not be necessary.... - ;; (if (not *runremote*)(exit)) ;; - #f)) ;; return a #f as a hint that we are done - ;; Here we need to check that all the tests remaining to be run are eligible to run - ;; and are not blocked by failed - +(define (runs:queue-next-hed tal reg n regful) + (if regful + (if (null? reg) ;; doesn't make sense, this is probably NOT the problem of the car + (car tal) + (car reg)) + (car tal))) + +(define (runs:queue-next-tal tal reg n regful) + (if regful + tal + (let ((newtal (cdr tal))) + (if (null? newtal) + reg + newtal + )))) + +(define (runs:queue-next-reg tal reg n regful) + (if regful + (cdr reg) + (if (eq? (length tal) 1) + '() + reg))) + +(include "run-tests-queue-classic.scm") +(include "run-tests-queue-new.scm") ;; parent-test is there as a placeholder for when parent-tests can be run as a setup step -(define (run:test run-id runname keyvallst test-record flags parent-test) +(define (run:test run-id run-info key-vals runname keyvallst test-record flags parent-test) ;; All these vars might be referenced by the testconfig file reader (let* ((test-name (tests:testqueue-get-testname test-record)) (test-waitons (tests:testqueue-get-waitons test-record)) (test-conf (tests:testqueue-get-testconfig test-record)) (itemdat (tests:testqueue-get-itemdat test-record)) @@ -646,11 +381,11 @@ ;; Here is where the test_meta table is best updated ;; Yes, another use of a global for caching. Need a better way? (if (not (hash-table-ref/default *test-meta-updated* test-name #f)) (begin (hash-table-set! *test-meta-updated* test-name #t) - (open-run-close runs:update-test_meta db test-name test-conf))) + (runs:update-test_meta test-name test-conf))) ;; (lambda (itemdat) ;;; ((ripeness "overripe") (temperature "cool") (season "summer")) (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/")) (new-test-name (if (equal? item-path "") test-name (conc test-name "/" item-path))) ;; just need it to be unique (test-id (cdb:remote-run db:get-test-id #f run-id test-name item-path)) @@ -667,11 +402,11 @@ ;; (set! test-id (open-run-close db:get-test-id db run-id test-name item-path)) (if (not test-id) (begin (debug:print 2 "WARN: Test not pre-created? test-name=" test-name ", item-path=" item-path ", run-id=" run-id) - (open-run-close db:tests-register-test #f run-id test-name item-path) + (cdb:tests-register-test *runremote* run-id test-name item-path) (set! test-id (open-run-close db:get-test-id db run-id test-name item-path)))) (debug:print-info 4 "test-id=" test-id ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"") (set! testdat (cdb:get-test-info-by-id *runremote* test-id)))) (set! test-id (db:test-get-id testdat)) (change-directory test-path) @@ -719,11 +454,12 @@ (debug:print 1 "NOTE: Not starting test " new-test-name " as it is state \"" (test:get-state testdat) "\" and status \"" (test:get-status testdat) "\", use -rerun \"" (test:get-status testdat) "\" or -force to override")) ;; NOTE: No longer be checking prerequisites here! Will never get here unless prereqs are ;; already met. - (if (not (launch-test #f run-id runname test-conf keyvallst test-name test-path itemdat flags)) + ;; This would be a great place to do the process-fork + (if (not (launch-test test-id run-id run-info key-vals runname test-conf keyvallst test-name test-path itemdat flags)) (begin (print "ERROR: Failed to launch the test. Exiting as soon as possible") (set! *globalexitstatus* 1) ;; (process-signal (current-process-id) signal/kill)))))) ((KILLED) @@ -957,26 +693,26 @@ ;;====================================================================== ;; Rollup runs ;;====================================================================== ;; Update the test_meta table for this test -(define (runs:update-test_meta db test-name test-conf) - (let ((currrecord (open-run-close db:testmeta-get-record db test-name))) +(define (runs:update-test_meta test-name test-conf) + (let ((currrecord (cdb:remote-run db:testmeta-get-record #f test-name))) (if (not currrecord) (begin (set! currrecord (make-vector 10 #f)) - (open-run-close db:testmeta-add-record db test-name))) + (cdb:remote-run db:testmeta-add-record #f test-name))) (for-each (lambda (key) (let* ((idx (cadr key)) (fld (car key)) (val (config-lookup test-conf "test_meta" fld))) ;; (debug:print 5 "idx: " idx " fld: " fld " val: " val) (if (and val (not (equal? (vector-ref currrecord idx) val))) (begin (print "Updating " test-name " " fld " to " val) - (open-run-close db:testmeta-update-field db test-name fld val))))) + (cdb:remote-run db:testmeta-update-field #f test-name fld val))))) '(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9))))) ;; Update test_meta for all tests (define (runs:update-all-test_meta db) (let ((test-names (get-all-legal-tests))) @@ -986,18 +722,18 @@ (test-configf (conc test-path "/testconfig")) (testexists (and (file-exists? test-configf)(file-read-access? test-configf))) ;; read configs with tricks turned off (i.e. no system) (test-conf (if testexists (read-config test-configf #f #f)(make-hash-table)))) ;; use the open-run-close instead of passing in db - (runs:update-test_meta #f test-name test-conf))) + (runs:update-test_meta test-name test-conf))) test-names))) ;; This could probably be refactored into one complex query ... (define (runs:rollup-run keys keyvallst runname user) ;; was target, now keyvallst (debug:print 4 "runs:rollup-run, keys: " keys " keyvallst: " keyvallst " :runname " runname " user: " user) (let* ((db #f) ;; (keyvalllst (keys:target->keyval keys target)) - (new-run-id (open-run-close runs:register-run db keys keyvallst runname "new" "n/a" user)) + (new-run-id (cdb:remote-run db:register-run #f keys keyvallst runname "new" "n/a" user)) (prev-tests (open-run-close test:get-matching-previous-test-run-records db new-run-id "%" "%")) (curr-tests (open-run-close db:get-tests-for-run db new-run-id "%/%" '() '())) (curr-tests-hash (make-hash-table))) (open-run-close db:update-run-event_time db new-run-id) ;; index the already saved tests by testname and itemdat in curr-tests-hash Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -244,11 +244,11 @@ (pop-directory) result))) ;; Do not rpc this one, do the underlying calls!!! -(define (tests:test-set-status! test-id state status comment dat) +(define (tests:test-set-status! test-id state status comment dat #!key (work-area #f)) (debug:print-info 4 "tests:test-set-status! test-id=" test-id ", state=" state ", status=" status ", dat=" dat) (let* ((db #f) (real-status status) (otherdat (if dat dat (make-hash-table))) (testdat (cdb:get-test-info-by-id *runremote* test-id)) @@ -290,11 +290,11 @@ (cdb:test-set-status-state *runremote* test-id real-status state (if waived waived comment))) ;; if status is "AUTO" then call rollup (note, this one modifies data in test ;; run area, it does remote calls under the hood. (if (and test-id state status (equal? status "AUTO")) - (db:test-data-rollup #f test-id status)) + (db:test-data-rollup #f test-id status work-area: work-area)) ;; add metadata (need to do this way to avoid SQL injection issues) ;; :first_err ;; (let ((val (hash-table-ref/default otherdat ":first_err" #f))) @@ -324,11 +324,12 @@ expected "," tol "," units "," dcomment ",," ;; extra comma for status type ))) - (cdb:remote-run db:csv->test-data #f test-id + ;; This was run remote, don't think that makes sense. + (db:csv->test-data #f test-id dat)))) ;; need to update the top test record if PASS or FAIL and this is a subtest (if (not (equal? item-path "")) (cdb:roll-up-pass-fail-counts *runremote* run-id test-name item-path status)) @@ -553,32 +554,41 @@ tdb "SELECT count(id) FROM test_rundat;") res)) 0) -(define (db:update-central-meta-info db test-id cpuload diskfree minutes num-records uname hostname) - (sqlite3:execute db "UPDATE tests SET cpuload=?,diskfree=? WHERE id=?;" - cpuload - diskfree - test-id) - (if minutes (sqlite3:execute db "UPDATE tests SET run_duration=? WHERE id=?;" minutes test-id)) - (if (eq? num-records 0) - (sqlite3:execute db "UPDATE tests SET uname=?,host=? WHERE id=?;" - uname hostname test-id))) - -(define (test-set-meta-info db test-id run-id testname itemdat minutes) +(define (tests:update-central-meta-info test-id cpuload diskfree minutes num-records uname hostname) + ;; This is a good candidate for threading the requests to enable + ;; transactionized write at the server + (cdb:tests-update-cpuload-diskfree *runremote* test-id cpuload diskfree) + ;; (let ((db (open-db))) + ;; (sqlite3:execute db "UPDATE tests SET cpuload=?,diskfree=? WHERE id=?;" + ;; cpuload + ;; diskfree + ;; test-id) + (if minutes + (cdb:tests-update-run-duration *runremote* test-id minutes)) + ;; (sqlite3:execute db "UPDATE tests SET run_duration=? WHERE id=?;" minutes test-id)) + (if (eq? num-records 0) + (cdb:tests-update-uname-host *runremote* test-id uname hostname)) + ;;(sqlite3:execute db "UPDATE tests SET uname=?,host=? WHERE id=?;" uname hostname test-id)) + ;;(sqlite3:finalize! db)) + ) + +(define (tests:set-meta-info db test-id run-id testname itemdat minutes work-area) ;; DOES cdb:remote-run under the hood! - (let* ((tdb (db:open-test-db-by-test-id db test-id)) + (let* ((tdb (db:open-test-db-by-test-id db test-id work-area: work-area)) (num-records (test:tdb-get-rundat-count tdb)) (cpuload (get-cpu-load)) (diskfree (get-df (current-directory)))) (if (eq? (modulo num-records 10) 0) ;; every ten records update central (let ((uname (get-uname "-srvpio")) (hostname (get-host-name))) - (cdb:remote-run db:update-central-meta-info db test-id cpuload diskfree minutes num-records uname hostname))) + (tests:update-central-meta-info test-id cpuload diskfree minutes num-records uname hostname))) (sqlite3:execute tdb "INSERT INTO test_rundat (update_time,cpuload,diskfree,run_duration) VALUES (strftime('%s','now'),?,?,?);" - cpuload diskfree minutes))) + cpuload diskfree minutes) + (sqlite3:finalize! tdb))) ;;====================================================================== ;; A R C H I V I N G ;;====================================================================== Index: tests/Makefile ================================================================== --- tests/Makefile +++ tests/Makefile @@ -44,15 +44,17 @@ test3 : fullprep cd fullrun;$(MEGATEST) -runtests runfirst -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b -debug 10 -test4 : fullprep - cd fullrun;$(MEGATEST) -debug $(DEBUG) -runall -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b -m "This is a comment specific to a run" -v $(LOGGING) +test4 : cleanprep + @echo "WARNING: No longer running fullprep, test converage may be lessened" + cd fullrun;time $(MEGATEST) -debug $(DEBUG) -runtests % -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b -m "This is a comment specific to a run" -v $(LOGGING) # NOTE: Only one instance can be a server -test5 : fullprep +test5 : cleanprep + @echo "WARNING: No longer running fullprep, test converage may be lessened" cd fullrun;sleep 0;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_aa -debug $(DEBUG) $(LOGGING) > aa.log 2> aa.log & cd fullrun;sleep 0;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ab -debug $(DEBUG) $(LOGGING) > ab.log 2> ab.log & cd fullrun;sleep 0;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ac -debug $(DEBUG) $(LOGGING) > ac.log 2> ac.log & cd fullrun;sleep 0;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ad -debug $(DEBUG) $(LOGGING) > ad.log 2> ad.log & # cd fullrun;sleep 0;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ae -debug $(DEBUG) $(LOGGING) > ae.log 2> ae.log & @@ -63,11 +65,11 @@ cd fullrun;$(MEGATEST) -runtests runfirst -testpatt %blahha% -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_itempatt -debug 10 cd fullrun;$(MEGATEST) -rollup :runname newrun -target ubuntu/nfs/none -debug 10 cleanprep : ../*.scm Makefile */*.config - mkdir -p /tmp/mt_runs /tmp/mt_links + mkdir -p fullrun/tmp/mt_runs fullrun/tmp/mt_links cd ..;make;make install rm -f */logging.db touch cleanprep fullprep : cleanprep ADDED tests/fdktestqa/testqa/Makefile Index: tests/fdktestqa/testqa/Makefile ================================================================== --- /dev/null +++ tests/fdktestqa/testqa/Makefile @@ -0,0 +1,3 @@ + +all : + megatest -runtests % -target a/b :runname c Index: tests/fdktestqa/testqa/megatest.config ================================================================== --- tests/fdktestqa/testqa/megatest.config +++ tests/fdktestqa/testqa/megatest.config @@ -1,5 +1,8 @@ [setup] -testcopycmd cp --remove-destination -rlv TEST_SRC_PATH/. TEST_TARG_PATH/. +testcopycmd cp --remove-destination -rlv TEST_SRC_PATH/. TEST_TARG_PATH/. >> TEST_TARG_PATH/mt_launch.log 2>> TEST_TARG_PATH/mt_launch.log +runqueue 2 [include ../fdk.config] +[server] +timeout 0.01 ADDED tests/fdktestqa/testqa/runsuite.sh Index: tests/fdktestqa/testqa/runsuite.sh ================================================================== --- /dev/null +++ tests/fdktestqa/testqa/runsuite.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +(cd ../../..;make && make install) || exit 1 +export PATH=$PWD/../../../bin:$PATH + +for i in a b c d e f;do + # g h i j k l m n o p q r s t u v w x y z;do + megatest -runtests % -target a/b :runname $i & +done + +echo "" > num-running.log +while true; do + foo=`megatest -list-runs % | grep RUNNING | wc -l` + echo "Num running at `date` $foo" + echo "$foo at `date`" >> num-running.log + # to make the test go at a reasonable clip only gather this info ever minute + sleep 1m +done Index: tests/fdktestqa/testqa/tests/bigrun/step1.sh ================================================================== --- tests/fdktestqa/testqa/tests/bigrun/step1.sh +++ tests/fdktestqa/testqa/tests/bigrun/step1.sh @@ -1,3 +1,8 @@ #!/bin/sh -sleep 10 +if [ $NUMBER -lt 200 ];then + sleep $NUMBER +else + sleep 200 +fi + exit 0 Index: tests/fdktestqa/testqa/tests/bigrun/testconfig ================================================================== --- tests/fdktestqa/testqa/tests/bigrun/testconfig +++ tests/fdktestqa/testqa/tests/bigrun/testconfig @@ -7,11 +7,11 @@ # waiton setup priority 0 # Iteration for your tests are controlled by the items section [items] -NUMBER #{scheme (string-intersperse (map number->string (sort (let loop ((a 0)(res '()))(if (< a 120)(loop (+ a 1)(cons a res)) res)) >)) " ")} +NUMBER #{scheme (string-intersperse (map number->string (sort (let loop ((a 0)(res '()))(if (< a (or (any->number (get-environment-variable "NUMTESTS")) 1100))(loop (+ a 1)(cons a res)) res)) >)) " ")} # test_meta is a section for storing additional data on your test [test_meta] author matt owner matt Index: tests/fdktestqa/testqa/tests/bigrun2/testconfig ================================================================== --- tests/fdktestqa/testqa/tests/bigrun2/testconfig +++ tests/fdktestqa/testqa/tests/bigrun2/testconfig @@ -9,11 +9,11 @@ mode itemmatch # Iteration for your tests are controlled by the items section [items] -NUMBER #{scheme (string-intersperse (map number->string (sort (let loop ((a 0)(res '()))(if (< a 120)(loop (+ a 1)(cons a res)) res)) >)) " ")} +NUMBER #{scheme (string-intersperse (map number->string (sort (let loop ((a 0)(res '()))(if (< a 150)(loop (+ a 1)(cons a res)) res)) >)) " ")} # test_meta is a section for storing additional data on your test [test_meta] author matt owner matt Index: tests/fullrun/config/mt_include_1.config ================================================================== --- tests/fullrun/config/mt_include_1.config +++ tests/fullrun/config/mt_include_1.config @@ -1,8 +1,8 @@ [setup] # exectutable /path/to/megatest -max_concurrent_jobs 200 +max_concurrent_jobs 150 linktree #{getenv MT_RUN_AREA_HOME}/tmp/mt_links [jobtools] useshell yes Index: tests/fullrun/megatest.config ================================================================== --- tests/fullrun/megatest.config +++ tests/fullrun/megatest.config @@ -9,16 +9,24 @@ area1 /tmp/oldarea/megatest [include config/mt_include_1.config] [setup] +# Set launchwait to yes to use the old launch run code that waits for the launch process to return before +# proceeding. +# launchwait yes + +# If defined the runs:run-tests-queue-new queue code is used with the register test depth +# given. Otherwise the old code is used. The old code will be removed in the future and +# a default of 10 used. +# runqueue 2 # It is possible (but not recommended) to override the rsync command used # to populate the test directories. For test development the following # example can be useful # -testcopycmd cp --remove-destination -rsv TEST_SRC_PATH/. TEST_TARG_PATH/. +# testcopycmd cp --remove-destination -rsv TEST_SRC_PATH/. TEST_TARG_PATH/. >> TEST_TARG_PATH/mt_launch.log 2>> TEST_TARG_PATH/mt_launch.log # or for hard links # testcopycmd cp --remove-destination -rlv TEST_SRC_PATH/. TEST_TARG_PATH/. @@ -49,10 +57,14 @@ WACKYVAR6 #{scheme (args:get-arg "-target")} PREDICTABLE the_ans MRAH MT_RUN_AREA_HOME=#{getenv MT_RUN_AREA_HOME} # The empty var should have a definition with null string EMPTY_VAR + +WRAPPEDVAR This var should have the work blah thrice: \ +blah \ +blah # XTERM [system xterm] # RUNDEAD [system exit 56] [server] @@ -61,11 +73,11 @@ # it succeeds port 8080 # This server will keep running this number of hours after last access. # Three minutes is 0.05 hours -timeout 0.05 +timeout 0.025 ## disks are: ## name host:/path/to/area ## -or- ## name /path/to/area Index: tests/fullrun/runconfigs.config ================================================================== --- tests/fullrun/runconfigs.config +++ tests/fullrun/runconfigs.config @@ -1,5 +1,8 @@ +[default] +SOMEVAR This should show up in SOMEVAR3 + [include #{getenv MT_RUN_AREA_HOME}/common_runconfigs.config] # #{system echo 'VACKYVAR #{shell pwd}' > $MT_RUN_AREA_HOME/config/$USER.config} [include ./config/#{getenv USER}.config] @@ -9,5 +12,11 @@ [default/ubuntu/nfs] WACKYVAR2 #{runconfigs-get CURRENT} [ubuntu/nfs/none] WACKYVAR2 #{runconfigs-get CURRENT} +SOMEVAR2 This should show up in SOMEVAR4 if the target is ubuntu/nfs/none + +[default] +SOMEVAR3 #{rget SOMEVAR} +SOMEVAR4 #{rget SOMEVAR2} +SOMEVAR5 #{runconfigs-get SOMEVAR2}