Index: .fossil-settings/ignore-glob ================================================================== --- .fossil-settings/ignore-glob +++ .fossil-settings/ignore-glob @@ -1,13 +1,29 @@ utils/build/* *~ *.o bin/* -tests/megatest.db -tests/monitor.db +megatest.db +monitor.db megatest dboard tests/fullrun/tmp/* tests/simpleruns tests/simplelinks mkdeploy/runs mkdeploy/links +example/linktree +example/runs +*.backup +mkdeploy/linktree +mkdeploy/site.config +mtest +newdboard +*.log +fslsync/fslsynclinks/* +fslsync/fslsyncruns/* +sites.dat +fullrun/config/*.config +fullrun/envfile.txt +*.bak +simplerun/*.scm +simplerun/simpleruns Index: Makefile ================================================================== --- Makefile +++ Makefile @@ -1,6 +1,6 @@ - +# make install CSCOPTS='-accumulate-profile -profile-name $(PWD)/profile-ww$(shell date +%V.%u)' PREFIX=$(PWD) CSCOPTS= INSTALL=install SRCFILES = common.scm items.scm launch.scm \ ods.scm runconfig.scm server.scm configf.scm \ @@ -55,10 +55,13 @@ db.o ezsteps.o keys.o launch.o megatest.o monitor.o runs-for-ref.o runs.o tests.o : key_records.scm tests.o tasks.o dashboard-tasks.o : task_records.scm runs.o : test_records.scm megatest.o : megatest-fossil-hash.scm +# Temporary while transitioning to new routine +runs.o : run-tests-queue-classic.scm run-tests-queue-new.scm + megatest-fossil-hash.scm : $(SRCFILES) megatest.scm *_records.scm echo "(define megatest-fossil-hash \"$(MTESTHASH)\")" > megatest-fossil-hash.new if ! diff -q megatest-fossil-hash.new megatest-fossil-hash.scm ; then echo copying .new to .scm;cp -f megatest-fossil-hash.new megatest-fossil-hash.scm;fi $(OFILES) $(GOFILES) : common_records.scm Index: NOTES ================================================================== --- NOTES +++ NOTES @@ -3,5 +3,28 @@ 3. Tests may or may not have file system access to the originating run area. rsync is used to pull the test area to the home host if and only if the originating area can not be seen via file system. NO LONGER TRUE. Rsync is used but file system must be visible. 4. All db access is done via the home host. NOT IMPLEMENTED YET. + +REMOTE ACCESS DB LOADS + +INFO: (0) Max cached queries was 10 +INFO: (0) Number of cached writes 27043 +INFO: (0) Average cached write time 15.0634544983915 ms +INFO: (0) Number non-cached queries 71928 +INFO: (0) Average non-cached time 5.15547491936381 ms +INFO: (0) Server shutdown complete. Exiting + + +fdktestqa on Apr 29, 2013: 1812 tests + +INFO: (0) Max cached queries was 10 +INFO: (0) Number of cached writes 41335 +INFO: (0) Average cached write time 206.081553163179 ms +INFO: (0) Number non-cached queries 74289 +INFO: (0) Average non-cached time 1055.09826488444 ms +INFO: (0) Server shutdown complete. Exiting + +Start: 0 at Sun Apr 28 22:18:25 MST 2013 +Max: 52 at Sun Apr 28 23:06:59 MST 2013 +End: 6 at Sun Apr 28 23:47:51 MST 2013 Index: client.scm ================================================================== --- client.scm +++ client.scm @@ -17,11 +17,11 @@ (use sqlite3 srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest) ;; (use zmq) (import (prefix sqlite3 sqlite3:)) -(use spiffy uri-common intarweb http-client spiffy-request-vars) +(use spiffy uri-common intarweb http-client spiffy-request-vars uri-common intarweb) (declare (unit client)) (declare (uses common)) (declare (uses db)) @@ -75,11 +75,20 @@ ;; ;; DEBUG STUFF ;; (if (eq? *transport-type* 'fs)(begin (print "ERROR!!!!!!! refusing to run with transport " *transport-type*)(exit 99))) (debug:print-info 11 "Using transport type of " *transport-type* (if hostinfo (conc " to connect to " hostinfo) "")) (case *transport-type* - ((fs)(if (not *megatest-db*)(set! *megatest-db* (open-db)))) + ((fs) ;; (if (not *megatest-db*)(set! *megatest-db* (open-db)))) + ;; we are not doing fs any longer. let's cheat and start up a server + ;; if we are falling back on fs (not 100% supported) do an about face and start a server + (if (not (equal? (args:get-arg "-transport") "fs")) + (begin + (set! *transport-type* #f) + (system (conc "megatest -list-servers | grep " megatest-version " | grep alive || megatest -server - -daemonize && sleep 3")) + (thread-sleep! 1) + (if (> numtries 0) + (client:setup numtries: (- numtries 1)))))) ((http) (http-transport:client-connect (tasks:hostinfo-get-interface hostinfo) (tasks:hostinfo-get-port hostinfo))) ((zmq) (zmq-transport:client-connect (tasks:hostinfo-get-interface hostinfo) Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -53,34 +53,33 @@ (define *server-id* #f) (define *server-info* #f) (define *time-to-exit* #f) (define *received-response* #f) (define *default-numtries* 10) +(define *server-run* #t) +(define *db-write-access* #t) + (define *target* (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN (define *keys* (make-hash-table)) ;; cache the keys here (define *keyvals* (make-hash-table)) (define *toptest-paths* (make-hash-table)) ;; cache toptest path settings here (define *test-paths* (make-hash-table)) ;; cache test-id to test run paths here (define *test-ids* (make-hash-table)) ;; cache run-id, testname, and item-path => test-id (define *test-info* (make-hash-table)) ;; cache the test info records, update the state, status, run_duration etc. from testdat.db -(define *run-info-cache* (make-hash-table)) ;; run info is stable, no need to reget - ;; Awful. Please FIXME (define *env-vars-by-run-id* (make-hash-table)) (define *current-run-name* #f) (define (common:clear-caches) - (set! *target* (make-hash-table)) (set! *keys* (make-hash-table)) (set! *keyvals* (make-hash-table)) (set! *toptest-paths* (make-hash-table)) (set! *test-paths* (make-hash-table)) (set! *test-ids* (make-hash-table)) (set! *test-info* (make-hash-table)) - (set! *run-info-cache* (make-hash-table)) (set! *env-vars-by-run-id* (make-hash-table)) (set! *test-id-cache* (make-hash-table))) ;; Debugging stuff (define *verbosity* 1) Index: configf.scm ================================================================== --- configf.scm +++ configf.scm @@ -59,11 +59,11 @@ (define configf:comment-rx (regexp "^\\s*#.*")) (define configf:cont-ln-rx (regexp "^(\\s+)(\\S+.*)$")) ;; read a line and process any #{ ... } constructs -(define configf:var-expand-regex (regexp "^(.*)#\\{(scheme|system|shell|getenv|get|runconfigs-get)\\s+([^\\}\\{]*)\\}(.*)")) +(define configf:var-expand-regex (regexp "^(.*)#\\{(scheme|system|shell|getenv|get|runconfigs-get|rget)\\s+([^\\}\\{]*)\\}(.*)")) (define (configf:process-line l ht) (let loop ((res l)) (if (string? res) (let ((matchdat (string-search configf:var-expand-regex res))) (if matchdat @@ -81,10 +81,11 @@ (let* ((parts (string-split cmd)) (sect (car parts)) (var (cadr parts))) (conc "(lambda (ht)(config-lookup ht \"" sect "\" \"" var "\"))"))) ((runconfigs-get) (conc "(lambda (ht)(runconfigs-get ht \"" cmd "\"))")) + ((rget) (conc "(lambda (ht)(runconfigs-get ht \"" cmd "\"))")) (else "(lambda (ht)(print \"ERROR\") \"ERROR\")")))) ;; (print "fullcmd=" fullcmd) (with-input-from-string fullcmd (lambda () (set! result ((eval (read)) ht)))) @@ -110,18 +111,26 @@ ;; Lookup a value in runconfigs based on -reqtarg or -target (define (runconfigs-get config var) (let ((targ (or (args:get-arg "-reqtarg")(args:get-arg "-target")))) (if targ - (config-lookup config targ var) - #f))) + (or (configf:lookup config targ var) + (configf:lookup config "default" var)) + (configf:lookup config "default" var)))) (define-inline (configf:read-line p ht allow-processing) - (if (and allow-processing - (not (eq? allow-processing 'return-string))) - (configf:process-line (read-line p) ht) - (read-line p))) + (let loop ((inl (read-line p))) + (if (and (string? inl) + (not (string-null? inl)) + (equal? "\\" (string-take-right inl 1))) ;; last character is \ + (let ((nextl (read-line p))) + (if (not (eof-object? nextl)) + (loop (string-append inl nextl)))) + (if (and allow-processing + (not (eq? allow-processing 'return-string))) + (configf:process-line inl ht) + inl)))) ;; read a config file, returns hash table of alists ;; read a config file, returns hash table of alists ;; adds to ht if given (must be #f otherwise) Index: dashboard-tests.scm ================================================================== --- dashboard-tests.scm +++ dashboard-tests.scm @@ -270,13 +270,13 @@ (keydat (if testdat (open-run-close db:get-key-val-pairs #f run-id) #f)) (rundat (if testdat (open-run-close db:get-run-info #f run-id) #f)) (runname (if testdat (db:get-value-by-header (db:get-row rundat) (db:get-header rundat) "runname") #f)) - (teststeps (if testdat (db:get-compressed-steps test-id) '())) (logfile "/this/dir/better/not/exist") (rundir logfile) + (teststeps (if testdat (db:get-compressed-steps test-id work-area: rundir) '())) (testfullname (if testdat (db:test-get-fullname testdat) "Gathering data ...")) (testname (if testdat (db:test-get-testname testdat) "n/a")) (testmeta (if testdat (let ((tm (open-run-close db:testmeta-get-record #f testname))) (if tm tm (make-db:testmeta))) @@ -305,15 +305,19 @@ (refreshdat (lambda () (let* ((curr-mod-time (file-modification-time db-path)) (need-update (or (and (> curr-mod-time db-mod-time) (> (current-seconds) (+ last-update 2))) ;; every two seconds if db touched request-update)) - (newtestdat (if need-update (open-run-close db:get-test-info-by-id #f test-id)))) + (newtestdat (if need-update + (handle-exceptions + exn + (debug:print-info 2 "test db access issue: " ((condition-property-accessor 'exn 'message) exn)) + (open-run-close db:get-test-info-by-id #f test-id ))))) (cond ((and need-update newtestdat) (set! testdat newtestdat) - (set! teststeps (db:get-compressed-steps test-id)) + (set! teststeps (db:get-compressed-steps test-id work-area: rundir)) (set! logfile (conc (db:test-get-rundir testdat) "/" (db:test-get-final_logf testdat))) (set! rundir (db:test-get-rundir testdat)) (set! testfullname (db:test-get-fullname testdat)) ;; (debug:print 0 "INFO: teststeps=" (intersperse teststeps "\n ")) ) @@ -349,26 +353,33 @@ (store-button store-label) (command-text-box (iup:textbox #:expand "HORIZONTAL" #:font "Courier New, -10")) (command-launch-button (iup:button "Execute!" #:action (lambda (x) (let ((cmd (iup:attribute command-text-box "VALUE"))) (system (conc cmd " &")))))) + (kill-jobs (lambda (x) + (iup:attribute-set! + command-text-box "VALUE" + (conc "xterm -geometry 180x20 -e \"megatest -target " keystring " :runname " runname + " -set-state-status KILLREQ,n/a -testpatt %/% " + ;; (conc testname "/" (if (equal? item-path "") "%" item-path)) + " :state RUNNING ;echo Press any key to continue;bash -c 'read -n 1 -s'\"")))) (run-test (lambda (x) (iup:attribute-set! command-text-box "VALUE" (conc "xterm -geometry 180x20 -e \"megatest -target " keystring " :runname " runname " -runtests " (conc testname "/" (if (equal? item-path "") "%" item-path)) - ";echo Press any key to continue;bash -c 'read -n 1 -s'\"")))) + " ;echo Press any key to continue;bash -c 'read -n 1 -s'\"")))) (remove-test (lambda (x) (iup:attribute-set! command-text-box "VALUE" (conc "xterm -geometry 180x20 -e \"megatest -remove-runs -target " keystring " :runname " runname " -testpatt " (conc testname "/" (if (equal? item-path "") "%" item-path)) - " -v;echo Press any key to continue;bash -c 'read -n 1 -s'\""))))) + " -v ;echo Press any key to continue;bash -c 'read -n 1 -s'\""))))) (cond ((not testdat)(begin (print "ERROR: bad test info for " test-id)(exit 1))) ((not rundat)(begin (print "ERROR: found test info but there is a problem with the run info for " run-id)(exit 1))) (else ;; (test-set-status! db run-id test-name state status itemdat) @@ -384,15 +395,16 @@ (host-info-panel testdat store-label) ;; The controls (iup:frame #:title "Actions" (iup:vbox (iup:hbox - (iup:button "View Log" #:action viewlog #:size "80x") - (iup:button "Start Xterm" #:action xterm #:size "80x") - (iup:button "Run Test" #:action run-test #:size "80x") - (iup:button "Clean Test" #:action remove-test #:size "80x") - (iup:button "Close" #:action (lambda (x)(exit)) #:size "80x")) + (iup:button "View Log" #:action viewlog #:size "80x") + (iup:button "Start Xterm" #:action xterm #:size "80x") + (iup:button "Run Test" #:action run-test #:size "80x") + (iup:button "Clean Test" #:action remove-test #:size "80x") + (iup:button "Kill All Jobs" #:action kill-jobs #:size "80x") + (iup:button "Close" #:action (lambda (x)(exit)) #:size "80x")) (apply iup:hbox (list command-text-box command-launch-button)))) (set-fields-panel test-id testdat) (let ((tabs @@ -437,11 +449,12 @@ (let ((val (vector-ref hed (- colnum 1)))) (iup:attribute-set! steps-matrix (conc rownum ":" colnum)(if val (conc val) "")) (if (< colnum 6) (loop hed tal rownum (+ colnum 1)) (if (not (null? tal)) - (loop (car tal)(cdr tal)(+ rownum 1) 1))))))))) + (loop (car tal)(cdr tal)(+ rownum 1) 1)))) + (iup:attribute-set! steps-matrix "REDRAW" "ALL")))))) (hash-table-set! widgets "StepsMatrix" proc) (proc testdat)) steps-matrix) ;; populate the Test Data panel (iup:frame Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -98,12 +98,13 @@ (define dlg #f) (define max-test-num 0) ;; (define *keys* (open-run-close db:get-keys #f)) (define *keys* (cdb:remote-run db:get-keys #f)) ;; (define *keys* (db:get-keys *db*)) -(define *dbkeys* (map (lambda (x)(vector-ref x 0)) - (append *keys* (list (vector "runname" "blah"))))) + +(define *dbkeys* (append *keys* (list "runname"))) + (define *header* #f) (define *allruns* '()) (define *allruns-by-id* (make-hash-table)) ;; (define *runchangerate* (make-hash-table)) @@ -197,11 +198,10 @@ (begin (set! *last-update* (current-seconds)) (set! *tot-run-count* (length runs)))) ;; ;; trim runs to only those that are changing often here - ;; (for-each (lambda (run) (let* ((run-id (db:get-value-by-header run header "id")) (tests (let ((tsts (cdb:remote-run db:get-tests-for-run #f run-id testnamepatt states statuses))) (if *tests-sort-reverse* (reverse tsts) tsts))) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -68,14 +68,18 @@ (begin (debug:print 0 "ERROR: Attempted to open db when not in megatest area. Exiting.") (exit)))) (let* ((dbpath (conc *toppath* "/megatest.db")) ;; fname) (dbexists (file-exists? dbpath)) + (write-access (file-write-access? dbpath)) (db (sqlite3:open-database dbpath)) ;; (never-give-up-open-db dbpath)) (handler (make-busy-timeout (if (args:get-arg "-override-timeout") (string->number (args:get-arg "-override-timeout")) 136000)))) ;; 136000))) ;; 136000 = 2.2 minutes + (if (and dbexists + (not write-access)) + (set! *db-write-access* write-access)) ;; only unset so other db's also can use this control (debug:print-info 11 "open-db, dbpath=" dbpath " argv=" (argv)) (sqlite3:set-busy-handler! db handler) (if (not dbexists) (db:initialize db)) (db:set-sync db) @@ -133,16 +137,16 @@ res)) (define (db:initialize db) (debug:print-info 11 "db:initialize START") (let* ((configdat (car *configinfo*)) ;; tut tut, global warning... - (keys (config-get-fields configdat)) + (keys (keys:config-get-fields configdat)) (havekeys (> (length keys) 0)) (keystr (keys->keystr keys)) (fieldstr (keys->key/field keys))) (for-each (lambda (key) - (let ((keyn (vector-ref key 0))) + (let ((keyn key)) (if (member (string-downcase keyn) (list "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count")) (begin (print "ERROR: your key cannot be named " keyn " as this conflicts with the same named field in the runs table") @@ -151,11 +155,11 @@ keys) ;; (sqlite3:execute db "PRAGMA synchronous = OFF;") (db:set-sync db) (sqlite3:execute db "CREATE TABLE IF NOT EXISTS keys (id INTEGER PRIMARY KEY, fieldname TEXT, fieldtype TEXT, CONSTRAINT keyconstraint UNIQUE (fieldname));") (for-each (lambda (key) - (sqlite3:execute db "INSERT INTO keys (fieldname,fieldtype) VALUES (?,?);" (key:get-fieldname key)(key:get-fieldtype key))) + (sqlite3:execute db "INSERT INTO keys (fieldname,fieldtype) VALUES (?,?);" key "TEXT")) keys) (sqlite3:execute db (conc "CREATE TABLE IF NOT EXISTS runs (id INTEGER PRIMARY KEY, " fieldstr (if havekeys "," "") "runname TEXT," @@ -241,24 +245,24 @@ ;;====================================================================== ;; T E S T S P E C I F I C D B ;;====================================================================== ;; Create the sqlite db for the individual test(s) -(define (open-test-db testpath) - (debug:print-info 11 "open-test-db " testpath) - (if (and testpath - (directory? testpath) - (file-read-access? testpath)) - (let* ((dbpath (conc testpath "/testdat.db")) +(define (open-test-db work-area) + (debug:print-info 11 "open-test-db " work-area) + (if (and work-area + (directory? work-area) + (file-read-access? work-area)) + (let* ((dbpath (conc work-area "/testdat.db")) (dbexists (file-exists? dbpath)) (handler (make-busy-timeout (if (args:get-arg "-override-timeout") (string->number (args:get-arg "-override-timeout")) 136000)))) (handle-exceptions exn (begin - (debug:print 0 "ERROR: problem accessing test db " testpath ", you probably should clean and re-run this test" + (debug:print 0 "ERROR: problem accessing test db " work-area ", you probably should clean and re-run this test" ((condition-property-accessor 'exn 'message) exn)) #f) (set! db (sqlite3:open-database dbpath))) (sqlite3:set-busy-handler! db handler) (if (not dbexists) @@ -265,29 +269,31 @@ (begin (sqlite3:execute db "PRAGMA synchronous = FULL;") (debug:print-info 11 "Initialized test database " dbpath) (db:testdb-initialize db))) ;; (sqlite3:execute db "PRAGMA synchronous = 0;") - (debug:print-info 11 "open-test-db END (sucessful)" testpath) + (debug:print-info 11 "open-test-db END (sucessful)" work-area) ;; now let's test that everything is correct (handle-exceptions exn (begin - (debug:print 0 "ERROR: problem accessing test db " testpath ", you probably should clean and re-run this test" + (debug:print 0 "ERROR: problem accessing test db " work-area ", you probably should clean and re-run this test" ((condition-property-accessor 'exn 'message) exn)) #f) ;; Is there a cheaper single line operation that will check for existance of a table ;; and raise an exception ? (sqlite3:execute db "SELECT id FROM test_data LIMIT 1;")) db) (begin - (debug:print-info 11 "open-test-db END (unsucessful)" testpath) + (debug:print-info 11 "open-test-db END (unsucessful)" work-area) #f))) ;; find and open the testdat.db file for an existing test -(define (db:open-test-db-by-test-id db test-id) - (let* ((test-path (cdb:remote-run db:test-get-rundir-from-test-id db test-id))) +(define (db:open-test-db-by-test-id db test-id #!key (work-area #f)) + (let* ((test-path (if work-area + work-area + (cdb:remote-run db:test-get-rundir-from-test-id db test-id)))) (debug:print 3 "TEST PATH: " test-path) (open-test-db test-path))) (define (db:testdb-initialize db) (debug:print 11 "db:testdb-initialize START") @@ -490,24 +496,26 @@ (define (db:del-var db var) (debug:print-info 11 "db:del-var START " var) (sqlite3:execute db "DELETE FROM metadat WHERE var=?;" var) (debug:print-info 11 "db:del-var END " var)) -;; use a global for some primitive caching, it is just silly to re-read the db -;; over and over again for the keys since they never change +;; use a global for some primitive caching, it is just silly to +;; re-read the db over and over again for the keys since they never +;; change + +;; why get the keys from the db? why not get from the *configdat* +;; using keys:config-get-fields? (define (db:get-keys db) (if *db-keys* *db-keys* (let ((res '())) - (debug:print-info 11 "db:get-keys START (cache miss)") (sqlite3:for-each-row - (lambda (key keytype) - (set! res (cons (vector key keytype) res))) + (lambda (key) + (set! res (cons key res))) db - "SELECT fieldname,fieldtype FROM keys ORDER BY id DESC;") + "SELECT fieldname FROM keys ORDER BY id DESC;") (set! *db-keys* res) - (debug:print-info 11 "db:get-keys END (cache miss)") res))) (define (db:get-value-by-header row header field) (debug:print-info 4 "db:get-value-by-header row: " row " header: " header " field: " field) (if (null? header) #f @@ -519,15 +527,34 @@ (if (null? tal) #f (loop (car tal)(cdr tal)(+ n 1))))))) ;;====================================================================== ;; R U N S ;;====================================================================== + +(define (db:get-run-name-from-id db run-id) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (runname) + (set! res runname)) + db + "SELECT runname FROM runs WHERE id=?;" + run-id) + res)) + +(define (db:get-run-key-val db run-id key) + (let ((res #f)) + (sqlite3:for-each-row + (lambda (val) + (set! res val)) + db + (conc "SELECT " key " FROM runs WHERE id=?;") + run-id) + res)) ;; keys list to key1,key2,key3 ... (define (runs:get-std-run-fields keys remfields) - (let* ((header (append (map key:get-fieldname keys) - remfields)) + (let* ((header (append keys remfields)) (keystr (conc (keys->keystr keys) "," (string-intersperse remfields ",")))) (list keystr header))) ;; make a query (fieldname like 'patt1' OR fieldname @@ -540,10 +567,43 @@ (conc fieldname " " wildtype " '" patt "'"))) (if (null? patts) '("") patts)) comparator))) + + +;; register a test run with the db +(define (db:register-run db keyvals runname state status user) + (debug:print 3 "runs:register-run runname: " runname " state: " state " status: " status " user: " user) + (let* ((keys (map car keyvals)) + (keystr (keys->keystr keys)) + (comma (if (> (length keys) 0) "," "")) + (andstr (if (> (length keys) 0) " AND " "")) + (valslots (keys->valslots keys)) ;; ?,?,? ... + (allvals (append (list runname state status user) (map cadr keyvals))) + (qryvals (append (list runname) (map cadr keyvals))) + (key=?str (string-intersperse (map (lambda (k)(conc k "=?")) keys) " AND "))) + (debug:print 3 "keys: " keys " allvals: " allvals " keyvals: " keyvals " key=?str is " key=?str) + (debug:print 2 "NOTE: using target " (string-intersperse (map cadr keyvals) "/") " for this run") + (if (and runname (null? (filter (lambda (x)(not x)) keyvals))) ;; there must be a better way to "apply and" + (let ((res #f)) + (apply sqlite3:execute db (conc "INSERT OR IGNORE INTO runs (runname,state,status,owner,event_time" comma keystr ") VALUES (?,?,?,?,strftime('%s','now')" comma valslots ");") + allvals) + (apply sqlite3:for-each-row + (lambda (id) + (set! res id)) + db + (let ((qry (conc "SELECT id FROM runs WHERE (runname=? " andstr key=?str ");"))) + ;(debug:print 4 "qry: " qry) + qry) + qryvals) + (sqlite3:execute db "UPDATE runs SET state=?,status=? WHERE id=?;" state status res) + res) + (begin + (debug:print 0 "ERROR: Called without all necessary keys") + #f)))) + ;; replace header and keystr with a call to runs:get-std-run-fields ;; ;; keypatts: ( (KEY1 "abc%def")(KEY2 "%") ) ;; runpatts: patt1,patt2 ... @@ -551,12 +611,11 @@ (define (db:get-runs db runpatt count offset keypatts) (let* ((res '()) (keys (db:get-keys db)) (runpattstr (db:patt->like "runname" runpatt)) (remfields (list "id" "runname" "state" "status" "owner" "event_time")) - (header (append (map key:get-fieldname keys) - remfields)) + (header (append keys remfields)) (keystr (conc (keys->keystr keys) "," (string-intersperse remfields ","))) (qrystr (conc "SELECT " keystr " FROM runs WHERE (" runpattstr ") " ;; runname LIKE ? " ;; Generate: " AND x LIKE 'keypatt' ..." (if (null? keypatts) "" @@ -566,11 +625,11 @@ (let ((key (car keypatt)) (patt (cadr keypatt))) (db:patt->like key patt))) keypatts) " AND "))) - " ORDER BY event_time DESC " + " AND state != 'deleted' ORDER BY event_time DESC " (if (number? count) (conc " LIMIT " count) "") (if (number? offset) (conc " OFFSET " offset) @@ -591,11 +650,11 @@ (debug:print-info 11 "db:get-num-runs START " runpatt) (sqlite3:for-each-row (lambda (count) (set! numruns count)) db - "SELECT COUNT(id) FROM runs WHERE runname LIKE ?;" runpatt) + "SELECT COUNT(id) FROM runs WHERE runname LIKE ? AND state != 'deleted';" runpatt) (debug:print-info 11 "db:get-num-runs END " runpatt) numruns)) ;; use (get-value-by-header (db:get-header runinfo)(db:get-row runinfo)) (define (db:get-run-info db run-id) @@ -602,20 +661,19 @@ ;;(if (hash-table-ref/default *run-info-cache* run-id #f) ;; (hash-table-ref *run-info-cache* run-id) (let* ((res #f) (keys (db:get-keys db)) (remfields (list "id" "runname" "state" "status" "owner" "event_time")) - (header (append (map key:get-fieldname keys) - remfields)) + (header (append keys remfields)) (keystr (conc (keys->keystr keys) "," (string-intersperse remfields ",")))) (debug:print-info 11 "db:get-run-info run-id: " run-id " header: " header " keystr: " keystr) (sqlite3:for-each-row (lambda (a . x) (set! res (apply vector a x))) db - (conc "SELECT " keystr " FROM runs WHERE id=?;") + (conc "SELECT " keystr " FROM runs WHERE id=? AND state != 'deleted';") run-id) (debug:print-info 11 "db:get-run-info run-id: " run-id " header: " header " keystr: " keystr) (let ((finalres (vector header res))) ;; (hash-table-set! *run-info-cache* run-id finalres) finalres))) @@ -626,11 +684,14 @@ (debug:print-info 11 "db:set-comment-for-run END run-id: " run-id " comment: " comment)) ;; does not (obviously!) removed dependent data. But why not!!? (define (db:delete-run db run-id) (common:clear-caches) ;; don't trust caches after doing any deletion - (sqlite3:execute db "DELETE FROM runs WHERE id=?;" run-id)) + (sqlite3:execute db "UPDATE runs SET state='deleted' WHERE id=?;" run-id)) +;; (sqlite3:execute db "DELETE FROM runs WHERE id=?;" run-id)) + + (define (db:update-run-event_time db run-id) (debug:print-info 11 "db:update-run-event_time START run-id: " run-id) (sqlite3:execute db "UPDATE runs SET event_time=strftime('%s','now') WHERE id=?;" run-id) (debug:print-info 11 "db:update-run-event_time END run-id: " run-id)) @@ -650,76 +711,61 @@ ;;====================================================================== ;; get key val pairs for a given run-id ;; ( (FIELDNAME1 keyval1) (FIELDNAME2 keyval2) ... ) (define (db:get-key-val-pairs db run-id) - (let* ((keys (get-keys db)) + (let* ((keys (db:get-keys db)) (res '())) (debug:print-info 11 "db:get-key-val-pairs START keys: " keys " run-id: " run-id) (for-each (lambda (key) - (let ((qry (conc "SELECT " (key:get-fieldname key) " FROM runs WHERE id=?;"))) + (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;"))) ;; (debug:print 0 "qry: " qry) (sqlite3:for-each-row (lambda (key-val) - (set! res (cons (list (key:get-fieldname key) key-val) res))) + (set! res (cons (list key key-val) res))) db qry run-id))) keys) (debug:print-info 11 "db:get-key-val-pairs END keys: " keys " run-id: " run-id) (reverse res))) ;; get key vals for a given run-id (define (db:get-key-vals db run-id) - (let ((mykeyvals (hash-table-ref/default *keyvals* run-id #f))) + (let ((mykeyvals (hash-table-ref/default *keyvals* run-id #f))) (if mykeyvals mykeyvals - (let* ((keys (get-keys db)) - (res '())) - (debug:print-info 11 "db:get-key-vals START keys: " keys " run-id: " run-id) - (for-each - (lambda (key) - (let ((qry (conc "SELECT " (key:get-fieldname key) " FROM runs WHERE id=?;"))) - ;; (debug:print 0 "qry: " qry) - (sqlite3:for-each-row - (lambda (key-val) - (set! res (cons key-val res))) - db qry run-id))) - keys) - (debug:print-info 11 "db:get-key-vals END keys: " keys " run-id: " run-id) + (let* ((keys (db:get-keys db)) + (res '())) + (debug:print-info 11 "db:get-key-vals START keys: " keys " run-id: " run-id) + (for-each + (lambda (key) + (let ((qry (conc "SELECT " key " FROM runs WHERE id=?;"))) + ;; (debug:print 0 "qry: " qry) + (sqlite3:for-each-row + (lambda (key-val) + (set! res (cons key-val res))) + db qry run-id))) + keys) + (debug:print-info 11 "db:get-key-vals END keys: " keys " run-id: " run-id) (let ((final-res (reverse res))) (hash-table-set! *keyvals* run-id final-res) final-res))))) ;; The target is keyval1/keyval2..., cached in *target* as it is used often (define (db:get-target db run-id) (let ((mytarg (hash-table-ref/default *target* run-id #f))) (if mytarg mytarg - (let* ((keyvals (db:get-key-vals db run-id)) - (thekey (string-intersperse (map (lambda (x)(if x x "-na-")) keyvals) "/"))) + (let* ((keyvals (db:get-key-vals db run-id)) + (thekey (string-intersperse (map (lambda (x)(if x x "-na-")) keyvals) "/"))) (hash-table-set! *target* run-id thekey) thekey)))) ;;====================================================================== ;; T E S T S ;;====================================================================== -(define (db:tests-register-test db run-id test-name item-path) - (debug:print-info 11 "db:tests-register-test START db=" db ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"") - (let ((item-paths (if (equal? item-path "") - (list item-path) - (list item-path "")))) - (for-each - (lambda (pth) - (sqlite3:execute db "INSERT OR IGNORE INTO tests (run_id,testname,event_time,item_path,state,status) VALUES (?,?,strftime('%s','now'),?,'NOT_STARTED','n/a');" - run-id - test-name - pth)) - item-paths) - (debug:print-info 11 "db:tests-register-test END db=" db ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"") - #f)) - ;; states and statuses are lists, turn them into ("PASS","FAIL"...) and use NOT IN ;; i.e. these lists define what to NOT show. ;; states and statuses are required to be lists, empty is ok ;; not-in #t = above behaviour, #f = must match (define (db:get-tests-for-run db run-id testpatt states statuses @@ -744,11 +790,11 @@ " IN ('" (string-intersperse statuses "','") "')"))) (tests-match-qry (tests:match->sqlqry testpatt)) (qry (conc "SELECT " qryvals - " FROM tests WHERE run_id=? " + " FROM tests WHERE run_id=? AND state != 'DELETED' " (if states-qry (conc " AND " states-qry) "") (if statuses-qry (conc " AND " statuses-qry) "") (if tests-match-qry (conc " AND (" tests-match-qry ") ") "") (case sort-by ((rundir) " ORDER BY length(rundir) DESC;") @@ -799,11 +845,11 @@ " IN ('" (string-intersperse statuses "','") "')"))) (tests-match-qry (tests:match->sqlqry testpatt)) (qry (conc "SELECT " qryvals - " FROM tests WHERE " + " FROM tests WHERE state != 'DELETED' AND " (if run-ids (if (list? run-ids) (conc " run_id in (" (string-intersperse (map conc run-ids) ",") ") ") (conc "run_id=" run-ids " ")) " ") ;; #f => run-ids don't filter on run-ids @@ -824,13 +870,13 @@ ) (debug:print-info 11 "db:get-tests-for-run START run-ids=" run-ids ", testpatt=" testpatt ", states=" states ", statuses=" statuses ", not-in=" not-in ", sort-by=" sort-by) res)) ;; this one is a bit broken BUG FIXME -(define (db:delete-test-step-records db test-id) +(define (db:delete-test-step-records db test-id #!key (work-area #f)) ;; Breaking it into two queries for better file access interleaving - (let* ((tdb (db:open-test-db-by-test-id db test-id))) + (let* ((tdb (db:open-test-db-by-test-id db test-id work-area: work-area))) ;; test db's can go away - must check every time (if tdb (begin (sqlite3:execute tdb "DELETE FROM test_steps;") (sqlite3:execute tdb "DELETE FROM test_data;") @@ -855,16 +901,18 @@ (define (db:delete-tests-for-run db run-id) (common:clear-caches) (sqlite3:execute db "DELETE FROM tests WHERE run_id=?;" run-id)) (define (db:delete-old-deleted-test-records db) + (common:clear-caches) (let ((targtime (- (current-seconds)(* 30 24 60 60)))) ;; one month in the past (sqlite3:execute db "DELETE FROM tests WHERE state='DELETED' AND event_timeobj rawdat)) - (vector-ref tmp 2)))) + (if rawdat + (begin + (set! tmp (db:string->obj rawdat)) + (vector-ref tmp 2)) + (begin + (debug:print 0 "ERROR: Communication with the server failed. Exiting if possible") + (exit 1)))))) ((zmq) (handle-exceptions exn (begin + (debug:print-info 0 "cdb:client-call timeout or error. Trying again in 5 seconds") (thread-sleep! 5) (if (> numretries 0)(apply cdb:client-call serverdat qtype immediate (- numretries 1) params))) (let* ((push-socket (vector-ref serverdat 0)) (sub-socket (vector-ref serverdat 1)) (client-sig (client:get-signature)) @@ -1217,31 +1314,32 @@ (receive-message* sub-socket) ;; now get the actual message (let ((myres (db:string->obj (receive-message* sub-socket)))) (if (equal? query-sig (vector-ref myres 1)) (set! res (vector-ref myres 2)) - (loop)))))) - (timeout (lambda () - (let loop ((n numretries)) - (thread-sleep! 15) - (if (not res) - (if (> numretries 0) - (begin - (debug:print 2 "WARNING: no reply to query " params ", trying resend") - (debug:print-info 11 "re-sending message") - (send-message push-socket zdat) - (debug:print-info 11 "message re-sent") - (loop (- n 1))) - ;; (apply cdb:client-call *runremote* qtype immediate (- numretries 1) params)) - (begin - (debug:print 0 "ERROR: cdb:client-call timed out " params ", exiting.") - (exit 5)))))))) + (loop))))))) + ;; (timeout (lambda () + ;; (let loop ((n numretries)) + ;; (thread-sleep! 15) + ;; (if (not res) + ;; (if (> numretries 0) + ;; (begin + ;; (debug:print 2 "WARNING: no reply to query " params ", trying resend") + ;; (debug:print-info 11 "re-sending message") + ;; (send-message push-socket zdat) + ;; (debug:print-info 11 "message re-sent") + ;; (loop (- n 1))) + ;; ;; (apply cdb:client-call *runremote* qtype immediate (- numretries 1) params)) + ;; (begin + ;; (debug:print 0 "ERROR: cdb:client-call timed out " params ", exiting.") + ;; (exit 5)))))))) (debug:print-info 11 "Starting threads") (let ((th1 (make-thread send-receive "send receive")) - (th2 (make-thread timeout "timeout"))) + ;; (th2 (make-thread timeout "timeout")) + ) (thread-start! th1) - (thread-start! th2) + ;; (thread-start! th2) (thread-join! th1) (debug:print-info 11 "cdb:client-call returning res=" res) res)))))) (define (cdb:set-verbosity serverdat val) @@ -1266,20 +1364,17 @@ (define (cdb:pass-fail-counts serverdat test-id fail-count pass-count) (cdb:client-call serverdat 'pass-fail-counts #t *default-numtries* fail-count pass-count test-id)) (define (cdb:tests-register-test serverdat run-id test-name item-path) - (let ((item-paths (if (equal? item-path "") - (list item-path) - (list item-path "")))) - (cdb:client-call serverdat 'register-test #t *default-numtries* run-id test-name item-path))) + (cdb:client-call serverdat 'register-test #t *default-numtries* run-id test-name item-path)) (define (cdb:flush-queue serverdat) (cdb:client-call serverdat 'flush #f *default-numtries*)) -(define (cdb:kill-server serverdat) - (cdb:client-call serverdat 'killserver #t *default-numtries*)) +(define (cdb:kill-server serverdat pid) + (cdb:client-call serverdat 'killserver #t *default-numtries* pid)) (define (cdb:roll-up-pass-fail-counts serverdat run-id test-name item-path status) (cdb:client-call serverdat 'immediate #f *default-numtries* open-run-close db:roll-up-pass-fail-counts #f run-id test-name item-path status)) (define (cdb:get-test-info serverdat run-id test-name item-path) @@ -1304,10 +1399,14 @@ db "SELECT rundir,final_logf FROM tests WHERE run_id=? AND testname=? AND item_path='';" run-id test-name) res)) +;;====================================================================== +;; A G R E G A T E D T R A N S A C T I O N D B W R I T E S +;;====================================================================== + (define db:queries (list '(register-test "INSERT OR IGNORE INTO tests (run_id,testname,event_time,item_path,state,status) VALUES (?,?,strftime('%s','now'),?,'NOT_STARTED','n/a');") '(state-status "UPDATE tests SET state=?,status=? WHERE id=?;") '(state-status-msg "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;") '(pass-fail-counts "UPDATE tests SET fail_count=?,pass_count=? WHERE id=?;") @@ -1323,10 +1422,15 @@ '(test-set-log "UPDATE tests SET final_logf=? WHERE id=?;") '(test-set-rundir-by-test-id "UPDATE tests SET rundir=? WHERE id=?") '(test-set-rundir "UPDATE tests SET rundir=? WHERE run_id=? AND testname=? AND item_path=?;") '(delete-tests-in-state "DELETE FROM tests WHERE state=? AND run_id=?;") '(tests:test-set-toplog "UPDATE tests SET final_logf=? WHERE run_id=? AND testname=? AND item_path='';") + '(update-cpuload-diskfree "UPDATE tests SET cpuload=?,diskfree=? WHERE id=?;") + '(update-run-duration "UPDATE tests SET run_duration=? WHERE id=?;") + '(update-uname-host "UPDATE tests SET uname=?,host=? WHERE id=?;") + '(update-test-state "UPDATE tests SET state=? WHERE state=? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));") + '(update-test-status "UPDATE tests SET status=? WHERE status like ? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));") )) ;; do not run these as part of the transaction (define db:special-queries '(rollup-tests-pass-fail db:roll-up-pass-fail-counts @@ -1502,18 +1606,24 @@ (server:reply return-address qry-sig #f (list #f (conc "Login failed due to mismatch paths: " calling-path ", " *toppath*))))))) ((flush sync) (server:reply return-address qry-sig #t 1)) ;; (length data))) ((set-verbosity) (set! *verbosity* (car params)) - (server:reply return-address qry-sig #t '(#t *verbosity*))) + (server:reply return-address qry-sig #t (list #t *verbosity*))) ((killserver) - (debug:print 0 "WARNING: Server going down in 15 seconds by user request!") - (open-run-close tasks:server-deregister tasks:open-db - (car *runremote*) - pullport: (cadr *runremote*)) - (thread-start! (make-thread (lambda ()(thread-sleep! 15)(exit)))) - (server:reply return-address qry-sig #t '(#t "exit process started"))) + (let ((hostname (car *runremote*)) + (port (cadr *runremote*)) + (pid (car params))) + (debug:print 0 "WARNING: Server on " hostname ":" port " going down by user request!") + (debug:print-info 1 "current pid=" (current-process-id)) + (open-run-close tasks:server-deregister tasks:open-db + hostname + port: port) + (set! *server-run* #f) + (thread-sleep! 3) + (process-signal pid signal/kill) + (server:reply return-address qry-sig #t '(#t "exit process started")))) (else ;; not a command, i.e. is a query (debug:print 0 "ERROR: Unrecognised query/command " stmt-key) (server:reply return-address qry-sig #f 'failed))))) (else (debug:print-info 11 "Executing " stmt-key " for " params) @@ -1593,13 +1703,13 @@ ;;====================================================================== ;; T E S T D A T A ;;====================================================================== -(define (db:csv->test-data db test-id csvdata) +(define (db:csv->test-data db test-id csvdata #!key (work-area #f)) (debug:print 4 "test-id " test-id ", csvdata: " csvdata) - (let ((tdb (db:open-test-db-by-test-id db test-id))) + (let ((tdb (db:open-test-db-by-test-id db test-id work-area: work-area))) (if tdb (let ((csvlist (csv->list (make-csv-reader (open-input-string csvdata) '((strip-leading-whitespace? #t) (strip-trailing-whitespace? #t)) )))) ;; (csv->list csvdata))) @@ -1649,17 +1759,17 @@ ((<=) (if (<= value expected) "pass" "fail")) (else (conc "ERROR: bad tol comparator " tol)))))) (debug:print 4 "AFTER2: category: " category " variable: " variable " value: " value ", expected: " expected " tol: " tol " units: " units " status: " status " comment: " comment) (sqlite3:execute tdb "INSERT OR REPLACE INTO test_data (test_id,category,variable,value,expected,tol,units,comment,status,type) VALUES (?,?,?,?,?,?,?,?,?,?);" - test-id category variable value expected tol units (if comment comment "") status type) - (sqlite3:finalize! tdb))) - csvlist))))) + test-id category variable value expected tol units (if comment comment "") status type))) + csvlist) + (sqlite3:finalize! tdb))))) ;; get a list of test_data records matching categorypatt -(define (db:read-test-data db test-id categorypatt) - (let ((tdb (db:open-test-db-by-test-id db test-id))) +(define (db:read-test-data db test-id categorypatt #!key (work-area #f)) + (let ((tdb (db:open-test-db-by-test-id db test-id work-area: work-area))) (if tdb (let ((res '())) (sqlite3:for-each-row (lambda (id test_id category variable value expected tol units comment status type) (set! res (cons (vector id test_id category variable value expected tol units comment status type) res))) @@ -1668,28 +1778,28 @@ (sqlite3:finalize! tdb) (reverse res)) '()))) ;; NOTE: Run this local with #f for db !!! -(define (db:load-test-data db test-id) +(define (db:load-test-data db test-id #!key (work-area #f)) (let loop ((lin (read-line))) (if (not (eof-object? lin)) (begin (debug:print 4 lin) - (db:csv->test-data db test-id lin) + (db:csv->test-data db test-id lin work-area: work-area) (loop (read-line))))) ;; roll up the current results. ;; FIXME: Add the status to - (db:test-data-rollup db test-id #f)) + (db:test-data-rollup db test-id #f work-area: work-area)) ;; WARNING: Do NOT call this for the parent test on an iterated test ;; Roll up test_data pass/fail results ;; look at the test_data status field, ;; if all are pass (any case) and the test status is PASS or NULL or '' then set test status to PASS. ;; if one or more are fail (any case) then set test status to PASS, non "pass" or "fail" are ignored -(define (db:test-data-rollup db test-id status) - (let ((tdb (db:open-test-db-by-test-id db test-id)) +(define (db:test-data-rollup db test-id status #!key (work-area #f)) + (let ((tdb (db:open-test-db-by-test-id db test-id work-area: work-area)) (fail-count 0) (pass-count 0)) (if tdb (begin (sqlite3:for-each-row @@ -1738,12 +1848,12 @@ (define (db:step-get-time-as-string vec) (seconds->time-string (db:step-get-event_time vec))) ;; db-get-test-steps-for-run -(define (db:get-steps-for-test db test-id) - (let* ((tdb (db:open-test-db-by-test-id db test-id)) +(define (db:get-steps-for-test db test-id #!key (work-area #f)) + (let* ((tdb (db:open-test-db-by-test-id db test-id work-area: work-area)) (res '())) (if tdb (begin (sqlite3:for-each-row (lambda (id test-id stepname state status event-time logfile) @@ -1755,12 +1865,12 @@ (reverse res)) '()))) ;; get a pretty table to summarize steps ;; -(define (db:get-steps-table db test-id) - (let ((steps (db:get-steps-for-test db test-id))) +(define (db:get-steps-table db test-id #!key (work-area #f)) + (let ((steps (db:get-steps-for-test db test-id work-area: work-area))) ;; organise the steps for better readability (let ((res (make-hash-table))) (for-each (lambda (step) (debug:print 6 "step=" step) @@ -1815,12 +1925,12 @@ (else #f))))) res))) ;; get a pretty table to summarize steps ;; -(define (db:get-steps-table-list db test-id) - (let ((steps (db:get-steps-for-test db test-id))) +(define (db:get-steps-table-list db test-id #!key (work-area #f)) + (let ((steps (db:get-steps-for-test db test-id work-area: work-area))) ;; organise the steps for better readability (let ((res (make-hash-table))) (for-each (lambda (step) (debug:print 6 "step=" step) @@ -1873,35 +1983,38 @@ ((eq? (db:step-get-event_time a)(db:step-get-event_time b)) (< (db:step-get-id a) (db:step-get-id b))) (else #f))))) res))) -(define (db:get-compressed-steps test-id) - (let* ((comprsteps (open-run-close db:get-steps-table #f test-id))) - (map (lambda (x) - ;; take advantage of the \n on time->string - (vector - (vector-ref x 0) - (let ((s (vector-ref x 1))) - (if (number? s)(seconds->time-string s) s)) - (let ((s (vector-ref x 2))) - (if (number? s)(seconds->time-string s) s)) - (vector-ref x 3) ;; status - (vector-ref x 4) - (vector-ref x 5))) ;; time delta - (sort (hash-table-values comprsteps) - (lambda (a b) - (let ((time-a (vector-ref a 1)) - (time-b (vector-ref b 1))) - (if (and (number? time-a)(number? time-b)) - (if (< time-a time-b) - #t - (if (eq? time-a time-b) - (stringstring + (vector + (vector-ref x 0) + (let ((s (vector-ref x 1))) + (if (number? s)(seconds->time-string s) s)) + (let ((s (vector-ref x 2))) + (if (number? s)(seconds->time-string s) s)) + (vector-ref x 3) ;; status + (vector-ref x 4) + (vector-ref x 5))) ;; time delta + (sort (hash-table-values comprsteps) + (lambda (a b) + (let ((time-a (vector-ref a 1)) + (time-b (vector-ref b 1))) + (if (and (number? time-a)(number? time-b)) + (if (< time-a time-b) + #t + (if (eq? time-a time-b) + (stringobj dat)) @@ -120,10 +119,24 @@ (debug:print-info 11 "Return value from db:process-queue-item is " res) (send-response body: (conc "ctrl data\n" res "") headers: '((content-type text/plain))))) + ((equal? (uri-path (request-uri (current-request))) + '(/ "")) + (send-response body: (http-transport:main-page))) + ((equal? (uri-path (request-uri (current-request))) + '(/ "runs")) + (send-response body: (http-transport:main-page))) + ((equal? (uri-path (request-uri (current-request))) + '(/ any)) + (send-response body: "hey there!\n" + headers: '((content-type text/plain)))) + ((equal? (uri-path (request-uri (current-request))) + '(/ "hey")) + (send-response body: "hey there!\n" + headers: '((content-type text/plain)))) (else (continue)))))))) (http-transport:try-start-server ipaddrstr start-port))) ;; This is recursively run by http-transport:run until sucessful ;; @@ -144,76 +157,106 @@ (set! *runremote* (list ipaddrstr portnum)) ;; (open-run-close tasks:remove-server-records tasks:open-db) (open-run-close tasks:server-register tasks:open-db (current-process-id) - ipaddrstr portnum 0 'live 'http) - (print "INFO: Trying to start server on " ipaddrstr ":" portnum) + ipaddrstr portnum 0 'startup 'http) + (debug:print 1 "INFO: Trying to start server on " ipaddrstr ":" portnum) ;; This starts the spiffy server ;; NEED WAY TO SET IP TO #f TO BIND ALL (start-server bind-address: ipaddrstr port: portnum) (open-run-close tasks:server-delete tasks:open-db ipaddrstr portnum) - (print "INFO: server has been stopped"))) + (debug:print 1 "INFO: server has been stopped"))) ;;====================================================================== ;; S E R V E R U T I L I T I E S ;;====================================================================== ;;====================================================================== ;; C L I E N T S ;;====================================================================== +(define *http-mutex* (make-mutex)) + +;; (system "megatest -list-servers | grep alive || megatest -server - -daemonize && sleep 4") + ;; ;; ;; 1 Hello, world! Goodbye Dolly ;; Send msg to serverdat and receive result -(define (http-transport:client-send-receive serverdat msg) - (let* ((url (http-transport:make-server-url serverdat)) - (fullurl (conc url "/ctrl")) ;; (conc url "/?dat=" msg))) - (numretries 0)) +(define (http-transport:client-send-receive serverdat msg #!key (numretries 30)) + (let* (;; (url (http-transport:make-server-url serverdat)) + (fullurl (caddr serverdat)) ;; (conc url "/ctrl")) ;; (conc url "/?dat=" msg))) + (res #f)) (handle-exceptions exn - (if (< numretries 200) - (http-transport:client-send-receive serverdat msg)) + (begin + (print "ERROR IN http-transport:client-send-receive " ((condition-property-accessor 'exn 'message) exn)) + (thread-sleep! 2) + (if (> numretries 0) + (http-transport:client-send-receive serverdat msg numretries: (- numretries 1)))) (begin (debug:print-info 11 "fullurl=" fullurl "\n") ;; set up the http-client here - (max-retry-attempts 100) + (max-retry-attempts 5) + ;; consider all requests indempotent (retry-request? (lambda (request) - (thread-sleep! (/ (if (> numretries 100) 100 numretries) 10)) - (set! numretries (+ numretries 1)) - #t)) + #t)) ;; (thread-sleep! (/ (if (> numretries 100) 100 numretries) 10)) + ;; (set! numretries (- numretries 1)) + ;; #t)) ;; send the data and get the response ;; extract the needed info from the http data and ;; process and return it. - (let* ((res (with-input-from-request fullurl - ;; #f - ;; msg - (list (cons 'dat msg)) - read-string))) + (let* ((send-recieve (lambda () + (mutex-lock! *http-mutex*) + (set! res (with-input-from-request + fullurl + (list (cons 'dat msg)) + read-string)) + (close-all-connections!) + (mutex-unlock! *http-mutex*))) + (time-out (lambda () + (thread-sleep! 45) + (if (not res) + (begin + (debug:print 0 "WARNING: communication with the server timed out.") + (mutex-unlock! *http-mutex*) + (http-transport:client-send-receive serverdat msg numretries: (- numretries 1)) + (if (< numretries 3) ;; on last try just exit + (begin + (debug:print 0 "ERROR: communication with the server timed out. Giving up.") + (exit 1))))))) + (th1 (make-thread send-recieve "with-input-from-request")) + (th2 (make-thread time-out "time out"))) + (thread-start! th1) + (thread-start! th2) + (thread-join! th1) + (thread-terminate! th2) (debug:print-info 11 "got res=" res) (let ((match (string-search (regexp "(.*)<.body>") res))) (debug:print-info 11 "match=" match) (let ((final (cadr match))) (debug:print-info 11 "final=" final) final))))))) (define (http-transport:client-connect iface port) (let* ((login-res #f) - (serverdat (list iface port))) + (uri-dat (make-request method: 'POST uri: (uri-reference (conc "http://" iface ":" port "/ctrl")))) + (serverdat (list iface port uri-dat))) (set! login-res (client:login serverdat)) (if (and (not (null? login-res)) (car login-res)) (begin - (debug:print-info 0 "Logged in and connected to " iface ":" port) + (debug:print-info 2 "Logged in and connected to " iface ":" port) (set! *runremote* serverdat) serverdat) (begin - (debug:print-info 0 "Failed to login or connect to " iface ":" port) - (set! *runremote* #f) - (set! *transport-type* 'fs) - #f)))) + (debug:print-info 0 "ERROR: Failed to login or connect to " iface ":" port) + (exit 1))))) +;; (set! *runremote* #f) +;; (set! *transport-type* 'fs) +;; #f)))) ;; run http-transport:keep-running in a parallel thread to monitor that the db is being ;; used and to shutdown after sometime if it is not. ;; @@ -224,11 +267,12 @@ (let* ((server-info (let loop () (let ((sdat #f)) (mutex-lock! *heartbeat-mutex*) (set! sdat *runremote*) (mutex-unlock! *heartbeat-mutex*) - (if sdat sdat + (if sdat + sdat (begin (sleep 4) (loop)))))) (iface (car server-info)) (port (cadr server-info)) @@ -271,14 +315,15 @@ ;; (if ;; (or (> numrunning 0) ;; stay alive for two days after last access (mutex-lock! *heartbeat-mutex*) (set! last-access *last-db-access*) (mutex-unlock! *heartbeat-mutex*) ;; (debug:print 11 "last-access=" last-access ", server-timeout=" server-timeout) - (if (> (+ last-access server-timeout) - (current-seconds)) + (if (and *server-run* + (> (+ last-access server-timeout) + (current-seconds))) (begin - (debug:print-info 2 "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) + (debug:print-info 0 "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) (loop 0)) (begin (debug:print-info 0 "Starting to shutdown the server.") ;; need to delete only *my* server entry (future use) (set! *time-to-exit* #t) @@ -366,5 +411,64 @@ (exit 4)) "exit on ^C timer"))) (thread-start! th2) (thread-start! th1) (thread-join! th2)))) + +;;====================================================================== +;; web pages +;;====================================================================== + +(define (http-transport:main-page) + (let ((linkpath (root-path))) + (conc "

" (pathname-strip-directory *toppath*) "

" + "" + "Run area: " *toppath* + "

Server Stats

" + (http-transport:stats-table) + "
" + (http-transport:runs linkpath) + "
" + (http-transport:run-stats) + "" + ))) + +(define (http-transport:stats-table) + (mutex-lock! *heartbeat-mutex*) + (let ((res + (conc "" + "" + "" + "" + "" + "" + "" + "
Max cached queries " *max-cache-size* "
Number of cached writes " *number-of-writes* "
Average cached write time " (if (eq? *number-of-writes* 0) + "n/a (no writes)" + (/ *writes-total-delay* + *number-of-writes*)) + " ms
Number non-cached queries " *number-non-write-queries* "
Average non-cached time " (if (eq? *number-non-write-queries* 0) + "n/a (no queries)" + (/ *total-non-write-delay* + *number-non-write-queries*)) + " ms
Last access" (seconds->time-string *last-db-access*) "
"))) + (mutex-unlock! *heartbeat-mutex*) + res)) + +(define (http-transport:runs linkpath) + (conc "

Runs

" + (string-intersperse + (let ((files (map pathname-strip-directory (glob (conc linkpath "/*"))))) + (map (lambda (p) + (conc "" p "
")) + files)) + " "))) + +(define (http-transport:run-stats) + (let ((stats (open-run-close db:get-running-stats #f))) + (conc "" + (string-intersperse + (map (lambda (stat) + (conc "")) + stats) + " ") + "
" (car stat) "" (cadr stat) "
"))) Index: key_records.scm ================================================================== --- key_records.scm +++ key_records.scm @@ -7,19 +7,15 @@ ;; This program is distributed WITHOUT ANY WARRANTY; without even the ;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ;; PURPOSE. ;;====================================================================== -(define-inline (key:get-fieldname key)(vector-ref key 0)) -(define-inline (key:get-fieldtype key)(vector-ref key 1)) - (define-inline (keys->valslots keys) ;; => ?,?,? .... (string-intersperse (map (lambda (x) "?") keys) ",")) (define-inline (keys->key/field keys . additional) - (string-join (map (lambda (k)(conc (key:get-fieldname k) " " - (key:get-fieldtype k))) + (string-join (map (lambda (k)(conc k " TEXT")) (append keys additional)) ",")) (define-inline (item-list->path itemdat) (if (list? itemdat) (string-intersperse (map cadr itemdat) "/") Index: keys.scm ================================================================== --- keys.scm +++ keys.scm @@ -19,103 +19,52 @@ (declare (uses common)) (include "key_records.scm") (include "common_records.scm") -(define (get-keys db) - (let ((keys '())) ;; keys are vectors - (sqlite3:for-each-row (lambda (fieldname fieldtype) - (set! keys (cons (vector fieldname fieldtype) keys))) - db - "SELECT fieldname,fieldtype FROM keys ORDER BY id ASC;") - (reverse keys))) ;; could just sort desc? - (define (keys->keystr keys) ;; => key1,key2,key3,additiona1, ... - (string-intersperse (map key:get-fieldname keys) ",")) + (string-intersperse keys ",")) (define (args:usage . a) #f) -;; keys->vallist is called several times (quite unnecessarily), use this hash to suppress multiple -;; reporting of missing keys on the command line. -(define keys:warning-suppress-hash (make-hash-table)) - ;;====================================================================== ;; key <=> target routines ;;====================================================================== -;; this now invalidates using "/" in item names +;; This invalidates using "/" in item names. Every key will be +;; available via args:get-arg as :keyfield. Since this only needs to +;; be called once let's use it to set the environment vars +;; +;; The setting of :keyfield in args should be turned off ASAP +;; (define (keys:target-set-args keys target ht) (let ((vals (string-split target "/"))) (if (eq? (length vals)(length keys)) (for-each (lambda (key val) - (hash-table-set! ht (conc ":" (vector-ref key 0)) val)) + (setenv key val) + (hash-table-set! ht (conc ":" key) val)) keys vals) (debug:print 0 "ERROR: wrong number of values in " target ", should match " keys)) vals)) -;; given the keys (a list of vectors ) and a target return a keyval list +;; given the keys (a list of vectors or a list of keys) and a target return a keyval list ;; keyval list ( (key1 val1) (key2 val2) ...) (define (keys:target->keyval keys target) (let* ((targlist (string-split target "/")) (numkeys (length keys)) (numtarg (length targlist)) (targtweaked (if (> numkeys numtarg) (append targlist (make-list (- numkeys numtarg) "")) targlist))) (map (lambda (key targ) - (list (vector-ref key 0) targ)) - keys targtweaked))) - - -;;====================================================================== -;; key <=> args routines -;;====================================================================== - -;; Using the keys pulled from the database (initially set from the megatest.config file) -;; look for the equivalent value on the command line and add it to a list, or #f if not found. -;; default => (val1 val2 val3 ...) -;; withkey => (:key1 val1 :key2 val2 :key3 val3 ...) -(define (keys->vallist keys . withkey) ;; ORDERING IS VERY IMPORTANT, KEEP PROPER ORDER HERE! - (let* ((keynames (map key:get-fieldname keys)) - (argkeys (map (lambda (k)(conc ":" k)) keynames)) - (withkey (not (null? withkey))) - (newremargs (args:get-args - (cons "blah" remargs) ;; the cons blah works around a bug in args [args assumes ("calling-prog-name" .... ) ] - argkeys - '() - args:arg-hash - 0))) - ;;(debug:print 0 "remargs: " remargs " newremargs: " newremargs) - (apply append (map (lambda (x) - (let ((val (args:get-arg x))) - ;; (debug:print 0 "x: " x " val: " val) - (if (not val) - (begin - (if (not (hash-table-ref/default keys:warning-suppress-hash x #f)) - (begin - (debug:print 0 "WARNING: missing key " x ". Specified in database but not on command line, using \"unk\"") - (hash-table-set! keys:warning-suppress-hash x #t))) - (set! val "default"))) - (if withkey (list x val) (list val)))) - argkeys)))) - -;; Given a list of keys (list of vectors) return an alist ((key argval) ...) -(define (keys->alist keys defaultval) - (let* ((keynames (map key:get-fieldname keys)) - (newremargs (args:get-args (cons "blah" remargs) (map (lambda (k)(conc ":" k)) keynames) '() args:arg-hash 0))) ;; the cons blah works around a bug in args - (map (lambda (key) - (let ((val (args:get-arg (conc ":" key)))) - (list key (if val val defaultval)))) - keynames))) - -(define (keystring->keys keystring) - (map (lambda (x) - (let ((xlst (string-split x ":"))) - (list->vector (if (> (length xlst) 1) xlst (append (car xlst)(list "TEXT")))))) - (delete-duplicates (string-split keystring ",")))) - -(define (config-get-fields confdat) - (let ((fields (hash-table-ref/default confdat "fields" '()))) - (map (lambda (x)(vector (car x)(cadr x))) - fields))) + (list key targ)) + keys targtweaked))) + +;;====================================================================== +;; config file related routines +;;====================================================================== + +(define (keys:config-get-fields confdat) + (let ((fields (hash-table-ref/default confdat "fields" '()))) + (map car fields))) Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -53,13 +53,13 @@ (define (launch:execute encoded-cmd) (let* ((cmdinfo (read (open-input-string (base64:base64-decode encoded-cmd))))) (setenv "MT_CMDINFO" encoded-cmd) (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed) ;; (test-name sqlitespeed) (runscript runscript.rb) (db-host localhost) (run-id 1)) - (let* ((testpath (assoc/default 'testpath cmdinfo)) ;; How is testpath different from work-area ?? + (let* ((testpath (assoc/default 'testpath cmdinfo)) ;; testpath is the test spec area (top-path (assoc/default 'toppath cmdinfo)) - (work-area (assoc/default 'work-area cmdinfo)) + (work-area (assoc/default 'work-area cmdinfo)) ;; work-area is the test run area (test-name (assoc/default 'test-name cmdinfo)) (runscript (assoc/default 'runscript cmdinfo)) (ezsteps (assoc/default 'ezsteps cmdinfo)) ;; (runremote (assoc/default 'runremote cmdinfo)) (transport (assoc/default 'transport cmdinfo)) @@ -91,11 +91,11 @@ ;; Setup the *runremote* global var (if *runremote* (debug:print 2 "ERROR: I'm not expecting *runremote* to be set at this time")) ;; (set! *runremote* runremote) (set! *transport-type* (string->symbol transport)) (set! keys (cdb:remote-run db:get-keys #f)) - (set! keyvals (if run-id (cdb:remote-run db:get-key-vals #f run-id) #f)) + (set! keyvals (keys:target->keyval keys target)) ;; apply pre-overrides before other variables. The pre-override vars must not ;; clobbers things from the official sources such as megatest.config and runconfigs.config (if (string? set-vars) (let ((varpairs (string-split set-vars ","))) (debug:print 4 "varpairs: " varpairs) @@ -126,18 +126,18 @@ (change-directory *toppath*) (set-megatest-env-vars run-id) ;; these may be needed by the launching process (change-directory work-area) - (open-run-close set-run-config-vars #f run-id keys keyvals) + (set-run-config-vars run-id keyvals target) ;; (db:get-target db run-id)) ;; environment overrides are done *before* the remaining critical envars. (alist->env-vars env-ovrd) (set-megatest-env-vars run-id) (set-item-env-vars itemdat) (save-environment-as-files "megatest") ;; open-run-close not needed for test-set-meta-info - (test-set-meta-info #f test-id run-id test-name itemdat 0) + (tests:set-meta-info #f test-id run-id test-name itemdat 0 work-area) (tests:test-set-status! test-id "REMOTEHOSTSTART" "n/a" (args:get-arg "-m") #f) (if (args:get-arg "-xterm") (set! fullrunscript "xterm") (if (and fullrunscript (not (file-execute-access? fullrunscript))) (system (conc "chmod ug+x " fullrunscript)))) @@ -208,11 +208,11 @@ ;; call the command using mt_ezstep (set! script (conc "mt_ezstep " stepname " " (if prevstep prevstep "-") " " stepcmd)) (debug:print 4 "script: " script) ;; DO NOT remote - (db:teststep-set-status! #f test-id stepname "start" "-" #f #f) + (db:teststep-set-status! #f test-id stepname "start" "-" #f #f work-area: work-area) ;; now launch (let ((pid (process-run script))) (let processloop ((i 0)) (let-values (((pid-val exit-status exit-code)(process-wait pid #t))) (mutex-lock! m) @@ -226,11 +226,11 @@ (processloop (+ i 1)))) )) (let ((exinfo (vector-ref exit-info 2)) (logfna (if logpro-used (conc stepname ".html") ""))) ;; testing if procedures called in a remote call cause problems (ans: no or so I suspect) - (db:teststep-set-status! #f test-id stepname "end" exinfo #f logfna)) + (db:teststep-set-status! #f test-id stepname "end" exinfo #f logfna work-area: work-area)) (if logpro-used (cdb:test-set-log! *runremote* test-id (conc stepname ".html"))) ;; set the test final status (let* ((this-step-status (cond ((and (eq? (vector-ref exit-info 2) 2) logpro-used) 'warn) @@ -276,11 +276,11 @@ (kill-tries 0)) (let loop ((minutes (calc-minutes))) (begin (set! kill-job? (test-get-kill-request test-id)) ;; run-id test-name itemdat)) ;; open-run-close not needed for test-set-meta-info - (test-set-meta-info #f test-id run-id test-name itemdat minutes) + (tests:set-meta-info #f test-id run-id test-name itemdat minutes work-area) (if kill-job? (begin (mutex-lock! m) (let* ((pid (vector-ref exit-info 0))) (if (number? pid) @@ -336,11 +336,11 @@ (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN")) (else "FAIL")) (args:get-arg "-m") #f))) ;; for automated creation of the rollup html file this is a good place... (if (not (equal? item-path "")) - (open-run-close tests:summarize-items #f run-id test-name #f)) ;; don't force - just update if no + (tests:summarize-items #f run-id test-name #f)) ;; don't force - just update if no ) (mutex-unlock! m) ;; (exec-results (cmd-run->list fullrunscript)) ;; (list ">" (conc test-name "-run.log")))) ;; (success exec-results)) ;; (eq? (cadr exec-results) 0))) (debug:print 2 "Output from running " fullrunscript ", pid " (vector-ref exit-info 0) " in work area " @@ -406,19 +406,17 @@ ;; ;; All log file links should be stored relative to the top of link path ;; ;; - [ - ] ;; -(define (create-work-area db run-id test-id test-src-path disk-path testname itemdat) - (let* ((run-info (cdb:remote-run db:get-run-info #f run-id)) - (item-path (item-list->path itemdat)) +(define (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat) + (let* ((item-path (item-list->path itemdat)) (runname (db:get-value-by-header (db:get-row run-info) (db:get-header run-info) "runname")) ;; convert back to db: from rdb: - this is always run at server end - (key-vals (cdb:remote-run db:get-key-vals #f run-id)) - (target (string-intersperse key-vals "/")) + (target (string-intersperse (map cadr keyvals) "/")) (not-iterated (equal? "" item-path)) ;; all tests are found at /test-base or /test-base (testtop-base (conc target "/" runname "/" testname)) @@ -537,15 +535,16 @@ (begin (let* ((ovrcmd (let ((cmd (config-lookup *configdat* "setup" "testcopycmd"))) (if cmd ;; substitute the TEST_SRC_PATH and TEST_TARG_PATH (string-substitute "TEST_TARG_PATH" test-path - (string-substitute "TEST_SRC_PATH" test-src-path cmd)) + (string-substitute "TEST_SRC_PATH" test-src-path cmd #t) #t) #f))) (cmd (if ovrcmd ovrcmd - (conc "rsync -av" (if (debug:debug-mode 1) "" "q") " " test-src-path "/ " test-path "/"))) + (conc "rsync -av" (if (debug:debug-mode 1) "" "q") " " test-src-path "/ " test-path "/" + " >> " test-path "/mt_launch.log 2>> " test-path "/mt_launch.log"))) (status (system cmd))) (if (not (eq? status 0)) (debug:print 2 "ERROR: problem with running \"" cmd "\""))) (list lnkpathf lnkpath )) (list #f #f)))) @@ -555,11 +554,11 @@ ;; 3. create link from run dir to megatest runs area ;; 4. remotely run the test on allocated host ;; - could be ssh to host from hosts table (update regularly with load) ;; - could be netbatch ;; (launch-test db (cadr status) test-conf)) -(define (launch-test db run-id runname test-conf keyvallst test-name test-path itemdat params) +(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params) (change-directory *toppath*) (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute" (list ;; (list "MT_TEST_RUN_DIR" work-area) (list "MT_RUN_AREA_HOME" *toppath*) (list "MT_TEST_NAME" test-name) @@ -594,13 +593,13 @@ (diskpath #f) (cmdparms #f) (fullcmd #f) ;; (define a (with-output-to-string (lambda ()(write x)))) (mt-bindir-path #f) (item-path (item-list->path itemdat)) - (test-id (cdb:remote-run db:get-test-id #f run-id test-name item-path)) + ;; (test-id (cdb:remote-run db:get-test-id #f run-id test-name item-path)) (testinfo (cdb:get-test-info-by-id *runremote* test-id)) - (mt_target (string-intersperse (map cadr keyvallst) "/")) + (mt_target (string-intersperse (map cadr keyvals) "/")) (debug-param (append (if (args:get-arg "-debug") (list "-debug" (args:get-arg "-debug")) '()) (if (args:get-arg "-logging")(list "-logging") '())))) (if hosts (set! hosts (string-split hosts))) ;; set the megatest to be called on the remote host (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest")) @@ -607,11 +606,11 @@ (set! mt-bindir-path (pathname-directory remote-megatest)) (if launcher (set! launcher (string-split launcher))) ;; set up the run work area for this test (set! diskpath (get-best-disk *configdat*)) (if diskpath - (let ((dat (open-run-close create-work-area db run-id test-id test-path diskpath test-name itemdat))) + (let ((dat (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat))) (set! work-area (car dat)) (set! toptest-work-area (cadr dat)) (debug:print-info 2 "Using work area " work-area)) (begin (set! work-area (conc test-path "/tmp_run")) @@ -635,11 +634,11 @@ (list 'ezsteps ezsteps) (list 'target mt_target) (list 'env-ovrd (hash-table-ref/default *configdat* "env-override" '())) (list 'set-vars (if params (hash-table-ref/default params "-setvars" #f))) (list 'runname runname) - (list 'mt-bindir-path mt-bindir-path))))))) ;; (string-intersperse keyvallst " ")))) + (list 'mt-bindir-path mt-bindir-path))))))) ;; clean out step records from previous run if they exist ;; (debug:print-info 4 "FIXMEEEEE!!!! This can be removed some day, perhaps move all test records to the test db?") ;; (open-run-close db:delete-test-step-records db test-id) (change-directory work-area) ;; so that log files from the launch process don't clutter the test dir (tests:test-set-status! test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED")) @@ -668,34 +667,37 @@ (list "MT_ITEM_INFO" (conc itemdat)) (list "MT_RUNNAME" runname) (list "MT_TARGET" mt_target) ) itemdat))) - (launch-results (apply cmd-run-with-stderr->list ;; cmd-run-proc-each-line + (launch-results (apply (if (equal? (configf:lookup *configdat* "setup" "launchwait") "yes") + cmd-run-with-stderr->list + process-run) (if useshell (string-intersperse fullcmd " ") (car fullcmd)) - ;; conc (if useshell '() - (cdr fullcmd))))) ;; launcher fullcmd)));; (apply cmd-run-proc-each-line launcher print fullcmd))) ;; (cmd-run->list fullcmd)) - (with-output-to-file "mt_launch.log" - (lambda () - (apply print launch-results))) + (cdr fullcmd))))) + (if (list? launch-results) + (with-output-to-file "mt_launch.log" + (lambda () + (apply print launch-results)) + #:append)) (debug:print 2 "Launching completed, updating db") (debug:print 2 "Launch results: " launch-results) (if (not launch-results) - (begin - (print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now") - ;; (sqlite3:finalize! db) - ;; good ole "exit" seems not to work - ;; (_exit 9) - ;; but this hack will work! Thanks go to Alan Post of the Chicken email list - ;; NB// Is this still needed? Should be safe to go back to "exit" now? - (process-signal (current-process-id) signal/kill) - )) + (begin + (print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now") + ;; (sqlite3:finalize! db) + ;; good ole "exit" seems not to work + ;; (_exit 9) + ;; but this hack will work! Thanks go to Alan Post of the Chicken email list + ;; NB// Is this still needed? Should be safe to go back to "exit" now? + (process-signal (current-process-id) signal/kill) + )) (alist->env-vars miscprevvals) (alist->env-vars testprevvals) (alist->env-vars commonprevvals) launch-results)) (change-directory *toppath*)) Index: megatest-version.scm ================================================================== --- megatest-version.scm +++ megatest-version.scm @@ -1,7 +1,7 @@ ;; Always use two digit decimal ;; 1.01, 1.02...1.10,1.11 ... 1.99,2.00.. (declare (unit megatest-version)) -(define megatest-version 1.5417) +(define megatest-version 1.5431) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -8,11 +8,11 @@ ;; PURPOSE. ;; (include "common.scm") ;; (include "megatest-version.scm") -(use sqlite3 srfi-1 posix regex regex-case srfi-69 base64 format readline apropos json) ;; (srfi 18) extras) +(use sqlite3 srfi-1 posix regex regex-case srfi-69 base64 format readline apropos json http-client) ;; (srfi 18) extras) (import (prefix sqlite3 sqlite3:)) (import (prefix base64 base64:)) ;; (use zmq) @@ -24,40 +24,24 @@ (declare (uses server)) (declare (uses client)) (declare (uses tests)) (declare (uses genexample)) (declare (uses daemon)) +(declare (uses db)) (define *db* #f) ;; this is only for the repl, do not use in general!!!! (include "common_records.scm") (include "key_records.scm") (include "db_records.scm") +(include "run_records.scm") (include "megatest-fossil-hash.scm") -;; (use trace dot-locking) -;; (trace -;; cdb:client-call -;; cdb:remote-run -;; cdb:test-set-status-state -;; change-directory -;; db:process-queue-item -;; db:test-get-logfile-info -;; db:teststep-set-status! -;; nice-path -;; obtain-dot-lock -;; open-run-close -;; read-config -;; runs:can-run-more-tests -;; sqlite3:execute -;; sqlite3:for-each-row -;; tests:check-waiver-eligibility -;; tests:summarize-items -;; tests:test-set-status! -;; thread-sleep! -;;) - +(let ((debugcontrolf (conc (get-environment-variable "HOME") "/.megatestrc"))) + (if (file-exists? debugcontrolf) + (load debugcontrolf))) + (define help (conc " Megatest, documentation at http://www.kiatoa.com/fossils/megatest version " megatest-version " license GPL, Copyright Matt Welland 2006-2012 @@ -76,10 +60,11 @@ -rerun FAIL,WARN... : force re-run for tests with specificed status(s) -rollup : (currently disabled) fill run (set by :runname) with latest test(s) from prior runs with same keys -lock : lock run specified by target and runname -unlock : unlock run specified by target and runname + -run-wait : wait on run specified by target and runname Selectors (e.g. use for -runtests, -remove-runs, -set-state-status, -list-runs etc.) -target key1/key2/... : run for key1, key2, etc. -reqtarg key1/key2/... : run for key1, key2, etc. but key1/key2 must be in runconfig -testpatt patt1/patt2,patt3/... : % is wildcard @@ -108,21 +93,22 @@ from standard in. Each line is comma delimited with four fields category,variable,value,comment Queries -list-runs patt : list runs matching pattern \"patt\", % is the wildcard - -showkeys : show the keys used in this megatest setup - -test-files targpatt : get the most recent test path/file matching targpatt e.g. %/%... + -show-keys : show the keys used in this megatest setup + -test-files targpatt : get the most recent test path/file matching targpatt e.g. %/%... returns list sorted by age ascending, see examples below -test-paths : get the test paths matching target, runname, item and test patterns. -list-disks : list the disks available for storing runs -list-targets : list the targets in runconfigs.config -list-db-targets : list the target combinations used in the db -show-config : dump the internal representation of the megatest.config file -show-runconfig : dump the internal representation of the runconfigs.config file -dumpmode json : dump in json format instead of sexpr + -show-cmdinfo : dump the command info for a test (run in test environment) Misc -rebuild-db : bring the database schema up to date -update-meta : update the tests metadata for all tests -env2file fname : write the environment to fname.csh and fname.sh @@ -131,11 +117,12 @@ -server -|hostname : start the server (reduces contention on megatest.db), use - to automatically figure out hostname -transport http|fs : use http or direct access for transport (default is http) -daemonize : fork into background and disconnect from stdin/out -list-servers : list the servers - -stop-server id : stop server specified by id (see output of -list-servers) + -stop-server id : stop server specified by id (see output of -list-servers), use + 0 to kill all -repl : start a repl (useful for extending megatest) -load file.scm : load and run file.scm Spreadsheet generation -extract-ods fname.ods : extract an open document spreadsheet from the database @@ -213,10 +200,11 @@ (list "-h" "-version" "-force" "-xterm" "-showkeys" + "-show-keys" "-test-status" "-set-values" "-load-test-data" "-summarize-items" "-gui" @@ -225,16 +213,19 @@ "-archive" "-repl" "-lock" "-unlock" "-list-servers" - ;; mist queries + "-run-wait" ;; wait on a run to complete (i.e. no RUNNING) + + ;; misc queries "-list-disks" "-list-targets" "-list-db-targets" "-show-runconfig" "-show-config" + "-show-cmdinfo" ;; queries "-test-paths" ;; get path(s) to a test, ordered by youngest first "-runall" ;; run all tests "-remove-runs" @@ -260,10 +251,15 @@ (print megatest-version) (exit))) (define *didsomething* #f) +(if (and (or (args:get-arg "-list-targets") + (args:get-arg "-list-db-targets")) + (not (args:get-arg "-transport"))) + (hash-table-set! args:arg-hash "-transport" "fs")) + ;;====================================================================== ;; Misc setup stuff ;;====================================================================== (debug:setup) @@ -319,22 +315,31 @@ (let loop ((servers (open-run-close tasks:get-best-server tasks:open-db)) (trycount 0)) (if (or (not servers) (null? servers)) (begin - (if (eq? trycount 0) ;; just do the server start once + (if (even? trycount) ;; just do the server start every other time through this loop (every 8 seconds) (begin (debug:print 0 "INFO: Starting server as none running ...") ;; (server:launch (string->symbol (args:get-arg "-transport" "http")))) + ;; no need to use fork, no need to do the list-servers trick. Just start the damn server, it will exit on it's own + ;; if there is an existing server + (system "megatest -server - -daemonize") + (thread-sleep! 3) ;; (process-run (car (argv)) (list "-server" "-" "-daemonize" "-transport" (args:get-arg "-transport" "http"))) - (process-fork (lambda () - (daemon:ize) - (server:launch (string->symbol (args:get-arg "-transport" "http"))))) - (thread-sleep! 3)) - (debug:print-info 0 "Waiting for server to start")) - (loop (open-run-close tasks:get-best-server tasks:open-db) - (+ trycount 1))) + ;; (system (conc "megatest -list-servers | egrep '" megatest-version ".*alive' || megatest -server - -daemonize && sleep 3")) + ;; (process-fork (lambda () + ;; (daemon:ize) + ;; (server:launch (string->symbol (args:get-arg "-transport" "http"))))) + ) + (begin + (debug:print-info 0 "Waiting for server to start") + (thread-sleep! 4))) + (if (< trycount 10) + (loop (open-run-close tasks:get-best-server tasks:open-db) + (+ trycount 1)) + (debug:print 0 "WARNING: Couldn't start or find a server."))) (debug:print 0 "INFO: Server(s) running " servers) ))))) (if (or (args:get-arg "-list-servers") (args:get-arg "-stop-server")) @@ -372,11 +377,12 @@ (open-run-close tasks:server-deregister tasks:open-db hostname pullport: pullport pid: pid action: 'delete)) (if (> last-update 20) ;; Mark as dead if not updated in last 20 seconds (open-run-close tasks:server-deregister tasks:open-db hostname pullport: pullport pid: pid))) (format #t fmtstr id mt-ver pid hostname interface pullport pubport last-update (if status "alive" "dead") transport) - (if (equal? id sid) + (if (or (equal? id sid) + (equal? sid 0)) ;; kill all/any (begin (debug:print-info 0 "Attempting to stop server with pid " pid) (tasks:kill-server status hostname pullport pid transport))))) servers) (debug:print-info 1 "Done with listservers") @@ -400,20 +406,32 @@ (for-each (lambda (x) ;; (print "[" x "]")) (print x)) targets) (set! *didsomething* #t))) + +(define (full-runconfigs-read) + (let* ((keys (cdb:remote-run get-keys #f)) + (target (if (args:get-arg "-reqtarg") + (args:get-arg "-reqtarg") + (if (args:get-arg "-target") + (args:get-arg "-target") + #f))) + (key-vals (if target (keys:target->keyval keys target) #f)) + (sections (if target (list "default" target) #f)) + (data (begin + (setenv "MT_RUN_AREA_HOME" *toppath*) + (if key-vals + (for-each (lambda (kt) + (setenv (car kt) (cadr kt))) + key-vals)) + (read-config "runconfigs.config" #f #t sections: sections)))) + data)) + (if (args:get-arg "-show-runconfig") - (let* ((target (if (args:get-arg "-reqtarg") - (args:get-arg "-reqtarg") - (if (args:get-arg "-target") - (args:get-arg "-target") - #f))) - (sections (if target (list "default" target) #f)) - (data (read-config "runconfigs.config" #f #t sections: sections))) - + (let ((data (full-runconfigs-read))) ;; keep this one local (cond ((not (args:get-arg "-dumpmode")) (pp (hash-table->alist data))) ((string=? (args:get-arg "-dumpmode") "json") @@ -431,51 +449,65 @@ ((string=? (args:get-arg "-dumpmode") "json") (json-write data)) (else (debug:print 0 "ERROR: -dumpmode of " (args:get-arg "-dumpmode") " not recognised"))) (set! *didsomething* #t))) + +(if (args:get-arg "-show-cmdinfo") + (let ((data (read (open-input-string (base64:base64-decode (getenv "MT_CMDINFO")))))) + (if (equal? (args:get-arg "-dumpmode") "json") + (json-write data) + (pp data)) + (set! *didsomething* #t))) ;;====================================================================== ;; Remove old run(s) ;;====================================================================== ;; since several actions can be specified on the command line the removal ;; is done first (define (operate-on action) - (cond - ((not (args:get-arg ":runname")) - (debug:print 0 "ERROR: Missing required parameter for " action ", you must specify the run name pattern with :runname patt") - (exit 2)) - ((not (args:get-arg "-testpatt")) - (debug:print 0 "ERROR: Missing required parameter for " action ", you must specify the test pattern with -testpatt") - (exit 3)) - (else - (if (not (car *configinfo*)) - (begin - (debug:print 0 "ERROR: Attempted " action "on test(s) but run area config file not found") - (exit 1)) - ;; put test parameters into convenient variables - (runs:operate-on action - (args:get-arg ":runname") - (args:get-arg "-testpatt") - state: (args:get-arg ":state") - status: (args:get-arg ":status") - new-state-status: (args:get-arg "-set-state-status"))) - (set! *didsomething* #t)))) + (let* ((runrec (runs:runrec-make-record)) + (target (or (args:get-arg "-reqtarg") + (args:get-arg "-target")))) + (cond + ((not target) + (debug:print 0 "ERROR: Missing required parameter for " action ", you must specify -target or -reqtarg") + (exit 1)) + ((not (args:get-arg ":runname")) + (debug:print 0 "ERROR: Missing required parameter for " action ", you must specify the run name pattern with :runname patt") + (exit 2)) + ((not (args:get-arg "-testpatt")) + (debug:print 0 "ERROR: Missing required parameter for " action ", you must specify the test pattern with -testpatt") + (exit 3)) + (else + (if (not (car *configinfo*)) + (begin + (debug:print 0 "ERROR: Attempted " action "on test(s) but run area config file not found") + (exit 1)) + ;; put test parameters into convenient variables + (runs:operate-on action + target + (args:get-arg ":runname") + (args:get-arg "-testpatt") + state: (args:get-arg ":state") + status: (args:get-arg ":status") + new-state-status: (args:get-arg "-set-state-status"))) + (set! *didsomething* #t))))) (if (args:get-arg "-remove-runs") (general-run-call "-remove-runs" "remove runs" - (lambda (target runname keys keynames keyvallst) + (lambda (target runname keys keyvals) (operate-on 'remove-runs)))) (if (args:get-arg "-set-state-status") (general-run-call "-set-state-status" "set state and status" - (lambda (target runname keys keynames keyvallst) + (lambda (target runname keys keyvals) (operate-on 'set-state-status)))) ;;====================================================================== ;; Query runs ;;====================================================================== @@ -490,19 +522,18 @@ "%")) (runsdat (cdb:remote-run db:get-runs #f runpatt #f #f '())) (runs (db:get-rows runsdat)) (header (db:get-header runsdat)) (keys (cdb:remote-run db:get-keys #f)) - (keynames (map key:get-fieldname keys)) (db-targets (args:get-arg "-list-db-targets")) (seen (make-hash-table))) ;; Each run (for-each (lambda (run) (let ((targetstr (string-intersperse (map (lambda (x) (db:get-value-by-header run header x)) - keynames) "/"))) + keys) "/"))) (if db-targets (if (not (hash-table-ref/default seen targetstr #f)) (begin (hash-table-set! seen targetstr #t) ;; (print "[" targetstr "]")))) @@ -572,14 +603,13 @@ ;; run all tests are are Not COMPLETED and PASS or CHECK (if (args:get-arg "-runall") (general-run-call "-runall" "run all tests" - (lambda (target runname keys keynames keyvallst) + (lambda (target runname keys keyvals) (runs:run-tests target runname - "%" (args:get-arg "-testpatt") user args:arg-hash)))) ;;====================================================================== @@ -601,44 +631,40 @@ (if (args:get-arg "-runtests") (general-run-call "-runtests" "run a test" - (lambda (target runname keys keynames keyvallst) + (lambda (target runname keys keyvals) (runs:run-tests target runname (args:get-arg "-runtests") - (args:get-arg "-testpatt") user args:arg-hash)))) ;;====================================================================== ;; Rollup into a run ;;====================================================================== (if (args:get-arg "-rollup") - (begin - (debug:print 0 "ERROR: Rollup is currently not working. If you need it please submit a ticket at http://www.kiatoa.com/fossils/megatest") - (exit 4))) -;; (general-run-call -;; "-rollup" -;; "rollup tests" -;; (lambda (target runname keys keynames keyvallst) -;; (runs:rollup-run keys -;; (keys->alist keys "na") -;; (args:get-arg ":runname") -;; user)))) + (general-run-call + "-rollup" + "rollup tests" + (lambda (target runname keys keyvals) + (runs:rollup-run keys + keyvals + (args:get-arg ":runname") + user)))) ;;====================================================================== ;; Lock or unlock a run ;;====================================================================== (if (or (args:get-arg "-lock")(args:get-arg "-unlock")) (general-run-call (if (args:get-arg "-lock") "-lock" "-unlock") "lock/unlock tests" - (lambda (target runname keys keynames keyvallst) + (lambda (target runname keys keyvals) (runs:handle-locking target keys (args:get-arg ":runname") (args:get-arg "-lock") @@ -677,25 +703,24 @@ (if (not (setup-for-run)) (begin (debug:print 0 "Failed to setup, giving up on -test-paths or -test-files, exiting") (exit 1))) (let* ((keys (cdb:remote-run db:get-keys db)) - (keynames (map key:get-fieldname keys)) ;; db:test-get-paths must not be run remote - (paths (db:test-get-paths-matching db keynames target (args:get-arg "-test-files")))) + (paths (db:test-get-paths-matching db keys target (args:get-arg "-test-files")))) (set! *didsomething* #t) (for-each (lambda (path) (print path)) paths))) ;; else do a general-run-call (general-run-call "-test-files" "Get paths to test" - (lambda (target runname keys keynames keyvallst) + (lambda (target runname keys keyvals) (let* ((db #f) ;; DO NOT run remote - (paths (db:test-get-paths-matching db keynames target (args:get-arg "-test-files")))) + (paths (db:test-get-paths-matching db keys target (args:get-arg "-test-files")))) (for-each (lambda (path) (print path)) paths)))))) ;;====================================================================== @@ -729,25 +754,24 @@ (if (not (setup-for-run)) (begin (debug:print 0 "Failed to setup, giving up on -archive, exiting") (exit 1))) (let* ((keys (cdb:remote-run db:get-keys db)) - (keynames (map key:get-fieldname keys)) ;; DO NOT run remote - (paths (db:test-get-paths-matching db keynames target))) + (paths (db:test-get-paths-matching db keys target))) (set! *didsomething* #t) (for-each (lambda (path) (print path)) paths))) ;; else do a general-run-call (general-run-call "-test-paths" "Get paths to tests" - (lambda (target runname keys keynames keyvallst) + (lambda (target runname keys keyvals) (let* ((db #f) ;; DO NOT run remote - (paths (db:test-get-paths-matching db keynames target))) + (paths (db:test-get-paths-matching db keys target))) (for-each (lambda (path) (print path)) paths)))))) ;;====================================================================== @@ -756,17 +780,17 @@ (if (args:get-arg "-extract-ods") (general-run-call "-extract-ods" "Make ods spreadsheet" - (lambda (target runname keys keynames keyvallst) + (lambda (target runname keys keyvals) (let ((db #f) (outputfile (args:get-arg "-extract-ods")) (runspatt (args:get-arg ":runname")) - (pathmod (args:get-arg "-pathmod")) - (keyvalalist (keys->alist keys "%"))) - (debug:print 2 "Extract ods, outputfile: " outputfile " runspatt: " runspatt " keyvalalist: " keyvalalist) + (pathmod (args:get-arg "-pathmod"))) + ;; (keyvalalist (keys->alist keys "%"))) + (debug:print 2 "Extract ods, outputfile: " outputfile " runspatt: " runspatt " keyvalalist: " keyvals) (cdb:remote-run db:extract-ods-file db outputfile keyvalalist (if runspatt runspatt "%") pathmod))))) ;;====================================================================== ;; execute the test ;; - gets called on remote host @@ -797,21 +821,22 @@ (runscript (assoc/default 'runscript cmdinfo)) (db-host (assoc/default 'db-host cmdinfo)) (run-id (assoc/default 'run-id cmdinfo)) (test-id (assoc/default 'test-id cmdinfo)) (itemdat (assoc/default 'itemdat cmdinfo)) + (work-area (assoc/default 'work-area cmdinfo)) (db #f)) (change-directory testpath) ;; (set! *runremote* runremote) (set! *transport-type* (string->symbol transport)) (if (not (setup-for-run)) (begin (debug:print 0 "Failed to setup, exiting") (exit 1))) (if (and state status) - ;; DO NOT remote run - (db:teststep-set-status! db test-id step state status msg logfile) + ;; DO NOT remote run, makes calls to the testdat.db test db. + (db:teststep-set-status! db test-id step state status msg logfile work-area: work-area) (begin (debug:print 0 "ERROR: You must specify :state and :status with every call to -step") (exit 6)))))) (if (args:get-arg "-step") @@ -824,10 +849,12 @@ (args:get-arg "-m")) ;; (if db (sqlite3:finalize! db)) (set! *didsomething* #t))) (if (or (args:get-arg "-setlog") ;; since setting up is so costly lets piggyback on -test-status + ;; (not (args:get-arg "-step"))) ;; -setlog may have been processed already in the "-step" previous + ;; NEW POLICY - -setlog sets test overall log on every call. (args:get-arg "-set-toplog") (args:get-arg "-test-status") (args:get-arg "-set-values") (args:get-arg "-load-test-data") (args:get-arg "-runstep") @@ -845,10 +872,11 @@ (runscript (assoc/default 'runscript cmdinfo)) (db-host (assoc/default 'db-host cmdinfo)) (run-id (assoc/default 'run-id cmdinfo)) (test-id (assoc/default 'test-id cmdinfo)) (itemdat (assoc/default 'itemdat cmdinfo)) + (work-area (assoc/default 'work-area cmdinfo)) (db #f) ;; (open-db)) (state (args:get-arg ":state")) (status (args:get-arg ":status"))) (change-directory testpath) ;; (set! *runremote* runremote) @@ -862,11 +890,11 @@ ;; (client:setup) (if (args:get-arg "-load-test-data") ;; has sub commands that are rdb: ;; DO NOT put this one into either cdb:remote-run or open-run-close - (db:load-test-data db test-id)) + (db:load-test-data db test-id work-area: work-area)) (if (args:get-arg "-setlog") (let ((logfname (args:get-arg "-setlog"))) (cdb:test-set-log! *runremote* test-id logfname))) (if (args:get-arg "-set-toplog") ;; DO NOT run remote @@ -894,11 +922,11 @@ (fullcmd (conc "(" (string-intersperse (cons cmd params) " ") ") " redir " " logfile))) ;; mark the start of the test ;; DO NOT run remote - (db:teststep-set-status! db test-id stepname "start" "n/a" (args:get-arg "-m") logfile) + (db:teststep-set-status! db test-id stepname "start" "n/a" (args:get-arg "-m") logfile work-area: work-area) ;; run the test step (debug:print-info 2 "Running \"" fullcmd "\"") (change-directory startingdir) (set! exitstat (system fullcmd)) ;; cmd params)) (set! *globalexitstatus* exitstat) @@ -914,11 +942,11 @@ (set! *globalexitstatus* exitstat) ;; no necessary (change-directory testpath) (cdb:test-set-log! *runremote* test-id htmllogfile))) (let ((msg (args:get-arg "-m"))) ;; DO NOT run remote - (db:teststep-set-status! db test-id stepname "end" exitstat msg logfile)) + (db:teststep-set-status! db test-id stepname "end" exitstat msg logfile work-area: work-area)) ))) (if (or (args:get-arg "-test-status") (args:get-arg "-set-values")) (let ((newstatus (cond ((number? status) (if (equal? status 0) "PASS" "FAIL")) @@ -941,27 +969,28 @@ ;; (sqlite3:finalize! db) (exit 6))) (let* ((msg (args:get-arg "-m")) (numoth (length (hash-table-keys otherdata)))) ;; Convert to rpc inside the tests:test-set-status! call, not here - (tests:test-set-status! test-id state newstatus msg otherdata)))) + (tests:test-set-status! test-id state newstatus msg otherdata work-area: work-area)))) (if db (sqlite3:finalize! db)) (set! *didsomething* #t)))) ;;====================================================================== ;; Various helper commands can go below here ;;====================================================================== -(if (args:get-arg "-showkeys") +(if (or (args:get-arg "-showkeys") + (args:get-arg "-show-keys")) (let ((db #f) (keys #f)) (if (not (setup-for-run)) (begin (debug:print 0 "Failed to setup, exiting") (exit 1))) - (set! keys (cbd:remote-run db:get-keys db)) - (debug:print 1 "Keys: " (string-intersperse (map key:get-fieldname keys) ", ")) + (set! keys (cdb:remote-run db:get-keys db)) + (debug:print 1 "Keys: " (string-intersperse keys ", ")) (if db (sqlite3:finalize! db)) (set! *didsomething* #t))) (if (args:get-arg "-gui") (begin @@ -990,10 +1019,23 @@ (debug:print 0 "Failed to setup, exiting") (exit 1))) ;; keep this one local (open-run-close patch-db #f) (set! *didsomething* #t))) + +;;====================================================================== +;; Wait on a run to complete +;;====================================================================== + +(if (args:get-arg "-run-wait") + (begin + (if (not (setup-for-run)) + (begin + (debug:print 0 "Failed to setup, exiting") + (exit 1))) + (operate-on 'run-wait) + (set! *didsomething* #t))) ;;====================================================================== ;; Update the tests meta data from the testconfig files ;;====================================================================== @@ -1035,10 +1077,12 @@ (set! *didsomething* #t))) ;;====================================================================== ;; Exit and clean up ;;====================================================================== + +(if *runremote* (close-all-connections!)) ;; this is the socket if we are a client ;; (if (and *runremote* ;; (socket? *runremote*)) ;; (close-socket *runremote*)) Index: newdashboard.scm ================================================================== --- newdashboard.scm +++ newdashboard.scm @@ -776,11 +776,11 @@ ;; ;; Each run is unique on its keys and runname or run-id, store in hash on colnum (for-each (lambda (run-id) (let* ((run-record (hash-table-ref/default runs-hash run-id #f)) (key-vals (map (lambda (key)(db:get-value-by-header run-record header key)) - (map key:get-fieldname keys))) + keys)) (run-name (db:get-value-by-header run-record header "runname")) (col-name (conc (string-intersperse key-vals "\n") "\n" run-name)) (run-path (append key-vals (list run-name)))) (hash-table-set! (dboard:data-get-run-keys *data*) run-id run-path) (iup:attribute-set! (dboard:data-get-runs-matrix *data*) ADDED run-tests-queue-classic.scm Index: run-tests-queue-classic.scm ================================================================== --- /dev/null +++ run-tests-queue-classic.scm @@ -0,0 +1,300 @@ + +;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... > +(define (runs:run-tests-queue-classic run-id runname test-records keyvals flags test-patts required-tests) + ;; At this point the list of parent tests is expanded + ;; NB// Should expand items here and then insert into the run queue. + (debug:print 5 "test-records: " test-records ", flags: " (hash-table->alist flags)) + (let ((run-info (cdb:remote-run db:get-run-info #f run-id)) + (sorted-test-names (tests:sort-by-priority-and-waiton test-records)) + (test-registry (make-hash-table)) + (registry-mutex (make-mutex)) + (num-retries 0) + (max-retries (config-lookup *configdat* "setup" "maxretries")) + (max-concurrent-jobs (let ((mcj (config-lookup *configdat* "setup" "max_concurrent_jobs"))) + (if (and mcj (string->number mcj)) + (string->number mcj) + 1)))) + (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100)) + (if (not (null? sorted-test-names)) + (let loop ((hed (car sorted-test-names)) + (tal (cdr sorted-test-names)) + (reruns '())) + (if (not (null? reruns))(debug:print-info 4 "reruns=" reruns)) + ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns) + (let* ((test-record (hash-table-ref test-records hed)) + (test-name (tests:testqueue-get-testname test-record)) + (tconfig (tests:testqueue-get-testconfig test-record)) + (jobgroup (config-lookup tconfig "requirements" "jobgroup")) + (testmode (let ((m (config-lookup tconfig "requirements" "mode"))) + (if m (string->symbol m) 'normal))) + (waitons (tests:testqueue-get-waitons test-record)) + (priority (tests:testqueue-get-priority test-record)) + (itemdat (tests:testqueue-get-itemdat test-record)) ;; itemdat can be a string, list or #f + (items (tests:testqueue-get-items test-record)) + (item-path (item-list->path itemdat)) + (newtal (append tal (list hed)))) + + (debug:print 6 + "test-name: " test-name + "\n hed: " hed + "\n itemdat: " itemdat + "\n items: " items + "\n item-path: " item-path + "\n waitons: " waitons + "\n num-retries: " num-retries + "\n tal: " tal + "\n reruns: " reruns) + + ;; check for hed in waitons => this would be circular, remove it and issue an + ;; error + (if (member test-name waitons) + (begin + (debug:print 0 "ERROR: test " test-name " has listed itself as a waiton, please correct this!") + (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons)))) + + (cond ;; OUTER COND + ((not items) ;; when false the test is ok to be handed off to launch (but not before) + (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) + (not (null? tal))) + (loop (car newtal)(cdr newtal) reruns)) + (let* ((run-limits-info ;; (cdb:remote-run runs:can-run-more-tests #f jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running + (open-run-close runs:can-run-more-tests #f jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running + (have-resources (car run-limits-info)) + (num-running (list-ref run-limits-info 1)) + (num-running-in-jobgroup (list-ref run-limits-info 2)) + (max-concurrent-jobs (list-ref run-limits-info 3)) + (job-group-limit (list-ref run-limits-info 4)) + (prereqs-not-met (db:get-prereqs-not-met run-id waitons item-path mode: testmode)) + (fails (runs:calc-fails prereqs-not-met)) + (non-completed (runs:calc-not-completed prereqs-not-met))) + (debug:print-info 8 "have-resources: " have-resources " prereqs-not-met: " + (string-intersperse + (map (lambda (t) + (if (vector? t) + (conc (db:test-get-state t) "/" (db:test-get-status t)) + (conc " WARNING: t is not a vector=" t ))) + prereqs-not-met) ", ") " fails: " fails) + (debug:print-info 4 "hed=" hed "\n test-record=" test-record "\n test-name: " test-name "\n item-path: " item-path "\n test-patts: " test-patts) + + ;; Don't know at this time if the test have been launched at some time in the past + ;; i.e. is this a re-launch? + (debug:print-info 4 "run-limits-info = " run-limits-info) + (cond ;; INNER COND #1 for a launchable test + ;; Check item path against item-patts + ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run + ;; else the run is stuck, temporarily or permanently + ;; but should check if it is due to lack of resources vs. prerequisites + (debug:print-info 1 "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts) + ;; (thread-sleep! *global-delta*) + (if (not (null? tal)) + (loop (car tal)(cdr tal) reruns))) + ;; Registry has been started for this test but has not yet completed + ;; this should be rare, the case where there are only a couple of tests and the db is slow + ;; delay a short while and continue + ;; ((eq? (hash-table-ref/default test-registry (runs:make-full-test-name test-name item-path) #f) 'start) + ;; (thread-sleep! 0.01) + ;; (loop (car newtal)(cdr newtal) reruns)) + ;; count number of 'done, if more than 100 then skip on through. + ((not (hash-table-ref/default test-registry (runs:make-full-test-name test-name item-path) #f)) ;; ) ;; too many changes required. Implement later. + (debug:print-info 4 "Pre-registering test " test-name "/" item-path " to create placeholder" ) + (let ((th (make-thread (lambda () + (mutex-lock! registry-mutex) + (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'start) + (mutex-unlock! registry-mutex) + ;; If haven't done it before register a top level test if this is an itemized test + (if (not (eq? (hash-table-ref/default test-registry (runs:make-full-test-name test-name "") #f) 'done)) + (cdb:tests-register-test *runremote* run-id test-name "")) + (cdb:tests-register-test *runremote* run-id test-name item-path) + (mutex-lock! registry-mutex) + (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done) + (mutex-unlock! registry-mutex)) + (conc test-name "/" item-path)))) + (thread-start! th)) + (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) + (loop (car newtal)(cdr newtal) reruns)) + ;; At this point *all* test registrations must be completed. + ((not (null? (filter (lambda (x)(eq? 'start x))(hash-table-values test-registry)))) + (debug:print-info 0 "Waiting on test registrations: " (string-intersperse + (filter (lambda (x) + (eq? (hash-table-ref/default test-registry x #f) 'start)) + (hash-table-keys test-registry)) + ", ")) + (thread-sleep! 0.1) + (loop hed tal reruns)) + ((not have-resources) ;; simply try again after waiting a second + (debug:print-info 1 "no resources to run new tests, waiting ...") + ;; Have gone back and forth on this but db starvation is an issue. + ;; wait one second before looking again to run jobs. + (thread-sleep! 1) ;; (+ 2 *global-delta*)) + ;; could have done hed tal here but doing car/cdr of newtal to rotate tests + (loop (car newtal)(cdr newtal) reruns)) + ((and have-resources + (or (null? prereqs-not-met) + (and (eq? testmode 'toplevel) + (null? non-completed)))) + (run:test run-id run-info keyvals runname test-record flags #f) + (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'running) + (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) + ;; (thread-sleep! *global-delta*) + (if (not (null? tal)) + (loop (car tal)(cdr tal) reruns))) + (else ;; must be we have unmet prerequisites + (debug:print 4 "FAILS: " fails) + ;; If one or more of the prereqs-not-met are FAIL then we can issue + ;; a message and drop hed from the items to be processed. + (if (null? fails) + (begin + ;; couldn't run, take a breather + (debug:print-info 4 "Shouldn't really get here, race condition? Unable to launch more tests at this moment, killing time ...") + ;; (thread-sleep! (+ 0.01 *global-delta*)) ;; long sleep here - no resources, may as well be patient + ;; we made new tal by sticking hed at the back of the list + (loop (car newtal)(cdr newtal) reruns)) + ;; the waiton is FAIL so no point in trying to run hed ever again + (if (not (null? tal)) + (if (vector? hed) + (begin + (debug:print 1 "WARN: Dropping test " (db:test-get-testname hed) "/" (db:test-get-item-path hed) + " from the launch list as it has prerequistes that are FAIL") + (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) + ;; (thread-sleep! *global-delta*) + (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'removed) + (loop (car tal)(cdr tal) (cons hed reruns))) + (begin + (debug:print 1 "WARN: Test not processed correctly. Could be a race condition in your test implementation? " hed) ;; " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)") + (runs:shrink-can-run-more-tests-count) ;; DELAY TWEAKER (still needed?) + ;; (thread-sleep! (+ 0.01 *global-delta*)) + (loop hed tal reruns))))))))) ;; END OF INNER COND + + ;; case where an items came in as a list been processed + ((and (list? items) ;; thus we know our items are already calculated + (not itemdat)) ;; and not yet expanded into the list of things to be done + (if (and (debug:debug-mode 1) ;; (>= *verbosity* 1) + (> (length items) 0) + (> (length (car items)) 0)) + (pp items)) + (for-each + (lambda (my-itemdat) + (let* ((new-test-record (let ((newrec (make-tests:testqueue))) + (vector-copy! test-record newrec) + newrec)) + (my-item-path (item-list->path my-itemdat))) + (if (tests:match test-patts hed my-item-path required: required-tests) ;; (patt-list-match my-item-path item-patts) ;; yes, we want to process this item, NOTE: Should not need this check here! + (let ((newtestname (runs:make-full-test-name hed my-item-path))) ;; test names are unique on testname/item-path + (tests:testqueue-set-items! new-test-record #f) + (tests:testqueue-set-itemdat! new-test-record my-itemdat) + (tests:testqueue-set-item_path! new-test-record my-item-path) + (hash-table-set! test-records newtestname new-test-record) + (set! tal (cons newtestname tal)))))) ;; since these are itemized create new test names testname/itempath + items) + (if (not (null? tal)) + (begin + (debug:print-info 4 "End of items list, looping with next after short delay") + ;; (thread-sleep! (+ 0.01 *global-delta*)) + (loop (car tal)(cdr tal) reruns)))) + + ;; if items is a proc then need to run items:get-items-from-config, get the list and loop + ;; - but only do that if resources exist to kick off the job + ((or (procedure? items)(eq? items 'have-procedure)) + (let ((can-run-more ;; (cdb:remote-run runs:can-run-more-tests #f jobgroup max-concurrent-jobs))) + (open-run-close runs:can-run-more-tests #f jobgroup max-concurrent-jobs))) + (if (and (list? can-run-more) + (car can-run-more)) + (let* ((prereqs-not-met (db:get-prereqs-not-met run-id waitons item-path mode: testmode)) + (fails (runs:calc-fails prereqs-not-met)) + (non-completed (runs:calc-not-completed prereqs-not-met))) + (debug:print-info 8 "can-run-more: " can-run-more + "\n testname: " hed + "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met) + "\n non-completed: " (runs:pretty-string non-completed) + "\n fails: " (runs:pretty-string fails) + "\n testmode: " testmode + "\n num-retries: " num-retries + "\n (eq? testmode 'toplevel): " (eq? testmode 'toplevel) + "\n (null? non-completed): " (null? non-completed) + "\n reruns: " reruns + "\n items: " items + "\n can-run-more: " can-run-more) + ;; (thread-sleep! (+ 0.01 *global-delta*)) + (cond ;; INNER COND #2 + ((or (null? prereqs-not-met) ;; all prereqs met, fire off the test + ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch + (and (eq? testmode 'toplevel) + (null? non-completed))) + (let ((test-name (tests:testqueue-get-testname test-record))) + (setenv "MT_TEST_NAME" test-name) ;; + (setenv "MT_RUNNAME" runname) + (set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process + (let ((items-list (items:get-items-from-config tconfig))) + (if (list? items-list) + (begin + (tests:testqueue-set-items! test-record items-list) + ;; (thread-sleep! *global-delta*) + (loop hed tal reruns)) + (begin + (debug:print 0 "ERROR: The proc from reading the setup did not yield a list - please report this") + (exit 1)))))) + ((null? fails) + (debug:print-info 4 "fails is null, moving on in the queue but keeping " hed " for now") + ;; only increment num-retries when there are no tests runing + (if (eq? 0 (list-ref can-run-more 1)) + (begin + ;; TRY (if (> num-retries 100) ;; first 100 retries are low time cost + ;; TRY (thread-sleep! (+ 2 *global-delta*)) + ;; TRY (thread-sleep! (+ 0.01 *global-delta*))) + (set! num-retries (+ num-retries 1)))) + (if (> num-retries max-retries) + (if (not (null? tal)) + (loop (car tal)(cdr tal) reruns)) + (loop (car newtal)(cdr newtal) reruns))) ;; an issue with prereqs not yet met? + ((and (not (null? fails))(eq? testmode 'normal)) + (debug:print-info 1 "test " hed " (mode=" testmode ") has failed prerequisite(s); " + (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ") + ", removing it from to-do list") + (if (not (null? tal)) + (begin + ;; (thread-sleep! *global-delta*) + (loop (car tal)(cdr tal)(cons hed reruns))))) + (else + (debug:print 8 "ERROR: No handler for this condition.") + ;; TRY (thread-sleep! (+ 1 *global-delta*)) + (loop (car newtal)(cdr newtal) reruns)))) ;; END OF IF CAN RUN MORE + + ;; if can't run more just loop with next possible test + (begin + (debug:print-info 4 "processing the case with a lambda for items or 'have-procedure. Moving through the queue without dropping " hed) + ;; (thread-sleep! (+ 2 *global-delta*)) + (loop (car newtal)(cdr newtal) reruns))))) ;; END OF (or (procedure? items)(eq? items 'have-procedure)) + + ;; this case should not happen, added to help catch any bugs + ((and (list? items) itemdat) + (debug:print 0 "ERROR: Should not have a list of items in a test and the itemspath set - please report this") + (exit 1)) + ((not (null? reruns)) + (let* ((newlst (tests:filter-non-runnable run-id tal test-records)) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED, + (junked (lset-difference equal? tal newlst))) + (debug:print-info 4 "full drop through, if reruns is less than 100 we will force retry them, reruns=" reruns ", tal=" tal) + (if (< num-retries max-retries) + (set! newlst (append reruns newlst))) + (set! num-retries (+ num-retries 1)) + ;; (thread-sleep! (+ 1 *global-delta*)) + (if (not (null? newlst)) + ;; since reruns have been tacked on to newlst create new reruns from junked + (loop (car newlst)(cdr newlst)(delete-duplicates junked))))) + ((not (null? tal)) + (debug:print-info 4 "I'm pretty sure I shouldn't get here.")) + (else + (debug:print-info 4 "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns)) + )))) ;; LET* ((test-record + + ;; we get here on "drop through" - loop for next test in queue + ;; FIXME!!!! THIS SHOULD NOT REQUIRE AN EXIT!!!!!!! + + (debug:print-info 1 "All tests launched") + (thread-sleep! 0.5) + ;; FIXME! This harsh exit should not be necessary.... + ;; (if (not *runremote*)(exit)) ;; + #f)) ;; return a #f as a hint that we are done + ;; Here we need to check that all the tests remaining to be run are eligible to run + ;; and are not blocked by failed + + ADDED run-tests-queue-new.scm Index: run-tests-queue-new.scm ================================================================== --- /dev/null +++ run-tests-queue-new.scm @@ -0,0 +1,352 @@ + +;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... > +(define (runs:run-tests-queue-new run-id runname test-records keyvals flags test-patts required-tests reglen) + ;; At this point the list of parent tests is expanded + ;; NB// Should expand items here and then insert into the run queue. + (debug:print 5 "test-records: " test-records ", flags: " (hash-table->alist flags)) + (let ((run-info (cdb:remote-run db:get-run-info #f run-id)) + (tests-info (cdb:remote-run db:get-tests-for-run #f run-id #f '() '())) ;; qryvals: "id,testname,item_path")) + (sorted-test-names (tests:sort-by-priority-and-waiton test-records)) + (test-registry (make-hash-table)) + (registry-mutex (make-mutex)) + (num-retries 0) + (max-retries (config-lookup *configdat* "setup" "maxretries")) + (max-concurrent-jobs (let ((mcj (config-lookup *configdat* "setup" "max_concurrent_jobs"))) + (if (and mcj (string->number mcj)) + (string->number mcj) + 1)))) ;; length of the register queue ahead + ;; Initialize the test-registery hash with tests that already have a record + (for-each (lambda (trec) + (let ((id (db:test-get-id trec)) + (tn (db:test-get-testname trec)) + (ip (db:test-get-item-path trec)) + (st (db:test-get-state trec))) + (hash-table-set! test-registry (runs:make-full-test-name tn ip) (string->symbol st)))) + tests-info) + (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100)) + (if (not (null? sorted-test-names)) + (let loop ((hed (car sorted-test-names)) + (tal (cdr sorted-test-names)) + (reg '()) ;; registered, put these at the head of tal + (reruns '())) + (if (not (null? reruns))(debug:print-info 4 "reruns=" reruns)) + ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns) + (let* ((test-record (hash-table-ref test-records hed)) + (test-name (tests:testqueue-get-testname test-record)) + (tconfig (tests:testqueue-get-testconfig test-record)) + (jobgroup (config-lookup tconfig "requirements" "jobgroup")) + (testmode (let ((m (config-lookup tconfig "requirements" "mode"))) + (if m (string->symbol m) 'normal))) + (waitons (tests:testqueue-get-waitons test-record)) + (priority (tests:testqueue-get-priority test-record)) + (itemdat (tests:testqueue-get-itemdat test-record)) ;; itemdat can be a string, list or #f + (items (tests:testqueue-get-items test-record)) + (item-path (item-list->path itemdat)) + (tfullname (runs:make-full-test-name test-name item-path)) + (newtal (append tal (list hed))) + (regfull (> (length reg) reglen))) + + ;; Fast skip of tests that are already "COMPLETED" + (if (equal? (hash-table-ref/default test-registry tfullname #f) 'COMPLETED) + (begin + (debug:print-info 0 "Skipping COMPLETED test " tfullname) + (if (not (null? tal)) + (loop (car tal)(cdr tal) reg reruns)))) + ;; (if (> (length reg) 10) + ;; (begin + ;; (set! tal (cons hed tal)) + ;; (set! hed (car reg)) + ;; (set! reg (cdr reg)) + ;; (set! newtal tal))) + (debug:print 6 + "test-name: " test-name + "\n hed: " hed + "\n itemdat: " itemdat + "\n items: " items + "\n item-path: " item-path + "\n waitons: " waitons + "\n num-retries: " num-retries + "\n tal: " tal + "\n reruns: " reruns) + + ;; check for hed in waitons => this would be circular, remove it and issue an + ;; error + (if (member test-name waitons) + (begin + (debug:print 0 "ERROR: test " test-name " has listed itself as a waiton, please correct this!") + (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons)))) + + (cond ;; OUTER COND + ((not items) ;; when false the test is ok to be handed off to launch (but not before) + (if (and (not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) + (not (null? tal))) + (loop (car tal)(cdr tal) reg reruns)) + (let* ((run-limits-info (cdb:remote-run runs:can-run-more-tests #f jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running + ;; (open-run-close runs:can-run-more-tests #f jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running + (have-resources (car run-limits-info)) + (num-running (list-ref run-limits-info 1)) + (num-running-in-jobgroup (list-ref run-limits-info 2)) + (max-concurrent-jobs (list-ref run-limits-info 3)) + (job-group-limit (list-ref run-limits-info 4)) + (prereqs-not-met (db:get-prereqs-not-met run-id waitons item-path mode: testmode)) + (fails (runs:calc-fails prereqs-not-met)) + (non-completed (runs:calc-not-completed prereqs-not-met))) + (debug:print-info 8 "have-resources: " have-resources " prereqs-not-met: " + (string-intersperse + (map (lambda (t) + (if (vector? t) + (conc (db:test-get-state t) "/" (db:test-get-status t)) + (conc " WARNING: t is not a vector=" t ))) + prereqs-not-met) ", ") " fails: " fails) + (debug:print-info 4 "hed=" hed "\n test-record=" test-record "\n test-name: " test-name "\n item-path: " item-path "\n test-patts: " test-patts) + + ;; Don't know at this time if the test have been launched at some time in the past + ;; i.e. is this a re-launch? + (debug:print-info 4 "run-limits-info = " run-limits-info) + (cond ;; INNER COND #1 for a launchable test + ;; Check item path against item-patts + ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path required: required-tests)) ;; This test/itempath is not to be run + ;; else the run is stuck, temporarily or permanently + ;; but should check if it is due to lack of resources vs. prerequisites + (debug:print-info 1 "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts) + ;; (thread-sleep! *global-delta*) + (if (not (null? tal)) + (loop (runs:queue-next-hed tal reg reglen regfull) + (runs:queue-next-tal tal reg reglen regfull) + (runs:queue-next-reg tal reg reglen regfull) + reruns))) + ;; Registry has been started for this test but has not yet completed + ;; this should be rare, the case where there are only a couple of tests and the db is slow + ;; delay a short while and continue + ;; ((eq? (hash-table-ref/default test-registry (runs:make-full-test-name test-name item-path) #f) 'start) + ;; (thread-sleep! 0.01) + ;; (loop (car newtal)(cdr newtal) reruns)) + ;; count number of 'done, if more than 100 then skip on through. + ((not (hash-table-ref/default test-registry (runs:make-full-test-name test-name item-path) #f)) ;; ) ;; too many changes required. Implement later. + (debug:print-info 4 "Pre-registering test " test-name "/" item-path " to create placeholder" ) + (let ((th (make-thread (lambda () + (mutex-lock! registry-mutex) + (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'start) + (mutex-unlock! registry-mutex) + ;; If haven't done it before register a top level test if this is an itemized test + (if (not (eq? (hash-table-ref/default test-registry (runs:make-full-test-name test-name "") #f) 'done)) + (cdb:tests-register-test *runremote* run-id test-name "")) + (cdb:tests-register-test *runremote* run-id test-name item-path) + (mutex-lock! registry-mutex) + (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'done) + (mutex-unlock! registry-mutex)) + (conc test-name "/" item-path)))) + (thread-start! th)) + (cdb:remote-run runs:shrink-can-run-more-tests-count #f) ;; DELAY TWEAKER (still needed?) + (if (and (null? tal)(null? reg)) + (loop hed tal reg reruns) + (loop (runs:queue-next-hed tal reg reglen regfull) + (runs:queue-next-tal tal reg reglen regfull) + (let ((newl (append reg (list hed)))) + (if regfull + (cdr newl) + newl)) + reruns))) + ;; At this point hed test registration must be completed. + ((eq? (hash-table-ref/default test-registry (runs:make-full-test-name test-name item-path) #f) + 'start) + (debug:print-info 0 "Waiting on test registration(s): " (string-intersperse + (filter (lambda (x) + (eq? (hash-table-ref/default test-registry x #f) 'start)) + (hash-table-keys test-registry)) + ", ")) + (thread-sleep! 0.1) + (loop hed tal reg reruns)) + ((not have-resources) ;; simply try again after waiting a second + (debug:print-info 1 "no resources to run new tests, waiting ...") + ;; Have gone back and forth on this but db starvation is an issue. + ;; wait one second before looking again to run jobs. + (thread-sleep! 1) ;; (+ 2 *global-delta*)) + ;; could have done hed tal here but doing car/cdr of newtal to rotate tests + (loop (car newtal)(cdr newtal) reg reruns)) + ((and have-resources + (or (null? prereqs-not-met) + (and (eq? testmode 'toplevel) + (null? non-completed)))) + (run:test run-id run-info keyvals runname test-record flags #f) + (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'running) + (cdb:remote-run runs:shrink-can-run-more-tests-count #f) ;; DELAY TWEAKER (still needed?) + ;; (thread-sleep! *global-delta*) + (if (not (null? tal)) + (loop (runs:queue-next-hed tal reg reglen regfull) + (runs:queue-next-tal tal reg reglen regfull) + (runs:queue-next-reg tal reg reglen regfull) + reruns))) + (else ;; must be we have unmet prerequisites + (debug:print 4 "FAILS: " fails) + ;; If one or more of the prereqs-not-met are FAIL then we can issue + ;; a message and drop hed from the items to be processed. + (if (null? fails) + (begin + ;; couldn't run, take a breather + (debug:print-info 4 "Shouldn't really get here, race condition? Unable to launch more tests at this moment, killing time ...") + ;; (thread-sleep! (+ 0.01 *global-delta*)) ;; long sleep here - no resources, may as well be patient + ;; we made new tal by sticking hed at the back of the list + (loop (car newtal)(cdr newtal) reg reruns)) + ;; the waiton is FAIL so no point in trying to run hed ever again + (if (not (null? tal)) + (if (vector? hed) + (begin + (debug:print 1 "WARN: Dropping test " (db:test-get-testname hed) "/" (db:test-get-item-path hed) + " from the launch list as it has prerequistes that are FAIL") + (cdb:remote-run runs:shrink-can-run-more-tests-count #f) ;; DELAY TWEAKER (still needed?) + ;; (thread-sleep! *global-delta*) + (hash-table-set! test-registry (runs:make-full-test-name test-name item-path) 'removed) + (loop (runs:queue-next-hed tal reg reglen regfull) + (runs:queue-next-tal tal reg reglen regfull) + (runs:queue-next-reg tal reg reglen regfull) + (cons hed reruns))) + (begin + (debug:print 1 "WARN: Test not processed correctly. Could be a race condition in your test implementation? " hed) ;; " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)") + (cdb:remote-run runs:shrink-can-run-more-tests-count #f) ;; DELAY TWEAKER (still needed?) + ;; (thread-sleep! (+ 0.01 *global-delta*)) + (loop hed tal reg reruns))))))))) ;; END OF INNER COND + + ;; case where an items came in as a list been processed + ((and (list? items) ;; thus we know our items are already calculated + (not itemdat)) ;; and not yet expanded into the list of things to be done + (if (and (debug:debug-mode 1) ;; (>= *verbosity* 1) + (> (length items) 0) + (> (length (car items)) 0)) + (pp items)) + (for-each + (lambda (my-itemdat) + (let* ((new-test-record (let ((newrec (make-tests:testqueue))) + (vector-copy! test-record newrec) + newrec)) + (my-item-path (item-list->path my-itemdat))) + (if (tests:match test-patts hed my-item-path required: required-tests) ;; (patt-list-match my-item-path item-patts) ;; yes, we want to process this item, NOTE: Should not need this check here! + (let ((newtestname (runs:make-full-test-name hed my-item-path))) ;; test names are unique on testname/item-path + (tests:testqueue-set-items! new-test-record #f) + (tests:testqueue-set-itemdat! new-test-record my-itemdat) + (tests:testqueue-set-item_path! new-test-record my-item-path) + (hash-table-set! test-records newtestname new-test-record) + (set! tal (cons newtestname tal)))))) ;; since these are itemized create new test names testname/itempath + items) + (if (not (null? tal)) + (begin + (debug:print-info 4 "End of items list, looping with next after short delay") + ;; (thread-sleep! (+ 0.01 *global-delta*)) + (loop (runs:queue-next-hed tal reg reglen regfull) + (runs:queue-next-tal tal reg reglen regfull) + (runs:queue-next-reg tal reg reglen regfull) + reruns)))) + + ;; if items is a proc then need to run items:get-items-from-config, get the list and loop + ;; - but only do that if resources exist to kick off the job + ((or (procedure? items)(eq? items 'have-procedure)) + (let ((can-run-more (cdb:remote-run runs:can-run-more-tests #f jobgroup max-concurrent-jobs))) + (if (and (list? can-run-more) + (car can-run-more)) + (let* ((prereqs-not-met (db:get-prereqs-not-met run-id waitons item-path mode: testmode)) + (fails (runs:calc-fails prereqs-not-met)) + (non-completed (runs:calc-not-completed prereqs-not-met))) + (debug:print-info 8 "can-run-more: " can-run-more + "\n testname: " hed + "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met) + "\n non-completed: " (runs:pretty-string non-completed) + "\n fails: " (runs:pretty-string fails) + "\n testmode: " testmode + "\n num-retries: " num-retries + "\n (eq? testmode 'toplevel): " (eq? testmode 'toplevel) + "\n (null? non-completed): " (null? non-completed) + "\n reruns: " reruns + "\n items: " items + "\n can-run-more: " can-run-more) + ;; (thread-sleep! (+ 0.01 *global-delta*)) + (cond ;; INNER COND #2 + ((or (null? prereqs-not-met) ;; all prereqs met, fire off the test + ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch + (and (eq? testmode 'toplevel) + (null? non-completed))) + (let ((test-name (tests:testqueue-get-testname test-record))) + (setenv "MT_TEST_NAME" test-name) ;; + (setenv "MT_RUNNAME" runname) + (set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process + (let ((items-list (items:get-items-from-config tconfig))) + (if (list? items-list) + (begin + (tests:testqueue-set-items! test-record items-list) + ;; (thread-sleep! *global-delta*) + (loop hed tal reg reruns)) + (begin + (debug:print 0 "ERROR: The proc from reading the setup did not yield a list - please report this") + (exit 1)))))) + ((null? fails) + (debug:print-info 4 "fails is null, moving on in the queue but keeping " hed " for now") + ;; only increment num-retries when there are no tests runing + (if (eq? 0 (list-ref can-run-more 1)) + (begin + ;; TRY (if (> num-retries 100) ;; first 100 retries are low time cost + ;; TRY (thread-sleep! (+ 2 *global-delta*)) + ;; TRY (thread-sleep! (+ 0.01 *global-delta*))) + (set! num-retries (+ num-retries 1)))) + (if (> num-retries max-retries) + (if (not (null? tal)) + (loop (runs:queue-next-hed tal reg reglen regfull) + (runs:queue-next-tal tal reg reglen regfull) + (runs:queue-next-reg tal reg reglen regfull) + reruns)) + (loop (car newtal)(cdr newtal) reg reruns))) ;; an issue with prereqs not yet met? + ((and (not (null? fails))(eq? testmode 'normal)) + (debug:print-info 1 "test " hed " (mode=" testmode ") has failed prerequisite(s); " + (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ") + ", removing it from to-do list") + (if (not (null? tal)) + (begin + ;; (thread-sleep! *global-delta*) + (loop (runs:queue-next-hed tal reg reglen regfull) + (runs:queue-next-tal tal reg reglen regfull) + (runs:queue-next-reg tal reg reglen regfull) + (cons hed reruns))))) + (else + (debug:print 8 "ERROR: No handler for this condition.") + ;; TRY (thread-sleep! (+ 1 *global-delta*)) + (loop (car newtal)(cdr newtal) reg reruns)))) ;; END OF IF CAN RUN MORE + + ;; if can't run more just loop with next possible test + (begin + (debug:print-info 4 "processing the case with a lambda for items or 'have-procedure. Moving through the queue without dropping " hed) + ;; (thread-sleep! (+ 2 *global-delta*)) + (loop (car newtal)(cdr newtal) reg reruns))))) ;; END OF (or (procedure? items)(eq? items 'have-procedure)) + + ;; this case should not happen, added to help catch any bugs + ((and (list? items) itemdat) + (debug:print 0 "ERROR: Should not have a list of items in a test and the itemspath set - please report this") + (exit 1)) + ((not (null? reruns)) + (let* ((newlst (tests:filter-non-runnable run-id tal test-records)) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED, + (junked (lset-difference equal? tal newlst))) + (debug:print-info 4 "full drop through, if reruns is less than 100 we will force retry them, reruns=" reruns ", tal=" tal) + (if (< num-retries max-retries) + (set! newlst (append reruns newlst))) + (set! num-retries (+ num-retries 1)) + ;; (thread-sleep! (+ 1 *global-delta*)) + (if (not (null? newlst)) + ;; since reruns have been tacked on to newlst create new reruns from junked + (loop (car newlst)(cdr newlst) reg (delete-duplicates junked))))) + ((not (null? tal)) + (debug:print-info 4 "I'm pretty sure I shouldn't get here.")) + ((not (null? reg)) ;; could we get here with leftovers? + (debug:print-info 0 "Have leftovers!") + (loop (car reg)(cdr reg) '() reruns)) + (else + (debug:print-info 4 "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns)) + )))) ;; LET* ((test-record + + ;; we get here on "drop through" - loop for next test in queue + ;; FIXME!!!! THIS SHOULD NOT REQUIRE AN EXIT!!!!!!! + + (debug:print-info 1 "All tests launched") + (thread-sleep! 0.5) + ;; FIXME! This harsh exit should not be necessary.... + ;; (if (not *runremote*)(exit)) ;; + #f)) ;; return a #f as a hint that we are done +;; Here we need to check that all the tests remaining to be run are eligible to run +;; and are not blocked by failed + Index: run_records.scm ================================================================== --- run_records.scm +++ run_records.scm @@ -7,10 +7,25 @@ ;; This program is distributed WITHOUT ANY WARRANTY; without even the ;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ;; PURPOSE. ;;====================================================================== +(define-inline (runs:runrec-make-record) (make-vector 13)) +(define-inline (runs:runrec-get-target vec)(vector-ref vec 0)) ;; a/b/c +(define-inline (runs:runrec-get-runname vec)(vector-ref vec 1)) ;; string +(define-inline (runs:runrec-testpatt vec)(vector-ref vec 2)) ;; a,b/c,d% +(define-inline (runs:runrec-keys vec)(vector-ref vec 3)) ;; (key1 key2 ...) +(define-inline (runs:runrec-keyvals vec)(vector-ref vec 4)) ;; ((key1 val1)(key2 val2) ...) +(define-inline (runs:runrec-environment vec)(vector-ref vec 5)) ;; environment, alist key val +(define-inline (runs:runrec-mconfig vec)(vector-ref vec 6)) ;; megatest.config +(define-inline (runs:runrec-runconfig vec)(vector-ref vec 7)) ;; runconfigs.config +(define-inline (runs:runrec-serverdat vec)(vector-ref vec 8)) ;; (host port) +(define-inline (runs:runrec-transport vec)(vector-ref vec 9)) ;; 'http +(define-inline (runs:runrec-db vec)(vector-ref vec 10)) ;; (if 'fs) +(define-inline (runs:runrec-top-path vec)(vector-ref vec 11)) ;; *toppath* +(define-inline (runs:runrec-run_id vec)(vector-ref vec 12)) ;; run-id + (define-inline (test:get-id vec) (vector-ref vec 0)) (define-inline (test:get-run_id vec) (vector-ref vec 1)) (define-inline (test:get-test-name vec)(vector-ref vec 2)) (define-inline (test:get-state vec) (vector-ref vec 3)) (define-inline (test:get-status vec) (vector-ref vec 4)) Index: runconfig.scm ================================================================== --- runconfig.scm +++ runconfig.scm @@ -8,24 +8,19 @@ (declare (unit runconfig)) (declare (uses common)) (include "common_records.scm") - - -;; (define (setup-env-defaults db fname run-id already-seen #!key (environ-patt #f)(change-env #t)) -(define (setup-env-defaults fname run-id already-seen keys keyvals #!key (environ-patt #f)(change-env #t)) - (let* (;; (keys (db:get-keys db)) - ;; (keyvals (if run-id (db:get-key-vals db run-id) #f)) - (thekey (if keyvals (string-intersperse (map (lambda (x)(if x x "-na-")) keyvals) "/") - (if (args:get-arg "-reqtarg") - (args:get-arg "-reqtarg") - (if (args:get-arg "-target") - (args:get-arg "-target") - (begin - (debug:print 0 "ERROR: setup-env-defaults called with no run-id or -target or -reqtarg") - "nothing matches this I hope"))))) +(define (setup-env-defaults fname run-id already-seen keyvals #!key (environ-patt #f)(change-env #t)) + (let* ((keys (map car keyvals)) + (thekey (if keyvals (string-intersperse (map (lambda (x)(if x x "-na-")) (map cadr keyvals)) "/") + (or (args:get-arg "-reqtarg") + (args:get-arg "-target") + (get-environment-variable "MT_TARGET") + (begin + (debug:print 0 "ERROR: setup-env-defaults called with no run-id or -target or -reqtarg") + "nothing matches this I hope")))) ;; Why was system disallowed in the reading of the runconfigs file? ;; NOTE: Should be setting env vars based on (target|default) (confdat (read-config fname #f #t environ-patt: environ-patt sections: (list "default" thekey))) (whatfound (make-hash-table)) (finaldat (make-hash-table)) @@ -32,14 +27,14 @@ (sections (list "default" thekey))) (if (not *target*)(set! *target* thekey)) ;; may save a db access or two but repeats db:get-target code (debug:print 4 "Using key=\"" thekey "\"") (if change-env - (for-each - (lambda (key val) - (setenv (vector-ref key 0) val)) - keys keyvals)) + (for-each ;; NB// This can be simplified with new content of keyvals having all that is needed. + (lambda (keyval) + (setenv (car keyval)(cadr keyval))) + keyvals)) (for-each (lambda (section) (let ((section-dat (hash-table-ref/default confdat section #f))) (if section-dat @@ -59,20 +54,21 @@ sections) (debug:print 2 "---") (set! *already-seen-runconfig-info* #t))) finaldat)) -(define (set-run-config-vars db run-id keys keyvals) - (push-directory *toppath*) +(define (set-run-config-vars run-id keyvals targ-from-db) + (push-directory *toppath*) ;; the push/pop doesn't appear to do anything ... (let ((runconfigf (conc *toppath* "/runconfigs.config")) (targ (or (args:get-arg "-target") (args:get-arg "-reqtarg") - (db:get-target db run-id)))) + targ-from-db + (get-environment-variable "MT_TARGET")))) (pop-directory) (if (file-exists? runconfigf) - (setup-env-defaults runconfigf run-id #t keys keyvals + (setup-env-defaults runconfigf run-id #t keyvals environ-patt: (conc "(default" (if targ (conc "|" targ ")") ")"))) (debug:print 0 "WARNING: You do not have a run config file: " runconfigf)))) ADDED runs-launch-loop-test.scm Index: runs-launch-loop-test.scm ================================================================== --- /dev/null +++ runs-launch-loop-test.scm @@ -0,0 +1,59 @@ +(use srfi-69) + +(define (runs:queue-next-hed tal reg n regful) + (if regful + (car reg) + (car tal))) + +(define (runs:queue-next-tal tal reg n regful) + (if regful + tal + (let ((newtal (cdr tal))) + (if (null? newtal) + reg + newtal + )))) + +(define (runs:queue-next-reg tal reg n regful) + (if regful + (cdr reg) + (if (eq? (length tal) 1) + '() + reg))) + +(use trace) +(trace runs:queue-next-hed + runs:queue-next-tal + runs:queue-next-reg) + + +(define tests '(1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20)) + +(define test-registry (make-hash-table)) + +(define n 3) + +(let loop ((hed (car tests)) + (tal (cdr tests)) + (reg '())) + (let* ((reglen (length reg)) + (regful (> reglen n))) + (print "hed=" hed ", length reg=" (length reg) ", (> lenreg n)=" (> (length reg) n)) + (let ((newtal (append tal (list hed)))) ;; used if we are not done with this test + (cond + ((not (hash-table-ref/default test-registry hed #f)) + (hash-table-set! test-registry hed #t) + (print "Registering #" hed) + (if (not (null? tal)) + (loop (runs:queue-next-hed tal reg n regful) + (runs:queue-next-tal tal reg n regful) + (let ((newl (append reg (list hed)))) + (if regful + (cdr newl) + newl))))) + (else + (print "Running #" hed) + (if (not (null? tal)) + (loop (runs:queue-next-hed tal reg n regful) + (runs:queue-next-tal tal reg n regful) + (runs:queue-next-reg tal reg n regful)))))))) Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -32,30 +32,30 @@ ;; register a test run with the db ;; ;; Use: (db-get-value-by-header (db:get-header runinfo)(db:get-row runinfo)) ;; to extract info from the structure returned ;; -(define (runs:get-runs-by-patt db keys runnamepatt) ;; test-name) - (let* ((keyvallst (keys->vallist keys)) - (tmp (runs:get-std-run-fields keys '("id" "runname" "state" "status" "owner" "event_time"))) +(define (runs:get-runs-by-patt db keys runnamepatt targpatt) ;; test-name) + (let* ((tmp (runs:get-std-run-fields keys '("id" "runname" "state" "status" "owner" "event_time"))) (keystr (car tmp)) (header (cadr tmp)) (res '()) (key-patt "") (runwildtype (if (substring-index "%" runnamepatt) "like" "glob")) - (qry-str #f)) + (qry-str #f) + (keyvals (keys:target->keyval keys targpatt))) (for-each (lambda (keyval) - (let* ((key (vector-ref keyval 0)) + (let* ((key (car keyval)) + (patt (cadr keyval)) (fulkey (conc ":" key)) - (patt (args:get-arg fulkey)) (wildtype (if (substring-index "%" patt) "like" "glob"))) (if patt (set! key-patt (conc key-patt " AND " key " " wildtype " '" patt "'")) (begin (debug:print 0 "ERROR: searching for runs with no pattern set for " fulkey) (exit 6))))) - keys) + keyvals) (set! qry-str (conc "SELECT " keystr " FROM runs WHERE runname " runwildtype " ? " key-patt ";")) (debug:print-info 4 "runs:get-runs-by-patt qry=" qry-str " " runnamepatt) (sqlite3:for-each-row (lambda (a . r) (set! res (cons (list->vector (cons a r)) res))) @@ -67,85 +67,124 @@ (define (runs:test-get-full-path test) (let* ((testname (db:test-get-testname test)) (itempath (db:test-get-item-path test))) (conc testname (if (equal? itempath "") "" (conc "(" itempath ")"))))) -(define (db:get-run-key-val db run-id key) - (let ((res #f)) - (sqlite3:for-each-row - (lambda (val) - (set! res val)) - db - (conc "SELECT " (key:get-fieldname key) " FROM runs WHERE id=?;") - run-id) - res)) - -(define (db:get-run-name-from-id db run-id) - (let ((res #f)) - (sqlite3:for-each-row - (lambda (runname) - (set! res runname)) - db - "SELECT runname FROM runs WHERE id=?;" - run-id) - res)) - -(define (set-megatest-env-vars run-id) - (let ((keys (cdb:remote-run db:get-keys #f)) - (vals (hash-table-ref/default *env-vars-by-run-id* run-id #f))) +;; This is the *new* methodology. One record to inform them and in the chaos, organise them. +;; +(define (runs:create-run-record) + (let* ((mconfig (if *configdat* + *configdat* + (if (setup-for-run) + *configdat* + (begin + (debug:print 0 "ERROR: Called setup in a non-megatest area, exiting") + (exit 1))))) + (runrec (runs:runrec-make-record)) + (target (or (args:get-arg "-reqtarg") + (args:get-arg "-target"))) + (runname (or (args:get-arg ":runname") + (args:get-arg "-runname"))) + (testpatt (or (args:get-arg "-testpatt") + (args:get-arg "-runtests"))) + (keys (keys:config-get-fields mconfig)) + (keyvals (keys:target->keyval keys target)) + (toppath *toppath*) + (envdat keyvals) ;; initial values start with keyvals + (runconfig #f) + (serverdat (if (args:get-arg "-server") + *runremote* + #f)) ;; to be used later + (transport (or (args:get-arg "-transport") 'http)) + (db (if (and mconfig + (or (args:get-arg "-server") + (eq? transport 'fs))) + (open-db) + #f)) + (run-id #f)) + ;; Set all the environment vars we know so far, start with keys + (for-each (lambda (keyval) + (setenv (car keyval)(cadr keyval))) + keyvals) + ;; Set up various and sundry known vars here + (setenv "MT_RUN_AREA_HOME" toppath) + (setenv "MT_RUNNAME" runname) + (setenv "MT_TARGET" target) + (set! envdat (append + envdat + (list (list "MT_RUN_AREA_HOME" toppath) + (list "MT_RUNNAME" runname) + (list "MT_TARGET" target)))) + ;; Now can read the runconfigs file + ;; + (set! runconfig (read-config (conc *toppath* "/runconfigs.config") #f #t sections: (list "default" target))) + (if (not (hash-table-ref/default runconfig (args:get-arg "-reqtarg") #f)) + (begin + (debug:print 0 "ERROR: [" (args:get-arg "-reqtarg") "] not found in " runconfigf) + (if db (sqlite3:finalize! db)) + (exit 1))) + ;; Now have runconfigs data loaded, set environment vars + (for-each (lambda (section) + (for-each (lambda (varval) + (set! envdat (append envdat (list varval))) + (setenv (car varval)(cadr varval))) + (configf:get-section runconfig section))) + (list "default" target)) + (vector target runname testpatt keys keyvals envdat mconfig runconfig serverdat transport db toppath run-id))) + + +(define (set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f)(inkeyvals #f)) + (let* ((target (or (args:get-arg "-reqtarg") + (args:get-arg "-target") + (get-environment-variable "MT_TARGET"))) + (keys (if inkeys inkeys (cdb:remote-run db:get-keys #f))) + (keyvals (if inkeyvals inkeyvals (keys:target->keyval keys target))) + (vals (hash-table-ref/default *env-vars-by-run-id* run-id #f))) ;; get the info from the db and put it in the cache (if (not vals) (let ((ht (make-hash-table))) (hash-table-set! *env-vars-by-run-id* run-id ht) (set! vals ht) (for-each (lambda (key) - (hash-table-set! vals key (cdb:remote-run db:get-run-key-val #f run-id key))) - keys))) + (hash-table-set! vals (car key) (cadr key))) ;; (cdb:remote-run db:get-run-key-val #f run-id (car key)))) + keyvals))) ;; from the cached data set the vars (hash-table-for-each vals (lambda (key val) - (debug:print 2 "setenv " (key:get-fieldname key) " " val) - (setenv (key:get-fieldname key) val))) + (debug:print 2 "setenv " key " " val) + (setenv key val))) (alist->env-vars (hash-table-ref/default *configdat* "env-override" '())) ;; Lets use this as an opportunity to put MT_RUNNAME in the environment - (setenv "MT_RUNNAME" (cdb:remote-run db:get-run-name-from-id #f run-id)) - (setenv "MT_RUN_AREA_HOME" *toppath*) - )) + (setenv "MT_RUNNAME" (if inrunname inrunname (cdb:remote-run db:get-run-name-from-id #f run-id))) + (setenv "MT_RUN_AREA_HOME" *toppath*))) (define (set-item-env-vars itemdat) (for-each (lambda (item) (debug:print 2 "setenv " (car item) " " (cadr item)) (setenv (car item) (cadr item))) itemdat)) +;; Every time can-run-more-tests is called increment the delay +;; +;; NOTE: We run this server-side!! Do not use this global except in the runs:can-run-more-tests routine +;; (define *last-num-running-tests* 0) -(define *runs:can-run-more-tests-delay* 0) -(define (runs:shrink-can-run-more-tests-delay) - (set! *runs:can-run-more-tests-delay* 0)) ;; (/ *runs:can-run-more-tests-delay* 2))) - -(define (runs:can-run-more-tests test-record) - (thread-sleep! *runs:can-run-more-tests-delay*) - (let* ((tconfig (tests:testqueue-get-testconfig test-record)) - (jobgroup (config-lookup tconfig "requirements" "jobgroup")) - ;; Heuristic fix. These are getting called too rapidly when jobs are running or stuck - ;; so we are going to increment a global delay by 0.1 seconds up to 10 seconds - ;; every time runs:can-run-more-tests is called. - ;; when a test is launched or other activity occurs divide the delay by 2 - (num-running (cdb:remote-run db:get-count-tests-running #f)) - (num-running-in-jobgroup (cdb:remote-run db:get-count-tests-running-in-jobgroup #f jobgroup)) - (max-concurrent-jobs (let ((mcj (config-lookup *configdat* "setup" "max_concurrent_jobs"))) - (if (and mcj (string->number mcj)) - (string->number mcj) - 1))) +(define *runs:can-run-more-tests-count* 0) +(define (runs:shrink-can-run-more-tests-count db) ;; the db is just so we can use cdb:remote-run + (set! *runs:can-run-more-tests-count* 0)) ;; (/ *runs:can-run-more-tests-count* 2))) + +(define (runs:can-run-more-tests db jobgroup max-concurrent-jobs) + (thread-sleep! (cond + ((> *runs:can-run-more-tests-count* 20) 2);; obviously haven't had any work to do for a while + (else 0))) + (let* ((num-running (db:get-count-tests-running db)) + (num-running-in-jobgroup (db:get-count-tests-running-in-jobgroup db jobgroup)) (job-group-limit (config-lookup *configdat* "jobgroups" jobgroup))) - (if (and (> (+ num-running num-running-in-jobgroup) 0) - (< *runs:can-run-more-tests-delay* 1)) - (begin - (set! *runs:can-run-more-tests-delay* (+ *runs:can-run-more-tests-delay* 0.009)) - (debug:print-info 14 "can-run-more-tests-delay: " *runs:can-run-more-tests-delay*))) + (if (> (+ num-running num-running-in-jobgroup) 0) + (set! *runs:can-run-more-tests-count* (+ *runs:can-run-more-tests-count* 1))) (if (not (eq? *last-num-running-tests* num-running)) (begin (debug:print 2 "max-concurrent-jobs: " max-concurrent-jobs ", num-running: " num-running) (set! *last-num-running-tests* num-running))) (if (not (eq? 0 *globalexitstatus*)) @@ -171,72 +210,42 @@ ;; New methodology. These routines will replace the above in time. For ;; now the code is duplicated. This stuff is initially used in the monitor ;; based code. ;;====================================================================== -;; register a test run with the db -(define (runs:register-run db keys keyvallst runname state status user) - (debug:print 3 "runs:register-run, keys: " keys " keyvallst: " keyvallst " runname: " runname " state: " state " status: " status " user: " user) - (let* ((keystr (keys->keystr keys)) - (comma (if (> (length keys) 0) "," "")) - (andstr (if (> (length keys) 0) " AND " "")) - (valslots (keys->valslots keys)) ;; ?,?,? ... - (keyvals (map cadr keyvallst)) - (allvals (append (list runname state status user) keyvals)) - (qryvals (append (list runname) keyvals)) - (key=?str (string-intersperse (map (lambda (k)(conc (key:get-fieldname k) "=?")) keys) " AND "))) - (debug:print 3 "keys: " keys " allvals: " allvals " keyvals: " keyvals) - (debug:print 2 "NOTE: using target " (string-intersperse keyvals "/") " for this run") - (if (and runname (null? (filter (lambda (x)(not x)) keyvals))) ;; there must be a better way to "apply and" - (let ((res #f)) - (apply sqlite3:execute db (conc "INSERT OR IGNORE INTO runs (runname,state,status,owner,event_time" comma keystr ") VALUES (?,?,?,?,strftime('%s','now')" comma valslots ");") - allvals) - (apply sqlite3:for-each-row - (lambda (id) - (set! res id)) - db - (let ((qry (conc "SELECT id FROM runs WHERE (runname=? " andstr key=?str ");"))) - ;(debug:print 4 "qry: " qry) - qry) - qryvals) - (sqlite3:execute db "UPDATE runs SET state=?,status=? WHERE id=?;" state status res) - res) - (begin - (debug:print 0 "ERROR: Called without all necessary keys") - #f)))) ;; This is a duplicate of run-tests (which has been deprecated). Use this one instead of run tests. ;; keyvals. ;; ;; test-names: Comma separated patterns same as test-patts but used in selection ;; of tests to run. The item portions are not respected. ;; FIXME: error out if /patt specified ;; -(define (runs:run-tests target runname test-names test-patts user flags) +(define (runs:run-tests target runname test-patts user flags) ;; test-names (common:clear-caches) ;; clear all caches (let* ((db #f) - (keys (cdb:remote-run db:get-keys #f)) - (keyvallst (keys:target->keyval keys target)) - (run-id (cdb:remote-run runs:register-run #f keys keyvallst runname "new" "n/a" user)) ;; test-name))) - (keyvals (if run-id (cdb:remote-run db:get-key-vals #f run-id) #f)) + (keys (keys:config-get-fields *configdat*)) + (keyvals (keys:target->keyval keys target)) + (run-id (cdb:remote-run db:register-run #f keyvals runname "new" "n/a" user)) ;; test-name))) (deferred '()) ;; delay running these since they have a waiton clause ;; keepgoing is the defacto modality now, will add hit-n-run a bit later ;; (keepgoing (hash-table-ref/default flags "-keepgoing" #f)) (runconfigf (conc *toppath* "/runconfigs.config")) (required-tests '()) - (test-records (make-hash-table))) + (test-records (make-hash-table)) + (all-test-names (tests:get-valid-tests *toppath* "%"))) ;; we need a list of all valid tests to check waiton names - (set-megatest-env-vars run-id) ;; these may be needed by the launching process + (set-megatest-env-vars run-id inkeys: keys) ;; these may be needed by the launching process (if (file-exists? runconfigf) - (setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keys keyvals "pre-launch-env-vars") + (setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals "pre-launch-env-vars") (debug:print 0 "WARNING: You do not have a run config file: " runconfigf)) ;; look up all tests matching the comma separated list of globs in ;; test-patts (using % as wildcard) - (set! test-names (tests:get-valid-tests *toppath* test-names)) + (set! test-names (tests:get-valid-tests *toppath* test-patts)) (set! test-names (delete-duplicates test-names)) (debug:print-info 0 "test names " test-names) ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if @@ -254,28 +263,35 @@ ;; (sqlite3:finalize! db) ;; now add non-directly referenced dependencies (i.e. waiton) (if (not (null? test-names)) (let loop ((hed (car test-names)) (tal (cdr test-names))) ;; 'return-procs tells the config reader to prep running system but return a proc - (debug:print-info 4 "hed=" hed " at top of loop") (let* ((config (tests:get-testconfig hed 'return-procs)) (waitons (let ((instr (if config (config-lookup config "requirements" "waiton") (begin ;; No config means this is a non-existant test (debug:print 0 "ERROR: non-existent required test \"" hed "\"") (if db (sqlite3:finalize! db)) (exit 1))))) (debug:print-info 8 "waitons string is " instr) - (string-split (cond - ((procedure? instr) - (let ((res (instr))) - (debug:print-info 8 "waiton procedure results in string " res " for test " hed) - res)) - ((string? instr) instr) - (else - ;; NOTE: This is actually the case of *no* waitons! ;; (debug:print 0 "ERROR: something went wrong in processing waitons for test " hed) - "")))))) + (let ((newwaitons + (string-split (cond + ((procedure? instr) + (let ((res (instr))) + (debug:print-info 8 "waiton procedure results in string " res " for test " hed) + res)) + ((string? instr) instr) + (else + ;; NOTE: This is actually the case of *no* waitons! ;; (debug:print 0 "ERROR: something went wrong in processing waitons for test " hed) + ""))))) + (filter (lambda (x) + (if (member x all-test-names) + #t + (begin + (debug:print 0 "ERROR: test " hed " has unrecognised waiton testname " x) + #f))) + newwaitons))))) (debug:print-info 8 "waitons: " waitons) ;; check for hed in waitons => this would be circular, remove it and issue an ;; error (if (member hed waitons) (begin @@ -308,11 +324,11 @@ (append (if (list? items) items '()) (if (list? itemstable) itemstable '()))) 'have-procedure) ((or (list? items)(list? itemstable)) ;; calc now (debug:print-info 4 "items and itemstable are lists, calc now\n" - " items: " items " itemstable: " itemstable) + " items: " items " itemstable: " itemstable) (items:get-items-from-config config)) (else #f))) ;; not iterated #f ;; itemsdat 5 #f ;; spare - used for item-path ))) @@ -329,11 +345,14 @@ (if (not (null? required-tests)) (debug:print-info 1 "Adding " required-tests " to the run queue")) ;; NOTE: these are all parent tests, items are not expanded yet. (debug:print-info 4 "test-records=" (hash-table->alist test-records)) - (runs:run-tests-queue run-id runname test-records keyvallst flags test-patts) + (let ((reglen (any->number (configf:lookup *configdat* "setup" "runqueue")))) + (if reglen + (runs:run-tests-queue-new run-id runname test-records keyvals flags test-patts required-tests reglen) + (runs:run-tests-queue-classic run-id runname test-records keyvals flags test-patts required-tests))) (debug:print-info 4 "All done by here"))) (define (runs:calc-fails prereqs-not-met) (filter (lambda (test) (and (vector? test) ;; not (string? test)) @@ -357,270 +376,38 @@ lst)) (define (runs:make-full-test-name testname itempath) (if (equal? itempath "") testname (conc testname "/" itempath))) -;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... > -(define (runs:run-tests-queue run-id runname test-records keyvallst flags test-patts) - ;; At this point the list of parent tests is expanded - ;; NB// Should expand items here and then insert into the run queue. - (debug:print 5 "test-records: " test-records ", keyvallst: " keyvallst " flags: " (hash-table->alist flags)) - (let ((sorted-test-names (tests:sort-by-priority-and-waiton test-records)) - (test-registery (make-hash-table)) - (num-retries 0) - (max-retries (config-lookup *configdat* "setup" "maxretries"))) - (set! max-retries (if (and max-retries (string->number max-retries))(string->number max-retries) 100)) - (if (not (null? sorted-test-names)) - (let loop ((hed (car sorted-test-names)) - (tal (cdr sorted-test-names)) - (reruns '())) - (if (not (null? reruns))(debug:print-info 4 "reruns=" reruns)) - ;; (print "Top of loop, hed=" hed ", tal=" tal " ,reruns=" reruns) - (let* ((test-record (hash-table-ref test-records hed)) - (test-name (tests:testqueue-get-testname test-record)) - (tconfig (tests:testqueue-get-testconfig test-record)) - (testmode (let ((m (config-lookup tconfig "requirements" "mode"))) - (if m (string->symbol m) 'normal))) - (waitons (tests:testqueue-get-waitons test-record)) - (priority (tests:testqueue-get-priority test-record)) - (itemdat (tests:testqueue-get-itemdat test-record)) ;; itemdat can be a string, list or #f - (items (tests:testqueue-get-items test-record)) - (item-path (item-list->path itemdat)) - (newtal (append tal (list hed)))) - - (debug:print 6 - "test-name: " test-name - "\n hed: " hed - "\n itemdat: " itemdat - "\n items: " items - "\n item-path: " item-path - "\n waitons: " waitons - "\n num-retries: " num-retries - "\n tal: " tal - "\n reruns: " reruns) - - ;; check for hed in waitons => this would be circular, remove it and issue an - ;; error - (if (member test-name waitons) - (begin - (debug:print 0 "ERROR: test " test-name " has listed itself as a waiton, please correct this!") - (set! waiton (filter (lambda (x)(not (equal? x hed))) waitons)))) - - (cond ;; OUTER COND - ((not items) ;; when false the test is ok to be handed off to launch (but not before) - (let* ((run-limits-info (open-run-close runs:can-run-more-tests test-record)) ;; look at the test jobgroup and tot jobs running - (have-resources (car run-limits-info)) - (num-running (list-ref run-limits-info 1)) - (num-running-in-jobgroup (list-ref run-limits-info 2)) - (max-concurrent-jobs (list-ref run-limits-info 3)) - (job-group-limit (list-ref run-limits-info 4)) - (prereqs-not-met (open-run-close db:get-prereqs-not-met #f run-id waitons item-path mode: testmode)) - (fails (runs:calc-fails prereqs-not-met)) - (non-completed (runs:calc-not-completed prereqs-not-met))) - (debug:print-info 8 "have-resources: " have-resources " prereqs-not-met: " - (string-intersperse - (map (lambda (t) - (if (vector? t) - (conc (db:test-get-state t) "/" (db:test-get-status t)) - (conc " WARNING: t is not a vector=" t ))) - prereqs-not-met) ", ") " fails: " fails) - (debug:print-info 4 "hed=" hed "\n test-record=" test-record "\n test-name: " test-name "\n item-path: " item-path "\n test-patts: " test-patts) - - ;; Don't know at this time if the test have been launched at some time in the past - ;; i.e. is this a re-launch? - (debug:print-info 4 "run-limits-info = " run-limits-info) - (cond ;; INNER COND #1 for a launchable test - ;; Check item path against item-patts - ((not (tests:match test-patts (tests:testqueue-get-testname test-record) item-path)) ;; This test/itempath is not to be run - ;; else the run is stuck, temporarily or permanently - ;; but should check if it is due to lack of resources vs. prerequisites - (debug:print-info 1 "Skipping " (tests:testqueue-get-testname test-record) " " item-path " as it doesn't match " test-patts) - ;; (thread-sleep! *global-delta*) - (if (not (null? tal)) - (loop (car tal)(cdr tal) reruns))) - ( ;; (and - (not (hash-table-ref/default test-registery (runs:make-full-test-name test-name item-path) #f)) - ;; (and max-concurrent-jobs (> (- max-concurrent-jobs num-running) 5))) - (debug:print-info 4 "Pre-registering test " test-name "/" item-path " to create placeholder" ) - (open-run-close db:tests-register-test #f run-id test-name item-path) - (hash-table-set! test-registery (runs:make-full-test-name test-name item-path) #t) - ;; (thread-sleep! *global-delta*) -(runs:shrink-can-run-more-tests-delay) - (loop (car newtal)(cdr newtal) reruns)) - ((not have-resources) ;; simply try again after waiting a second - (debug:print-info 1 "no resources to run new tests, waiting ...") - ;; (thread-sleep! (+ 2 *global-delta*)) - ;; could have done hed tal here but doing car/cdr of newtal to rotate tests - (loop (car newtal)(cdr newtal) reruns)) - ((and have-resources - (or (null? prereqs-not-met) - (and (eq? testmode 'toplevel) - (null? non-completed)))) - (run:test run-id runname keyvallst test-record flags #f) -(runs:shrink-can-run-more-tests-delay) - ;; (thread-sleep! *global-delta*) - (if (not (null? tal)) - (loop (car tal)(cdr tal) reruns))) - (else ;; must be we have unmet prerequisites - (debug:print 4 "FAILS: " fails) - ;; If one or more of the prereqs-not-met are FAIL then we can issue - ;; a message and drop hed from the items to be processed. - (if (null? fails) - (begin - ;; couldn't run, take a breather - (debug:print-info 4 "Shouldn't really get here, race condition? Unable to launch more tests at this moment, killing time ...") - ;; (thread-sleep! (+ 0.01 *global-delta*)) ;; long sleep here - no resources, may as well be patient - ;; we made new tal by sticking hed at the back of the list - (loop (car newtal)(cdr newtal) reruns)) - ;; the waiton is FAIL so no point in trying to run hed ever again - (if (not (null? tal)) - (if (vector? hed) - (begin (debug:print 1 "WARN: Dropping test " (db:test-get-testname hed) "/" (db:test-get-item-path hed) - " from the launch list as it has prerequistes that are FAIL") -(runs:shrink-can-run-more-tests-delay) - ;; (thread-sleep! *global-delta*) - (loop (car tal)(cdr tal) (cons hed reruns))) - (begin - (debug:print 1 "WARN: Test not processed correctly. Could be a race condition in your test implementation? " hed) ;; " as it has prerequistes that are FAIL. (NOTE: hed is not a vector)") -(runs:shrink-can-run-more-tests-delay) - ;; (thread-sleep! (+ 0.01 *global-delta*)) - (loop hed tal reruns))))))))) ;; END OF INNER COND - - ;; case where an items came in as a list been processed - ((and (list? items) ;; thus we know our items are already calculated - (not itemdat)) ;; and not yet expanded into the list of things to be done - (if (and (debug:debug-mode 1) ;; (>= *verbosity* 1) - (> (length items) 0) - (> (length (car items)) 0)) - (pp items)) - (for-each - (lambda (my-itemdat) - (let* ((new-test-record (let ((newrec (make-tests:testqueue))) - (vector-copy! test-record newrec) - newrec)) - (my-item-path (item-list->path my-itemdat))) - (if (tests:match test-patts hed my-item-path) ;; (patt-list-match my-item-path item-patts) ;; yes, we want to process this item, NOTE: Should not need this check here! - (let ((newtestname (runs:make-full-test-name hed my-item-path))) ;; test names are unique on testname/item-path - (tests:testqueue-set-items! new-test-record #f) - (tests:testqueue-set-itemdat! new-test-record my-itemdat) - (tests:testqueue-set-item_path! new-test-record my-item-path) - (hash-table-set! test-records newtestname new-test-record) - (set! tal (cons newtestname tal)))))) ;; since these are itemized create new test names testname/itempath - items) - (if (not (null? tal)) - (begin - (debug:print-info 4 "End of items list, looping with next after short delay") - ;; (thread-sleep! (+ 0.01 *global-delta*)) - (loop (car tal)(cdr tal) reruns)))) - - ;; if items is a proc then need to run items:get-items-from-config, get the list and loop - ;; - but only do that if resources exist to kick off the job - ((or (procedure? items)(eq? items 'have-procedure)) - (let ((can-run-more (runs:can-run-more-tests test-record))) - (if (and (list? can-run-more) - (car can-run-more)) - (let* ((prereqs-not-met (open-run-close db:get-prereqs-not-met #f run-id waitons item-path mode: testmode)) - (fails (runs:calc-fails prereqs-not-met)) - (non-completed (runs:calc-not-completed prereqs-not-met))) - (debug:print-info 8 "can-run-more: " can-run-more - "\n testname: " hed - "\n prereqs-not-met: " (runs:pretty-string prereqs-not-met) - "\n non-completed: " (runs:pretty-string non-completed) - "\n fails: " (runs:pretty-string fails) - "\n testmode: " testmode - "\n num-retries: " num-retries - "\n (eq? testmode 'toplevel): " (eq? testmode 'toplevel) - "\n (null? non-completed): " (null? non-completed) - "\n reruns: " reruns - "\n items: " items - "\n can-run-more: " can-run-more) - ;; (thread-sleep! (+ 0.01 *global-delta*)) - (cond ;; INNER COND #2 - ((or (null? prereqs-not-met) ;; all prereqs met, fire off the test - ;; or, if it is a 'toplevel test and all prereqs not met are COMPLETED then launch - (and (eq? testmode 'toplevel) - (null? non-completed))) - (let ((test-name (tests:testqueue-get-testname test-record))) - (setenv "MT_TEST_NAME" test-name) ;; - (setenv "MT_RUNNAME" runname) - (set-megatest-env-vars run-id) ;; these may be needed by the launching process - (let ((items-list (items:get-items-from-config tconfig))) - (if (list? items-list) - (begin - (tests:testqueue-set-items! test-record items-list) - ;; (thread-sleep! *global-delta*) - (loop hed tal reruns)) - (begin - (debug:print 0 "ERROR: The proc from reading the setup did not yield a list - please report this") - (exit 1)))))) - ((null? fails) - (debug:print-info 4 "fails is null, moving on in the queue but keeping " hed " for now") - ;; only increment num-retries when there are no tests runing - (if (eq? 0 (list-ref can-run-more 1)) - (begin - (if (> num-retries 100) ;; first 100 retries are low time cost - (thread-sleep! (+ 2 *global-delta*)) - (thread-sleep! (+ 0.01 *global-delta*))) - (set! num-retries (+ num-retries 1)))) - (if (> num-retries max-retries) - (if (not (null? tal)) - (loop (car tal)(cdr tal) reruns)) - (loop (car newtal)(cdr newtal) reruns))) ;; an issue with prereqs not yet met? - ((and (not (null? fails))(eq? testmode 'normal)) - (debug:print-info 1 "test " hed " (mode=" testmode ") has failed prerequisite(s); " - (string-intersperse (map (lambda (t)(conc (db:test-get-testname t) ":" (db:test-get-state t)"/"(db:test-get-status t))) fails) ", ") - ", removing it from to-do list") - (if (not (null? tal)) - (begin - ;; (thread-sleep! *global-delta*) - (loop (car tal)(cdr tal)(cons hed reruns))))) - (else - (debug:print 8 "ERROR: No handler for this condition.") - (thread-sleep! (+ 1 *global-delta*)) - (loop (car newtal)(cdr newtal) reruns)))) ;; END OF IF CAN RUN MORE - - ;; if can't run more just loop with next possible test - (begin - (debug:print-info 4 "processing the case with a lambda for items or 'have-procedure. Moving through the queue without dropping " hed) - ;; (thread-sleep! (+ 2 *global-delta*)) - (loop (car newtal)(cdr newtal) reruns))))) ;; END OF (or (procedure? items)(eq? items 'have-procedure)) - - ;; this case should not happen, added to help catch any bugs - ((and (list? items) itemdat) - (debug:print 0 "ERROR: Should not have a list of items in a test and the itemspath set - please report this") - (exit 1)) - ((not (null? reruns)) - (let* ((newlst (tests:filter-non-runnable run-id tal test-records)) ;; i.e. not FAIL, WAIVED, INCOMPLETE, PASS, KILLED, - (junked (lset-difference equal? tal newlst))) - (debug:print-info 4 "full drop through, if reruns is less than 100 we will force retry them, reruns=" reruns ", tal=" tal) - (if (< num-retries max-retries) - (set! newlst (append reruns newlst))) - (set! num-retries (+ num-retries 1)) - ;; (thread-sleep! (+ 1 *global-delta*)) - (if (not (null? newlst)) - ;; since reruns have been tacked on to newlst create new reruns from junked - (loop (car newlst)(cdr newlst)(delete-duplicates junked))))) - ((not (null? tal)) - (debug:print-info 4 "I'm pretty sure I shouldn't get here.")) - (else - (debug:print-info 4 "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns)) - )))) ;; LET* ((test-record - - ;; we get here on "drop through" - loop for next test in queue - ;; FIXME!!!! THIS SHOULD NOT REQUIRE AN EXIT!!!!!!! - - (debug:print-info 1 "All tests launched") - (thread-sleep! 0.5) - ;; FIXME! This harsh exit should not be necessary.... - ;; (if (not *runremote*)(exit)) ;; - #f)) ;; return a #f as a hint that we are done - ;; Here we need to check that all the tests remaining to be run are eligible to run - ;; and are not blocked by failed - +(define (runs:queue-next-hed tal reg n regful) + (if regful + (if (null? reg) ;; doesn't make sense, this is probably NOT the problem of the car + (car tal) + (car reg)) + (car tal))) + +(define (runs:queue-next-tal tal reg n regful) + (if regful + tal + (let ((newtal (cdr tal))) + (if (null? newtal) + reg + newtal + )))) + +(define (runs:queue-next-reg tal reg n regful) + (if regful + (cdr reg) + (if (eq? (length tal) 1) + '() + reg))) + +(include "run-tests-queue-classic.scm") +(include "run-tests-queue-new.scm") ;; parent-test is there as a placeholder for when parent-tests can be run as a setup step -(define (run:test run-id runname keyvallst test-record flags parent-test) +(define (run:test run-id run-info keyvals runname test-record flags parent-test) ;; All these vars might be referenced by the testconfig file reader (let* ((test-name (tests:testqueue-get-testname test-record)) (test-waitons (tests:testqueue-get-waitons test-record)) (test-conf (tests:testqueue-get-testconfig test-record)) (itemdat (tests:testqueue-get-itemdat test-record)) @@ -638,19 +425,19 @@ (if (not itemdat)(set! itemdat '())) (set! item-path (item-list->path itemdat)) (debug:print 2 "Attempting to launch test " test-name (if (equal? item-path "/") "/" item-path)) (setenv "MT_TEST_NAME" test-name) ;; (setenv "MT_RUNNAME" runname) - (set-megatest-env-vars run-id) ;; these may be needed by the launching process + (set-megatest-env-vars run-id inrunname: runname) ;; these may be needed by the launching process (change-directory *toppath*) ;; Here is where the test_meta table is best updated ;; Yes, another use of a global for caching. Need a better way? (if (not (hash-table-ref/default *test-meta-updated* test-name #f)) (begin (hash-table-set! *test-meta-updated* test-name #t) - (open-run-close runs:update-test_meta db test-name test-conf))) + (runs:update-test_meta test-name test-conf))) ;; (lambda (itemdat) ;;; ((ripeness "overripe") (temperature "cool") (season "summer")) (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/")) (new-test-name (if (equal? item-path "") test-name (conc test-name "/" item-path))) ;; just need it to be unique (test-id (cdb:remote-run db:get-test-id #f run-id test-name item-path)) @@ -663,18 +450,20 @@ ;; ;; (open-run-close tests:register-test db run-id test-name item-path) ;; ;; NB// for the above line. I want the test to be registered long before this routine gets called! ;; - (set! test-id (open-run-close db:get-test-id db run-id test-name item-path)) + (set! test-id (cdb:remote-run db:get-test-id #f run-id test-name item-path)) (if (not test-id) (begin (debug:print 2 "WARN: Test not pre-created? test-name=" test-name ", item-path=" item-path ", run-id=" run-id) - (open-run-close db:tests-register-test #f run-id test-name item-path) - (set! test-id (open-run-close db:get-test-id db run-id test-name item-path)))) + (cdb:tests-register-test *runremote* run-id test-name item-path) + (set! test-id (cdb:remote-run db:get-test-id #f run-id test-name item-path)))) (debug:print-info 4 "test-id=" test-id ", run-id=" run-id ", test-name=" test-name ", item-path=\"" item-path "\"") (set! testdat (cdb:get-test-info-by-id *runremote* test-id)))) + (if (not testdat) ;; should NOT happen + (debug:print 0 "ERROR: failed to get test record for test-id " test-id)) (set! test-id (db:test-get-id testdat)) (change-directory test-path) (case (if force ;; (args:get-arg "-force") 'NOT_STARTED (if testdat @@ -719,11 +508,12 @@ (debug:print 1 "NOTE: Not starting test " new-test-name " as it is state \"" (test:get-state testdat) "\" and status \"" (test:get-status testdat) "\", use -rerun \"" (test:get-status testdat) "\" or -force to override")) ;; NOTE: No longer be checking prerequisites here! Will never get here unless prereqs are ;; already met. - (if (not (launch-test #f run-id runname test-conf keyvallst test-name test-path itemdat flags)) + ;; This would be a great place to do the process-fork + (if (not (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat flags)) (begin (print "ERROR: Failed to launch the test. Exiting as soon as possible") (set! *globalexitstatus* 1) ;; (process-signal (current-process-id) signal/kill)))))) ((KILLED) @@ -754,15 +544,15 @@ ;; 'remove-runs ;; 'set-state-status ;; ;; NB// should pass in keys? ;; -(define (runs:operate-on action runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)) +(define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)) (common:clear-caches) ;; clear all caches (let* ((db #f) - (keys (open-run-close db:get-keys db)) - (rundat (open-run-close runs:get-runs-by-patt db keys runnamepatt)) + (keys (cdb:remote-run db:get-keys db)) + (rundat (cdb:remote-run runs:get-runs-by-patt db keys runnamepatt target)) (header (vector-ref rundat 0)) (runs (vector-ref rundat 1)) (states (if state (string-split state ",") '())) (statuses (if status (string-split status ",") '())) (state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f)))) @@ -772,21 +562,23 @@ (debug:print 0 "ERROR: the parameter to -set-state-status is a comma delimited string. E.g. COMPLETED,FAIL") (exit))) (for-each (lambda (run) (let ((runkey (string-intersperse (map (lambda (k) - (db:get-value-by-header run header (vector-ref k 0))) keys) "/")) - (dirs-to-remove (make-hash-table))) + (db:get-value-by-header run header k)) keys) "/")) + (dirs-to-remove (make-hash-table)) + (proc-get-tests (lambda (run-id) + (cdb:remote-run db:get-tests-for-run db run-id + testpatt states statuses + not-in: #f + sort-by: (case action + ((remove-runs) 'rundir) + (else 'event_time)))))) (let* ((run-id (db:get-value-by-header run header "id")) (run-state (db:get-value-by-header run header "state")) (tests (if (not (equal? run-state "locked")) - (open-run-close db:get-tests-for-run db run-id - testpatt states statuses - not-in: #f - sort-by: (case action - ((remove-runs) 'rundir) - (else 'event_time))) + (proc-get-tests run-id) '())) (lasttpath "/does/not/exist/I/hope")) (debug:print-info 4 "runs:operate-on run=" run ", header=" header) (if (not (null? tests)) (begin @@ -796,79 +588,115 @@ ((set-state-status) (debug:print 1 "Modifying state and staus for tests for run: " runkey " " (db:get-value-by-header run header "runname"))) ((print-run) (debug:print 1 "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header) action) + ((run-wait) + (debug:print 1 "Waiting for run " runkey ", run=" runnamepatt " to complete")) (else (debug:print-info 0 "action not recognised " action))) - (for-each - (lambda (test) - (let* ((item-path (db:test-get-item-path test)) - (test-name (db:test-get-testname test)) - (run-dir (db:test-get-rundir test)) ;; run dir is from the link tree - (real-dir (if (file-exists? run-dir) - (resolve-pathname run-dir) - #f)) - (test-id (db:test-get-id test))) - ;; (tdb (db:open-test-db run-dir))) - (debug:print-info 4 "test=" test) ;; " (db:test-get-testname test) " id: " (db:test-get-id test) " " item-path " action: " action) - (case action - ((remove-runs) ;; the tdb is for future possible. - (open-run-close db:delete-test-records db #f (db:test-get-id test)) - (debug:print-info 1 "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) - (if (and real-dir - (> (string-length real-dir) 5) - (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc. - (begin ;; let* ((realpath (resolve-pathname run-dir))) - (debug:print-info 1 "Recursively removing " real-dir) - (if (file-exists? real-dir) - (if (> (system (conc "rm -rf " real-dir)) 0) - (debug:print 0 "ERROR: There was a problem removing " real-dir " with rm -f")) - (debug:print 0 "WARNING: test dir " real-dir " appears to not exist or is not readable"))) - (if real-dir - (debug:print 0 "WARNING: directory " real-dir " does not exist") - (debug:print 0 "WARNING: no real directory corrosponding to link " run-dir ", nothing done"))) - (if (symbolic-link? run-dir) - (begin - (debug:print-info 1 "Removing symlink " run-dir) - (handle-exceptions - exn - (debug:print 0 "ERROR: Failed to remove symlink " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue") - (delete-file run-dir))) - (if (directory? run-dir) - (if (> (directory-fold (lambda (f x)(+ 1 x)) 0 run-dir) 0) - (debug:print 0 "WARNING: refusing to remove " run-dir " as it is not empty") - (handle-exceptions - exn - (debug:print 0 "ERROR: Failed to remove directory " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue") - (delete-directory run-dir))) - (if run-dir - (debug:print 0 "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink") - (debug:print 0 "NOTE: the run dir for this test is undefined. Test may have already been deleted.")) - ))) - ((set-state-status) - (debug:print-info 2 "new state " (car state-status) ", new status " (cadr state-status)) - (open-run-close db:test-set-state-status-by-id db (db:test-get-id test) (car state-status)(cadr state-status) #f))))) - (sort tests (lambda (a b)(let ((dira (db:test-get-rundir a)) - (dirb (db:test-get-rundir b))) - (if (and (string? dira)(string? dirb)) - (> (string-length dira)(string-length dirb)) - #f))))))) + (let ((sorted-tests (sort tests (lambda (a b)(let ((dira (db:test-get-rundir a)) + (dirb (db:test-get-rundir b))) + (if (and (string? dira)(string? dirb)) + (> (string-length dira)(string-length dirb)) + #f))))) + (test-retry-time (make-hash-table)) + (allow-run-time 10)) ;; seconds to allow for killing tests before just brutally killing 'em + (let loop ((test (car sorted-tests)) + (tal (cdr sorted-tests))) + (let* ((test-id (db:test-get-id test)) + (new-test-dat (cdb:remote-run db:get-test-info-by-id #f test-id)) + (item-path (db:test-get-item-path new-test-dat)) + (test-name (db:test-get-testname new-test-dat)) + (run-dir (db:test-get-rundir new-test-dat)) ;; run dir is from the link tree + (real-dir (if (file-exists? run-dir) + (resolve-pathname run-dir) + #f)) + (test-state (db:test-get-state new-test-dat)) + (test-fulln (db:test-get-fullname new-test-dat))) + (case action + ((remove-runs) + (debug:print-info 0 "test: " test-name " itest-state: " test-state) + (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) + (begin + (if (not (hash-table-ref/default test-retry-time test-fulln #f)) + (hash-table-set! test-retry-time test-fulln (current-seconds))) + (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time) + ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first + ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give + ;; up and blow it away. + (begin + (debug:print 0 "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing") + (cdb:remote-run db:test-set-state-status-by-id db (db:test-get-id test) "FAILEDKILL" "n/a" #f) + (thread-sleep! 1)) + (begin + (cdb:remote-run db:test-set-state-status-by-id db (db:test-get-id test) "KILLREQ" "n/a" #f) + (thread-sleep! 1))) + ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ... + (if (null? tal) + (loop new-test-dat tal) + (loop (car tal)(append tal (list new-test-dat))))) + (begin + (cdb:remote-run db:delete-test-records db #f (db:test-get-id test)) + (debug:print-info 1 "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) + (if (and real-dir + (> (string-length real-dir) 5) + (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc. + (begin ;; let* ((realpath (resolve-pathname run-dir))) + (debug:print-info 1 "Recursively removing " real-dir) + (if (file-exists? real-dir) + (if (> (system (conc "rm -rf " real-dir)) 0) + (debug:print 0 "ERROR: There was a problem removing " real-dir " with rm -f")) + (debug:print 0 "WARNING: test dir " real-dir " appears to not exist or is not readable"))) + (if real-dir + (debug:print 0 "WARNING: directory " real-dir " does not exist") + (debug:print 0 "WARNING: no real directory corrosponding to link " run-dir ", nothing done"))) + (if (symbolic-link? run-dir) + (begin + (debug:print-info 1 "Removing symlink " run-dir) + (handle-exceptions + exn + (debug:print 0 "ERROR: Failed to remove symlink " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue") + (delete-file run-dir))) + (if (directory? run-dir) + (if (> (directory-fold (lambda (f x)(+ 1 x)) 0 run-dir) 0) + (debug:print 0 "WARNING: refusing to remove " run-dir " as it is not empty") + (handle-exceptions + exn + (debug:print 0 "ERROR: Failed to remove directory " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue") + (delete-directory run-dir))) + (if run-dir + (debug:print 0 "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink") + (debug:print 0 "NOTE: the run dir for this test is undefined. Test may have already been deleted.")) + )) + (if (not (null? tal)) + (loop (car tal)(cdr tal)))))) + ((set-state-status) + (debug:print-info 2 "new state " (car state-status) ", new status " (cadr state-status)) + (cdb:remote-run db:test-set-state-status-by-id db (db:test-get-id test) (car state-status)(cadr state-status) #f)) + ((run-wait) + (debug:print-info 2 "still waiting, " (length tests) " tests still running") + (thread-sleep! 10) + (let ((new-tests (proc-get-tests run-id))) + (if (null? new-tests) + (debug:print-info 1 "Run completed according to zero tests matching provided criteria.") + (loop (car new-tests)(cdr new-tests)))))))) + ))) ;; remove the run if zero tests remain (if (eq? action 'remove-runs) - (let ((remtests (open-run-close db:get-tests-for-run db (db:get-value-by-header run header "id") #f '("DELETED") '("n/a") not-in: #t))) + (let ((remtests (cdb:remote-run db:get-tests-for-run db (db:get-value-by-header run header "id") #f '("DELETED") '("n/a") not-in: #t))) (if (null? remtests) ;; no more tests remaining (let* ((dparts (string-split lasttpath "/")) (runpath (conc "/" (string-intersperse (take dparts (- (length dparts) 1)) "/")))) (debug:print 1 "Removing run: " runkey " " (db:get-value-by-header run header "runname") " and related record") - (open-run-close db:delete-run db run-id) + (cdb:remote-run db:delete-run db run-id) ;; This is a pretty good place to purge old DELETED tests - (open-run-close db:delete-tests-for-run db run-id) - (open-run-close db:delete-old-deleted-test-records db) - (open-run-close db:set-var db "DELETED_TESTS" (current-seconds)) + (cdb:remote-run db:delete-tests-for-run db run-id) + (cdb:remote-run db:delete-old-deleted-test-records db) + (cdb:remote-run db:set-var db "DELETED_TESTS" (current-seconds)) ;; need to figure out the path to the run dir and remove it if empty ;; (if (null? (glob (conc runpath "/*"))) ;; (begin ;; (debug:print 1 "Removing run dir " runpath) ;; (system (conc "rmdir -p " runpath)))) @@ -885,39 +713,38 @@ ;; this wrapper is used to reduce the replication of code (define (general-run-call switchname action-desc proc) (let ((runname (args:get-arg ":runname")) (target (if (args:get-arg "-target") (args:get-arg "-target") - (args:get-arg "-reqtarg"))) - (th1 #f)) + (args:get-arg "-reqtarg")))) + ;; (th1 #f)) (cond ((not target) (debug:print 0 "ERROR: Missing required parameter for " switchname ", you must specify the target with -target") (exit 3)) ((not runname) (debug:print 0 "ERROR: Missing required parameter for " switchname ", you must specify the run name with :runname runname") (exit 3)) (else (let ((db #f) - (keys #f)) + (keys #f) + (target (or (args:get-arg "-reqtarg") + (args:get-arg "-target")))) (if (not (setup-for-run)) (begin (debug:print 0 "Failed to setup, exiting") (exit 1))) - (if (args:get-arg "-server") - (open-run-close server:start db (args:get-arg "-server"))) - ;; (if (not (or (args:get-arg "-runall") ;; runall and runtests are allowed to be servers - ;; (args:get-arg "-runtests"))) - ;; (client:setup) ;; This is a duplicate startup!!!??? BUG? - ;; )) - (set! keys (open-run-close db:get-keys db)) + ;; (if (args:get-arg "-server") + ;; (cdb:remote-run server:start db (args:get-arg "-server"))) + (set! keys (keys:config-get-fields *configdat*)) ;; have enough to process -target or -reqtarg here (if (args:get-arg "-reqtarg") (let* ((runconfigf (conc *toppath* "/runconfigs.config")) ;; DO NOT EVALUATE ALL - (runconfig (read-config runconfigf #f #t environ-patt: #f))) + (runconfig (read-config runconfigf #f #t environ-patt: #f))) (if (hash-table-ref/default runconfig (args:get-arg "-reqtarg") #f) (keys:target-set-args keys (args:get-arg "-reqtarg") args:arg-hash) + (begin (debug:print 0 "ERROR: [" (args:get-arg "-reqtarg") "] not found in " runconfigf) (if db (sqlite3:finalize! db)) (exit 1)))) (if (args:get-arg "-target") @@ -926,57 +753,55 @@ (begin (debug:print 0 "ERROR: Attempted to " action-desc " but run area config file not found") (exit 1)) ;; Extract out stuff needed in most or many calls ;; here then call proc - (let* ((keynames (map key:get-fieldname keys)) - (keyvallst (keys->vallist keys #t))) - (proc target runname keys keynames keyvallst))) - (if th1 (thread-join! th1)) + (let* ((keyvals (keys:target->keyval keys target))) + (proc target runname keys keyvals))) (if db (sqlite3:finalize! db)) (set! *didsomething* #t)))))) ;;====================================================================== ;; Lock/unlock runs ;;====================================================================== (define (runs:handle-locking target keys runname lock unlock user) (let* ((db #f) - (rundat (open-run-close runs:get-runs-by-patt db keys runname)) + (rundat (cdb:remote-run runs:get-runs-by-patt db keys runname target)) (header (vector-ref rundat 0)) (runs (vector-ref rundat 1))) (for-each (lambda (run) (let ((run-id (db:get-value-by-header run header "id"))) (if (or lock (and unlock (begin (print "Do you really wish to unlock run " run-id "?\n y/n: ") (equal? "y" (read-line))))) - (open-run-close db:lock/unlock-run db run-id lock unlock user) + (cdb:remote-run db:lock/unlock-run db run-id lock unlock user) (debug:print-info 0 "Skipping lock/unlock on " run-id)))) runs))) ;;====================================================================== ;; Rollup runs ;;====================================================================== ;; Update the test_meta table for this test -(define (runs:update-test_meta db test-name test-conf) - (let ((currrecord (open-run-close db:testmeta-get-record db test-name))) +(define (runs:update-test_meta test-name test-conf) + (let ((currrecord (cdb:remote-run db:testmeta-get-record #f test-name))) (if (not currrecord) (begin (set! currrecord (make-vector 10 #f)) - (open-run-close db:testmeta-add-record db test-name))) + (cdb:remote-run db:testmeta-add-record #f test-name))) (for-each (lambda (key) (let* ((idx (cadr key)) (fld (car key)) (val (config-lookup test-conf "test_meta" fld))) ;; (debug:print 5 "idx: " idx " fld: " fld " val: " val) (if (and val (not (equal? (vector-ref currrecord idx) val))) (begin (print "Updating " test-name " " fld " to " val) - (open-run-close db:testmeta-update-field db test-name fld val))))) + (cdb:remote-run db:testmeta-update-field #f test-name fld val))))) '(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9))))) ;; Update test_meta for all tests (define (runs:update-all-test_meta db) (let ((test-names (get-all-legal-tests))) @@ -985,23 +810,23 @@ (let* ((test-path (conc *toppath* "/tests/" test-name)) (test-configf (conc test-path "/testconfig")) (testexists (and (file-exists? test-configf)(file-read-access? test-configf))) ;; read configs with tricks turned off (i.e. no system) (test-conf (if testexists (read-config test-configf #f #f)(make-hash-table)))) - ;; use the open-run-close instead of passing in db - (runs:update-test_meta #f test-name test-conf))) + ;; use the cdb:remote-run instead of passing in db + (runs:update-test_meta test-name test-conf))) test-names))) ;; This could probably be refactored into one complex query ... -(define (runs:rollup-run keys keyvallst runname user) ;; was target, now keyvallst - (debug:print 4 "runs:rollup-run, keys: " keys " keyvallst: " keyvallst " :runname " runname " user: " user) - (let* ((db #f) ;; (keyvalllst (keys:target->keyval keys target)) - (new-run-id (open-run-close runs:register-run db keys keyvallst runname "new" "n/a" user)) - (prev-tests (open-run-close test:get-matching-previous-test-run-records db new-run-id "%" "%")) - (curr-tests (open-run-close db:get-tests-for-run db new-run-id "%/%" '() '())) +(define (runs:rollup-run keys runname user keyvals) + (debug:print 4 "runs:rollup-run, keys: " keys " :runname " runname " user: " user) + (let* ((db #f) + (new-run-id (cdb:remote-run db:register-run #f keyvals runname "new" "n/a" user)) + (prev-tests (cdb:remote-run test:get-matching-previous-test-run-records db new-run-id "%" "%")) + (curr-tests (cdb:remote-run db:get-tests-for-run db new-run-id "%/%" '() '())) (curr-tests-hash (make-hash-table))) - (open-run-close db:update-run-event_time db new-run-id) + (cdb:remote-run db:update-run-event_time db new-run-id) ;; index the already saved tests by testname and itemdat in curr-tests-hash (for-each (lambda (testdat) (let* ((testname (db:test-get-testname testdat)) (item-path (db:test-get-item-path testdat)) @@ -1015,23 +840,23 @@ (lambda (testdat) (let* ((testname (db:test-get-testname testdat)) (item-path (db:test-get-item-path testdat)) (full-name (conc testname "/" item-path)) (prev-test-dat (hash-table-ref/default curr-tests-hash full-name #f)) - (test-steps (open-run-close db:get-steps-for-test db (db:test-get-id testdat))) + (test-steps (cdb:remote-run db:get-steps-for-test db (db:test-get-id testdat))) (new-test-record #f)) ;; replace these with insert ... select (apply sqlite3:execute db (conc "INSERT OR REPLACE INTO tests (run_id,testname,state,status,event_time,host,cpuload,diskfree,uname,rundir,item_path,run_duration,final_logf,comment) " "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?);") new-run-id (cddr (vector->list testdat))) - (set! new-testdat (car (open-run-close db:get-tests-for-run db new-run-id (conc testname "/" item-path) '() '()))) + (set! new-testdat (car (cdb:remote-run db:get-tests-for-run db new-run-id (conc testname "/" item-path) '() '()))) (hash-table-set! curr-tests-hash full-name new-testdat) ;; this could be confusing, which record should go into the lookup table? ;; Now duplicate the test steps (debug:print 4 "Copying records in test_steps from test_id=" (db:test-get-id testdat) " to " (db:test-get-id new-testdat)) - (open-run-close + (cdb:remote-run (lambda () (sqlite3:execute db (conc "INSERT OR REPLACE INTO test_steps (test_id,stepname,state,status,event_time,comment) " "SELECT " (db:test-get-id new-testdat) ",stepname,state,status,event_time,comment FROM test_steps WHERE test_id=?;") Index: tasks.scm ================================================================== --- tasks.scm +++ tasks.scm @@ -21,16 +21,20 @@ ;;====================================================================== ;; Tasks db ;;====================================================================== (define (tasks:open-db) - (let* ((dbpath (conc *toppath* "/monitor.db")) - (exists (file-exists? dbpath)) - (mdb (sqlite3:open-database dbpath)) ;; (never-give-up-open-db dbpath)) - (handler (make-busy-timeout 36000))) + (let* ((dbpath (conc *toppath* "/monitor.db")) + (exists (file-exists? dbpath)) + (write-access (file-write-access? dbpath)) + (mdb (sqlite3:open-database dbpath)) ;; (never-give-up-open-db dbpath)) + (handler (make-busy-timeout 36000))) + (if (and exists + (not write-access)) + (set! *db-write-access* write-access)) ;; only unset so other db's also can use this control (sqlite3:set-busy-handler! mdb handler) - (sqlite3:execute mdb (conc "PRAGMA synchronous = 1;")) + (sqlite3:execute mdb (conc "PRAGMA synchronous = 0;")) (if (not exists) (begin (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY, action TEXT DEFAULT '', owner TEXT, @@ -103,21 +107,22 @@ pubport transport )) ;; NB// two servers with same pid on different hosts will be removed from the list if pid: is used! -(define (tasks:server-deregister mdb hostname #!key (port #f)(pid #f)(action 'markdead)) +(define (tasks:server-deregister mdb hostname #!key (port #f)(pid #f)(action 'delete)) (debug:print-info 11 "server-deregister " hostname ", port " port ", pid " pid) - (if pid - (case action - ((delete)(sqlite3:execute mdb "DELETE FROM servers WHERE pid=?;" pid)) - (else (sqlite3:execute mdb "UPDATE servers SET state='dead' WHERE pid=?;" pid))) - (if port - (case action - ((delete)(sqlite3:execute mdb "DELETE FROM servers WHERE hostname=? AND port=?;" hostname port)) - (else (sqlite3:execute mdb "UPDATE servers SET state='dead' WHERE hostname=? AND port=?;" hostname port))) - (debug:print 0 "ERROR: tasks:server-deregister called with neither pid nor port specified")))) + (if *db-write-access* + (if pid + (case action + ((delete)(sqlite3:execute mdb "DELETE FROM servers WHERE pid=?;" pid)) + (else (sqlite3:execute mdb "UPDATE servers SET state='dead' WHERE pid=?;" pid))) + (if port + (case action + ((delete)(sqlite3:execute mdb "DELETE FROM servers WHERE (interface=? or hostname=?) AND port=?;" hostname hostname port)) + (else (sqlite3:execute mdb "UPDATE servers SET state='dead' WHERE (interface=? or hostname=?) AND port=?;" hostname hostname port))) + (debug:print 0 "ERROR: tasks:server-deregister called with neither pid nor port specified"))))) (define (tasks:server-deregister-self mdb hostname) (tasks:server-deregister mdb hostname pid: (current-process-id))) ;; need a simple call for robustly removing records given host and port @@ -141,12 +146,18 @@ "SELECT id FROM servers WHERE pid=-999;"))) (if hostname hostname iface)(if pid pid port)) res)) (define (tasks:server-update-heartbeat mdb server-id) - (debug:print-info 0 "Heart beat update of server id=" server-id) - (sqlite3:execute mdb "UPDATE servers SET heartbeat=strftime('%s','now') WHERE id=?;" server-id)) + (debug:print-info 1 "Heart beat update of server id=" server-id) + (handle-exceptions + exn + (begin + (debug:print 0 "WARNING: probable timeout on monitor.db access") + (thread-sleep! 1) + (tasks:server-update-heartbeat mdb server-id)) + (sqlite3:execute mdb "UPDATE servers SET heartbeat=strftime('%s','now') WHERE id=?;" server-id))) ;; alive servers keep the heartbeat field upto date with seconds every 6 or so seconds (define (tasks:server-alive? mdb server-id #!key (iface #f)(hostname #f)(port #f)(pid #f)) (let* ((server-id (if server-id server-id @@ -250,17 +261,17 @@ (process-signal pid signal/term) (thread-sleep! 5) ;; give it five seconds to die peacefully then do a brutal kill ;;(process-signal pid signal/kill) ) ;; local machine, send sig term (begin - (debug:print-info 1 "Stopping remote servers not yet supported.")))) - ;; (debug:print-info 1 "Telling alive server on " hostname ":" port " to commit servercide") - ;; (let ((serverdat (list hostname port))) - ;; (case (string->symbol transport) - ;; ((http)(http-transport:client-connect hostname port)) - ;; (else (debug:print "ERROR: remote stopping servers of type " transport " not supported yet"))) - ;; (cdb:kill-server serverdat))))) ;; remote machine, try telling server to commit suicide + ;;(debug:print-info 1 "Stopping remote servers not yet supported.")))) + (debug:print-info 1 "Telling alive server on " hostname ":" port " to commit servercide") + (let ((serverdat (list hostname port))) + (case (if (string? transport) (string->symbol transport) transport) + ((http)(http-transport:client-connect hostname port)) + (else (debug:print "ERROR: remote stopping servers of type " transport " not supported yet"))) + (cdb:kill-server serverdat pid))))) ;; remote machine, try telling server to commit suicide (begin (if status (if (equal? hostname (get-host-name)) (begin (debug:print-info 1 "Sending signal/term to " pid " on " hostname) @@ -531,15 +542,15 @@ (tasks:set-state mdb (tasks:task-get-id task) "waiting"))) (define (tasks:rollup-runs db mdb task) (let* ((flags (make-hash-table)) (keys (db:get-keys db)) - (keyvallst (keys:target->keyval keys (tasks:task-get-target task)))) + (keyvals (keys:target-keyval keys (tasks:task-get-target task)))) ;; (hash-table-set! flags "-rerun" "NOT_STARTED") (print "Starting rollup " task) ;; sillyness, just call the damn routine with the task vector and be done with it. FIXME SOMEDAY (runs:rollup-run db keys - keyvallst + keyvals (tasks:task-get-name task) (tasks:task-get-owner task)) (tasks:set-state mdb (tasks:task-get-id task) "waiting"))) Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -51,13 +51,13 @@ (set! res (string-match (regexp finpatt (if like #t #f)) str)) (if notpatt (not res) res)))) ;; if itempath is #f then look only at the testname part ;; -(define (tests:match patterns testname itempath) +(define (tests:match patterns testname itempath #!key (required '())) (if (string? patterns) - (let ((patts (string-split patterns ","))) + (let ((patts (append (string-split patterns ",") required))) (if (null? patts) ;;; no pattern(s) means no match #f (let loop ((patt (car patts)) (tal (cdr patts))) ;; (print "loop: patt: " patt ", tal " tal) @@ -107,12 +107,12 @@ ;; get the previous record for when this test was run where all keys match but runname ;; returns #f if no such test found, returns a single test record if found (define (test:get-previous-test-run-record db run-id test-name item-path) (let* ((keys (cdb:remote-run db:get-keys #f)) - (selstr (string-intersperse (map (lambda (x)(vector-ref x 0)) keys) ",")) - (qrystr (string-intersperse (map (lambda (x)(conc (vector-ref x 0) "=?")) keys) " AND ")) + (selstr (string-intersperse keys ",")) + (qrystr (string-intersperse (map (lambda (x)(conc x "=?")) keys) " AND ")) (keyvals #f)) ;; first look up the key values from the run selected by run-id (sqlite3:for-each-row (lambda (a . b) (set! keyvals (cons a b))) @@ -244,11 +244,11 @@ (pop-directory) result))) ;; Do not rpc this one, do the underlying calls!!! -(define (tests:test-set-status! test-id state status comment dat) +(define (tests:test-set-status! test-id state status comment dat #!key (work-area #f)) (debug:print-info 4 "tests:test-set-status! test-id=" test-id ", state=" state ", status=" status ", dat=" dat) (let* ((db #f) (real-status status) (otherdat (if dat dat (make-hash-table))) (testdat (cdb:get-test-info-by-id *runremote* test-id)) @@ -290,11 +290,11 @@ (cdb:test-set-status-state *runremote* test-id real-status state (if waived waived comment))) ;; if status is "AUTO" then call rollup (note, this one modifies data in test ;; run area, it does remote calls under the hood. (if (and test-id state status (equal? status "AUTO")) - (db:test-data-rollup #f test-id status)) + (db:test-data-rollup #f test-id status work-area: work-area)) ;; add metadata (need to do this way to avoid SQL injection issues) ;; :first_err ;; (let ((val (hash-table-ref/default otherdat ":first_err" #f))) @@ -324,11 +324,12 @@ expected "," tol "," units "," dcomment ",," ;; extra comma for status type ))) - (cdb:remote-run db:csv->test-data #f test-id + ;; This was run remote, don't think that makes sense. + (db:csv->test-data #f test-id dat)))) ;; need to update the top test record if PASS or FAIL and this is a subtest (if (not (equal? item-path "")) (cdb:roll-up-pass-fail-counts *runremote* run-id test-name item-path status)) @@ -538,13 +539,13 @@ ;;====================================================================== ;; teststep-set-status! used to be here (define (test-get-kill-request test-id) ;; run-id test-name itemdat) - (let* (;; (item-path (item-list->path itemdat)) - (testdat (cdb:get-test-info-by-id *runremote* test-id))) ;; run-id test-name item-path))) - (equal? (test:get-state testdat) "KILLREQ"))) + (let* ((testdat (cdb:get-test-info-by-id *runremote* test-id))) ;; run-id test-name item-path))) + (and testdat + (equal? (test:get-state testdat) "KILLREQ")))) (define (test:tdb-get-rundat-count tdb) (if tdb (let ((res 0)) (sqlite3:for-each-row @@ -553,32 +554,41 @@ tdb "SELECT count(id) FROM test_rundat;") res)) 0) -(define (db:update-central-meta-info db test-id cpuload diskfree minutes num-records uname hostname) - (sqlite3:execute db "UPDATE tests SET cpuload=?,diskfree=? WHERE id=?;" - cpuload - diskfree - test-id) - (if minutes (sqlite3:execute db "UPDATE tests SET run_duration=? WHERE id=?;" minutes test-id)) - (if (eq? num-records 0) - (sqlite3:execute db "UPDATE tests SET uname=?,host=? WHERE id=?;" - uname hostname test-id))) - -(define (test-set-meta-info db test-id run-id testname itemdat minutes) +(define (tests:update-central-meta-info test-id cpuload diskfree minutes num-records uname hostname) + ;; This is a good candidate for threading the requests to enable + ;; transactionized write at the server + (cdb:tests-update-cpuload-diskfree *runremote* test-id cpuload diskfree) + ;; (let ((db (open-db))) + ;; (sqlite3:execute db "UPDATE tests SET cpuload=?,diskfree=? WHERE id=?;" + ;; cpuload + ;; diskfree + ;; test-id) + (if minutes + (cdb:tests-update-run-duration *runremote* test-id minutes)) + ;; (sqlite3:execute db "UPDATE tests SET run_duration=? WHERE id=?;" minutes test-id)) + (if (eq? num-records 0) + (cdb:tests-update-uname-host *runremote* test-id uname hostname)) + ;;(sqlite3:execute db "UPDATE tests SET uname=?,host=? WHERE id=?;" uname hostname test-id)) + ;;(sqlite3:finalize! db)) + ) + +(define (tests:set-meta-info db test-id run-id testname itemdat minutes work-area) ;; DOES cdb:remote-run under the hood! - (let* ((tdb (db:open-test-db-by-test-id db test-id)) + (let* ((tdb (db:open-test-db-by-test-id db test-id work-area: work-area)) (num-records (test:tdb-get-rundat-count tdb)) (cpuload (get-cpu-load)) (diskfree (get-df (current-directory)))) (if (eq? (modulo num-records 10) 0) ;; every ten records update central (let ((uname (get-uname "-srvpio")) (hostname (get-host-name))) - (cdb:remote-run db:update-central-meta-info db test-id cpuload diskfree minutes num-records uname hostname))) + (tests:update-central-meta-info test-id cpuload diskfree minutes num-records uname hostname))) (sqlite3:execute tdb "INSERT INTO test_rundat (update_time,cpuload,diskfree,run_duration) VALUES (strftime('%s','now'),?,?,?);" - cpuload diskfree minutes))) + cpuload diskfree minutes) + (sqlite3:finalize! tdb))) ;;====================================================================== ;; A R C H I V I N G ;;====================================================================== Index: tests/Makefile ================================================================== --- tests/Makefile +++ tests/Makefile @@ -1,8 +1,8 @@ # run some tests -BINPATH=$(PWD)/../bin +BINPATH=$(shell readlink -m $(PWD)/../bin) MEGATEST=$(BINPATH)/megatest PATH := $(BINPATH):$(PATH) RUNNAME := $(shell date +w%V.%u.%H.%M) IPADDR := "-" # Set SERVER to "-server -" @@ -20,11 +20,11 @@ all : test1 test2 test3 test4 test5 server : (cd ..;make;make install) && \ - (cd fullrun;../../bin/megatest -server - -debug 22) + (cd fullrun;../../bin/megatest -server - -debug 22 &) test0 : cleanprep cd simplerun ; $(MEGATEST) -server - -debug $(DEBUG) test1 : cleanprep @@ -44,30 +44,53 @@ test3 : fullprep cd fullrun;$(MEGATEST) -runtests runfirst -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b -debug 10 -test4 : fullprep - cd fullrun;$(MEGATEST) -debug $(DEBUG) -runall -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b -m "This is a comment specific to a run" -v $(LOGGING) +test4 : cleanprep + @echo "WARNING: No longer running fullprep, test converage may be lessened" + cd fullrun;time $(MEGATEST) -debug $(DEBUG) -runtests % -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b -m "This is a comment specific to a run" -v $(LOGGING) # NOTE: Only one instance can be a server -test5 : fullprep +test5 : cleanprep + @echo "WARNING: No longer running fullprep, test converage may be lessened" cd fullrun;sleep 0;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_aa -debug $(DEBUG) $(LOGGING) > aa.log 2> aa.log & cd fullrun;sleep 0;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ab -debug $(DEBUG) $(LOGGING) > ab.log 2> ab.log & cd fullrun;sleep 0;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ac -debug $(DEBUG) $(LOGGING) > ac.log 2> ac.log & cd fullrun;sleep 0;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ad -debug $(DEBUG) $(LOGGING) > ad.log 2> ad.log & # cd fullrun;sleep 0;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_ae -debug $(DEBUG) $(LOGGING) > ae.log 2> ae.log & # cd fullrun;sleep 0;$(MEGATEST) -runtests % -target $(TARGET) :runname $(RUNNAME)_af -debug $(DEBUG) $(LOGGING) > af.log 2> af.log & + cd fullrun;sleep 10;$(MEGATEST) -run-wait -target $(TARGET) :runname % -testpatt % :state RUNNING,LAUNCHED,NOT_STARTED,REMOTEHOSTSTART;echo ALL DONE test6: fullprep cd fullrun;$(MEGATEST) -runtests runfirst -testpatt %/1 -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_itempatt -v cd fullrun;$(MEGATEST) -runtests runfirst -testpatt %blahha% -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_itempatt -debug 10 cd fullrun;$(MEGATEST) -rollup :runname newrun -target ubuntu/nfs/none -debug 10 +test7: + @echo Only a/c testname c should remain. If there is a run a/b/c then there is a cache issue. + (cd simplerun; \ + $(MEGATEST) -server - -daemonize; \ + $(MEGATEST) -remove-runs -target %/% :runname % -testpatt %; \ + $(MEGATEST) -runtests % -target a/b :runname c; sleep 5; \ + $(MEGATEST) -remove-runs -target a/c :runname c; \ + $(MEGATEST) -runtests % -target a/c :runname c; \ + $(MEGATEST) -remove-runs -target a/b :runname c -testpatt % ; \ + $(MEGATEST) -runtests % -target a/d :runname c;$(MEGATEST) -list-runs %|egrep ^Run:) > test7.log 2> test7.log + logpro test7.logpro test7.html < test7.log + @echo + @echo Run \"firefox test7.html\" to see the results. + +# This one failed with v1.55 +test8 : + cd fullrun;$(MEGATEST) -runtests priority_10_waiton_1 -target ubuntu/nfs/none :runname $(RUNNAME)_waiton_single + cd fullrun;$(MEGATEST) -runtests lineitem_fail 1 -target ubuntu/nfs/none :runname $(RUNNAME)_singletest + cd fullrun;$(MEGATEST) -runtests runfirst/fall 1 -target ubuntu/nfs/none :runname $(RUNNAME)_singleitem + cd fullrun;$(MEGATEST) -runtests test_mt_vars/2 -target ubuntu/nfs/none :runname $(RUNNAME)_singleitem_waiton cleanprep : ../*.scm Makefile */*.config - mkdir -p /tmp/mt_runs /tmp/mt_links + mkdir -p fullrun/tmp/mt_runs fullrun/tmp/mt_links cd ..;make;make install rm -f */logging.db touch cleanprep fullprep : cleanprep @@ -90,9 +113,9 @@ hardkill : kill sleep 5;killall -v mtest main.sh dboard -9 listservers : - cd fullrun;$(MEGATEST) -listservers + cd fullrun;$(MEGATEST) -list-servers runforever : while(ls); do runname=`date +%F-%R:%S`;(cd fullrun;$(MEGATEST) -runall -target ubuntu/nfs/none :runname $$runname;/home/matt/data/megatest/megatest -runall -target ubuntu/nfs/none :runname $$runname;/home/matt/data/megatest/megatest -runall -target ubuntu/nfs/none :runname $$runname);done ADDED tests/fdktestqa/testqa/Makefile Index: tests/fdktestqa/testqa/Makefile ================================================================== --- /dev/null +++ tests/fdktestqa/testqa/Makefile @@ -0,0 +1,25 @@ +BINDIR=$(PWD)/../../../bin +MEGATEST=$(BINDIR)/megatest +DASHBOARD=$(BINDIR)/dashboard +all : + $(MEGATEST) -runtests % -target a/b :runname c + +bigbig : + for tn in a b c d;do \ + ($(MEGATEST) -runtests % -target a/b :runname $tn & ) ; \ + done + +bigrun : + $(MEGATEST) -runtests bigrun -target a/bigrun :runname a + +bigrun2 : + $(MEGATEST) -runtests bigrun2 -target a/bigrun2 :runname a + +dashboard : + $(DASHBOARD) -rows 20 & + +compile : + (cd ../../..;make && make install) + +clean : + rm -rf ../simple*/*/* megatest.db Index: tests/fdktestqa/testqa/megatest.config ================================================================== --- tests/fdktestqa/testqa/megatest.config +++ tests/fdktestqa/testqa/megatest.config @@ -1,5 +1,8 @@ [setup] -testcopycmd cp --remove-destination -rlv TEST_SRC_PATH/. TEST_TARG_PATH/. +testcopycmd cp --remove-destination -rlv TEST_SRC_PATH/. TEST_TARG_PATH/. >> TEST_TARG_PATH/mt_launch.log 2>> TEST_TARG_PATH/mt_launch.log +runqueue 2 [include ../fdk.config] +[server] +timeout 0.01 ADDED tests/fdktestqa/testqa/runsuite.sh Index: tests/fdktestqa/testqa/runsuite.sh ================================================================== --- /dev/null +++ tests/fdktestqa/testqa/runsuite.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +(cd ../../..;make && make install) || exit 1 +export PATH=$PWD/../../../bin:$PATH + +for i in a b c d e f;do + # g h i j k l m n o p q r s t u v w x y z;do + megatest -runtests % -target a/b :runname $i & +done + +echo "" > num-running.log +while true; do + foo=`megatest -list-runs % | grep RUNNING | wc -l` + echo "Num running at `date` $foo" + echo "$foo at `date`" >> num-running.log + # to make the test go at a reasonable clip only gather this info ever minute + sleep 1m +done Index: tests/fdktestqa/testqa/tests/bigrun/step1.sh ================================================================== --- tests/fdktestqa/testqa/tests/bigrun/step1.sh +++ tests/fdktestqa/testqa/tests/bigrun/step1.sh @@ -1,3 +1,8 @@ #!/bin/sh -sleep 10 +if [ $NUMBER -lt 200 ];then + sleep $NUMBER +else + sleep 200 +fi + exit 0 Index: tests/fdktestqa/testqa/tests/bigrun/testconfig ================================================================== --- tests/fdktestqa/testqa/tests/bigrun/testconfig +++ tests/fdktestqa/testqa/tests/bigrun/testconfig @@ -7,11 +7,11 @@ # waiton setup priority 0 # Iteration for your tests are controlled by the items section [items] -NUMBER #{scheme (string-intersperse (map number->string (sort (let loop ((a 0)(res '()))(if (< a 120)(loop (+ a 1)(cons a res)) res)) >)) " ")} +NUMBER #{scheme (string-intersperse (map number->string (sort (let loop ((a 0)(res '()))(if (< a (or (any->number (get-environment-variable "NUMTESTS")) 1100))(loop (+ a 1)(cons a res)) res)) >)) " ")} # test_meta is a section for storing additional data on your test [test_meta] author matt owner matt Index: tests/fdktestqa/testqa/tests/bigrun2/step1.sh ================================================================== --- tests/fdktestqa/testqa/tests/bigrun2/step1.sh +++ tests/fdktestqa/testqa/tests/bigrun2/step1.sh @@ -1,7 +1,9 @@ #!/bin/sh -prev_test=`$MT_MEGATEST -test-paths -target $MT_TARGET :runname $MT_RUNNAME -testpatt bigrun/$NUMBER` -if [ -e $prev_test/testconfig ]; then - exit 0 -else - exit 1 -fi +# prev_test=`$MT_MEGATEST -test-paths -target $MT_TARGET :runname $MT_RUNNAME -testpatt bigrun/$NUMBER` +# if [ -e $prev_test/testconfig ]; then +# exit 0 +# else +# exit 1 +# fi + +exit 0 Index: tests/fdktestqa/testqa/tests/bigrun2/testconfig ================================================================== --- tests/fdktestqa/testqa/tests/bigrun2/testconfig +++ tests/fdktestqa/testqa/tests/bigrun2/testconfig @@ -2,18 +2,18 @@ [ezsteps] step1 step1.sh # Test requirements are specified here [requirements] -waiton bigrun +# waiton bigrun priority 0 mode itemmatch # Iteration for your tests are controlled by the items section [items] -NUMBER #{scheme (string-intersperse (map number->string (sort (let loop ((a 0)(res '()))(if (< a 120)(loop (+ a 1)(cons a res)) res)) >)) " ")} +NUMBER #{scheme (string-intersperse (map number->string (sort (let loop ((a 0)(res '()))(if (< a 1500)(loop (+ a 1)(cons a res)) res)) >)) " ")} # test_meta is a section for storing additional data on your test [test_meta] author matt owner matt ADDED tests/fullrun/afs.config Index: tests/fullrun/afs.config ================================================================== --- /dev/null +++ tests/fullrun/afs.config @@ -0,0 +1,1 @@ +TESTSTORUN priority_6 sqlitespeed/ag Index: tests/fullrun/config/mt_include_1.config ================================================================== --- tests/fullrun/config/mt_include_1.config +++ tests/fullrun/config/mt_include_1.config @@ -1,8 +1,8 @@ [setup] # exectutable /path/to/megatest -max_concurrent_jobs 200 +max_concurrent_jobs 150 linktree #{getenv MT_RUN_AREA_HOME}/tmp/mt_links [jobtools] useshell yes Index: tests/fullrun/megatest.config ================================================================== --- tests/fullrun/megatest.config +++ tests/fullrun/megatest.config @@ -9,16 +9,24 @@ area1 /tmp/oldarea/megatest [include config/mt_include_1.config] [setup] +# Set launchwait to yes to use the old launch run code that waits for the launch process to return before +# proceeding. +# launchwait yes + +# If defined the runs:run-tests-queue-new queue code is used with the register test depth +# given. Otherwise the old code is used. The old code will be removed in the future and +# a default of 10 used. +runqueue 1 # It is possible (but not recommended) to override the rsync command used # to populate the test directories. For test development the following # example can be useful # -testcopycmd cp --remove-destination -rsv TEST_SRC_PATH/. TEST_TARG_PATH/. +# testcopycmd cp --remove-destination -rsv TEST_SRC_PATH/. TEST_TARG_PATH/. >> TEST_TARG_PATH/mt_launch.log 2>> TEST_TARG_PATH/mt_launch.log # or for hard links # testcopycmd cp --remove-destination -rlv TEST_SRC_PATH/. TEST_TARG_PATH/. @@ -49,10 +57,14 @@ WACKYVAR6 #{scheme (args:get-arg "-target")} PREDICTABLE the_ans MRAH MT_RUN_AREA_HOME=#{getenv MT_RUN_AREA_HOME} # The empty var should have a definition with null string EMPTY_VAR + +WRAPPEDVAR This var should have the work blah thrice: \ +blah \ +blah # XTERM [system xterm] # RUNDEAD [system exit 56] [server] @@ -61,11 +73,11 @@ # it succeeds port 8080 # This server will keep running this number of hours after last access. # Three minutes is 0.05 hours -timeout 0.05 +timeout 0.025 ## disks are: ## name host:/path/to/area ## -or- ## name /path/to/area ADDED tests/fullrun/nfs.config Index: tests/fullrun/nfs.config ================================================================== --- /dev/null +++ tests/fullrun/nfs.config @@ -0,0 +1,1 @@ +TESTSTORUN priority_4 test_mt_vars Index: tests/fullrun/runconfigs.config ================================================================== --- tests/fullrun/runconfigs.config +++ tests/fullrun/runconfigs.config @@ -1,13 +1,26 @@ +[default] +SOMEVAR This should show up in SOMEVAR3 + +# target based getting of config file, look at afs.config and nfs.config +[include #{getenv fsname}.config] + [include #{getenv MT_RUN_AREA_HOME}/common_runconfigs.config] # #{system echo 'VACKYVAR #{shell pwd}' > $MT_RUN_AREA_HOME/config/$USER.config} [include ./config/#{getenv USER}.config] + WACKYVAR0 #{get ubuntu/nfs/none CURRENT} WACKYVAR1 #{scheme (args:get-arg "-target")} [default/ubuntu/nfs] WACKYVAR2 #{runconfigs-get CURRENT} [ubuntu/nfs/none] WACKYVAR2 #{runconfigs-get CURRENT} +SOMEVAR2 This should show up in SOMEVAR4 if the target is ubuntu/nfs/none + +[default] +SOMEVAR3 #{rget SOMEVAR} +SOMEVAR4 #{rget SOMEVAR2} +SOMEVAR5 #{runconfigs-get SOMEVAR2} ADDED tests/fullrun/tests/special/testconfig Index: tests/fullrun/tests/special/testconfig ================================================================== --- /dev/null +++ tests/fullrun/tests/special/testconfig @@ -0,0 +1,8 @@ +[ezsteps] +# calcresults megatest -list-runs $MT_RUNNAME -target $MT_TARGET + +[requirements] +waiton #{rget TESTSTORUN} + +# This is a "toplevel" test, it does not require waitons to be non-FAIL to run +mode toplevel Index: tests/simplerun/tests/test1/step1.logpro ================================================================== --- tests/simplerun/tests/test1/step1.logpro +++ tests/simplerun/tests/test1/step1.logpro @@ -1,7 +1,7 @@ ;; You should have at least one expect:required. This ensures that your process ran -(expect:required in "LogFileBody" > 0 "Put description here" #/put pattern here/) +;; (expect:required in "LogFileBody" > 0 "Put description here" #/put pattern here/) ;; You may need ignores to suppress false error or warning hits from the later expects ;; NOTE: Order is important here! (expect:ignore in "LogFileBody" < 99 "Ignore the word error in comments" #/^\/\/.*error/) (expect:warning in "LogFileBody" = 0 "Any warning" #/warn/) Index: tests/simplerun/tests/test1/step1.sh ================================================================== --- tests/simplerun/tests/test1/step1.sh +++ tests/simplerun/tests/test1/step1.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash # Run your step here echo Got here! + Index: tests/simplerun/tests/test1/step2.logpro ================================================================== --- tests/simplerun/tests/test1/step2.logpro +++ tests/simplerun/tests/test1/step2.logpro @@ -1,7 +1,7 @@ ;; You should have at least one expect:required. This ensures that your process ran -(expect:required in "LogFileBody" > 0 "Put description here" #/put pattern here/) +;; (expect:required in "LogFileBody" > 0 "Put description here" #/put pattern here/) ;; You may need ignores to suppress false error or warning hits from the later expects ;; NOTE: Order is important here! (expect:ignore in "LogFileBody" < 99 "Ignore the word error in comments" #/^\/\/.*error/) (expect:warning in "LogFileBody" = 0 "Any warning" #/warn/) Index: tests/simplerun/tests/test1/step2.sh ================================================================== --- tests/simplerun/tests/test1/step2.sh +++ tests/simplerun/tests/test1/step2.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash # Run your step here echo Got here eh! + Index: tests/simplerun/tests/test2/step1.logpro ================================================================== --- tests/simplerun/tests/test2/step1.logpro +++ tests/simplerun/tests/test2/step1.logpro @@ -1,7 +1,7 @@ ;; You should have at least one expect:required. This ensures that your process ran -(expect:required in "LogFileBody" > 0 "Put description here" #/put pattern here/) +;; (expect:required in "LogFileBody" > 0 "Put description here" #/put pattern here/) ;; You may need ignores to suppress false error or warning hits from the later expects ;; NOTE: Order is important here! (expect:ignore in "LogFileBody" < 99 "Ignore the word error in comments" #/^\/\/.*error/) (expect:warning in "LogFileBody" = 0 "Any warning" #/warn/) Index: tests/simplerun/tests/test2/step2.logpro ================================================================== --- tests/simplerun/tests/test2/step2.logpro +++ tests/simplerun/tests/test2/step2.logpro @@ -1,7 +1,7 @@ ;; You should have at least one expect:required. This ensures that your process ran -(expect:required in "LogFileBody" > 0 "Put description here" #/put pattern here/) +;; (expect:required in "LogFileBody" > 0 "Put description here" #/put pattern here/) ;; You may need ignores to suppress false error or warning hits from the later expects ;; NOTE: Order is important here! (expect:ignore in "LogFileBody" < 99 "Ignore the word error in comments" #/^\/\/.*error/) (expect:warning in "LogFileBody" = 0 "Any warning" #/warn/) ADDED tests/test7.logpro Index: tests/test7.logpro ================================================================== --- /dev/null +++ tests/test7.logpro @@ -0,0 +1,8 @@ +;; You should have at least one expect:required. This ensures that your process ran +(expect:required in "LogFileBody" > 0 "All tests launched" #/INFO:.*All tests launched/) + +;; You may need ignores to suppress false error or warning hits from the later expects +;; NOTE: Order is important here! +(expect:ignore in "LogFileBody" < 99 "Ignore the word error in comments" #/^\/\/.*error/) +(expect:warning in "LogFileBody" = 0 "Any warning" #/warn/) +(expect:error in "LogFileBody" = 0 "Any error" (list #/ERROR/ #/error/)) ;; but disallow any other errors Index: tests/tests.scm ================================================================== --- tests/tests.scm +++ tests/tests.scm @@ -79,36 +79,35 @@ (test "setup for run" #t (begin (setup-for-run) (string? (getenv "MT_RUN_AREA_HOME")))) (test "server-register, get-best-server" #t (let ((res #f)) - (open-run-close tasks:server-register tasks:open-db 1 "bob" 1234 100 'live) + (open-run-close tasks:server-register tasks:open-db 1 "bob" 1234 100 'live 'http) (set! res (open-run-close tasks:get-best-server tasks:open-db)) - (number? (cadddr res)))) + (number? (vector-ref res 3)))) (test "de-register server" #t (let ((res #f)) - (open-run-close tasks:server-deregister tasks:open-db "bob" pullport: 1234) - (list? (open-run-close tasks:get-best-server tasks:open-db)))) + (open-run-close tasks:server-deregister tasks:open-db "bob" port: 1234) + (vector? (open-run-close tasks:get-best-server tasks:open-db)))) -(define hostinfo #f) +(define server-pid #f) +(test "launch server" #t (let ((pid (process-fork (lambda () + ;; (daemon:ize) + (server:launch 'http))))) + (set! server-pid pid) + (number? pid))) + +(thread-sleep! 3) ;; need to wait for server to start. Yes, a better way is needed. (test "get-best-server" #t (let ((dat (open-run-close tasks:get-best-server tasks:open-db))) - (set! hostinfo dat) ;; host ip pullport pubport - (and (string? (car dat)) - (number? (caddr dat))))) - -(test #f #t (let ((zmq-socket (server:client-connect - (cadr hostinfo) - (caddr hostinfo) - ;; (cadddr hostinfo) - ))) - (set! *runremote* zmq-socket) - (string? (car *runremote*)))) - -(test #f #t (let ((res (server:client-login *runremote*))) + (set! *runremote* (list (vector-ref dat 1)(vector-ref dat 2))) ;; host ip pullport pubport + (and (string? (car *runremote*)) + (number? (cadr *runremote*))))) + +(test #f #t (car (cdb:login *runremote* *toppath* *my-client-signature*))) +(test #f #t (let ((res (client:login *runremote*))) (car res))) -(test #f #t (car (cdb:login *runremote* *toppath* *my-client-signature*))) ;;====================================================================== ;; C O N F I G F I L E S ;;====================================================================== @@ -169,26 +168,26 @@ ;; (cdb:set-verbosity *runremote* *verbosity*) (test "get all legal tests" (list "test1" "test2") (sort (get-all-legal-tests) string<=?)) -(test "get-keys" "SYSTEM" (vector-ref (car (db:get-keys *db*)) 0));; (key:get-fieldname (car (sort (db-get-keys *db*)(lambda (a b)(string>=? (vector-ref a 0)(vector-ref b 0))))))) +(test "get-keys" "SYSTEM" (car (db:get-keys *db*))) (define remargs (args:get-args '("bar" "foo" ":runname" "bob" ":SYSTEM" "ubuntu" ":RELEASE" "v1.2" ":datapath" "blah/foo" "nada") (list ":runname" ":state" ":status") (list "-h") args:arg-hash 0)) -(test "register-run" #t (number? (runs:register-run *db* - (db:get-keys *db*) - '(("SYSTEM" "key1")("RELEASE" "key2")) - "myrun" - "new" - "n/a" - "bob"))) +(test "register-run" #t (number? + (db:register-run *db* + '(("SYSTEM" "key1")("RELEASE" "key2")) + "myrun" + "new" + "n/a" + "bob"))) (test #f #t (cdb:tests-register-test *runremote* 1 "nada" "")) (test #f 1 (cdb:remote-run db:get-test-id #f 1 "nada" "")) (test #f "NOT_STARTED" (vector-ref (open-run-close db:get-test-info #f 1 "nada" "") 3)) (test #f "NOT_STARTED" (vector-ref (cdb:get-test-info *runremote* 1 "nada" "") 3)) @@ -198,12 +197,12 @@ ;;====================================================================== ;; D B ;;====================================================================== (test #f "FOO LIKE 'abc%def'" (db:patt->like "FOO" "abc%def")) -(test #f (vector '("SYSTEM" "RELEASE" "id" "runname" "state" "status" "owner" "event_time") '()) - (runs:get-runs-by-patt db keys "%")) +(test #f "key2" (vector-ref (car (vector-ref (runs:get-runs-by-patt *db* '("SYSTEM" "RELEASE") "%" "key1/key2") 1)) 1)) + (test #f "SYSTEM,RELEASE,id,runname,state,status,owner,event_time" (car (runs:get-std-run-fields keys '("id" "runname" "state" "status" "owner" "event_time")))) (test #f #t (runs:operate-on 'print "%" "%" "%")) ;;(test "update-test-info" #t (test-update-meta-info *db* 1 "nada" (setenv "BLAHFOO" "1234") @@ -236,10 +235,13 @@ (hash-table-set! args:arg-hash "-testpatt" "%") (hash-table-set! args:arg-hash "-target" "ubuntu/r1.2") (test "Setup for a run" #t (begin (setup-for-run) #t)) (define *tdb* #f) +(define keyvals #f) +(test "target->keyval" #t (let ((kv (keys:target->keyval keys (args:get-arg "-target")))) + (set! keyvals kv)(list? keyvals))) (define testdbpath (conc "/tmp/" (getenv "USER") "/megatest_testing")) (system (conc "rm -f " testdbpath "/testdat.db;mkdir -p " testdbpath)) (print "Using " testdbpath " for test db") @@ -252,37 +254,88 @@ (define tconfig #f) (test "get a testconfig" #t (let ((tconf (tests:get-testconfig "test1" 'return-procs))) (set! tconfig tconf) (hash-table? tconf))) (db:clean-all-caches) -;; (set! *verbosity* 20) + +(test "set-megatest-env-vars" + "ubuntu" + (begin + (set-megatest-env-vars 1 inkeys: keys) + (get-environment-variable "SYSTEM"))) +(test "setup-env-defaults" + "see this variable" + (begin + (setup-env-defaults "runconfigs.config" 1 *already-seen-runconfig-info* keys keyvals "pre-launch-env-vars") + (get-environment-variable "ALLTESTS"))) + +(test #f "ubuntu" (car (keys:target-set-args keys (args:get-arg "-target") args:arg-hash))) + +(define rinfo #f) +(test "get-run-info" #f (vector? (vector-ref (let ((rinf (cdb:remote-run db:get-run-info #f 1))) + (set! rinfo rinf) + rinf) 0))) +(test "get-key-vals" "key1" (car (cdb:remote-run db:get-key-vals #f 1))) +(test "tests:sort-by" '() (tests:sort-by-priority-and-waiton (make-hash-table))) + +(test "update-test_meta" "test1" (begin + (runs:update-test_meta "test1" tconfig) + (let ((dat (cdb:remote-run db:testmeta-get-record #f "test1"))) + (vector-ref dat 1)))) + +(define test-path "tests/test1") +(define disk-path #f) +(test "get-best-disk" #t (string? (file-exists? (let ((d (get-best-disk *configdat*))) + (set! disk-path d) + d)))) +(test "create-work-area" #t (symbolic-link? (car (create-work-area 1 rinfo keyvals 1 test-path disk-path "test1" '())))) +(test #f "" (item-list->path '())) + +(test "launch-test" #t (string? (file-exists? (launch-test 1 1 rinfo keyvals "run1" tconfig "test1" test-path '() (make-hash-table))))) + + (test "Run a test" #t (general-run-call "-runtests" "run a test" - (lambda (target runname keys keynames keyvallst) + (lambda (target runname keys keyvallst) (let ((test-patts "test%")) ;; (runs:run-tests target runname test-patts user (make-hash-table)) + ;; (run:test run-id run-info key-vals runname test-record flags parent-test) + ;; (set! *verbosity* 22) ;; (list 0 1 2)) (run:test 1 ;; run-id - (args:get-arg ":runname") - (keys:target->keyval keys target) - (vector + #f ;; run-info is yet only a dream + keyvallst ;; (keys:target->keyval keys target) + "run1" ;; runname + (vector ;; test_records.scm tests:testqueue "test1" ;; testname tconfig ;; testconfig '() ;; waitons 0 ;; priority #f ;; items #f ;; itemsdat - #f ;; spare + "" ;; itempath ) args:arg-hash ;; flags (e.g. -itemspatt) - #f))))) + #f) + ;; (set! *verbosity* 0) + )))) + + + + + +(test "server stop" #f (let ((hostname (car *runremote*)) + (port (cadr *runremote*))) + (tasks:kill-server #t hostname port server-pid 'http) + (open-run-close tasks:get-best-server tasks:open-db))) -(test "cache is coherent" #t (let ((cached-info (db:get-test-info-cached-by-id db 2)) - (non-cached (db:get-test-info-not-cached-by-id db 2))) - (print "\nCached: " cached-info) - (print "Noncached: " non-cached) - (equal? cached-info non-cached))) +(exit 1) +;; (test "cache is coherent" #t (let ((cached-info (db:get-test-info-cached-by-id db 2)) +;; (non-cached (db:get-test-info-not-cached-by-id db 2))) +;; (print "\nCached: " cached-info) +;; (print "Noncached: " non-cached) +;; (equal? cached-info non-cached))) (change-directory test-work-dir) (test "Add a step" #t (begin (db:teststep-set-status! db 2 "step1" "start" 0 "This is a comment" "mylogfile.html") @@ -390,11 +443,16 @@ (hash-table-set! args:arg-hash ":runname" "%") (test "Remove the rollup run" #t (begin (operate-on 'remove-runs))) (print "Waiting for server to be done, should be about 20 seconds") -(cdb:kill-server *runremote*) +(test "server stop" #f (let ((hostname (car *runremote*)) + (port (cadr *runremote*))) + (tasks:kill-server #t hostname port server-pid 'http) + (open-run-close tasks:get-best-server tasks:open-db))) + +;; (cdb:kill-server *runremote*) ;; (thread-join! th1 th2 th3) ;; ADD ME!!!! (db:get-prereqs-not-met *db* 1 '("runfirst") "" mode: 'normal) ;; ADD ME!!!! (rdb:get-tests-for-run *db* 1 "runfirst" #f '() '())