Index: api.scm ================================================================== --- api.scm +++ api.scm @@ -371,11 +371,13 @@ ((add-var) (apply db:add-var dbstruct params)) ((insert-run) (apply db:insert-run dbstruct params)) ;; STEPS - ((teststep-set-status!) (apply db:teststep-set-status! dbstruct params)) + ((teststep-set-status!) + ;; (apply db:teststep-set-status! dbstruct params)) + (db:add-cached-write dbstruct db:teststep-set-status! run-id params)) ((delete-steps-for-test!) (apply db:delete-steps-for-test! dbstruct params)) ;; TEST DATA ((test-data-rollup) (apply db:test-data-rollup dbstruct params)) ((csv->test-data) (apply db:csv->test-data dbstruct params)) Index: common.scm ================================================================== --- common.scm +++ common.scm @@ -43,11 +43,11 @@ (include "common_records.scm") (define (remove-files filespec) (let ((files (glob filespec))) - (for-each delete-file files))) + (for-each delete-file* files))) (define (stop-the-train) (thread-start! (make-thread (lambda () (let loop () (if (and *toppath* Index: dashboard.scm ================================================================== --- dashboard.scm +++ dashboard.scm @@ -3112,11 +3112,11 @@ (debug:print 2 *default-log-port* "WARNING: error in accessing databases in get-youngest-run-db-mod-time: " ((condition-property-accessor 'exn 'message) exn) " db-dir="dbdir ", exn=" exn) (current-seconds)) ;; something went wrong - just print an error and return current-seconds (common:max (map (lambda (filen) (file-modification-time filen)) - (glob (conc dbdir "/*.db*")))))) + (cons (conc dbdir "/main.db") (glob (conc dbdir "/?.db"))))))) (define (dashboard:monitor-changed? commondat tabdat) (let* ((run-update-time (current-seconds)) (monitor-db-path (dboard:tabdat-monitor-db-path tabdat)) (monitor-modtime (if (and monitor-db-path (common:file-exists? monitor-db-path)) @@ -3828,11 +3828,12 @@ (let* ((mtdb-path (conc *toppath* "/.mtdb/main.db")) (target (args:get-arg "-target")) (commondat (dboard:commondat-make))) (if target (begin - (args:remove-arg-from-ht "-target") + (hash-table-delete! args:arg-hash "-target") ;; workaround for the following commented out function + ;; (args:remove-arg-from-ht "-target") This function is in mtargs/mtargs.scm, but it's in an egg that is not in the current build of chicken 4. (dboard:commondat-target-set! commondat target) ) ) (if (not (launch:setup)) Index: db.scm ================================================================== --- db.scm +++ db.scm @@ -2739,12 +2739,10 @@ "INSERT OR REPLACE into test_steps (test_id,stepname,state,status,event_time,comment,logfile) VALUES(?,?,?,?,?,?,?);" test-id teststep-name state-in status-in (current-seconds) (if comment comment "") (if logfile logfile ""))))) - - (define (db:delete-steps-for-test! dbstruct run-id test-id) ;; TODO: figure out why status is the key field rather than state (note: CONSTRAINT test_steps_constraint UNIQUE (test_id,stepname,state) ) (db:with-db dbstruct run-id @@ -4247,10 +4245,56 @@ (conc (current-directory) "/" outputfile))) results) ;; brutal clean up (dbfile:add-dbdat dbstruct #f dbdat) (system "rm -rf tempdir"))) + +;;====================================================================== +;; cached writes stuff +;;====================================================================== + +(define (db:add-cached-write dbstruct proc run-id params) + (debug:print 0 *default-log-port* "Adding cached write for run-id "run-id" params " params) + (mutex-lock! *cached-writes-mutex*) + (let* ((hkey (cons dbstruct run-id)) + (cached-writes-queue (hash-table-ref/default *cached-writes-queues* hkey '()))) + (hash-table-set! *cached-writes-queues* hkey (cons (list proc params) cached-writes-queue))) + (if (not *cached-writes-flag*) + (begin + (set! *cached-writes-flag* #t) + (thread-start! (make-thread + (lambda () + (debug:print 0 *default-log-port* "process cached writes thread started.") + (thread-sleep! 1) + (db:process-cached-writes-queue)))))) + (mutex-unlock! *cached-writes-mutex*)) + +(define (db:process-cached-writes-queue) + (mutex-lock! *cached-writes-mutex*) + (hash-table-for-each + *cached-writes-queues* + (lambda (hkey writes-list) + (let* ((dbstruct (car hkey)) + (run-id (cdr hkey))) + (debug:print 0 *default-log-port* "Processing "(length writes-list)" cached writes for run "run-id) + (db:with-db + dbstruct + run-id + #t + (lambda (dbdat db) + (sqlite3:with-transaction + db + (lambda () + (for-each + (lambda (queued-write) + (match queued-write + ((proc params)(apply proc dbstruct params)) + (else (assert #f "BAD queued-write")))) + writes-list))) + (hash-table-delete! *cached-writes-queues* hkey)))))) + (set! *cached-writes-flag* #f) + (mutex-unlock! *cached-writes-mutex*)) ;; (db:extract-ods-file db "outputfile.ods" '(("sysname" "%")("fsname" "%")("datapath" "%")) "%") ;;====================================================================== ;; moving watch dogs here due to dependencies Index: dbfile.scm ================================================================== --- dbfile.scm +++ dbfile.scm @@ -446,11 +446,12 @@ (if journal-mode (sqlite3:execute db (conc "PRAGMA journal_mode = "journal-mode";"))) (if (and init-proc (or force-init (not db-exists))) (init-proc db)) - db))) + db)) + expire-time: 30) (begin (if (file-exists? fname ) (let ((db (sqlite3:open-database fname))) ;; pragmas synchronous not needed because this db is used read-only ;; (sqlite3:execute db (conc "PRAGMA synchronous = "mode";") @@ -1513,11 +1514,11 @@ #f (with-input-from-file fname (lambda () (equal? key-string (read-line))))) (begin - (dbfile:print-err "dbfile:simple-file-lock created " fname " but it was gone 3 seconds later") + (dbfile:print-err "dbfile:simple-file-lock created " fname " but it was gone 0.25 seconds later") #f ) ) ) ) @@ -1579,7 +1580,17 @@ ;; (db:hoh-set! stmt-cache db stmt newstmth) (hash-table-set! stmt-cache stmt newstmth) newstmth)))) (mutex-unlock! *get-cache-stmth-mutex*) result)) + +;;====================================================================== +;; cached writes - run list of procs inside transaction +;; NOTE: this only works because we have once database per process +;;====================================================================== + +(define *cached-writes-mutex* (make-mutex)) +(define *cached-writes-flag* #f) +(define *cached-writes-queues* (make-hash-table)) ;; dbstruct->list of writes + ) Index: dbmod.scm ================================================================== --- dbmod.scm +++ dbmod.scm @@ -119,12 +119,14 @@ (loop (- count 1))) (begin (debug:print-info 0 *default-log-port* "dbmod:with-db, database is busy, giving up.") (exit 1)))) (exn () - (dbfile:print-err exn "ERROR: Unknown error with database for run-id "run-id", message: " - ((condition-property-accessor 'exn 'message) exn)) + (dbfile:print-err exn "ERROR: Unknown error with db for run-id " + run-id", message: " + ((condition-property-accessor 'exn 'message) exn) + ", details: "(condition->list exn)) (exit 2)))))) (if use-mutex (mutex-unlock! *db-with-db-mutex*)) res))) (define (db:with-db dbstruct run-id w/r proc . params) @@ -520,11 +522,11 @@ ;; done (debug:print 2 *default-log-port* "Attaching "destdbfile" as auxdb") (handle-exceptions exn (begin - (debug:print 0 "ATTACH failed, exiting. exn="(condition->list exn)) + (debug:print 0 *default-log-port* "ATTACH failed, exiting. exn="(condition->list exn)) (exit 1)) (sqlite3:execute dbh (conc "ATTACH '"destdbfile"' AS auxdb;"))) (for-each (lambda (table) (let* ((dummy (debug:print 2 *default-log-port* "Doing table " table)) Index: launch.scm ================================================================== --- launch.scm +++ launch.scm @@ -341,10 +341,55 @@ (or new-cpu-load cpu-load) (or new-disk-free disk-free) (if do-sync (current-seconds) last-sync))))))) (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f))) ;; NOTE: Checking twice for keep-going is intentional + +;; read testconfig and create .logpro and script files +;; - use #f for tconfigreg to re-read the testconfigs from disk +;; +(define (launch:extract-scripts-logpro test-dir test-name item-path tconfigreg-in) + (let* ((tconfigreg (or tconfigreg-in + (tests:get-all))) + (tconfig-fname (conc test-dir "/.testconfig")) + (tconfig-tmpfile (conc tconfig-fname ".tmp")) + (tconfig (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs))) + (scripts (configf:get-section tconfig "scripts")) + (logpros (configf:get-section tconfig "logpro"))) + ;; create .testconfig file + (configf:write-alist tconfig tconfig-tmpfile) + (file-move tconfig-tmpfile tconfig-fname #t) + (delete-file* ".final-status") + + ;; extract scripts from testconfig and write them to files in test run dir + (for-each + (lambda (scriptdat) + (match scriptdat + ((name content) + (debug:print-info 2 *default-log-port* "Creating script "(current-directory)"/"name) + (with-output-to-file name + (lambda () + (print content))) + (change-file-mode name (bitwise-ior perm/irwxg perm/irwxu))) + (else + (debug:print-info 0 "Invalid script definiton found in [scripts] section of testconfig. \"" scriptdat "\"")))) + scripts) + + ;; extract logpro from testconfig and write them to files in test run dir + (for-each + (lambda (logprodat) + (match logprodat + ((name content) + (debug:print-info 2 *default-log-port* "Creating logpro file "(current-directory)"/"name) + (with-output-to-file name + (lambda () + (print content) + ;; (change-file-mode name (bitwise-ior perm/irwxg perm/irwxu)) + ))) + (else + (debug:print-info 0 "Invalid logpro definiton found in [logpro] section of testconfig. \"" logprodat "\"")))) + logpros))) (define (launch:execute encoded-cmd) (let* ((cmdinfo (common:read-encoded-string encoded-cmd)) (tconfigreg #f)) (setenv "MT_CMDINFO" encoded-cmd) @@ -616,12 +661,11 @@ (if mt-bindir-path (setenv "PATH" (conc tmppath":"mt-bindir-path)))) ;;(bb-check-path msg: "launch:execute post block 4") ;; (change-directory top-path) ;; Can setup as client for server mode now ;; (client:setup) - - + ;; environment overrides are done *before* the remaining critical envars. (alist->env-vars env-ovrd) ;;(bb-check-path msg: "launch:execute post block 41") (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals) ;;(bb-check-path msg: "launch:execute post block 42") @@ -647,37 +691,39 @@ (set! fullrunscript "xterm") (if (and fullrunscript (common:file-exists? fullrunscript) (not (file-execute-access? fullrunscript))) (system (conc "chmod ug+x " fullrunscript)))) - - ;; We are about to actually kick off the test - ;; so this is a good place to remove the records for - ;; any previous runs - ;; (db:test-remove-steps db run-id testname itemdat) - ;; now is also a good time to write the .testconfig file - (let* ((tconfig-fname (conc work-area "/.testconfig")) - (tconfig-tmpfile (conc tconfig-fname ".tmp")) - (tconfig (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs))) - (scripts (configf:get-section tconfig "scripts"))) - ;; create .testconfig file - (configf:write-alist tconfig tconfig-tmpfile) - (file-move tconfig-tmpfile tconfig-fname #t) - (delete-file* ".final-status") - - ;; extract scripts from testconfig and write them to files in test run dir - (for-each - (lambda (scriptdat) - (match scriptdat - ((name content) - (with-output-to-file name - (lambda () - (print content) - (change-file-mode name (bitwise-ior perm/irwxg perm/irwxu))))) - (else - (debug:print-info 0 "Invalid script definiton found in [scripts] section of testconfig. \"" scriptdat "\"")))) - scripts)) + (launch:extract-scripts-logpro work-area test-name item-path tconfigreg) + +;;;;; ;; We are about to actually kick off the test +;;;;; ;; so this is a good place to remove the records for +;;;;; ;; any previous runs +;;;;; ;; (db:test-remove-steps db run-id testname itemdat) +;;;;; ;; now is also a good time to write the .testconfig file +;;;;; (let* ((tconfig-fname (conc work-area "/.testconfig")) +;;;;; (tconfig-tmpfile (conc tconfig-fname ".tmp")) +;;;;; (tconfig (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs))) +;;;;; (scripts (configf:get-section tconfig "scripts")) +;;;;; (precmd (configf:lookup tconfig ) +;;;;; ;; create .testconfig file +;;;;; (configf:write-alist tconfig tconfig-tmpfile) +;;;;; (file-move tconfig-tmpfile tconfig-fname #t) +;;;;; (delete-file* ".final-status") +;;;;; +;;;;; ;; extract scripts from testconfig and write them to files in test run dir +;;;;; (for-each +;;;;; (lambda (scriptdat) +;;;;; (match scriptdat +;;;;; ((name content) +;;;;; (with-output-to-file name +;;;;; (lambda () +;;;;; (print content) +;;;;; (change-file-mode name (bitwise-ior perm/irwxg perm/irwxu))))) +;;;;; (else +;;;;; (debug:print-info 0 "Invalid script definiton found in [scripts] section of testconfig. \"" scriptdat "\"")))) +;;;;; scripts)) ;; (let* ((m (make-mutex)) (kill-job? #f) (exit-info (make-launch:einf pid: #t exit-status: #t exit-code: #t rollup-status: 0)) ;; pid exit-status exit-code (i.e. process was successfully run) rollup-status @@ -694,11 +740,15 @@ (th2 (make-thread runit "run job")) (tconfig (tests:get-testconfig test-name item-path tconfigreg #t)) (propagate-exit-code (configf:lookup *configdat* "setup" "propagate-exit-code")) (propagate-status-list '("FAIL" "KILLED" "ABORT" "DEAD" "CHECK" "SKIP" "WAIVED")) (test-status "not set") - ) + (precmd (configf:lookup tconfig "setup" "precmd")) + (postcmd (configf:lookup tconfig "setup" "postcmd"))) + ;; first, if set, run the precmd + (if precmd ;; (file-exists? precmd)(file-execute-access? precmd)) + (system precmd)) ;; up to test author to put nbfake if desired. (set! job-thread th2) (thread-start! th1) (thread-start! th2) (thread-join! th2) (debug:print-info 0 *default-log-port* "Megatest execute of test " test-name ", item path " item-path " complete. Notifying the db ...") @@ -762,10 +812,13 @@ (set! test-status (db:test-get-status (rmt:get-testinfo-state-status run-id test-id))) ;; If the propagate-exit-code option has been set in the megatest config, and the test status matches the list, set the exit code to 1. + (if postcmd + (system postcmd)) + (if (and propagate-exit-code (string=? propagate-exit-code "yes") (member test-status propagate-status-list)) (begin (debug:print 1 *default-log-port* "Setting exit status to 1 because of test status of " test-status) (set! *globalexitstatus* 1) ) @@ -887,11 +940,11 @@ (let ((tmpfile (conc fulldir "/.megatest.cfg." (current-seconds))) (targfile (conc fulldir "/.megatest.cfg-" megatest-version "-" megatest-fossil-hash)) (rconfig (conc fulldir "/.runconfig." megatest-version "-" megatest-fossil-hash))) (if (common:file-exists? rconfig) ;; only cache megatest.config AFTER runconfigs has been cached (begin - (debug:print-info 0 *default-log-port* "Caching megatest.config in " tmpfile) + (debug:print-info 2 *default-log-port* "Caching megatest.config in " tmpfile) (if (not (common:in-running-test?)) (configf:write-alist *configdat* tmpfile)) (system (conc "ln -sf " tmpfile " " targfile)))) ))) (debug:print-info 1 *default-log-port* "No linktree yet, no caching configs."))))) Index: megatest-version.scm ================================================================== --- megatest-version.scm +++ megatest-version.scm @@ -18,6 +18,6 @@ ;; Always use two or four digit decimal ;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00.. ;; (declare (unit megatest-version)) -(define megatest-version 1.8017) +(define megatest-version 1.8019) Index: megatest.scm ================================================================== --- megatest.scm +++ megatest.scm @@ -111,11 +111,17 @@ ;; load the ~/.megatestrc file, put (use trace)(trace-call-sites #t)(trace function-you-want-to-trace) in this file ;; (let ((debugcontrolf (conc (get-environment-variable "HOME") "/.megatestrc"))) (if (common:file-exists? debugcontrolf) - (load debugcontrolf))) + (begin + ;; for some reason, debug:print does not work here. Had to use print. + (print (conc "WARNING: loading " debugcontrolf)) + (load debugcontrolf) + ) + ) +) ;; usage logging, careful with this, it is not designed to deal with all real world challenges! ;; (if (and *usage-log-file* (file-write-access? *usage-log-file*)) @@ -257,11 +263,12 @@ -debug N|N,M,O... : enable debug 0-N or N and M and O ... -debug-noprop N|M,M,O...: enable debug but do not propagate to subprocesses via MT_DEBUG -config fname : override the megatest.config file with fname -append-config fname : append fname to the megatest.config file -import-sexpr fname : import a sexpr file (use -list-runs % -dumpmode sexpr to create) - + -regen-testfiles : regenerate scripts and logpro files from testconfig, run in test context + Utilities -env2file fname : write the environment to fname.csh and fname.sh -envcap a : save current variables labeled as context 'a' in file envdat.db -envdelta a-b : output enviroment delta from context a to context b to -o fname set the output mode with -dumpmode csh, bash or ini @@ -466,10 +473,11 @@ "-local" ;; run some commands using local db access "-generate-html" "-generate-html-structure" "-list-run-time" "-list-test-time" + "-regen-testfiles" ;; misc queries "-list-disks" "-list-targets" "-list-db-targets" @@ -999,14 +1007,11 @@ (format #t fmtstr "DB" "host:port" "PID" "age" "last mod" "state") (for-each (lambda (dbfile) (let* ( (dbfname (conc (pathname-file dbfile) ".db")) - (sfiles (tt:find-server *toppath* dbfname)) ) - (for-each - (lambda (sfile) (let ( (sinfos (tt:get-server-info-sorted ttdat dbfname)) ) (for-each (lambda (sinfo) @@ -1028,13 +1033,10 @@ ) ) sinfos ) ) - ) - sfiles - ) ) ) dbfiles ) (set! *didsomething* #t) @@ -1079,11 +1081,11 @@ (killed (system (conc "ssh " host " kill " pid " > /dev/null"))) (dummy2 (sleep 1)) (state (if (> (system (conc "ssh " host " ps " pid " > /dev/null")) 0) "dead" "alive")) ) (format #t fmtstr db (conc host ":" port) pid age last-mod state) - (system (conc "rm " sfile)) + (delete-file* sfile) ) ) sinfos ) ) @@ -2119,10 +2121,25 @@ (paths (tests:test-get-paths-matching keys target (args:get-arg "-test-files")))) (for-each (lambda (path) (print path)) paths)))))) +;;====================================================================== +;; Utils for test areas +;;====================================================================== + +(if (args:get-arg "-regen-testfiles") + (if (getenv "MT_TEST_RUN_DIR") + (begin + (launch:setup) + (change-directory (getenv "MT_TEST_RUN_DIR")) + (let* ((testname (getenv "MT_TEST_NAME")) + (itempath (getenv "MT_ITEMPATH"))) + (launch:extract-scripts-logpro (getenv "MT_TEST_RUN_DIR") testname itempath #f)) + (set! *didsomething* #t)) + (debug:print 0 *default-log-port* "ERROR: Must run -regen-testfiles in a test environment (i.e. test xterm from dashboard)"))) + ;;====================================================================== ;; Archive tests ;;====================================================================== ;; Archive tests matching target, runname, and testpatt (if (equal? (args:get-arg "-archive") "replicate-db") Index: mtargs/mtargs.scm ================================================================== --- mtargs/mtargs.scm +++ mtargs/mtargs.scm @@ -20,10 +20,11 @@ ( arg-hash get-arg get-arg-number get-arg-from + remove-arg-from-ht get-args usage print-args any-defined? ) @@ -64,10 +65,14 @@ (define (get-arg-from ht arg . default) (if (null? default) (hash-table-ref/default ht arg #f) (hash-table-ref/default ht arg (car default)))) + +(define (remove-arg-from-ht arg) + (hash-table-delete! arg-hash arg) +) (define (get-args args params switches arg-hash num-needed) (let* ((numtargs (length args)) (adj-num-needed (if num-needed (+ num-needed 2) #f))) (if (< numtargs (if adj-num-needed adj-num-needed 2)) Index: portlogger.scm ================================================================== --- portlogger.scm +++ portlogger.scm @@ -31,11 +31,12 @@ (import (prefix sqlite3 sqlite3:)) (import debugprint dbmod) ;; lsof -i (define (portlogger:open-db fname) - (let* ((avail (tasks:wait-on-journal fname 5 remove: #t)) ;; wait up to about 10 seconds for the journal to go away + (let* (;; (avail (tasks:wait-on-journal fname 5 remove: #t)) ;; wait up to about 10 seconds for the journal to go away + (avail #t) (exists (file-exists? fname)) (db (if avail (sqlite3:open-database fname) (begin (system (conc "rm -f " fname)) Index: rmt.scm ================================================================== --- rmt.scm +++ rmt.scm @@ -72,34 +72,11 @@ ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) ;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected (assert *toppath* "FATAL: rmt:send-receive called with *toppath* not set.") - - (if (not (eq? (rmt:transport-mode) 'nfs)) - (begin - (if (> attemptnum 2) - (debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum)) - - (cond - ((> attemptnum 2) (thread-sleep! 0.05)) - ((> attemptnum 10) (thread-sleep! 0.5)) - ((> attemptnum 20) (thread-sleep! 1))) - - ;; I'm turning this off, it may make sense to move it - ;; into http-transport-handler - (if (and (> attemptnum 5) (= 0 (modulo attemptnum 15))) - (begin - (debug:print 0 *default-log-port* "ERROR: can't connect to server, trying to start a server.") - (case (rmt:transport-mode) - ((http) - (server:run *toppath*) - (thread-sleep! 3)) - (else - (thread-sleep! 1) ;; for tcp the server is started by routines in tcp-transportmod. For nfs there is no server - )))))) - + ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds. ;; 3. do the query, if on homehost use local access ;; (let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value Index: rmtmod.scm ================================================================== --- rmtmod.scm +++ rmtmod.scm @@ -87,16 +87,24 @@ (let* ((runname (car run-dat)) (all-dat (cdr run-dat)) (tests-data (alist-ref "data" all-dat equal?)) (run-meta (alist-ref "meta" all-dat equal?)) (run-id (rmt:insert-run target runname run-meta))) - (for-each - (lambda (test-dat) - (let* ((test-id (car test-dat)) + (if (list? tests-data) + (begin + (debug:print 0 *default-log-port* "import-run: inserting " (length tests-data) " tests") + (for-each + (lambda (test-dat) + (let* ((test-id (car test-dat)) (test-rec (cdr test-dat))) - (rmt:insert-test run-id test-rec))) - tests-data))) + (rmt:insert-test run-id test-rec))) + tests-data) + ) + (debug:print 0 *default-log-port* "import-run: run has no tests") + ) + ) +) ;; insert run if not there, return id either way (define (rmt:insert-run target runname run-meta) ;; look for id, return if found (debug:print 0 *default-log-port* "Insert run: "target"/"runname) @@ -107,13 +115,22 @@ (rmtmod:send-receive 'insert-run #f (list target runname run-meta)) (simple-run-id (car runs))))) (define (rmt:insert-test run-id test-rec) (let* ((testname (alist-ref "testname" test-rec equal?)) - (item-path (alist-ref "item_path" test-rec equal?))) - (debug:print 0 *default-log-port* " Insert test in run "run-id": "testname"/"item-path) - (rmtmod:send-receive 'insert-test run-id test-rec))) + (item-path (alist-ref "item_path" test-rec equal?)) + (test-id (rmt:get-test-id run-id testname item-path)) + ) + (if test-id + (debug:print 0 *default-log-port* "test "testname"/"item-path " already exists in run-id " run-id) + (begin + (debug:print 0 *default-log-port* " Insert test in run "run-id": "testname"/"item-path) + (rmtmod:send-receive 'insert-test run-id test-rec) + ) + ) + ) +) ;;====================================================================== ;; T E S T S ;;====================================================================== Index: runs.scm ================================================================== --- runs.scm +++ runs.scm @@ -2916,11 +2916,11 @@ (fld (car key)) (val (configf:lookup test-conf "test_meta" fld))) ;; (debug:print 5 *default-log-port* "idx: " idx " fld: " fld " val: " val) (if (and val (not (equal? (vector-ref currrecord idx) val))) (begin - (debug:print 0 *default-log-port* "Updating " test-name " " fld " to " val) + (debug:print 2 *default-log-port* "Updating " test-name " " fld " to " val) (rmt:testmeta-update-field test-name fld val))))) '(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9)("jobgroup" 10))))) ;; find tests with matching tags, tagpatt is a string "tagpatt1,tagpatt2%, ..." ;; Index: tcp-transportmod.scm ================================================================== --- tcp-transportmod.scm +++ tcp-transportmod.scm @@ -232,10 +232,11 @@ ;;(tt:handler # get-keys #f () 2 #f "/home/matt/data/megatest/ext-tests" #f "main.db" "ext-tests" "/home/matt/data/megatest/bin/.22.04/../megatest") ;; (define (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe) (debug:print 2 *default-log-port* "tt:handler cmd: " cmd " run-id: " run-id " attemptnum: " attemptnum) ;; NOTE: areapath is passed in and in tt struct. We'll use passed in value for now. + ;; connect-to-server will start a server if needed. (let* ((conn (tt:client-connect-to-server ttdat dbfname run-id testsuite))) ;; (hash-table-ref/default (tt-conns ttdat) dbfname #f))) (if conn ;; have connection, call the server (let* ((res (tt:send-receive ttdat conn cmd run-id params))) ;; res is (status errmsg result meta) @@ -260,11 +261,11 @@ (tt:backoff-incr (tt-conn-host conn)(tt-conn-port conn)) result) ;; (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)) (else result))) (else ;; did not receive properly formated result - (if (not res) ;; tt:handler is telling us that communication failed + (if (not res) ;; tt:send-receive telling us that communication failed (let* ((host (tt-conn-host conn)) (port (tt-conn-port conn)) ;; (dbfname (tt-conn-port conn)) ;; 192.168.0.127:4242-726924:4.db (pid (tt-conn-pid conn)) ;;(servinf (tt-conn-servinf-file conn))) @@ -293,11 +294,11 @@ ;; try again (thread-sleep! 0.25) ;; dunno, I think this needs to be here (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)) )))) (begin ;; no server file, delay and try again - (debug:print 2 *default-log-port* "INFO: connection to server "host":"port" broken for "dbfname", but do not see servinf file "servinf) + (debug:print 2 *default-log-port* "INFO: connection to server "host":"port" broken for "dbfname", no servinf file. Server exited? ") (thread-sleep! 0.5) (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)))) (begin ;; this case is where res is malformed. Probably should abort (assert #f "FATAL: tt:handler received bad data "res) ;; (debug:print 0 *default-log-port* "INFO: got corrupt data from server "host":"port", "res", for "dbfname", will try again.") @@ -476,11 +477,12 @@ ;; is there already a server for this dbfile? Then exit. (debug:print 2 *default-log-port* "tt:start-server: " dbfname-in) (let* ((ttdat (make-tt areapath: areapath)) (dbfname (or dbfname-in (dbmod:run-id->dbfname run-id))) (servers (tt:find-server areapath dbfname))) ;; should use tt:get-current-server-info instead - (if (> (length servers) 4) + (debug:print 0 *default-log-port* "Found " (length servers) " already running for " dbfname) + (if (> (length servers) 0) (begin (debug:print 0 *default-log-port* "INFO: found server(s) already running for db "dbfname", "(string-intersperse servers ",")" Exiting.") (exit)) (let* ((dbstruct (dbmod:open-dbmoddb areapath run-id dbfname (dbfile:db-init-proc) keys))) (tt-handler-set! ttdat (handler dbstruct)) @@ -537,12 +539,11 @@ (dbfile:with-no-sync-db nosyncdbpath (lambda (db) (let* ((dbtmpname (dbr:dbstruct-dbtmpname dbstruct))) (debug:print-info 0 *default-log-port* "Running clean up, including removing db file "dbtmpname) (db:no-sync-del! db dbfname) - #;(if dbtmpname - (delete-file dbtmpname)))))))) + )))))) (set! *server-info* ttdat) (let loop ((count 0)) (if (> count 240) (begin (debug:print 0 *default-log-port* "FATAL: Could not start a tcp server, giving up.") @@ -585,10 +586,11 @@ (if (and res (common:low-noise-print 120 "top server message")) (debug:print-info 0 *default-log-port* "Keep running, I'm the top server for " dbfname" on "(tt-host ttdat)":"(tt-port ttdat))) res)) (else + ;; wrong servinfo file (debug:print-info 0 *default-log-port* "I'm not the lead server: "servers) (let* ((leadsrv (car servers))) (match leadsrv ((host port startseconds server-id pid dbfname servinfofile) (let* ((result (tt:timed-ping host port server-id)) @@ -713,21 +715,38 @@ ;; if more than one, wait one second and look again ;; future: ping oldest, if alive remove other : files ;; (define (tt:find-server areapath dbfname) (let* ((servdir (tt:get-servinfo-dir areapath)) - (sfiles (glob (conc servdir"/*:"dbfname)))) - sfiles)) + (sfiles (glob (conc servdir"/*:"dbfname))) + (good-files '())) + (for-each + (lambda (sfile) + (let* ((sinfo (tt:server-get-info sfile)) + (host (list-ref sinfo 0)) + (port (list-ref sinfo 1)) + (server-id (list-ref sinfo 3)) + (pid (list-ref sinfo 4)) + (status (system (conc "ssh " host " ps " pid " > /dev/null"))) + ) + (if (= status 0) + (set! good-files (cons sfile good-files)) + (delete-file* sfile) + ) + ) + ) + sfiles + ) + (debug:print-info 2 *default-log-port* "tt:find-server: good-files: " good-files " sfiles: " sfiles) + good-files)) ;; given a path to a server info file return: host port startseconds server-id pid dbfname logf ;; example of what it's looking for in the log file: ;; SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4 ;; (define (tt:server-get-info logf) (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+) dbfname: (\\S+)")) ;; SERVER STARTED: host:port AT timesecs server id - (dbprep-rx (regexp "^SERVER: dbprep")) - (dbprep-found 0) (bad-dat (list #f #f #f #f #f #f logf))) (let ((fdat (handle-exceptions exn (begin ;; WARNING: this is potentially dangerous to blanket ignore the errors @@ -737,13 +756,11 @@ (if (null? fdat) ;; bad data, return bad-dat bad-dat (let loop ((inl (car fdat)) (tail (cdr fdat)) (lnum 0)) - (let ((mlst (string-match server-rx inl)) - (dbprep (string-match dbprep-rx inl))) - (if dbprep (set! dbprep-found 1)) + (let ((mlst (string-match server-rx inl))) (if (not mlst) (if (> lnum 500) ;; give up if more than 500 lines of server log read bad-dat (if (null? tail) bad-dat @@ -884,11 +901,15 @@ (if (< port 65535) (begin (portlogger:open-run-close portlogger:set-failed port) (thread-sleep! 0.25) (setup-listener-portlogger uconn)) - #f) + (begin + (assert #t "setup-listener-portlogger: could not get a port") + #f + ) + ) (connect-listener uconn port)))) (define (connect-listener uconn port) ;; (tcp-listener-socket LISTENER)(socket-name so) ;; sockaddr-address, sockaddr-port, sockaddr->string Index: tests.scm ================================================================== --- tests.scm +++ tests.scm @@ -1638,11 +1638,11 @@ (if (and testexists cache-file (file-write-access? cache-path) allow-write-cache) (let ((tpath (conc cache-path "/.testconfig"))) - (debug:print-info 1 *default-log-port* "Caching testconfig for " test-name " in " tpath) + (debug:print-info 2 *default-log-port* "Caching testconfig for " test-name " in " tpath) (if (and tcfg (not (common:in-running-test?))) (configf:write-alist tcfg tpath)))) tcfg)))))) ;; sort tests by priority and waiton