Changes In Branch v1.65 Through [169d94de4e] Excluding Merge-Ins
This is equivalent to a diff from 57b5fb07d6 to 169d94de4e
2021-04-06
| ||
13:30 | made the sourcing of cfg.sh not dependent on LD_LIBRARY_PATH being set check-in: 38a8d8c4ea user: mmgraham tags: v1.65 | |
10:31 | Fixed typo Closed-Leaf check-in: 8283307ec9 user: matt tags: v1.65-real | |
2021-03-25
| ||
17:35 | changed server start lock file expire-time from 15 to 25 seconds check-in: 169d94de4e user: mmgraham tags: v1.65, v1.6584 | |
11:26 | Changed sleep in kind-run from 2 to 18 seconds to allow server to come up to the point where get-list will find it. Cleaned up messages and comments. check-in: 2f8513eec5 user: mmgraham tags: v1.65 | |
2021-03-14
| ||
05:41 | Redo inter-test-delay. Leaf check-in: 262ea69800 user: matt tags: v1.65-real-redo-inter-test-delay | |
2021-03-11
| ||
17:52 | updated delay to kind-run check-in: dd2cea12eb user: pjhatwal tags: v1.65, THIS-IS-REAL-1.65 | |
2021-03-09
| ||
21:03 | Very odd, missing egg in server.scm, util. check-in: 57b5fb07d6 user: matt tags: v1.65-real | |
18:48 | changed version to 1.6584 check-in: b6403cb822 user: mmgraham tags: v1.65-real | |
Modified configf.scm from [15f0835800] to [d727d54436].
︙ | ︙ | |||
50 51 52 53 54 55 56 57 58 59 60 61 62 63 | (define (configf:assoc-safe-add alist key val #!key (metadata #f)) (let ((newalist (filter (lambda (x)(not (equal? key (car x)))) alist))) (append newalist (list (if metadata (list key val metadata) (list key val)))))) (define (configf:section-var-set! cfgdat section-name var value #!key (metadata #f)) (hash-table-set! cfgdat section-name (configf:assoc-safe-add (hash-table-ref/default cfgdat section-name '()) var value metadata: metadata))) (define (configf:eval-string-in-environment str) | > > > > | 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | (define (configf:assoc-safe-add alist key val #!key (metadata #f)) (let ((newalist (filter (lambda (x)(not (equal? key (car x)))) alist))) (append newalist (list (if metadata (list key val metadata) (list key val)))))) ;; this is used in megatestqa/ext.scm. ;; remove it from here and there by 12/31/21 (define config:assoc-safe-add configf:assoc-safe-add) (define (configf:section-var-set! cfgdat section-name var value #!key (metadata #f)) (hash-table-set! cfgdat section-name (configf:assoc-safe-add (hash-table-ref/default cfgdat section-name '()) var value metadata: metadata))) (define (configf:eval-string-in-environment str) |
︙ | ︙ | |||
142 143 144 145 146 147 148 | (lambda () (set! result ((eval (read)) ht)))) (set! result (conc "#{(" cmdtype ") " cmd "}")))) (case cmdsym ((system shell scheme) (let ((delta (- (current-seconds) start-time))) (if (> delta 2) | | | 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 | (lambda () (set! result ((eval (read)) ht)))) (set! result (conc "#{(" cmdtype ") " cmd "}")))) (case cmdsym ((system shell scheme) (let ((delta (- (current-seconds) start-time))) (if (> delta 2) (debug:print-info 2 *default-log-port* "for line \"" l "\"\n command: " cmd " took " delta " seconds to run with output:\n " result) (debug:print-info 9 *default-log-port* "for line \"" l "\"\n command: " cmd " took " delta " seconds to run with output:\n " result))))) (loop (conc prestr result poststr))) res)) res))) ;; Run a shell command and return the output as a string (define (shell cmd) |
︙ | ︙ |
Modified runs.scm from [2583922f1c] to [07f8912d41].
︙ | ︙ | |||
499 500 501 502 503 504 505 506 507 508 509 510 511 512 | ;; runs:run-tests is called from megatest.scm and itself ;;====================================================================== ;; ;; test-names: Comma separated patterns same as test-patts but used in selection ;; of tests to run. The item portions are not respected. ;; FIXME: error out if /patt specified ;; (define (runs:run-tests target runname test-patts user flags #!key (run-count 1)) ;; test-names (let* ((keys (keys:config-get-fields *configdat*)) (keyvals (keys:target->keyval keys target)) (run-id (rmt:register-run keyvals runname "new" "n/a" user (args:get-arg "-contour"))) ;; test-name))) ;; (deferred '()) ;; delay running these since they have a waiton clause (runconfigf (conc *toppath* "/runconfigs.config")) (dbfile (conc *toppath* "/megatest.db")) | > | 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 | ;; runs:run-tests is called from megatest.scm and itself ;;====================================================================== ;; ;; test-names: Comma separated patterns same as test-patts but used in selection ;; of tests to run. The item portions are not respected. ;; FIXME: error out if /patt specified ;; ;; run-count is passed from megatest.scm as configf:lookup *configdat* "setup" "reruns", or defaults to 1. (define (runs:run-tests target runname test-patts user flags #!key (run-count 1)) ;; test-names (let* ((keys (keys:config-get-fields *configdat*)) (keyvals (keys:target->keyval keys target)) (run-id (rmt:register-run keyvals runname "new" "n/a" user (args:get-arg "-contour"))) ;; test-name))) ;; (deferred '()) ;; delay running these since they have a waiton clause (runconfigf (conc *toppath* "/runconfigs.config")) (dbfile (conc *toppath* "/megatest.db")) |
︙ | ︙ | |||
622 623 624 625 626 627 628 | ;; (set! required-tests (lset-intersection equal? test-names all-test-names)) ;; look up all tests matching the comma separated list of globs in ;; test-patts (using % as wildcard) ;; (set! test-names (delete-duplicates (tests:get-valid-tests *toppath* test-patts))) (debug:print-info 0 *default-log-port* "tests search path: " (string-intersperse (tests:get-tests-search-path *configdat*) " ")) | | | 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 | ;; (set! required-tests (lset-intersection equal? test-names all-test-names)) ;; look up all tests matching the comma separated list of globs in ;; test-patts (using % as wildcard) ;; (set! test-names (delete-duplicates (tests:get-valid-tests *toppath* test-patts))) (debug:print-info 0 *default-log-port* "tests search path: " (string-intersperse (tests:get-tests-search-path *configdat*) " ")) (debug:print-info 2 *default-log-port* "all tests: " (string-intersperse (sort all-test-names string<) " ")) (debug:print-info 0 *default-log-port* "test names: " (string-intersperse (sort test-names string<) " ")) (debug:print-info 0 *default-log-port* "required tests: " (string-intersperse (sort required-tests string<) " ")) ;; on the first pass or call to run-tests set FAILS to NOT_STARTED if ;; -keepgoing is specified (if (eq? *passnum* 0) (begin |
︙ | ︙ | |||
2391 2392 2393 2394 2395 2396 2397 | ;; seek and kill in flight -runtests with % as testpatt here ;; (if (equal? testpatt "%") (tasks:kill-runner target run-name testpatt) ;; (debug:print 0 *default-log-port* "not attempting to kill any run launcher processes as testpatt is " testpatt)) (debug:print 1 *default-log-port* "Removing tests for run: " runkey " " (db:get-value-by-header run header "runname"))) ((set-state-status) ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) | | | 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 | ;; seek and kill in flight -runtests with % as testpatt here ;; (if (equal? testpatt "%") (tasks:kill-runner target run-name testpatt) ;; (debug:print 0 *default-log-port* "not attempting to kill any run launcher processes as testpatt is " testpatt)) (debug:print 1 *default-log-port* "Removing tests for run: " runkey " " (db:get-value-by-header run header "runname"))) ((set-state-status) ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10)) (debug:print 1 *default-log-port* "Modifying state and status for tests for run: " runkey " " (db:get-value-by-header run header "runname"))) ((print-run) (debug:print 1 *default-log-port* "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header) action) ((run-wait) (debug:print 1 *default-log-port* "Waiting for run " runkey ", run=" runnamepatt " to complete")) ((archive) (debug:print 1 *default-log-port* "Archiving/restoring (" (args:get-arg "-archive") ") data for run: " runkey " " (db:get-value-by-header run header "runname")) |
︙ | ︙ |
Modified server.scm from [6d2db21290] to [bb2f7cfc15].
︙ | ︙ | |||
109 110 111 112 113 114 115 | ((rpc) (db:obj->string (vector success/fail query-sig result))) ((http) (db:obj->string (vector success/fail query-sig result))) ((fs) result) (else (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*) result))) | | | | 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 | ((rpc) (db:obj->string (vector success/fail query-sig result))) ((http) (db:obj->string (vector success/fail query-sig result))) ((fs) result) (else (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*) result))) ;; Given an area path, start a server process ### NOTE ### > file 2>&1 ;; if the target-host is set ;; try running on that host ;; incidental: rotate logs in logs/ dir. ;; (define (server:run areapath) ;; areapath is *toppath* for a given testsuite area (let* ((curr-host (get-host-name)) ;; (attempt-in-progress (server:start-attempted? areapath)) ;; (dot-server-url (server:check-if-running areapath)) |
︙ | ︙ | |||
153 154 155 156 157 158 159 | (not (equal? curr-ip target-host))) (begin (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile) (setenv "TARGETHOST" target-host))) (setenv "TARGETHOST_LOGF" logfile) (thread-sleep! (/ (random 5000) 1000)) ;; add about a random (up to 5 seconds) initial delay. It seems pretty common that many running tests request a server at the same time | | < | | > > | 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 | (not (equal? curr-ip target-host))) (begin (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile) (setenv "TARGETHOST" target-host))) (setenv "TARGETHOST_LOGF" logfile) (thread-sleep! (/ (random 5000) 1000)) ;; add about a random (up to 5 seconds) initial delay. It seems pretty common that many running tests request a server at the same time (debug:print 0 *default-log-port* "INFO: starting server at " (common:human-time)) (system (conc "nbfake " cmdln)) (unsetenv "TARGETHOST_LOGF") (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST")) (thread-join! log-rotate) (pop-directory))) ;; given a path to a server log return: host port startseconds server-id ;; any changes to number of elements returned by this fuction will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which use match let ;; example of what it's looking for in the log file: ;; SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4 (define (server:logf-get-start-info logf) (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+)")) ;; SERVER STARTED: host:port AT timesecs server id (dbprep-rx (regexp "^SERVER: dbprep")) (dbprep-found 0)) (handle-exceptions exn |
︙ | ︙ | |||
199 200 201 202 203 204 205 | (list (car dat) ;; host (string->number (cadr dat)) ;; port (string->number (caddr dat)) (cadr (cddr dat)))))) (begin (if dbprep-found (begin | | | | | | 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 | (list (car dat) ;; host (string->number (cadr dat)) ;; port (string->number (caddr dat)) (cadr (cddr dat)))))) (begin (if dbprep-found (begin (debug:print-info 0 *default-log-port* "Server is in dbprep at " (common:human-time)) (thread-sleep! 0.5) ;; was 25 sec but that blocked things from starting? ) (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (seconds->time-string (current-seconds))) ) (list #f #f #f #f))))))))) ;; get a list of servers from the log files, with all relevant data ;; ( mod-time host port start-time pid ) ;; (define (server:get-list areapath #!key (limit #f)) (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$")) (day-seconds (* 24 60 60))) ;; if the directory exists continue to get the list ;; otherwise attempt to create the logs dir and then |
︙ | ︙ | |||
232 233 234 235 236 237 238 | ;; Get the list of server logs that do not contain "exiting". Ignore logs for servers that have exited. (let* ((server-logs-cmd (conc "grep -iL exiting " areapath "/logs/server-*-*.log")) (server-logs (string-split (string-chomp (call-with-input-pipe server-logs-cmd read-all)))) (num-serv-logs (length server-logs))) (if (or (null? server-logs) (= num-serv-logs 0)) (let () | | | 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 | ;; Get the list of server logs that do not contain "exiting". Ignore logs for servers that have exited. (let* ((server-logs-cmd (conc "grep -iL exiting " areapath "/logs/server-*-*.log")) (server-logs (string-split (string-chomp (call-with-input-pipe server-logs-cmd read-all)))) (num-serv-logs (length server-logs))) (if (or (null? server-logs) (= num-serv-logs 0)) (let () (debug:print 1 *default-log-port* "There are no servers running at " (common:human-time)) '() ) (let loop ((hed (string-chomp (car server-logs))) (tal (cdr server-logs)) (res '())) (let* ((mod-time (handle-exceptions exn |
︙ | ︙ | |||
363 364 365 366 367 368 369 | (define (server:get-client-signature) ;; BB> why is this proc named "get-"? it returns nothing -- set! has not return value. (if *my-client-signature* *my-client-signature* (let ((sig (server:mk-signature))) (set! *my-client-signature* sig) *my-client-signature*))) | | > > > | | > > > > | | < < | < < | > > | > > | | > > | | | | < < < < < < < < < | | | | | | | | > > > | 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 | (define (server:get-client-signature) ;; BB> why is this proc named "get-"? it returns nothing -- set! has not return value. (if *my-client-signature* *my-client-signature* (let ((sig (server:mk-signature))) (set! *my-client-signature* sig) *my-client-signature*))) ;; if server-start-last exists, and wasn't old enough, wait <idle time>, then call this function recursively until it is old enough. ;; if it is old enough, overwrite it and wait 0.25 seconds. ;; if it then has the wrong server key, wait <idle time> and call this function recursively. ;; (define (server:wait-for-server-start-last-flag areapath) (let* ((start-flag (conc areapath "/logs/server-start-last")) ;;; THIS INTERACTS WITH [server] timeout. Suggest using 0.1 or above for timeout (6 seconds) (idletime (configf:lookup-number *configdat* "server" "idletime" default: 4)) (server-key (conc (get-host-name) "-" (current-process-id)))) (if (file-exists? start-flag) (let* ((fmodtime (file-modification-time start-flag)) (delta (- (current-seconds) fmodtime)) (old-enough (> delta idletime)) ) ;; write start-flag file, wait 0.25s, then if previously the start-flag file was older than <idletime> seconds, and the new file still has the same server key as you just wrote, return #t. ;; (if (and old-enough (begin (debug:print-info 0 *default-log-port* "Writing " start-flag) (with-output-to-file start-flag (lambda () (print server-key))) (thread-sleep! 0.25) (let ((res (with-input-from-file start-flag (lambda () (read-line))))) (equal? server-key res))) ) #t ;; If either of the above conditions is not true, print a "Gating server start" message, wait <idle-time>, then call this function recursively. (begin (debug:print-info 0 *default-log-port* "Gating server start, last start: " (seconds->time-string fmodtime) ", time since last start: " delta ", required idletime: " idletime ", gating reason:" (if old-enough "server key does not match" "too soon to start another server")) (thread-sleep! idletime) (server:wait-for-server-start-last-flag areapath))))))) ;; kind start up of server, wait before allowing another server for a given ;; area to be launched ;; (define (server:kind-run areapath) ;; look for $MT_RUN_AREA_HOME/logs/server-start-last ;; and wait for it to be at least <server idletime> seconds old (server:wait-for-server-start-last-flag areapath) (if (not (server:check-if-running areapath)) ;; why try if there is already a server running? (let* ( (lock-file (conc areapath "/logs/server-start.lock"))) (let* ((start-flag (conc areapath "/logs/server-start-last"))) (common:simple-file-lock-and-wait lock-file expire-time: 25) (debug:print-info 0 *default-log-port* "server:kind-run: touching " start-flag) (system (conc "touch " start-flag)) ;; lazy but safe (server:run areapath) (thread-sleep! 18) ;; don't release the lock for at least a few seconds. And allow time for the server startup to get to "SERVER STARTED". (common:simple-file-release-lock lock-file))) (debug:print-info 0 *default-log-port* "Found server already running. NOT trying to start another.") ) ) ;; this one seems to be the general entry point ;; (define (server:start-and-wait areapath #!key (timeout 60)) (let ((give-up-time (+ (current-seconds) timeout))) (let loop ((server-info (server:check-if-running areapath)) (try-num 0)) |
︙ | ︙ |