Overview
Comment: | Allocating test run areas at test start time now mostly working |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.64-defered-rundir |
Files: | files | file ages | folders |
SHA1: |
1ebe0d3438d42e14bc21885bd53e2771 |
User & Date: | mrwellan on 2017-07-18 15:06:12 |
Other Links: | branch diff | manifest | tags |
Context
2017-07-25
| ||
23:20 | Merged in v1.64 changes and fixed couple places affected by testpath no longer being available in cmdinfo check-in: 8a6d50fd15 user: matt tags: v1.64-defered-rundir | |
2017-07-18
| ||
15:06 | Allocating test run areas at test start time now mostly working check-in: 1ebe0d3438 user: mrwellan tags: v1.64-defered-rundir | |
09:14 | Moved defered-rundir branch to v1.64 check-in: 78c25e29a0 user: mrwellan tags: v1.64-defered-rundir | |
Changes
Modified configf.scm from [b1611ec83f] to [26fb7cf3f7].
︙ | ︙ | |||
699 700 701 702 703 704 705 | (if (common:file-exists? fname) ;; now verify it is readable (if (configf:read-alist fname) #t ;; data is good. (begin (handle-exceptions exn #f | | | 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 | (if (common:file-exists? fname) ;; now verify it is readable (if (configf:read-alist fname) #t ;; data is good. (begin (handle-exceptions exn #f (debug:print 0 *default-log-port* "WARNING: content read from cache " fname " is not readable. Deleting generated file.") (delete-file fname)) #f)) #f)))) (common:faux-unlock fname) res)) ;; convert hierarchial list to ini format |
︙ | ︙ |
Modified launch.scm from [e08aec9b12] to [a70a186b30].
︙ | ︙ | |||
410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 | (if (hash-table-ref/default misc-flags 'keep-going #f) ;; keep originals for cpu-load and disk-free unless they change more than the allowed delta (loop (calc-minutes) (or new-cpu-load cpu-load) (or new-disk-free disk-free) (if do-sync (current-seconds) last-sync))))))) (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f))) ;; NOTE: Checking twice for keep-going is intentional (define (launch:execute encoded-cmd) (let* ((cmdinfo (common:read-encoded-string encoded-cmd)) (tconfigreg #f)) (setenv "MT_CMDINFO" encoded-cmd) ;;(bb-check-path msg: "launch:execute incoming") (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed) ;; (test-name sqlitespeed) (runscript runscript.rb) (db-host localhost) (run-id 1)) (let* ((testpath (assoc/default 'testpath cmdinfo)) ;; testpath is the test spec area (top-path (assoc/default 'toppath cmdinfo)) | > > > > > > > > > > > > > > > > > > > > > | | 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 | (if (hash-table-ref/default misc-flags 'keep-going #f) ;; keep originals for cpu-load and disk-free unless they change more than the allowed delta (loop (calc-minutes) (or new-cpu-load cpu-load) (or new-disk-free disk-free) (if do-sync (current-seconds) last-sync))))))) (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f))) ;; NOTE: Checking twice for keep-going is intentional (define (launch:test-execute-exit-handler run-id test-id) (let ((sighand (lambda (signum) ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting (if (eq? signum signal/stop) (debug:print-error 0 *default-log-port* "attempt to STOP process. Exiting.")) (set! *time-to-exit* #t) (print "Received signal " signum ", cleaning up before exit. Please wait...") (let ((th1 (make-thread (lambda () (rmt:test-set-state-status run-id test-id "INCOMPLETE" "KILLED" #f) (print "Killed by signal " signum ". Exiting") (thread-sleep! 1) (exit 1)))) (th2 (make-thread (lambda () (thread-sleep! 2) (debug:print 0 *default-log-port* "Done") (exit 4))))) (thread-start! th2) (thread-start! th1) (thread-join! th2))))) (set-signal-handler! signal/int sighand) (set-signal-handler! signal/term sighand))) (define (launch:execute encoded-cmd) (let* ((cmdinfo (common:read-encoded-string encoded-cmd)) (tconfigreg #f)) (setenv "MT_CMDINFO" encoded-cmd) ;;(bb-check-path msg: "launch:execute incoming") (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed) ;; (test-name sqlitespeed) (runscript runscript.rb) (db-host localhost) (run-id 1)) (let* ((testpath (assoc/default 'testpath cmdinfo)) ;; testpath is the test spec area (top-path (assoc/default 'toppath cmdinfo)) (work-area #f) ;; (assoc/default 'work-area cmdinfo)) ;; work-area is the test run area (test-name (assoc/default 'test-name cmdinfo)) (runscript (assoc/default 'runscript cmdinfo)) (ezsteps (assoc/default 'ezsteps cmdinfo)) ;; (runremote (assoc/default 'runremote cmdinfo)) ;; (transport (assoc/default 'transport cmdinfo)) ;; not used ;; (serverinf (assoc/default 'serverinf cmdinfo)) ;; (port (assoc/default 'port cmdinfo)) |
︙ | ︙ | |||
444 445 446 447 448 449 450 | (runname (assoc/default 'runname cmdinfo)) (megatest (assoc/default 'megatest cmdinfo)) (runtlim (assoc/default 'runtlim cmdinfo)) (contour (assoc/default 'contour cmdinfo)) (item-path (item-list->path itemdat)) (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo)) (keys #f) | | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | < < < < < < | < < < < < < < < < < < < < | | < < < < | | < | | | 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 | (runname (assoc/default 'runname cmdinfo)) (megatest (assoc/default 'megatest cmdinfo)) (runtlim (assoc/default 'runtlim cmdinfo)) (contour (assoc/default 'contour cmdinfo)) (item-path (item-list->path itemdat)) (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo)) (keys #f) ;; (keyvals (assoc/default 'keyvals cmdinfo)) (fullrunscript (if (not runscript) #f (if (substring-index "/" runscript) runscript ;; use unadultered if contains slashes (let ((fulln (conc testpath "/" runscript))) (if (and (common:file-exists? fulln) (file-execute-access? fulln)) fulln runscript))))) ;; assume it is on the path ) ;; (rollup-status 0) (if contour (setenv "MT_CONTOUR" contour)) ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ... ;; (setenv "MT_TESTSUITENAME" areaname) (setenv "MT_RUN_AREA_HOME" top-path) (set! *toppath* top-path) ;; NFS might not have propagated the directory meta data to the run host - give it time if needed (let loop ((count 0)) (if (or (common:file-exists? top-path) (> count 10)) (change-directory top-path) (begin (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " top-path " not found") (thread-sleep! 10) (loop (+ count 1))))) (launch:setup) ;; should be properly in the top-path now (set! tconfigreg (tests:get-all)) (launch:test-execute-exit-handler run-id test-id) ;; Do not run the test if it is REMOVING, RUNNING, KILLREQ or REMOTEHOSTSTART, ;; Mark the test as REMOTEHOSTSTART *IMMEDIATELY* ;; (let* ((test-info (rmt:get-test-info-by-id run-id test-id)) (test-host (if test-info (db:test-get-host test-info) (begin (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.") (exit)))) (test-pid (db:test-get-process_id test-info))) (let ((dat (create-work-area run-id runname target test-id testpath #f test-name itemdat))) (set! work-area (car dat))) (debug:print-info 2 *default-log-port* "Using work area " work-area) (setenv "MT_TEST_RUN_DIR" work-area) (cond ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "KILLREQ" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request") ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f) |
︙ | ︙ | |||
1237 1238 1239 1240 1241 1242 1243 | (if (not (eq? status 0)) (debug:print 2 *default-log-port* "ERROR: problem with running \"" cmd "\""))) (list lnkpathf lnkpath )) (if (and test-src-path (> remtries 0)) (begin (debug:print-error 0 *default-log-port* "Failed to create work area at " test-path " with link at " lnktarget ", remaining attempts " remtries) ;; | | | < < < < < < < < < < < < < | | 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 | (if (not (eq? status 0)) (debug:print 2 *default-log-port* "ERROR: problem with running \"" cmd "\""))) (list lnkpathf lnkpath )) (if (and test-src-path (> remtries 0)) (begin (debug:print-error 0 *default-log-port* "Failed to create work area at " test-path " with link at " lnktarget ", remaining attempts " remtries) ;; (create-work-area run-id run-info target test-id test-src-path disk-path-in testname itemdat remtries: (- remtries 1))) (list #f #f))))) ;; 1. look though disks list for disk with most space ;; 2. create run dir on disk, path name is meaningful ;; 3. create link from run dir to megatest runs area ;; 4. remotely run the test on allocated host ;; - could be ssh to host from hosts table (update regularly with load) ;; - could be netbatch ;; (launch-test db (cadr status) test-conf)) (define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-source-path itemdat params) (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex (let* ((item-path (item-list->path itemdat)) (contour #f)) ;; NOT READY FOR THIS (args:get-arg "-contour"))) (let loop ((delta (- (current-seconds) *last-launch*)) (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 1))) (if (> launch-delay delta) (begin (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay. (debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds")) |
︙ | ︙ | |||
1317 1318 1319 1320 1321 1322 1323 | (conc (if dir (conc dir "/") "") (case (string->symbol exe) ((dboard) "../megatest") ((mtest) "../megatest") ((dashboard) "megatest") (else exe))))) (launcher (common:get-launcher *configdat* test-name item-path)) ;; (config-lookup *configdat* "jobtools" "launcher")) | | | 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 | (conc (if dir (conc dir "/") "") (case (string->symbol exe) ((dboard) "../megatest") ((mtest) "../megatest") ((dashboard) "megatest") (else exe))))) (launcher (common:get-launcher *configdat* test-name item-path)) ;; (config-lookup *configdat* "jobtools" "launcher")) (test-sig (conc (common:get-testsuite-name) ":" test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-source-path is the full path including the item-path (work-area #f) (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all (cmdparms #f) (fullcmd #f) ;; (define a (with-output-to-string (lambda ()(write x)))) (mt-bindir-path #f) (testinfo (rmt:get-test-info-by-id run-id test-id)) (mt_target (string-intersperse (map cadr keyvals) "/")) |
︙ | ︙ | |||
1345 1346 1347 1348 1349 1350 1351 | ;; prevent overlapping actions - set to LAUNCHED as early as possible ;; ;; the following call handles waiver propogation. cannot yet condense into roll-up-pass-fail (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED")) (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "LAUNCHED" #f) ;; (pp (hash-table->alist tconfig)) | | | | | | | | > > | | > | | | 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 | ;; prevent overlapping actions - set to LAUNCHED as early as possible ;; ;; the following call handles waiver propogation. cannot yet condense into roll-up-pass-fail (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED")) (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "LAUNCHED" #f) ;; (pp (hash-table->alist tconfig)) ;;(if (configf:var-is? *configdat* "setup" "early-setup" "yes") ;; (let ((dat (create-work-area run-id run-info keyvals test-id test-source-path #f test-name itemdat tconfig: tconfig))) ;; (set! work-area (car dat)) ;; (set! toptest-work-area (cadr dat)) ;; (debug:print-info 2 *default-log-port* "Using work area " work-area))) (set! cmdparms (base64:base64-encode (z3:encode-buffer (with-output-to-string (lambda () ;; (list 'hosts hosts) (write (list (list 'testpath test-source-path) ;; (list 'transport (conc *transport-type*)) ;; (list 'serverinf *server-info*) (list 'homehost (let* ((hhdat (common:get-homehost))) (if hhdat (car hhdat) #f))) (list 'serverurl (if *runremote* (remote-server-url *runremote*) #f)) ;; (list 'areaname (common:get-testsuite-name)) (list 'toppath *toppath*) ;; (list 'work-area work-area) (list 'test-name test-name) (list 'runscript runscript) (list 'run-id run-id ) (list 'test-id test-id ) ;; (list 'item-path item-path ) (list 'itemdat itemdat ) (list 'megatest remote-megatest) (list 'ezsteps ezsteps) (list 'target mt_target) (list 'contour contour) ;; (list 'keyvals keyvals) (list 'runtlim (if run-time-limit (common:hms-string->seconds run-time-limit) #f)) (list 'env-ovrd (hash-table-ref/default *configdat* "env-override" '())) (list 'set-vars (if params (hash-table-ref/default params "-setvars" #f))) (list 'runname runname) (list 'mt-bindir-path mt-bindir-path)))))))) ;; clean out step records from previous run if they exist ;; (rmt:delete-test-step-records run-id test-id) ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway ;; (if (and work-area (common:file-exists? work-area)) ;; (change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir (cond ;; ((and launcher hosts) ;; must be using ssh hostname ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms)))) (launcher (set! fullcmd (append launcher (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms)))) (else (if (not useshell)(debug:print 0 *default-log-port* "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section")) (set! fullcmd (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param (list (if useshell "&" "")))))) ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" ""))))) (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm")))) ;; (debug:print 1 *default-log-port* "Launching " work-area) ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done (debug:print 4 *default-log-port* "fullcmd: " fullcmd) (set! *last-launch* (current-seconds)) ;; all that junk above takes time, set this as late as possible. (let* ((commonprevvals (alist->env-vars (hash-table-ref/default *configdat* "env-override" '()))) (miscprevvals (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute" (append (list ;; (list "MT_TEST_RUN_DIR" (if work-area work-area "no-test-run-area-set-yet")) (list "MT_TEST_NAME" test-name) (list "MT_ITEM_INFO" (conc itemdat)) ;; GET RID OF THIS ONE (list "MT_RUNNAME" runname) (list "MT_TARGET" mt_target) (list "MT_ITEMPATH" item-path) ) itemdat))) |
︙ | ︙ |