Changes In Branch v1.65-adjutant-patched-forward Excluding Merge-Ins
This is equivalent to a diff from 92c105d9bc to 97bcfbf545
2024-08-15
| ||
20:21 | Patched forward adjutant code Leaf check-in: 97bcfbf545 user: matt tags: v1.65-adjutant-patched-forward | |
18:16 | Patched forward adjutant code check-in: 15a47376a6 user: matt tags: v1.65-adjutant-patched-forward-defunct | |
2022-05-16
| ||
20:27 | Merged file adds, cleanup from v1.70 to facilitate moving functions to module files Leaf check-in: e9b009e0a2 user: matt tags: v1.65-cleanup-tweak | |
19:55 | Experimentally adding dbfile.scm Closed-Leaf check-in: 4b7f6bdfbc user: matt tags: v1.65-delme | |
2022-05-15
| ||
05:01 | Rebased v1.7001-multi-db forward to tip of v1.65, old branch is hidden check-in: 820ac9a873 user: matt tags: v1.70 | |
2022-05-13
| ||
13:17 | Remove global waiton from itself. Leaf check-in: 92c105d9bc user: mrwellan tags: v1.65 | |
12:16 | Fixed few things with the hasty implementation of global waitons. check-in: 0c7e3bc287 user: mrwellan tags: v1.65 | |
Modified Makefile from [dd76a98688] to [cb404ea48e].
︙ | ︙ | |||
26 27 28 29 30 31 32 | process.scm runs.scm tasks.scm tests.scm genexample.scm \ http-transport.scm filedb.scm tdb.scm client.scm mt.scm \ ezsteps.scm lock-queue.scm sdb.scm rmt.scm api.scm \ subrun.scm portlogger.scm archive.scm env.scm \ diff-report.scm cgisetup/models/pgdb.scm # module source files | | | 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 | process.scm runs.scm tasks.scm tests.scm genexample.scm \ http-transport.scm filedb.scm tdb.scm client.scm mt.scm \ ezsteps.scm lock-queue.scm sdb.scm rmt.scm api.scm \ subrun.scm portlogger.scm archive.scm env.scm \ diff-report.scm cgisetup/models/pgdb.scm # module source files MSRCFILES = dbmod.scm adjutant.scm mutils.scm mttop.scm # ftail.scm rmtmod.scm commonmod.scm removed # MSRCFILES = ducttape-lib.scm pkts.scm stml2.scm cookie.scm mutils.scm \ # mtargs.scm commonmod.scm dbmod.scm adjutant.scm ulex.scm \ # rmtmod.scm apimod.scm GUISRCF = dashboard-context-menu.scm dashboard-tests.scm \ dashboard-guimonitor.scm gutils.scm dcommon.scm tree.scm \ |
︙ | ︙ |
Modified adjutant.scm from [7560fecb1c] to [d6c67b1549].
︙ | ︙ | |||
20 21 22 23 24 25 26 | (declare (unit adjutant)) (module adjutant * (import scheme chicken data-structures extras files) (import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 | | | | > > > > > > > > > > > | 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 | (declare (unit adjutant)) (module adjutant * (import scheme chicken data-structures extras files) (import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 md5 message-digest matchable regex srfi-1) (define (adjutant-run host-type rmt:no-sync-take-job) (print "Running the adjutant!") (let loop ((wait-count 0)) (if (< wait-count 10) ;; 6 x 10 seconds = one minute (let* ((dat (rmt:no-sync-take-job host-type))) (match dat ((id ht vars exekey cmdline state event-time last-update) (system cmdline) (loop 0)) (else (thread-sleep! 10) (loop (+ wait-count 1))))) (print "I'm bored. Exiting.")))) ) |
Modified api.scm from [c2c4883b3a] to [1f6726c86f].
︙ | ︙ | |||
254 255 256 257 258 259 260 261 262 263 264 265 266 | ((tasks-get-last) (apply tasks:get-last dbstruct params)) ;; NO SYNC DB ((no-sync-set) (apply db:no-sync-set *no-sync-db* params)) ((no-sync-get/default) (apply db:no-sync-get/default *no-sync-db* params)) ((no-sync-del!) (apply db:no-sync-del! *no-sync-db* params)) ((no-sync-get-lock) (apply db:no-sync-get-lock *no-sync-db* params)) ;; ARCHIVES ;; ((archive-get-allocations) ((archive-register-disk) (apply db:archive-register-disk dbstruct params)) ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params)) ;; ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey)) | > > > | | 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 | ((tasks-get-last) (apply tasks:get-last dbstruct params)) ;; NO SYNC DB ((no-sync-set) (apply db:no-sync-set *no-sync-db* params)) ((no-sync-get/default) (apply db:no-sync-get/default *no-sync-db* params)) ((no-sync-del!) (apply db:no-sync-del! *no-sync-db* params)) ((no-sync-get-lock) (apply db:no-sync-get-lock *no-sync-db* params)) ((no-sync-add-job) (apply db:no-sync-add-job *no-sync-db* params)) ((no-sync-take-job) (apply db:no-sync-take-job *no-sync-db* params)) ((no-sync-job-records-clean) (apply db:no-sync-job-records-clean *no-sync-db* params)) ;; ARCHIVES ;; ((archive-get-allocations) ((archive-register-disk) (apply db:archive-register-disk dbstruct params)) ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params)) ;; ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey)) ;;====================================================================== ;; READ ONLY QUERIES ;;====================================================================== ;; KEYS ((get-key-val-pairs) (apply db:get-key-val-pairs dbstruct params)) ((get-keys) (db:get-keys dbstruct)) |
︙ | ︙ |
Modified common.scm from [37b673dcbd] to [0271503ac0].
︙ | ︙ | |||
3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 | ;; ;; [hosts] ;; arm cubie01 cubie02 ;; x86_64 zeus xena myth01 ;; allhosts #{g hosts arm} #{g hosts x86_64} ;; ;; [host-types] ;; general #MTLOWESTLOAD #{g hosts allhosts} ;; arm #MTLOWESTLOAD #{g hosts arm} ;; nbgeneral nbjob run JOBCOMMAND -log $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME.$MT_TESTNAME-$MT_ITEM_PATH.lgo ;; ;; [host-rules] ;; # maxnload => max normalized load ;; # maxnjobs => max jobs per cpu ;; # maxjobrate => max jobs per second ;; general maxnload=1.1; maxnjobs=1.2; maxjobrate=0.1 ;; ;; [launchers] ;; envsetup general | > > > > | > | > > > | | | > > > > | | 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 | ;; ;; [hosts] ;; arm cubie01 cubie02 ;; x86_64 zeus xena myth01 ;; allhosts #{g hosts arm} #{g hosts x86_64} ;; ;; [host-types] ;; C/M/A lets megatest know this launcher provides C cores, M bytes memory for architecture A ;; 2/2G/arm smart -cores 2 -memory 2G -arch arm ;; general #MTLOWESTLOAD #{g hosts allhosts} ;; arm #MTLOWESTLOAD #{g hosts arm} ;; nbgeneral nbjob run JOBCOMMAND -log $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME.$MT_TESTNAME-$MT_ITEM_PATH.lgo ;; ;; NOTE: host-rules is ONLY used for MTLOWESTLOAD ;; ;; [host-rules] ;; # maxnload => max normalized load ;; # maxnjobs => max jobs per cpu ;; # maxjobrate => max jobs per second ;; general maxnload=1.1; maxnjobs=1.2; maxjobrate=0.1 ;; ;; [launchers] ;; envsetup general ;; xor/%/n 2/2G/arm ;; % nbgeneral ;; ;; [jobtools] ;; # if defined and not "no" flexi-launcher will bypass "launcher" unless no match. ;; flexi-launcher yes ;; launcher nbfake ;; mode adjutant|normal (default is normal) ;; ;; ;; mode is 'normal (i.e. directly use launcher) or 'adjutant (i.e. use adjutant) ;; (define (common:get-launcher configdat testname itempath mode) (let ((fallback-launcher (configf:lookup configdat "jobtools" "launcher"))) (if (and (configf:lookup configdat "jobtools" "flexi-launcher") ;; overrides launcher (not (equal? (configf:lookup configdat "jobtools" "flexi-launcher") "no"))) (let* ((launchers (hash-table-ref/default configdat "launchers" '()))) (if (null? launchers) fallback-launcher (let loop ((hed (car launchers)) (tal (cdr launchers))) (let ((patt (car hed)) (host-type (cadr hed))) (if (tests:match patt testname itempath) ;; have a launcher match for this test (begin (debug:print-info 2 *default-log-port* "Have flexi-launcher match for " testname "/" itempath " = " host-type) (let ((launcher (configf:lookup configdat "host-types" host-type))) ;; find the actual launcher from the host-types table ;; if we are in adjutant mode then we want to return both host-type and launcher (if launcher (let* ((launcher-parts (string-split launcher)) (launcher-exe (car launcher-parts))) (if (equal? launcher-exe "#MTLOWESTLOAD") ;; this is our special case, we will find the lowest load and craft a nbfake commandline (let host-loop ((targ-host (common:get-least-loaded-host (cdr launcher-parts) host-type configdat)) (count 100)) (if targ-host (conc "remrun " targ-host) (if (> count 0) (begin (debug:print 0 *default-log-port* "INFO: Waiting for a host for host-type " host-type) (thread-sleep! (- 101 count)) (host-loop (common:get-least-loaded-host (cdr launcher-parts) host-type configdat) (- count 1))) (begin (debug:print 0 *default-log-port* "FATAL: Failed to find a host from #MTLOWESTLOAD for host-type " host-type) (exit))))) (case mode ((adjutant) (list host-type launcher)) (else launcher)))) (begin (debug:print-info 0 *default-log-port* "WARNING: no launcher found for host-type " host-type) (if (null? tal) fallback-launcher (loop (car tal)(cdr tal))))))) ;; no match, try again (if (null? tal) |
︙ | ︙ |
Modified db.scm from [65246b91b8] to [c98421ab5b].
︙ | ︙ | |||
2145 2146 2147 2148 2149 2150 2151 | (let* ((dbpath (db:dbfile-path)) (dbname (conc dbpath "/no-sync.db")) (db-exists (common:file-exists? dbname)) (db (sqlite3:open-database dbname))) (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) (if (not db-exists) (begin | > | > > > > | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 | (let* ((dbpath (db:dbfile-path)) (dbname (conc dbpath "/no-sync.db")) (db-exists (common:file-exists? dbname)) (db (sqlite3:open-database dbname))) (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 136000)) (if (not db-exists) (begin (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));") (sqlite3:execute db "PRAGMA journal_mode=WAL;"))) ;; MOVE THIS TABLE CREATION TO THE (begin above in about six months (it is Sep 2020 right now). (sqlite3:execute db "CREATE TABLE IF NOT EXISTS jobs_queue (id INTEGER PRIMARY KEY, host_type TEXT, cores INTEGER, memory TEXT, vars TEXT, exekey TEXT, cmdline TEXT, state TEXT, event_time INTEGER, last_update INTEGER);") ;; not sure I'll use this next one. I prefer if tests simply append to a file: ;; last-update-seconds cpuload tmpspace rundirspace (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_extra_data (id INTEGER PRIMARY KEY, run_id INTEGER, test_id INTEGER, last_seen_running INTEGER);") (sqlite3:execute db "PRAGMA synchronous = 0;") db)) (define (db:no-sync-add-job db-in host-type vars-list exekey cmdline) (sqlite3:execute (db:no-sync-db db-in) "INSERT INTO jobs_queue (host_type,vars,exekey,cmdline,state,event_time,last_update) VALUES (?,?,?,?,?,?,?);" host-type (with-output-to-string (lambda () (write vars-list))) exekey cmdline "waiting" (current-seconds)(current-seconds))) ;; find next job (waiting longest) that matches host-type - future, we'll find jobs that fit if no exact match (define (db:no-sync-take-job db-in host-type) (let* ((db (db:no-sync-db db-in)) (stmt1 "SELECT id,host_type,vars,exekey,cmdline,state,event_time,last_update FROM jobs_queue WHERE host_type=? AND state != 'taken' ORDER BY event_time ASC;") (stmt1h (sqlite3:prepare db stmt1)) (stmt2 "UPDATE jobs_queue SET state='taken',last_update=? WHERE id=?;") (stmt2h (sqlite3:prepare db stmt2)) (res (sqlite3:with-transaction db (lambda () (let* ((matching-jobs (sqlite3:fold-row (lambda (res . row) ;; id host-type vars exekey state event-time last-update) (cons row res)) '() stmt1h host-type))) (if (null? matching-jobs) #f (let ((choosen-one (let loop ((tal matching-jobs) (res #f)) ;; put bestest one in here (if (null? tal) res (let ((curr (car tal)) (rem (cdr tal))) curr) ;; here we will compare with res, if better candidate the loop with curr else loop with res )))) (if choosen-one ;; we need to mark it as taken (sqlite3:execute stmt2h (current-seconds) (car choosen-one))) choosen-one))))))) (sqlite3:finalize! stmt1h) ;; it'd be nice to cache these and finalize on exit. (sqlite3:finalize! stmt2h) res)) ;; clean out old jobs in queue, i.e. taken and event_time > 24 hrs ago ;; (define (db:no-sync-job-records-clean db) (sqlite3:execute (db:no-sync-db db) "DELETE FROM jobs_queue WHERE state='taken' AND event_time < ?;" (- (current-seconds)(* 24 3600)))) ;; if we are not a server create a db handle. this is not finalized ;; so watch for problems. I'm still not clear if it is needed to manually ;; finalize sqlite3 dbs with the sqlite3 egg. ;; (define (db:no-sync-db db-in) (mutex-lock! *db-access-mutex*) |
︙ | ︙ | |||
2170 2171 2172 2173 2174 2175 2176 | (define (db:no-sync-set db var val) (sqlite3:execute (db:no-sync-db db) "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val)) (define (db:no-sync-del! db var) (sqlite3:execute (db:no-sync-db db) "DELETE FROM no_sync_metadat WHERE var=?;" var)) | | > | | 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 | (define (db:no-sync-set db var val) (sqlite3:execute (db:no-sync-db db) "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES (?,?);" var val)) (define (db:no-sync-del! db var) (sqlite3:execute (db:no-sync-db db) "DELETE FROM no_sync_metadat WHERE var=?;" var)) (define (db:no-sync-get/default db-in var default) (let ((db (db:no-sync-db db-in)) (res default)) (sqlite3:for-each-row (lambda (val) (set! res val)) (db:no-sync-db db) "SELECT val FROM no_sync_metadat WHERE var=?;" var) (if res |
︙ | ︙ |
Modified launch.scm from [1931a96c9c] to [0f92a3953e].
︙ | ︙ | |||
1417 1418 1419 1420 1421 1422 1423 | (else #f)))) (when do-scan? (debug:print 1 *default-log-port* "INFO: search and mark zombie tests") (rmt:set-var key (current-seconds)) (rmt:find-and-mark-incomplete run-id #f)))) | | > > > > > > > > | < < < < < < < < < < < < | > > > < | | | | | | | | | > > > > > > > | 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 | (else #f)))) (when do-scan? (debug:print 1 *default-log-port* "INFO: search and mark zombie tests") (rmt:set-var key (current-seconds)) (rmt:find-and-mark-incomplete run-id #f)))) (defstruct launch:ajt (vars '()) (exekey #f) (host-type #f) (test-sig #f) (cmdline #f)) ;; append vars (define (launch:ajt-add-vars dat vars) (launch:ajt-vars-set! dat (append (launch:ajt-vars dat) vars))) ;; 1. look though disks list for disk with most space ;; 2. create run dir on disk, path name is meaningful ;; 3. create link from run dir to megatest runs area ;; 4. remotely run the test on allocated host ;; - could be ssh to host from hosts table (update regularly with load) ;; - could be netbatch ;; (launch-test db (cadr status) test-conf)) (define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params) (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex (let* (;; locking code removed from here commented out and pasted at end of file (item-path (item-list->path itemdat)) (contour #f) ;; NOT READY FOR THIS (args:get-arg "-contour"))) ;; launcher-mode will be 'adjutant or 'normal (launcher-mode (string->symbol (or (configf:lookup *configdat* "jobtools" "mode") "normal"))) (ajtdat (make-launch:ajt))) (let loop ((delta (- (current-seconds) *last-launch*)) (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 0))) (if (> launch-delay delta) (begin (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay. (debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds")) (thread-sleep! (- launch-delay delta)) (loop (- (current-seconds) *last-launch*) launch-delay)))) (change-directory *toppath*) (let ((var-list (append (list (list "MT_RUN_AREA_HOME" *toppath*) (list "MT_TEST_NAME" test-name) (list "MT_RUNNAME" runname) (list "MT_ITEMPATH" item-path) (list "MT_CONTOUR" contour) ) itemdat))) ;; consolidate this code with the code in megatest.scm for ;; "-execute", *maybe* - the longer they are set the longer ;; each launch takes (must be non-overlapping with the vars) (alist->env-vars var-list) ;; the var-list into the ajtdat adjutant record whether it is needed or not. (launch:ajt-add-vars ajtdat var-list)) (let* ((tregistry (tests:get-all)) ;; third param (below) is system-allowed ;; for tconfig, why do we allow fallback to test-conf? (tconfig (or (tests:get-testconfig test-name item-path tregistry #t force-create: #t) (begin (debug:print 0 *default-log-port* "WARNING: falling back to pre-calculated testconfig. This is likely not desired.") test-conf))) ;; force re-read now that all vars are set (useshell (let ((ush (configf:lookup *configdat* "jobtools" "useshell"))) |
︙ | ︙ | |||
1484 1485 1486 1487 1488 1489 1490 | (subrun (> (length (hash-table-ref/default tconfig "subrun" '())) 0)) ;; send a flag to process a subrun ;; (diskspace (configf:lookup tconfig "requirements" "diskspace")) ;; (memory (configf:lookup tconfig "requirements" "memory")) ;; (hosts (configf:lookup *configdat* "jobtools" "workhosts")) ;; I'm pretty sure this was never completed (remote-megatest (configf:lookup *configdat* "setup" "executable")) (run-time-limit (or (configf:lookup tconfig "requirements" "runtimelim") (configf:lookup *configdat* "setup" "runtimelim"))) | < < < < < < < > | | < < < < < > > | > > > > > > > > > | | 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 | (subrun (> (length (hash-table-ref/default tconfig "subrun" '())) 0)) ;; send a flag to process a subrun ;; (diskspace (configf:lookup tconfig "requirements" "diskspace")) ;; (memory (configf:lookup tconfig "requirements" "memory")) ;; (hosts (configf:lookup *configdat* "jobtools" "workhosts")) ;; I'm pretty sure this was never completed (remote-megatest (configf:lookup *configdat* "setup" "executable")) (run-time-limit (or (configf:lookup tconfig "requirements" "runtimelim") (configf:lookup *configdat* "setup" "runtimelim"))) (local-megatest (common:find-local-megatest)) (launcher (let ((l (common:get-launcher *configdat* test-name item-path launcher-mode))) (if (string? l) (string-split l) l))) ;; some nonhomogenuity here. '(cmd param1 param2 ...) OR '(host-type launcher) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path (test-sig (conc (common:get-testsuite-name) ":" test-name ":" item-path)) (work-area #f) (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all (diskpath #f) (cmdparms #f) (fullcmd #f) ;; (define a (with-output-to-string (lambda ()(write x)))) (mt-bindir-path #f) (testinfo (rmt:get-test-info-by-id run-id test-id)) (mt_target (string-intersperse (map cadr keyvals) "/")) (debug-param (append (if (args:get-arg "-debug") (list "-debug" (args:get-arg "-debug")) '()) (if (args:get-arg "-logging")(list "-logging") '()) (if (configf:lookup *configdat* "misc" "profilesw") (list (configf:lookup *configdat* "misc" "profilesw")) '())))) ;; save the test-sig in the ajtdat record (launch:ajt-test-sig-set! ajtdat test-sig) ;; go ahead and figure out if we have a host-type from the ;; launcher call above and save it in the ajtdat record (if (and (eq? launcher-mode 'adjutant) (list? launcher) (> (length launcher) 1)) (launch:ajt-host-type-set! ajtdat (car launcher))) ;; (if hosts (set! hosts (string-split hosts))) ;; set the megatest to be called on the remote host (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest")) (set! mt-bindir-path (pathname-directory remote-megatest)) ;; (if launcher (set! launcher (string-split launcher))) ;; yuk! ;; set up the run work area for this test (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run (not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir (begin (debug:print-info 0 *default-log-port* "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path) (runs:remove-test-directory testinfo 'remove-data-only))) ;; remove data only, do not perturb the record |
︙ | ︙ | |||
1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 | (list 'target mt_target) (list 'contour contour) (list 'runtlim (if run-time-limit (common:hms-string->seconds run-time-limit) #f)) (list 'env-ovrd (hash-table-ref/default *configdat* "env-override" '())) (list 'set-vars (if params (hash-table-ref/default params "-setvars" #f))) (list 'runname runname) (list 'mt-bindir-path mt-bindir-path)))))))) (setenv "MT_CMDINFO" cmdparms) ;; setting this for use in nblauncher ;; clean out step records from previous run if they exist ;; (rmt:delete-test-step-records run-id test-id) ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway (if (common:file-exists? work-area) (change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir | > > | > | | < > < | < | | | | | | | | < | > > | | | > > > | | | | | | | | | | | > > > > > > > > > > > > > > > > > > > > > > > > | < | 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 | (list 'target mt_target) (list 'contour contour) (list 'runtlim (if run-time-limit (common:hms-string->seconds run-time-limit) #f)) (list 'env-ovrd (hash-table-ref/default *configdat* "env-override" '())) (list 'set-vars (if params (hash-table-ref/default params "-setvars" #f))) (list 'runname runname) (list 'mt-bindir-path mt-bindir-path)))))))) ;; save the cmdparms in the ajtdat (launch:ajt-exekey-set! ajtdat cmdparms) (setenv "MT_CMDINFO" cmdparms) ;; setting this for use in nblauncher ;; clean out step records from previous run if they exist ;; (rmt:delete-test-step-records run-id test-id) ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway (if (common:file-exists? work-area) (change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir ;; save the command line for adjutant mode (might never be needed but best to assemble it here) (launch:ajt-cmdline-set! ajtdat (string-intersperse (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) (cond (launcher (set! fullcmd (append launcher (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param))) (else (if (not useshell)(debug:print 0 *default-log-port* "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section")) (set! fullcmd (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param (list (if useshell "&" "")))))) (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm")))) (debug:print 1 *default-log-port* "Launching " work-area) ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done (debug:print 4 *default-log-port* "fullcmd: " fullcmd) (set! *last-launch* (current-seconds)) ;; all that junk above takes time, set this as late as possible. (let* ((env-override-vars (hash-table-ref/default *configdat* "env-override" '())) (commonprevvals (alist->env-vars env-override-vars)) (misc-vars (append (list (list "MT_TEST_RUN_DIR" work-area) (list "MT_TEST_NAME" test-name) (list "MT_ITEM_INFO" (conc itemdat)) (list "MT_RUNNAME" runname) (list "MT_TARGET" mt_target) (list "MT_ITEMPATH" item-path)) itemdat)) (miscprevvals (alist->env-vars misc-vars));; consolidate this code with the code in megatest.scm for "-execute" (test-vars (hash-table-ref/default tconfig "pre-launch-env-overrides" '())) (testprevvals (alist->env-vars test-vars)) ;; Launchwait defaults to true, must override it to turn off wait (launchwait (if (equal? (configf:lookup *configdat* "setup" "launchwait") "no") #f #t)) ;; BB: TODO: refactor this to examine return code of launcher, if nonzero, set state to launch failed. (launch-results-prev (if (eq? launcher-mode 'adjutant) '(#t 0) ;; just some fake data to fool downstream but non-applicable code (apply (if launchwait process:cmd-run-with-stderr-and-exitcode->list process-run) (if useshell (let ((cmdstr (string-intersperse fullcmd " "))) (if launchwait cmdstr (conc cmdstr " >> mt_launch.log 2>&1 &"))) (car fullcmd)) (if useshell '() (cdr fullcmd))))) (success (if launchwait (equal? 0 (cadr launch-results-prev)) #t)) (launch-results (if launchwait (car launch-results-prev) launch-results-prev))) (launch:ajt-add-vars ajtdat env-override-vars) (launch:ajt-add-vars ajtdat misc-vars) (launch:ajt-add-vars ajtdat test-vars) ;; if in adjutant mode we register the job in the jobs_queue ;; then fire off an adjutant runner ;; (if (eq? launcher-mode 'adjutant) (let* ((adjutant-runner-cmd (append (cdr launcher) (list remote-megatest "-adjutant" (launch:ajt-host-type ajtdat) "-start-dir" *toppath*))) (adj-cmd (conc (string-intersperse (map conc adjutant-runner-cmd) " ") "&"))) (rmt:no-sync-add-job (launch:ajt-host-type ajtdat) (launch:ajt-vars ajtdat) (launch:ajt-exekey ajtdat) (launch:ajt-cmdline ajtdat)) (print "adj-cmd: " adj-cmd) (system adj-cmd) )) (if (not success) (tests:test-set-status! run-id test-id "COMPLETED" "DEAD" "launcher failed; exited non-zero; check mt_launch.log" #f)) ;; (if launch-results launch-results "FAILED")) ;; (rmt:no-sync-del! lock-key) ;; release the lock for starting this test (if (not launchwait) ;; give the OS a little time to allow the process to start (thread-sleep! 0.01)) (with-output-to-file "mt_launch.log" (lambda () (print "LAUNCHCMD: " (string-intersperse fullcmd " ")) (if (list? launch-results) |
︙ | ︙ | |||
1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 | ;; but this hack will work! Thanks go to Alan Post of the Chicken email list ;; NB// Is this still needed? Should be safe to go back to "exit" now? (process-signal (current-process-id) signal/kill) )) (alist->env-vars miscprevvals) (alist->env-vars testprevvals) (alist->env-vars commonprevvals) launch-results)) (change-directory *toppath*) (thread-sleep! (configf:lookup-number *configdat* "setup" "inter-test-delay" default: 0.0)))) ;; recover a test where the top controlling mtest may have died ;; (define (launch:recover-test run-id test-id) | > > > > | 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 | ;; but this hack will work! Thanks go to Alan Post of the Chicken email list ;; NB// Is this still needed? Should be safe to go back to "exit" now? (process-signal (current-process-id) signal/kill) )) (alist->env-vars miscprevvals) (alist->env-vars testprevvals) (alist->env-vars commonprevvals) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork. ;; the unlock previously was further up. This seemed wrong as we should not proceed until the ;; vars have been reset. (mutex-unlock! *launch-setup-mutex*) launch-results)) (change-directory *toppath*) (thread-sleep! (configf:lookup-number *configdat* "setup" "inter-test-delay" default: 0.0)))) ;; recover a test where the top controlling mtest may have died ;; (define (launch:recover-test run-id test-id) |
︙ | ︙ | |||
1685 1686 1687 1688 1689 1690 1691 | (read-symbolic-link (conc "/proc/" pid "/cwd")) #f))) ;; now wait on that process if all is correct ;; periodically update the db with runtime ;; when the process exits look at the db, if still RUNNING after 10 seconds set ;; state/status appropriately (process-wait pid))) | > > > > > > > > > > > > > > > > | 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 | (read-symbolic-link (conc "/proc/" pid "/cwd")) #f))) ;; now wait on that process if all is correct ;; periodically update the db with runtime ;; when the process exits look at the db, if still RUNNING after 10 seconds set ;; state/status appropriately (process-wait pid))) ;; (lock-key (conc "test-" test-id)) ;; (got-lock (let loop ((lock (rmt:no-sync-get-lock lock-key)) ;; (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds ;; (if (car lock) ;; #t ;; (if (> (current-seconds) expire-time) ;; (begin ;; (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to launch test " keyvals " " runname " " test-name " " test-path) ;; (rmt:no-sync-del! lock-key) ;; destroy the lock ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; ;; (begin ;; (thread-sleep! 1) ;; (loop (rmt:no-sync-get-lock lock-key) expire-time)))))) |
Modified megatest.scm from [35ed864745] to [0cca94bf63].
︙ | ︙ | |||
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 | (declare (uses tdb)) (declare (uses mt)) (declare (uses api)) (declare (uses tasks)) ;; only used for debugging. (declare (uses env)) (declare (uses diff-report)) ;; (declare (uses ftail)) ;; (import ftail) (define *db* #f) ;; this is only for the repl, do not use in general!!!! (include "common_records.scm") (include "key_records.scm") (include "db_records.scm") (include "run_records.scm") (include "megatest-fossil-hash.scm") (use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (prefix base64 base64:) | > > > > > > > | | | 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 | (declare (uses tdb)) (declare (uses mt)) (declare (uses api)) (declare (uses tasks)) ;; only used for debugging. (declare (uses env)) (declare (uses diff-report)) (declare (uses adjutant)) (import adjutant) (declare (uses mttop)) (import mttop) ;; (declare (uses ftail)) ;; (import ftail) (define *db* #f) ;; this is only for the repl, do not use in general!!!! (include "common_records.scm") (include "key_records.scm") (include "db_records.scm") (include "run_records.scm") (include "megatest-fossil-hash.scm") (use (prefix sqlite3 sqlite3:) srfi-1 posix regex regex-case srfi-69 (prefix base64 base64:) readline apropos json http-client directory-utils typed-records matchable http-client srfi-18 extras format call-with-environment-variables) ;; Added for csv stuff - will be removed ;; (use sparse-vectors) (require-library mutils) |
︙ | ︙ | |||
100 101 102 103 104 105 106 107 108 109 110 111 112 113 | version " megatest-version " license GPL, Copyright Matt Welland 2006-2017 Usage: megatest [options] -h : this help -manual : show the Megatest user manual -version : print megatest version (currently " megatest-version ") Launching and managing runs -run : run all tests or as specified by -testpatt -remove-runs : remove the data for a run, requires -runname and -testpatt Optionally use :state and :status, use -keep-records to remove only the run data. Use -kill-wait to override the 10 second per test wait after kill delay (e.g. -kill-wait 0). | > | 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 | version " megatest-version " license GPL, Copyright Matt Welland 2006-2017 Usage: megatest [options] -h : this help -manual : show the Megatest user manual -version : print megatest version (currently " megatest-version ") help : help for the new Megatest interface Launching and managing runs -run : run all tests or as specified by -testpatt -remove-runs : remove the data for a run, requires -runname and -testpatt Optionally use :state and :status, use -keep-records to remove only the run data. Use -kill-wait to override the 10 second per test wait after kill delay (e.g. -kill-wait 0). |
︙ | ︙ | |||
197 198 199 200 201 202 203 | -sync-to-megatest.db : pull data from cache files in /tmp/$USER to megatest.db -sync-to dest : sync to new postgresql central style database -update-meta : update the tests metadata for all tests -setvars VAR1=val1,VAR2=val2 : Add environment variables to a run NB// these are overwritten by values set in config files. -server -|hostname : start the server (reduces contention on megatest.db), use - to automatically figure out hostname | | | 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 | -sync-to-megatest.db : pull data from cache files in /tmp/$USER to megatest.db -sync-to dest : sync to new postgresql central style database -update-meta : update the tests metadata for all tests -setvars VAR1=val1,VAR2=val2 : Add environment variables to a run NB// these are overwritten by values set in config files. -server -|hostname : start the server (reduces contention on megatest.db), use - to automatically figure out hostname -adjutant host-type : start the server/adjutant with given host-type use 0,0 to auto use full machine -transport http|rpc : use http or rpc for transport (default is http) -log logfile : send stdout and stderr to logfile -list-servers : list the servers -kill-servers : kill all servers -repl : start a repl (useful for extending megatest) -load file.scm : load and run file.scm |
︙ | ︙ | |||
266 267 268 269 270 271 272 273 274 275 276 277 278 279 | Called as " (string-intersperse (argv) " ") " Version " megatest-version ", built from " megatest-fossil-hash )) ;; -gui : start a gui interface ;; -config fname : override the runconfigs file with fname ;; process args (define remargs (args:get-args (argv) (list "-runtests" ;; run a specific test "-config" ;; override the config file name "-append-config" "-execute" ;; run the command encoded in the base64 parameter | > > > > | 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 | Called as " (string-intersperse (argv) " ") " Version " megatest-version ", built from " megatest-fossil-hash )) ;; -gui : start a gui interface ;; -config fname : override the runconfigs file with fname (mttop-run (command-line-arguments) '("help")) ;; process args (define remargs (args:get-args (argv) (list "-runtests" ;; run a specific test "-config" ;; override the config file name "-append-config" "-execute" ;; run the command encoded in the base64 parameter |
︙ | ︙ | |||
916 917 918 919 920 921 922 923 924 925 926 | ;; (if (args:get-arg "-server") (let ((tl (launch:setup)) (transport-type (string->symbol (or (args:get-arg "-transport") "http")))) (server:launch 0 transport-type) (set! *didsomething* #t))) ;; The adjutant is a bit different, it does NOT run (launch:setup) as it is not necessarily tied to ;; a specific Megatest area. Detail are being hashed out and this may change. ;; (if (args:get-arg "-adjutant") | > > > > > > > > > > > > > > > > > > > > > | > > > > > > > > > > | | 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 | ;; (if (args:get-arg "-server") (let ((tl (launch:setup)) (transport-type (string->symbol (or (args:get-arg "-transport") "http")))) (server:launch 0 transport-type) (set! *didsomething* #t))) (define (naylist->alist inlst) (map (lambda (dat) (cons (car dat) (or (if (list? (cdr dat)) (if (null? (cdr dat)) "" (cadr dat)) (cdr dat)) ""))) ;; we need a string for call-with-environment-variables inlst)) ;; The adjutant is a bit different, it does NOT run (launch:setup) as it is not necessarily tied to ;; a specific Megatest area. Detail are being hashed out and this may change. ;; (if (args:get-arg "-adjutant") (let* ((host-type (args:get-arg "-adjutant"))) (launch:setup) ;; dang it, wish this wasn't needed (print "Running the adjutant!") (let loop ((wait-count 0)) (if (< wait-count 10) ;; 6 x 10 seconds = one minute (let* ((dat (rmt:no-sync-take-job host-type))) (match dat ((id ht vars exekey cmdline state event-time last-update) (let ((vars-alist (with-input-from-string vars read) )) (print "Vars:") (pp vars-alist) (call-with-environment-variables (naylist->alist vars-alist) (lambda () (system cmdline)))) (loop 0)) (else (thread-sleep! 10) (loop (+ wait-count 1))))) (print "I'm bored. Exiting."))) ;; (adjutant-run (args:get-arg "-ajutant") rmt:no-sync-take-job) (set! *didsomething* #t))) (if (or (args:get-arg "-list-servers") (args:get-arg "-kill-servers")) (let ((tl (launch:setup))) (if tl ;; all roads from here exit (let* ((servers (server:get-list *toppath*)) |
︙ | ︙ |
Added mttop.scm version [0ba1c89f48].
> > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 | ;; Copyright 2006-2011, Matthew Welland. ;; ;; This program is made available under the GNU GPL version 2.0 or ;; greater. See the accompanying file COPYING for details. ;; ;; This program is distributed WITHOUT ANY WARRANTY; without even the ;; implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR ;; PURPOSE. ;; This is from the perl world, a hash of hashes is a super easy way to keep a handle on ;; lots of disparate data ;; (declare (unit mttop)) (module mttop * (import chicken scheme ;; data-structures posix srfi-1 ;; srfi-13 srfi-69 ports extras regex posix data-structures matchable ) (define (str-is-cmd cmd all-cmds) (let* ((rx (regexp (conc "^" cmd ".*"))) (mx (filter string? (map (lambda (x) (let ((res (string-match rx x))) (if res (car res) #f))) all-cmds)))) (if (eq? (length mx) 1) ;; have a command (car mx) #f))) (define (mttop-run args all-cmds) ;; any path through this call must end in exit if it is NOT an old Megatest call (if (null? args) #f ;; continue on and do the old Megatest stuff (let ((cmd (str-is-cmd (car args) all-cmds))) (if cmd (begin (case (string->symbol cmd) ((help)(print "New help")) (else (print "Command " cmd " is not implemented yet."))) (exit)) ;; always exit here #f)))) ;; or continue on to Megatest old stuff here ) |
Modified rmt.scm from [ed2cbd88f2] to [e580559d86].
︙ | ︙ | |||
943 944 945 946 947 948 949 950 951 952 953 954 955 956 | (rmt:send-receive 'no-sync-get/default #f `(,var ,default))) (define (rmt:no-sync-del! var) (rmt:send-receive 'no-sync-del! #f `(,var))) (define (rmt:no-sync-get-lock keyname) (rmt:send-receive 'no-sync-get-lock #f `(,keyname))) ;;====================================================================== ;; A R C H I V E S ;;====================================================================== (define (rmt:archive-get-allocations testname itempath dneeded) (rmt:send-receive 'archive-get-allocations #f (list testname itempath dneeded))) | > > > > > > > > > | 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 | (rmt:send-receive 'no-sync-get/default #f `(,var ,default))) (define (rmt:no-sync-del! var) (rmt:send-receive 'no-sync-del! #f `(,var))) (define (rmt:no-sync-get-lock keyname) (rmt:send-receive 'no-sync-get-lock #f `(,keyname))) (define (rmt:no-sync-add-job host-type vars-list exekey cmdline) (rmt:send-receive 'no-sync-add-job #f `(,host-type ,vars-list ,exekey ,cmdline))) (define (rmt:no-sync-take-job host-type) (rmt:send-receive 'no-sync-take-job #f `(,host-type))) (define (rmt:no-sync-job-records-clean) (rmt:set-receive 'no-sync-job-records-clean #f '())) ;;====================================================================== ;; A R C H I V E S ;;====================================================================== (define (rmt:archive-get-allocations testname itempath dneeded) (rmt:send-receive 'archive-get-allocations #f (list testname itempath dneeded))) |
︙ | ︙ |