Overview
Comment: | Experimentally cutting back the open-db/finalize cycle NOTE: This made contention on the db worse and causes failures |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | experimental-streamlining |
Files: | files | file ages | folders |
SHA1: |
e01e81f554b0bb25baf115a8a27fb05d |
User & Date: | matt on 2012-03-31 18:26:31 |
Original Comment: | Experimentally cutting back the open-db/finalize cycle |
Other Links: | branch diff | manifest | tags |
Context
2012-03-31
| ||
18:26 | Experimentally cutting back the open-db/finalize cycle NOTE: This made contention on the db worse and causes failures Closed-Leaf check-in: e01e81f554 user: matt tags: experimental-streamlining | |
2012-03-30
| ||
00:07 | Fixed reading of runconfigs; chdir to location of .config before reading sub-files; added pattern matching on sections to target; added tests, removed some redundant db accesses check-in: c65398ee68 user: matt tags: trunk | |
Changes
Modified db.scm from [1ae5c33688] to [2e8024a3e8].
︙ | ︙ | |||
708 709 710 711 712 713 714 | (sqlite3:execute db "UPDATE tests SET fail_count=(SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND status='FAIL'), pass_count=(SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND (status='PASS' OR status='WARN' OR status='WAIVED')) WHERE run_id=? AND testname=? AND item_path='';" run-id test-name run-id test-name run-id test-name) | | | < < | 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 | (sqlite3:execute db "UPDATE tests SET fail_count=(SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND status='FAIL'), pass_count=(SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND (status='PASS' OR status='WARN' OR status='WAIVED')) WHERE run_id=? AND testname=? AND item_path='';" run-id test-name run-id test-name run-id test-name) (thread-sleep! 0.01) ;; give other processes a chance here (if (equal? status "RUNNING") ;; running takes priority over all other states, force the test state to RUNNING (sqlite3:execute db "UPDATE tests SET state=? WHERE run_id=? AND testname=? AND item_path='';" "RUNNING" run-id test-name) (sqlite3:execute db "UPDATE tests SET state=CASE WHEN (SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND state in ('RUNNING','NOT_STARTED')) > 0 THEN 'RUNNING' ELSE 'COMPLETED' END, status=CASE WHEN fail_count > 0 THEN 'FAIL' WHEN pass_count > 0 AND fail_count=0 THEN 'PASS' ELSE 'UNKNOWN' END WHERE run_id=? AND testname=? AND item_path='';" run-id test-name run-id test-name))))) ;;====================================================================== ;; Tests meta data ;;====================================================================== ;; read the record given a testname |
︙ | ︙ |
Modified launch.scm from [2790622feb] to [8c18cb9491].
︙ | ︙ | |||
115 116 117 118 119 120 121 | ;; so this is a good place to remove the records for ;; any previous runs ;; (db:test-remove-steps db run-id testname itemdat) ;; from here on out we will open and close the db ;; on every access to reduce the probablitiy of ;; contention or stuck access on nfs. | | | 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 | ;; so this is a good place to remove the records for ;; any previous runs ;; (db:test-remove-steps db run-id testname itemdat) ;; from here on out we will open and close the db ;; on every access to reduce the probablitiy of ;; contention or stuck access on nfs. ;; (sqlite3:finalize! db) (let* ((m (make-mutex)) (kill-job? #f) (exit-info (vector #t #t #t)) (job-thread #f) (runit (lambda () ;; (let-values |
︙ | ︙ | |||
148 149 150 151 152 153 154 | (loop (+ i 1))) ))))) ;; then, if runscript ran ok (or did not get called) ;; do all the ezsteps (if any) (if ezsteps (let* ((testconfig (read-config (conc work-area "/testconfig") #f #t environ-patt: "pre-launch-env-vars")) ;; FIXME??? is allow-system ok here? (ezstepslst (hash-table-ref/default testconfig "ezsteps" '())) | | > | | | 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 | (loop (+ i 1))) ))))) ;; then, if runscript ran ok (or did not get called) ;; do all the ezsteps (if any) (if ezsteps (let* ((testconfig (read-config (conc work-area "/testconfig") #f #t environ-patt: "pre-launch-env-vars")) ;; FIXME??? is allow-system ok here? (ezstepslst (hash-table-ref/default testconfig "ezsteps" '())) ;; (db (open-db)) ) ;; (if (not (args:get-arg "-server")) ;; (server:client-setup db)) (if (not (file-exists? ".ezsteps"))(create-directory ".ezsteps")) ;; if ezsteps was defined then we are sure to have at least one step but check anyway (if (not (> (length ezstepslst) 0)) (debug:print 0 "ERROR: ezsteps defined but ezstepslst is zero length") (let loop ((ezstep (car ezstepslst)) (tal (cdr ezstepslst)) (prevstep #f)) |
︙ | ︙ | |||
249 250 251 252 253 254 255 | (inexact->exact (round (- (current-seconds) start-seconds))))) (kill-tries 0)) (let loop ((minutes (calc-minutes))) | | | | | 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 | (inexact->exact (round (- (current-seconds) start-seconds))))) (kill-tries 0)) (let loop ((minutes (calc-minutes))) (let* (;; (db (open-db)) (cpuload (get-cpu-load)) (diskfree (get-df (current-directory))) (tmpfree (get-df "/tmp"))) ;; (if (not (args:get-arg "-server")) ;; (server:client-setup db)) (if (not cpuload) (begin (debug:print 0 "WARNING: CPULOAD not found.") (set! cpuload "n/a"))) (if (not diskfree) (begin (debug:print 0 "WARNING: DISKFREE not found.") (set! diskfree "n/a"))) (set! kill-job? (test-get-kill-request db run-id test-name itemdat)) (rdb:test-update-meta-info db test-id minutes cpuload diskfree tmpfree) (if kill-job? (begin (mutex-lock! m) |
︙ | ︙ | |||
283 284 285 286 287 288 289 | (system (conc "kill -9 " p-id)))))) (car processes)) (system (conc "kill -9 " pid)))) (begin (debug:print 0 "WARNING: Request received to kill job but problem with process, attempting to kill manager process") (test-set-status! db test-id "KILLED" "FAIL" (args:get-arg "-m") #f) | | | | | | | 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 | (system (conc "kill -9 " p-id)))))) (car processes)) (system (conc "kill -9 " pid)))) (begin (debug:print 0 "WARNING: Request received to kill job but problem with process, attempting to kill manager process") (test-set-status! db test-id "KILLED" "FAIL" (args:get-arg "-m") #f) ;; (sqlite3:finalize! db) (exit 1)))) (set! kill-tries (+ 1 kill-tries)) (mutex-unlock! m))) ;; (sqlite3:finalize! db) (thread-sleep! (+ 10 (random 10))) ;; add some jitter to the call home time to spread out the db accesses (loop (calc-minutes))))))) (th1 (make-thread monitorjob)) (th2 (make-thread runit))) (set! job-thread th2) (thread-start! th1) (thread-start! th2) (thread-join! th2) (mutex-lock! m) ;; (set! db (open-db)) ;; (if (not (args:get-arg "-server")) ;; (server:client-setup db)) (let* ((item-path (item-list->path itemdat)) (testinfo (rdb:get-test-info db run-id test-name item-path))) (if (not (equal? (db:test-get-state testinfo) "COMPLETED")) (begin (debug:print 2 "Test NOT logged as COMPLETED, (state=" (db:test-get-state testinfo) "), updating result, rollup-status is " rollup-status) (test-set-status! db test-id (if kill-job? "KILLED" "COMPLETED") |
︙ | ︙ | |||
462 463 464 465 466 467 468 | (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all (diskpath #f) (cmdparms #f) (fullcmd #f) ;; (define a (with-output-to-string (lambda ()(write x)))) (mt-bindir-path #f) (item-path (item-list->path itemdat)) (testinfo (rdb:get-test-info db run-id test-name item-path)) | | > | 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 | (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all (diskpath #f) (cmdparms #f) (fullcmd #f) ;; (define a (with-output-to-string (lambda ()(write x)))) (mt-bindir-path #f) (item-path (item-list->path itemdat)) (testinfo (rdb:get-test-info db run-id test-name item-path)) (test-id (db:test-get-id testinfo)) (debug-param (if (args:get-arg "-debug")(list "-debug" (args:get-arg "-debug")) '()))) (if hosts (set! hosts (string-split hosts))) ;; set the megatest to be called on the remote host (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest")) (set! mt-bindir-path (pathname-directory remote-megatest)) (if launcher (set! launcher (string-split launcher))) ;; set up the run work area for this test (set! diskpath (get-best-disk *configdat*)) |
︙ | ︙ | |||
498 499 500 501 502 503 504 | (list 'runname runname) (list 'mt-bindir-path mt-bindir-path))))))) ;; (string-intersperse keyvallst " ")))) ;; clean out step records from previous run if they exist (db:delete-test-step-records db run-id test-name itemdat) (change-directory work-area) ;; so that log files from the launch process don't clutter the test dir (cond ((and launcher hosts) ;; must be using ssh hostname | | | | | 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 | (list 'runname runname) (list 'mt-bindir-path mt-bindir-path))))))) ;; (string-intersperse keyvallst " ")))) ;; clean out step records from previous run if they exist (db:delete-test-step-records db run-id test-name itemdat) (change-directory work-area) ;; so that log files from the launch process don't clutter the test dir (cond ((and launcher hosts) ;; must be using ssh hostname (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms) debug-param))) (launcher (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms) debug-param))) (else (if (not useshell)(debug:print 0 "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section")) (set! fullcmd (append (list remote-megatest test-sig "-execute" cmdparms) debug-param (list (if useshell "&" "")))))) (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm")))) (debug:print 1 "Launching megatest for test " test-name " in " work-area" ...") (test-set-status! db test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED")) ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done (debug:print 4 "fullcmd: " fullcmd) (let* ((commonprevvals (alist->env-vars (hash-table-ref/default *configdat* "env-override" '()))) |
︙ | ︙ |