Changes In Branch v1.65-tweak-bfsync Through [89d013c708] Excluding Merge-Ins
This is equivalent to a diff from 4b37eea7d0 to 89d013c708
2019-03-18
| ||
17:32 | Updates to mtutil to limit packet creation - configurable check-in: 16e5e51d77 user: jmoon18 tags: v1.65 | |
10:57 | catch up with v1.65 Closed-Leaf check-in: fe83540f6a user: pjhatwal tags: v1.65-end-of-run | |
2019-03-15
| ||
18:38 | ok, new syncer works, but -run-wait is not waiting. check-in: 1b9511af26 user: bjbarcla tags: v1.65-tweak-bfsync | |
2019-03-14
| ||
18:28 | wip check-in: 89d013c708 user: bjbarcla tags: v1.65-tweak-bfsync | |
11:59 | Create new branch named "mtutil" Closed-Leaf check-in: a062a8feaa user: ritikaag tags: mtutil | |
10:39 | Made changes to only create and process 4 packets per mtutil process run check-in: 4b37eea7d0 user: jmoon18 tags: v1.65 | |
2019-03-12
| ||
22:33 | Added automatic suppression of waiting for all tests to gracefully die when -remove-keep is called with actions kill-runs,remove-runs check-in: 002d037cb3 user: matt tags: v1.65 | |
Modified common.scm from [c41ac723cd] to [8e98e8852f].
︙ | ︙ | |||
935 936 937 938 939 940 941 | ) ) 0) (define (std-signal-handler signum) ;; (signal-mask! signum) | | | 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 | ) ) 0) (define (std-signal-handler signum) ;; (signal-mask! signum) (set! *time-to-exit* #t) ;;(debug:print-info 13 *default-log-port* "got signal "signum) (debug:print-error 0 *default-log-port* "Received signal " signum " aaa exiting promptly") ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway (exit)) (define (special-signal-handler signum) ;; (signal-mask! signum) |
︙ | ︙ |
Modified db.scm from [f74527238e] to [b2f54e4e77].
︙ | ︙ | |||
311 312 313 314 315 316 317 | ;; This routine creates the db if not already present. It is only called if the db is not already opened ;; (define (db:open-db dbstruct #!key (areapath #f)(do-sync #t)) ;; TODO: actually use areapath (let ((tmpdb-stack (dbr:dbstruct-dbstack dbstruct))) ;; RA => Returns the first reference in dbstruct (if (stack? tmpdb-stack) (db:get-db tmpdb-stack) ;; get previously opened db (will create new db handle if all in the stack are already used | > | | 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 | ;; This routine creates the db if not already present. It is only called if the db is not already opened ;; (define (db:open-db dbstruct #!key (areapath #f)(do-sync #t)) ;; TODO: actually use areapath (let ((tmpdb-stack (dbr:dbstruct-dbstack dbstruct))) ;; RA => Returns the first reference in dbstruct (if (stack? tmpdb-stack) (db:get-db tmpdb-stack) ;; get previously opened db (will create new db handle if all in the stack are already used (let* ((max-stale-tmp (configf:lookup-number *configdat* "server" "filling-db-max-stale-seconds" default: 10)) (dbpath (db:dbfile-path )) ;; path to tmp db area (dbexists (common:file-exists? dbpath)) (tmpdbfname (conc dbpath "/megatest.db")) (dbfexists (common:file-exists? tmpdbfname)) ;; (conc dbpath "/megatest.db"))) (mtdbexists (common:file-exists? (conc *toppath* "/megatest.db"))) (mtdbmodtime (if mtdbexists (common:lazy-sqlite-db-modification-time (conc *toppath* "/megatest.db")) #f)) (tmpdbmodtime (if dbfexists (common:lazy-sqlite-db-modification-time tmpdbfname) #f)) |
︙ | ︙ | |||
349 350 351 352 353 354 355 | (dbr:dbstruct-tmpdb-set! dbstruct tmpdb) (dbr:dbstruct-dbstack-set! dbstruct (make-stack)) ;; BB: why a stack? Why would the number of db's be indeterminate? Is this a legacy of 1.db 2.db .. ? (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb) ;; olddb is already a (cons db path) (dbr:dbstruct-refndb-set! dbstruct refndb) ;; (mutex-unlock! *rundb-mutex*) (if (and (or (not dbfexists) (and modtimedelta | | | 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 | (dbr:dbstruct-tmpdb-set! dbstruct tmpdb) (dbr:dbstruct-dbstack-set! dbstruct (make-stack)) ;; BB: why a stack? Why would the number of db's be indeterminate? Is this a legacy of 1.db 2.db .. ? (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb) ;; olddb is already a (cons db path) (dbr:dbstruct-refndb-set! dbstruct refndb) ;; (mutex-unlock! *rundb-mutex*) (if (and (or (not dbfexists) (and modtimedelta (> modtimedelta max-stale-tmp))) ;; if db in tmp is over ten seconds older than the file in MTRA then do a sync back do-sync) (begin (debug:print 1 *default-log-port* "filling db " (db:dbdat-get-path tmpdb) " with data \n from " (db:dbdat-get-path mtdb) " mod time delta: " modtimedelta) (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb refndb tmpdb) ;touch tmp db to avoid wal mode wierdness (set! (file-modification-time tmpdbfname) (current-seconds)) (debug:print-info 13 *default-log-port* "db:sync-all-tables-list done.") |
︙ | ︙ |
Modified megatest.scm from [a7dc9766fe] to [8e2abc3809].
︙ | ︙ | |||
1242 1243 1244 1245 1246 1247 1248 | (db:get-value-by-header run header x)) keys) "/")) (statuses (string-split (or (args:get-arg "-status") "") ",")) (run-id (db:get-value-by-header run header "id")) (runname (db:get-value-by-header run header "runname")) (states (string-split (or (args:get-arg "-state") "") ",")) (tests (if tests-spec | | | 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 | (db:get-value-by-header run header x)) keys) "/")) (statuses (string-split (or (args:get-arg "-status") "") ",")) (run-id (db:get-value-by-header run header "id")) (runname (db:get-value-by-header run header "runname")) (states (string-split (or (args:get-arg "-state") "") ",")) (tests (if tests-spec (rmt:get-tests-for-run run-id testpatt states statuses #f #f #f 'testname 'asc ;; (db:get-tests-for-run dbstruct run-id testpatt '() '() #f #f #f 'testname 'asc ;; use qryvals if test-spec provided (if tests-spec (string-intersperse adj-tests-spec ",") ;; db:test-record-fields #f) #f 'normal) |
︙ | ︙ | |||
1378 1379 1380 1381 1382 1383 1384 | (hash-table-set! data "targets" (cons targetstr (hash-table-ref/default data "targets" '()))) ))) (let* ((run-id (db:get-value-by-header run header "id")) (runname (db:get-value-by-header run header "runname")) (states (string-split (or (args:get-arg "-state") "") ",")) (statuses (string-split (or (args:get-arg "-status") "") ",")) (tests (if tests-spec | | | 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 | (hash-table-set! data "targets" (cons targetstr (hash-table-ref/default data "targets" '()))) ))) (let* ((run-id (db:get-value-by-header run header "id")) (runname (db:get-value-by-header run header "runname")) (states (string-split (or (args:get-arg "-state") "") ",")) (statuses (string-split (or (args:get-arg "-status") "") ",")) (tests (if tests-spec (rmt:get-tests-for-run run-id testpatt states statuses #f #f #f 'testname 'asc ;; (db:get-tests-for-run dbstruct run-id testpatt '() '() #f #f #f 'testname 'asc ;; use qryvals if test-spec provided (if tests-spec (string-intersperse adj-tests-spec ",") ;; db:test-record-fields #f) #f 'normal) |
︙ | ︙ | |||
1503 1504 1505 1506 1507 1508 1509 | (conc "\n rundir: " (get-value-by-fieldname test test-field-index "rundir")) ;; (db:test-get-rundir test) "") ;; "\n rundir: " (get-value-by-fieldname test test-field-index "") ;; (sdb:qry 'getstr ;; (filedb:get-path *fdb* ;; (db:test-get-rundir test) ;; ) ) ;; Each test ;; DO NOT remote run | | | 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 | (conc "\n rundir: " (get-value-by-fieldname test test-field-index "rundir")) ;; (db:test-get-rundir test) "") ;; "\n rundir: " (get-value-by-fieldname test test-field-index "") ;; (sdb:qry 'getstr ;; (filedb:get-path *fdb* ;; (db:test-get-rundir test) ;; ) ) ;; Each test ;; DO NOT remote run (let ((steps (rmt:get-steps-for-test run-id (db:test-get-id test)))) ;; (db:get-steps-for-test dbstruct run-id (db:test-get-id test)))) (for-each (lambda (step) (format #t " Step: ~20a State: ~10a Status: ~10a Time ~22a\n" (tdb:step-get-stepname step) (tdb:step-get-state step) (tdb:step-get-status step) |
︙ | ︙ |
Modified server.scm from [8ce184eea5] to [98cdb6b75b].
︙ | ︙ | |||
498 499 500 501 502 503 504 505 506 | ;; (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180))) ;; (server:release-sync-lock) ;; (server:have-sync-lock?)) ;; (else #f)))) ;; moving this here as it needs access to db and cannot be in common. ;; (define (server:writable-watchdog dbstruct) (thread-sleep! 10) ;; delay for startup | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | > > > > > > > > > > > > > > > > > > > > > > > > | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | < < | < < < < < < < < < < < < < < < < < < < < < < | > | 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 | ;; (* 3 (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 180))) ;; (server:release-sync-lock) ;; (server:have-sync-lock?)) ;; (else #f)))) ;; moving this here as it needs access to db and cannot be in common. ;; (define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f)) (let* ((sqlite-exe (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh (sync-log (or (args:get-arg "-sync-log") (conc *toppath* "/logs/sync-" (current-process-id) "-" (get-host-name) ".log"))) (tmp-area (common:get-db-tmp-area)) (tmp-db (conc tmp-area "/megatest.db")) (staging-file (conc *toppath* "/.megatest.db")) (mtdbfile (conc *toppath* "/megatest.db")) (lockfile (conc tmp-db ".lock")) (sync-cmd-core (conc sqlite-exe" " tmp-db " .dump | "sqlite-exe" " staging-file "&>"sync-log)) (sync-cmd (if fork-to-background (conc "/usr/bin/env NBFAKE_LOG="*toppath*"/logs/last-server-sync-"(current-process-id)".log nbfake "sync-cmd-core) sync-cmd-core)) (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 30)) (default-duty-cycle 0.1) (duty-cycle (configf:lookup-number *configdat* "server" "sync-duty-cycle" default: default-duty-cycle)) (last-sync-seconds 10) ;; we will adjust this to a measurement and delay last-sync-seconds * (1 - duty-cycle) (calculate-off-time (lambda (work-duration duty-cycle) (* (/ (- 1 duty-cycle) duty-cycle) last-sync-seconds))) (off-time min-intersync-delay) (do-a-sync (lambda () (if (common:simple-file-lock lockfile) (begin (if (not fork-to-background) (thread-sleep! (max off-time min-intersync-delay))) (if (not (configf:lookup *configdat* "server" "disable-db-snapshot")) (common:snapshot-file mtdbfile subdir: ".db-snapshot")) (delete-file* staging-file) (let* ((start-time (current-milliseconds)) (res (system sync-cmd)) (res2 (cond ((eq? 0 res) (delete-file* (conc mtdbfile ".backup")) (if (eq? 0 (file-size sync-log)) (delete-file sync-log)) (system (conc "/bin/mv " staging-file " " mtdbfile)) (set! last-sync-seconds (/ (- (current-milliseconds) start-time) 1000)) (set! off-time (calculate-off-time last-sync-seconds (cond ((and (number? duty-cycle) (> duty-cycle 0) (< duty-cycle 1)) duty-cycle) (else (debug:print 0 *default-log-port* "WARNING: ["(common:human-time)"] server.sync-duty-cycle is invalid. Should be a number between 0 and 1, but "duty-cycle" was specified. Using default value: "default-duty-cycle) default-duty-cycle)))) (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec") 'sync-completed) (else (system (conc "/bin/cp "sync-log" "sync-log".fail")) (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail") (if (file-exists? (conc mtdbfile ".backup")) (system (conc "/bin/cp "mtdbfile ".backup " mtdbfile))) #f))) (common:simple-file-release-lock lockfile) res2))) ;; else (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; not syncing.") 'parallel-sync ) ;; end if got lockfile ) ;; end lambda )) do-a-sync)) (define (server:writable-watchdog dbstruct) (thread-sleep! 10) ;; delay for startup (let* ((do-a-sync (server:get-bruteforce-syncer dbstruct))) (if (and (not (args:get-arg "-sync-to-megatest.db")) ;; conditions under which we do not run the sync (args:get-arg "-server")) (let loop () (debug:print 0 *default-log-port* "INFO: syncer thread sleeping for server.minimum-intersync-delay seconds ["min-intersync-delay"]") (do-a-sync) ;; keep going unless time to exit ;; (if (not *time-to-exit*) (let delay-loop ((count 0)) ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*) (if (eq? count 0) ((server:get-bruteforce-syncer dbstruct fork-to-background: #t))) (if (and (not *time-to-exit*) (< count 6)) ;; was 11, changing to 4. (begin (thread-sleep! 1) (delay-loop (+ count 1)))) (if (not *time-to-exit*) (loop)))) ;; time to exit, close the no-sync db here ;; (db:no-sync-close-db no-sync-db) (if (common:low-noise-print 30) (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) )))))) (define (server:old-writable-watchdog dbstruct) (thread-sleep! 10) ;; delay for startup (let* ((sqlite-exe (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh (sync-log (or (args:get-arg "-sync-log") (conc *toppath* "/logs/sync-" (current-process-id) "-" (get-host-name) ".log"))) (tmp-area (common:get-db-tmp-area)) (tmp-db (conc tmp-area "/megatest.db")) (staging-file (conc *toppath* "/.megatest.db")) (mtdbfile (conc *toppath* "/megatest.db")) (lockfile (conc tmp-db ".lock")) (sync-cmd (conc sqlite-exe" " tmp-db " .dump | "sqlite-exe" " staging-file "&>"sync-log)) (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 30)) (default-duty-cycle 0.9) (duty-cycle (configf:lookup-number *configdat* "server" "sync-duty-cycle" default: default-duty-cycle)) (last-sync-seconds 10) ;; we will adjust this to a measurement and delay last-sync-seconds * (1 - duty-cycle) (calculate-off-time (lambda (work-duration duty-cycle) (* (/ (- 1 duty-cycle) duty-cycle) last-sync-seconds))) (off-time min-intersync-delay) (do-a-sync (lambda () (if (common:simple-file-lock lockfile) (begin (if (not (configf:lookup *configdat* "server" "disable-db-snapshot")) (common:snapshot-file mtdbfile subdir: ".db-snapshot")) (delete-file* staging-file) (let* ((start-time (current-milliseconds)) (res (system sync-cmd))) (cond ((eq? 0 res) (delete-file* (conc mtdbfile ".backup")) (if (eq? 0 (file-size sync-log)) (delete-file sync-log)) (system (conc "/bin/mv " staging-file " " mtdbfile)) (set! last-sync-seconds (/ (- (current-milliseconds) start-time) 1000)) (set! off-time (calculate-off-time last-sync-seconds (cond ((and (number? duty-cycle) (> duty-cycle 0) (< duty-cycle 1)) duty-cycle) (else (debug:print 0 *default-log-port* "WARNING: ["(common:human-time)"] server.sync-duty-cycle is invalid. Should be a number between 0 and 1, but "duty-cycle" was specified. Using default value: "default-duty-cycle) default-duty-cycle)))) (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" SYNC took "last-sync-seconds" sec") #t) (else (system (conc "/bin/cp "sync-log" "sync-log".fail")) (debug:print 0 *default-log-port* "ERROR: ["(common:human-time)"] Sync failed. See log at "sync-log".fail") (if (file-exists? (conc mtdbfile ".backup")) (system (conc "/bin/cp "mtdbfile ".backup " mtdbfile))))) (common:simple-file-release-lock lockfile))) ;; else (debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] pid="(current-process-id)" other SYNC in progress; not syncing.") ) ;; end if got lockfile ) ;; end lambda )) (if (and (not (args:get-arg "-sync-to-megatest.db")) ;; conditions under which we do not run the sync (args:get-arg "-server")) (let loop () (debug:print 0 *default-log-port* "INFO: syncer thread sleeping for server.minimum-intersync-delay seconds ["min-intersync-delay"]") (thread-sleep! (max off-time min-intersync-delay)) (do-a-sync) ;; keep going unless time to exit ;; (if (not *time-to-exit*) (let delay-loop ((count 0)) ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*) (if (eq? count 0) (do-a-sync)) (if (and (not *time-to-exit*) (< count 6)) ;; was 11, changing to 4. (begin (thread-sleep! 1) (delay-loop (+ count 1)))) (if (not *time-to-exit*) (loop)))) ;; time to exit, close the no-sync db here |
︙ | ︙ |