Changes In Branch v1.71_old Through [4245c94942] Excluding Merge-Ins
This is equivalent to a diff from 08b1dfe720 to 4245c94942
2020-10-12
| ||
10:07 | Added additional checks on vectors being slung around in communication. Leaf check-in: 73dd709e16 user: mrwellan tags: v1.71_old | |
2020-10-05
| ||
23:07 | Cross-pollinated v1.65 run-testdat work. This likely still does not pass obfusc-skel. check-in: 4245c94942 user: matt tags: v1.71_old | |
09:37 | Patched in fix for local ssh burden fix check-in: 1ad4b8ce96 user: mrwellan tags: v1.71_old | |
06:43 | Removed suspect change in server that seemed to be causing the obfusc-skel fails check-in: 134d87e018 user: matt tags: v1.71_old | |
2020-10-03
| ||
21:25 | Pull in all v1.65 before patching with prereq fix. check-in: b50ac24a01 user: matt tags: v1.70-defunct | |
20:20 | Patched in fix for prereqs-not-met issue. Closed-Leaf check-in: 7ee0b77302 user: matt tags: v1.65-refactor-fix-2, v1.65-failed-testdat (unpublished) | |
2020-09-24
| ||
23:28 | Try locking transactions in no-sync db check-in: 44919a2a7f user: mrwellan tags: v1.65-transaction-locking | |
15:50 | Missed case of relying on run_duration field - fixed. ==/FAIL/orion,mars/== Closed-Leaf check-in: 08b1dfe720 user: mrwellan tags: v1.65-failed-testdat | |
2020-09-23
| ||
15:16 | Do not exit on failure to create directory - race conditons on NFS cause false fail scenarios - just keep going and cross your fingers... ==/FAIL/orion,mars/== check-in: c5b1d00b22 user: mrwellan tags: v1.6572, v1.65-failed-testdat | |
Modified TODO from [0885dee1e5] to [dcd0f52bc7].
︙ | ︙ | |||
40 41 42 43 44 45 46 47 48 49 50 51 52 53 | WW19 . break command line into sections; all, run control, queries, utilities etc. . pull in ftfplan (not integrated, just code pulled in) WW20 . ./configure => ubuntu, sles11, sles12, rh7 . Add output flushing in teamcity support . Switch to using simple runs query everywhere . Add end_time to runs and add a rollup call that sets state, status and end_time Future . Switch to scsh-process pipeline management for job execution/control . Use call-with-environment-variables more. | > | 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 | WW19 . break command line into sections; all, run control, queries, utilities etc. . pull in ftfplan (not integrated, just code pulled in) WW20 . ./configure => ubuntu, sles11, sles12, rh7 . Jenkins junit XML support . Add output flushing in teamcity support . Switch to using simple runs query everywhere . Add end_time to runs and add a rollup call that sets state, status and end_time Future . Switch to scsh-process pipeline management for job execution/control . Use call-with-environment-variables more. |
︙ | ︙ |
Modified common.scm from [33c7316880] to [a37bf38bb2].
︙ | ︙ | |||
499 500 501 502 503 504 505 | ;; logs directory you wish to log-rotate. ;; (define (common:rotate-logs) (let* ((all-files (make-hash-table)) (stats (make-hash-table)) (inc-stat (lambda (key) (hash-table-set! stats key (+ (hash-table-ref/default stats key 0) 1)))) | | | 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 | ;; logs directory you wish to log-rotate. ;; (define (common:rotate-logs) (let* ((all-files (make-hash-table)) (stats (make-hash-table)) (inc-stat (lambda (key) (hash-table-set! stats key (+ (hash-table-ref/default stats key 0) 1)))) (max-allowed (string->number (or (configf:lookup *configdat* "setup" "max-logfiles") "600")))) ;; name -> age (if (not (directory-exists? "logs"))(create-directory "logs")) (directory-fold (lambda (file rem) (handle-exceptions exn (begin (debug:print-info 2 *default-log-port* "unable to rotate log " file ", probably handled by another process, this is safe to ignore. exn=" exn) |
︙ | ︙ |
Modified dashboard.scm from [59b903a7ca] to [d4733659e1].
︙ | ︙ | |||
718 719 720 721 722 723 724 | ;; ;; (debug:print 0 *default-log-port* "got-all: " got-all " multi-get: " multi-get " num-to-get: " num-to-get " (length tmptests): " (length tmptests) " db-modified: " db-modified " db-mod-time: " db-mod-time " db-path: " db-path) (if got-all (begin (dboard:rundat-last-update-set! run-dat (- start-time 2)) (dboard:rundat-run-data-offset-set! run-dat 0)) (begin | < | 718 719 720 721 722 723 724 725 726 727 728 729 730 731 | ;; ;; (debug:print 0 *default-log-port* "got-all: " got-all " multi-get: " multi-get " num-to-get: " num-to-get " (length tmptests): " (length tmptests) " db-modified: " db-modified " db-mod-time: " db-mod-time " db-path: " db-path) (if got-all (begin (dboard:rundat-last-update-set! run-dat (- start-time 2)) (dboard:rundat-run-data-offset-set! run-dat 0)) (begin (dboard:rundat-run-data-offset-set! run-dat (+ num-to-get (dboard:rundat-run-data-offset run-dat))))) (for-each (lambda (tdat) (let ((test-id (db:test-get-id tdat)) (state (db:test-get-state tdat))) |
︙ | ︙ | |||
836 837 838 839 840 841 842 | ;; this calls dboard:get-tests-for-run-duplicate for each run ;; ;; create a virtual table of all the tests ;; keypatts: ( (KEY1 "abc%def")(KEY2 "%") ) ;; (define (dboard:update-rundat tabdat runnamepatt numruns testnamepatt keypatts) | < < | | 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 | ;; this calls dboard:get-tests-for-run-duplicate for each run ;; ;; create a virtual table of all the tests ;; keypatts: ( (KEY1 "abc%def")(KEY2 "%") ) ;; (define (dboard:update-rundat tabdat runnamepatt numruns testnamepatt keypatts) (let* ((access-mode (dboard:tabdat-access-mode tabdat)) (keys (dboard:tabdat-keys tabdat)) ;; (db:dispatch-query access-mode rmt:get-keys db:get-keys))) (last-runs-update (- (dboard:tabdat-last-runs-update tabdat) 2)) (allruns (rmt:get-runs runnamepatt numruns (dboard:tabdat-start-run-offset tabdat) keypatts)) ;;(allruns-tree (rmt:get-runs-by-patt (dboard:tabdat-keys tabdat) "%" #f #f #f #f)) (allruns-tree (rmt:get-runs-by-patt keys "%" #f #f #f #f 0)) ;; last-runs-update));;'("id" "runname") (header (db:get-header allruns)) (runs (db:get-rows allruns)) ;; RA => Filtered as per runpatt selected |
︙ | ︙ | |||
3725 3726 3727 3728 3729 3730 3731 | ;; removing the tabdat-values proc ;; ;; (define (tabdat-values tabdat) ;; runs update-rundat using the various filters from the gui ;; (define (dashboard:do-update-rundat tabdat) | < < < < < < < | 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 | ;; removing the tabdat-values proc ;; ;; (define (tabdat-values tabdat) ;; runs update-rundat using the various filters from the gui ;; (define (dashboard:do-update-rundat tabdat) (dboard:update-rundat tabdat (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) "runname" "%") (dboard:tabdat-numruns tabdat) (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) "test-name" "%/%") ;; generate key patterns from the target stored in tabdat (let* ((dbkeys (dboard:tabdat-dbkeys tabdat))) |
︙ | ︙ |
Modified launch.scm from [fb7acc7e32] to [3e2d417312].
︙ | ︙ | |||
460 461 462 463 464 465 466 | (db:test-get-host test-info) (begin (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.") (exit)))) (test-pid (db:test-get-process_id test-info))) (cond ;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag. | | | | > | 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 | (db:test-get-host test-info) (begin (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.") (exit)))) (test-pid (db:test-get-process_id test-info))) (cond ;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag. ;; ((or (member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun ;; (and (equal? (db:test-get-state test-info) "COMPLETED") ;; completed/abort => rerun if asked ;; (member (db:test-get-status test-info) '("ABORT")))) ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request") ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a") (rmt:general-call 'set-test-start-time #f test-id) (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f) ) ;; prime it for running ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART")) |
︙ | ︙ | |||
768 769 770 771 772 773 774 | (item-path (vector-ref running-test 11))) (debug:print 0 *default-log-port* "test " test-name "/" item-path " not completed") (if (not (null? tal)) (loop (car tal) (cdr tal))))))))))) (define (launch:is-test-alive host pid) (if (and host pid (not (equal? host "n/a"))) | > > | | 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 | (item-path (vector-ref running-test 11))) (debug:print 0 *default-log-port* "test " test-name "/" item-path " not completed") (if (not (null? tal)) (loop (car tal) (cdr tal))))))))))) (define (launch:is-test-alive host pid) (if (and host pid (not (equal? host "n/a"))) (let* ((is-local (equal? host (get-host-name))) (ssh-cmd (if is-local " " (conc "ssh " host " "))) (cmd (conc ssh-cmd "pstree -A " pid)) (output (with-input-from-pipe cmd read-lines))) (debug:print 2 *default-log-port* "Running " cmd " received " output) (if (eq? (length output) 0) #f #t)) #t)) |
︙ | ︙ |
Modified megatest-version.scm from [291b748ecb] to [4f15dbe2cd].
︙ | ︙ | |||
16 17 18 19 20 21 22 | ;; along with Megatest. If not, see <http://www.gnu.org/licenses/>. ;; Always use two or four digit decimal ;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00.. ;; (declare (unit megatest-version)) | | | 16 17 18 19 20 21 22 23 | ;; along with Megatest. If not, see <http://www.gnu.org/licenses/>. ;; Always use two or four digit decimal ;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00.. ;; (declare (unit megatest-version)) (define megatest-version 1.7101) |
Modified runs.scm from [e5ce9acbf0] to [497a5137ad].
︙ | ︙ | |||
1781 1782 1783 1784 1785 1786 1787 | (loop (car tal)(cdr tal) reg reruns))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ;; EXPAND ITEMS ((or (procedure? items)(eq? items 'have-procedure)) (debug:print-info 4 *default-log-port* "cond branch - " "rtq-4") | | | | | < < < | 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 | (loop (car tal)(cdr tal) reg reruns))) ;; if items is a proc then need to run items:get-items-from-config, get the list and loop ;; - but only do that if resources exist to kick off the job ;; EXPAND ITEMS ((or (procedure? items)(eq? items 'have-procedure)) (debug:print-info 4 *default-log-port* "cond branch - " "rtq-4") (let ((can-run-more #f)) ;; (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs))) (if (not can-run-more) #;(and (list? can-run-more) (car can-run-more)) (let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps))) ;; itemized test expanded here (if loop-list (apply loop loop-list) (debug:print-info 4 *default-log-port* " -- Can't expand hed="hed))) ;; if can't run more just loop with next possible test (loop (car newtal)(cdr newtal) reg reruns)))) ;; this case should not happen, added to help catch any bugs |
︙ | ︙ |
Modified server.scm from [7b2af2dc7e] to [e17aad21de].
︙ | ︙ | |||
344 345 346 347 348 349 350 351 352 353 354 355 356 357 | (equal? server-key res)))) #t ;; (system (conc "touch " start-flag)) ;; lazy but safe (begin (debug:print-info 0 *default-log-port* "Gating server start, last start: " fmodtime ", delta: " delta ", reftime: " reftime ", all-go=" all-go) (thread-sleep! reftime) (server:wait-for-server-start-last-flag areapath))))))) ;; kind start up of servers, wait 40 seconds before allowing another server for a given ;; run-id to be launched ;; (define (server:kind-run areapath) ;; look for $MT_RUN_AREA_HOME/logs/server-start-last ;; and wait for it to be at least 3 seconds old | > > > > > > > > > > > > > > > > > > | 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 | (equal? server-key res)))) #t ;; (system (conc "touch " start-flag)) ;; lazy but safe (begin (debug:print-info 0 *default-log-port* "Gating server start, last start: " fmodtime ", delta: " delta ", reftime: " reftime ", all-go=" all-go) (thread-sleep! reftime) (server:wait-for-server-start-last-flag areapath))))))) ;; wait for server=start-last to be three seconds old - this might be what was causing the obfusc-skel fails ;; #;(define (server:wait-for-server-start-last-flag areapath) (let* ((start-flag (conc areapath "/logs/server-start-last"))) (if (file-exists? start-flag) (let* ((fmodtime (file-modification-time start-flag)) (reftime (+ 2 (random 3))) (delta (- (current-seconds) fmodtime)) (all-go (> delta reftime))) (if all-go #t ;; (system (conc "touch " start-flag)) ;; lazy but safe (begin (debug:print-info 0 *default-log-port* "Gating server start, last start: " fmodtime ", delta: " delta ", reftime: " reftime ", all-go=" all-go) (thread-sleep! 5) (server:wait-for-server-start-last-flag areapath)))) #;(system (conc "touch " start-flag))))) ;; kind start up of servers, wait 40 seconds before allowing another server for a given ;; run-id to be launched ;; (define (server:kind-run areapath) ;; look for $MT_RUN_AREA_HOME/logs/server-start-last ;; and wait for it to be at least 3 seconds old |
︙ | ︙ |
Modified tests.scm from [af455125f4] to [9198f1fbaa].
︙ | ︙ | |||
1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 | (let* ((cpuload (get-cpu-load)) (diskfree (get-df (current-directory))) (tmpfree (get-df "/tmp")) (uname (get-uname "-srvpio")) (hostname (get-host-name))) (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname update-db: update-db tmpfree: tmpfree))) ;;====================================================================== ;; A R C H I V I N G ;;====================================================================== (define (test:archive db test-id) #f) | > > > > > > > > > > > > > > > > > > > > > > | 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 | (let* ((cpuload (get-cpu-load)) (diskfree (get-df (current-directory))) (tmpfree (get-df "/tmp")) (uname (get-uname "-srvpio")) (hostname (get-host-name))) (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname update-db: update-db tmpfree: tmpfree))) ;; (define (tests:set-partial-meta-info test-id run-id minutes work-area) #;(define (tests:set-partial-meta-info test-id run-id minutes work-area remtries) (let* ((cpuload (get-cpu-load)) (diskfree (get-df (current-directory))) (remtries 10)) (handle-exceptions exn (if (> remtries 0) (begin (print-call-chain (current-error-port)) (debug:print-info 0 *default-log-port* "WARNING: failed to set meta info. Will try " remtries " more times") (set! remtries (- remtries 1)) (thread-sleep! 10) (tests:set-full-meta-info db test-id run-id minutes work-area (- remtries 1))) (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn))) (debug:print-error 0 *default-log-port* "tried for over a minute to update meta info and failed. Giving up") (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable.") (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) (debug:print 5 *default-log-port* "exn=" (condition->list exn)) (debug:print 0 *default-log-port* " status: " ((condition-property-accessor 'sqlite3 'status) exn)) (print-call-chain (current-error-port)))) (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes)))) ;;====================================================================== ;; A R C H I V I N G ;;====================================================================== (define (test:archive db test-id) #f) |
︙ | ︙ |