Megatest

Check-in [4245c94942]
Login
Overview
Comment:Cross-pollinated v1.65 run-testdat work. This likely still does not pass obfusc-skel.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.71_old
Files: files | file ages | folders
SHA1: 4245c94942b081e77ddf5b1ab0563a5a10b9e3b9
User & Date: matt on 2020-10-05 23:07:17
Other Links: branch diff | manifest | tags
Context
2020-10-12
10:07
Added additional checks on vectors being slung around in communication. Leaf check-in: 73dd709e16 user: mrwellan tags: v1.71_old
2020-10-05
23:07
Cross-pollinated v1.65 run-testdat work. This likely still does not pass obfusc-skel. check-in: 4245c94942 user: matt tags: v1.71_old
09:37
Patched in fix for local ssh burden fix check-in: 1ad4b8ce96 user: mrwellan tags: v1.71_old
Changes

Modified TODO from [0885dee1e5] to [dcd0f52bc7].

40
41
42
43
44
45
46

47
48
49
50
51
52
53

WW19
. break command line into sections; all, run control, queries, utilities etc.
. pull in ftfplan (not integrated, just code pulled in)

WW20
. ./configure => ubuntu, sles11, sles12, rh7

. Add output flushing in teamcity support
. Switch to using simple runs query everywhere
. Add end_time to runs and add a rollup call that sets state, status and end_time

Future
. Switch to scsh-process pipeline management for job execution/control
. Use call-with-environment-variables more.







>







40
41
42
43
44
45
46
47
48
49
50
51
52
53
54

WW19
. break command line into sections; all, run control, queries, utilities etc.
. pull in ftfplan (not integrated, just code pulled in)

WW20
. ./configure => ubuntu, sles11, sles12, rh7
. Jenkins junit XML support
. Add output flushing in teamcity support
. Switch to using simple runs query everywhere
. Add end_time to runs and add a rollup call that sets state, status and end_time

Future
. Switch to scsh-process pipeline management for job execution/control
. Use call-with-environment-variables more.

Modified dashboard.scm from [59b903a7ca] to [d4733659e1].

718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
    ;;
    ;; (debug:print 0 *default-log-port* "got-all: " got-all " multi-get: " multi-get " num-to-get: " num-to-get " (length tmptests): " (length tmptests) " db-modified: " db-modified " db-mod-time: " db-mod-time " db-path: " db-path)
    (if got-all
	(begin
	  (dboard:rundat-last-update-set!     run-dat (- start-time 2))
	  (dboard:rundat-run-data-offset-set! run-dat 0))
	(begin
	  ;;; (thread-sleep! 0.25) ;; give the rest of the gui some time to update. <-- this did NOT help
	  (dboard:rundat-run-data-offset-set! run-dat
					      (+ num-to-get (dboard:rundat-run-data-offset run-dat)))))

    (for-each 
     (lambda (tdat)
       (let ((test-id (db:test-get-id tdat))
	     (state   (db:test-get-state tdat)))







<







718
719
720
721
722
723
724

725
726
727
728
729
730
731
    ;;
    ;; (debug:print 0 *default-log-port* "got-all: " got-all " multi-get: " multi-get " num-to-get: " num-to-get " (length tmptests): " (length tmptests) " db-modified: " db-modified " db-mod-time: " db-mod-time " db-path: " db-path)
    (if got-all
	(begin
	  (dboard:rundat-last-update-set!     run-dat (- start-time 2))
	  (dboard:rundat-run-data-offset-set! run-dat 0))
	(begin

	  (dboard:rundat-run-data-offset-set! run-dat
					      (+ num-to-get (dboard:rundat-run-data-offset run-dat)))))

    (for-each 
     (lambda (tdat)
       (let ((test-id (db:test-get-id tdat))
	     (state   (db:test-get-state tdat)))
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852

;; this calls dboard:get-tests-for-run-duplicate for each run
;;
;; create a virtual table of all the tests
;; keypatts: ( (KEY1 "abc%def")(KEY2 "%") )
;;
(define (dboard:update-rundat tabdat runnamepatt numruns testnamepatt keypatts)
  (dboard:tabdat-already-running-set! tabdat #t)
  (let* (;; (already-running  (dboard:tabdat-already-running tabdat))
	 (access-mode      (dboard:tabdat-access-mode tabdat))
         (keys             (dboard:tabdat-keys tabdat)) ;; (db:dispatch-query access-mode rmt:get-keys db:get-keys)))
	 (last-runs-update (- (dboard:tabdat-last-runs-update tabdat) 2))
         (allruns          (rmt:get-runs runnamepatt numruns (dboard:tabdat-start-run-offset tabdat) keypatts))
         ;;(allruns-tree (rmt:get-runs-by-patt (dboard:tabdat-keys tabdat) "%" #f #f #f #f))
         (allruns-tree    (rmt:get-runs-by-patt keys "%" #f #f #f #f 0)) ;; last-runs-update));;'("id" "runname")
	 (header      (db:get-header allruns))
	 (runs        (db:get-rows   allruns)) ;; RA => Filtered as per runpatt selected







<
<
|







835
836
837
838
839
840
841


842
843
844
845
846
847
848
849

;; this calls dboard:get-tests-for-run-duplicate for each run
;;
;; create a virtual table of all the tests
;; keypatts: ( (KEY1 "abc%def")(KEY2 "%") )
;;
(define (dboard:update-rundat tabdat runnamepatt numruns testnamepatt keypatts)


  (let* ((access-mode      (dboard:tabdat-access-mode tabdat))
         (keys             (dboard:tabdat-keys tabdat)) ;; (db:dispatch-query access-mode rmt:get-keys db:get-keys)))
	 (last-runs-update (- (dboard:tabdat-last-runs-update tabdat) 2))
         (allruns          (rmt:get-runs runnamepatt numruns (dboard:tabdat-start-run-offset tabdat) keypatts))
         ;;(allruns-tree (rmt:get-runs-by-patt (dboard:tabdat-keys tabdat) "%" #f #f #f #f))
         (allruns-tree    (rmt:get-runs-by-patt keys "%" #f #f #f #f 0)) ;; last-runs-update));;'("id" "runname")
	 (header      (db:get-header allruns))
	 (runs        (db:get-rows   allruns)) ;; RA => Filtered as per runpatt selected
3725
3726
3727
3728
3729
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744
3745
;;  removing the tabdat-values proc 
;;
;; (define (tabdat-values tabdat)

;; runs update-rundat using the various filters from the gui
;;
(define (dashboard:do-update-rundat tabdat)
  ;; this seems like a good place to check for already running and skip if so
  ;;
  ;; (set! *updater-running* #t)
;;(if (dboard:tabdat-already-running tabdat)
;;    (begin
;;	(debug:print-info 0 *default-log-port* "Dashboard overloaded - updates will be slow, skipping update.")
;;	(dboard:tabdat-target tabdat))
  (dboard:update-rundat
   tabdat
   (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) "runname" "%")
   (dboard:tabdat-numruns tabdat)
   (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) "test-name" "%/%")
   ;; generate key patterns from the target stored in tabdat
   (let* ((dbkeys (dboard:tabdat-dbkeys tabdat)))







<
<
<
<
<
<
<







3722
3723
3724
3725
3726
3727
3728







3729
3730
3731
3732
3733
3734
3735
;;  removing the tabdat-values proc 
;;
;; (define (tabdat-values tabdat)

;; runs update-rundat using the various filters from the gui
;;
(define (dashboard:do-update-rundat tabdat)







  (dboard:update-rundat
   tabdat
   (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) "runname" "%")
   (dboard:tabdat-numruns tabdat)
   (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) "test-name" "%/%")
   ;; generate key patterns from the target stored in tabdat
   (let* ((dbkeys (dboard:tabdat-dbkeys tabdat)))

Modified launch.scm from [9905b8fdbe] to [3e2d417312].

460
461
462
463
464
465
466
467
468
469

470
471
472
473
474
475
476
				(db:test-get-host        test-info)
				(begin
				  (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.")
				  (exit))))
		 (test-pid  (db:test-get-process_id  test-info)))
	    (cond
             ;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag.
	     ((or (member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
		  (and (equal? (db:test-get-state test-info) "COMPLETED")                           ;; completed/abort => rerun if asked
		       (member (db:test-get-status test-info) '("ABORT"))))

	      (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")

              (rmt:general-call 'set-test-start-time #f test-id)
              (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
	      ) ;; prime it for running
	     ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART"))







|
|
|
>







460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
				(db:test-get-host        test-info)
				(begin
				  (debug:print 0 *default-log-port* "ERROR: failed to find a record for test-id " test-id ", exiting.")
				  (exit))))
		 (test-pid  (db:test-get-process_id  test-info)))
	    (cond
             ;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag.
	     ;; ((or (member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
	     ;;  (and (equal? (db:test-get-state test-info) "COMPLETED")                           ;; completed/abort => rerun if asked
	     ;;	       (member (db:test-get-status test-info) '("ABORT"))))
	     ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
	      (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")

              (rmt:general-call 'set-test-start-time #f test-id)
              (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
	      ) ;; prime it for running
	     ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART"))

Modified runs.scm from [e5ce9acbf0] to [497a5137ad].

1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
	      (loop (car tal)(cdr tal) reg reruns)))
         
	 ;; if items is a proc then need to run items:get-items-from-config, get the list and loop 
	 ;;    - but only do that if resources exist to kick off the job
	 ;; EXPAND ITEMS
	 ((or (procedure? items)(eq? items 'have-procedure))
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-4")
	  (let ((can-run-more (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)))
	    (if (and (list? can-run-more)
		     (car can-run-more)) ;; itemized test expanded here
		(let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup
						    max-concurrent-jobs run-id waitons item-path
						    testmode test-record can-run-more items runname
						    tconfig reglen test-registry test-records itemmaps)))
		  (if loop-list
		      (apply loop loop-list)
                      (debug:print-info 4 *default-log-port* " -- Can't expand hed="hed)))
		;; if can't run more just loop with next possible test
		(loop (car newtal)(cdr newtal) reg reruns))))
         
	 ;; this case should not happen, added to help catch any bugs







|
|
|
|
<
<
<







1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791



1792
1793
1794
1795
1796
1797
1798
	      (loop (car tal)(cdr tal) reg reruns)))
         
	 ;; if items is a proc then need to run items:get-items-from-config, get the list and loop 
	 ;;    - but only do that if resources exist to kick off the job
	 ;; EXPAND ITEMS
	 ((or (procedure? items)(eq? items 'have-procedure))
          (debug:print-info 4 *default-log-port* "cond branch - "  "rtq-4")
	  (let ((can-run-more    #f)) ;; (runs:can-run-more-tests runsdat run-id jobgroup max-concurrent-jobs)))
	    (if (not can-run-more) #;(and (list? can-run-more)
		(car can-run-more))
		(let ((loop-list (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record can-run-more items runname tconfig reglen test-registry test-records itemmaps))) ;; itemized test expanded here



		  (if loop-list
		      (apply loop loop-list)
                      (debug:print-info 4 *default-log-port* " -- Can't expand hed="hed)))
		;; if can't run more just loop with next possible test
		(loop (car newtal)(cdr newtal) reg reruns))))
         
	 ;; this case should not happen, added to help catch any bugs

Modified server.scm from [fcc299d5de] to [e17aad21de].

317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333

(define (server:get-client-signature) ;; BB> why is this proc named "get-"?  it returns nothing -- set! has not return value.
  (if *my-client-signature* *my-client-signature*
      (let ((sig (server:mk-signature)))
        (set! *my-client-signature* sig)
        *my-client-signature*)))

;; wait for server=start-last to be three seconds old - this might be what was causing the obfusc-skel fails
;;
#;(define (server:wait-for-server-start-last-flag areapath)
  (let* ((start-flag (conc areapath "/logs/server-start-last"))
	 ;;; THIS INTERACTS WITH [server] timeout. Suggest using 0.1 or above for timeout (6 seconds)
	 (reftime    (configf:lookup-number *configdat* "server" "idletime" default: 4))
	 (server-key (conc (get-host-name) "-" (current-process-id))))
    (if (file-exists? start-flag)
	(let* ((fmodtime (file-modification-time start-flag))
	       (delta    (- (current-seconds) fmodtime))







|

|







317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333

(define (server:get-client-signature) ;; BB> why is this proc named "get-"?  it returns nothing -- set! has not return value.
  (if *my-client-signature* *my-client-signature*
      (let ((sig (server:mk-signature)))
        (set! *my-client-signature* sig)
        *my-client-signature*)))

;; wait for server=start-last to be three seconds old
;;
(define (server:wait-for-server-start-last-flag areapath)
  (let* ((start-flag (conc areapath "/logs/server-start-last"))
	 ;;; THIS INTERACTS WITH [server] timeout. Suggest using 0.1 or above for timeout (6 seconds)
	 (reftime    (configf:lookup-number *configdat* "server" "idletime" default: 4))
	 (server-key (conc (get-host-name) "-" (current-process-id))))
    (if (file-exists? start-flag)
	(let* ((fmodtime (file-modification-time start-flag))
	       (delta    (- (current-seconds) fmodtime))
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
	      #t ;; (system (conc "touch " start-flag)) ;; lazy but safe
	      (begin
		(debug:print-info 0 *default-log-port* "Gating server start, last start: "
				  fmodtime ", delta: " delta ", reftime: " reftime ", all-go=" all-go)
		(thread-sleep! reftime)
		(server:wait-for-server-start-last-flag areapath)))))))

;; wait for server=start-last to be three seconds old
;;
(define (server:wait-for-server-start-last-flag areapath)
  (let* ((start-flag (conc areapath "/logs/server-start-last")))
    (if (file-exists? start-flag)
	(let* ((fmodtime (file-modification-time start-flag))
	       (reftime  (+ 2 (random 3)))
	       (delta    (- (current-seconds) fmodtime))
	       (all-go   (> delta reftime)))
	  (if all-go







|

|







345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
	      #t ;; (system (conc "touch " start-flag)) ;; lazy but safe
	      (begin
		(debug:print-info 0 *default-log-port* "Gating server start, last start: "
				  fmodtime ", delta: " delta ", reftime: " reftime ", all-go=" all-go)
		(thread-sleep! reftime)
		(server:wait-for-server-start-last-flag areapath)))))))

;; wait for server=start-last to be three seconds old - this might be what was causing the obfusc-skel fails
;;
#;(define (server:wait-for-server-start-last-flag areapath)
  (let* ((start-flag (conc areapath "/logs/server-start-last")))
    (if (file-exists? start-flag)
	(let* ((fmodtime (file-modification-time start-flag))
	       (reftime  (+ 2 (random 3)))
	       (delta    (- (current-seconds) fmodtime))
	       (all-go   (> delta reftime)))
	  (if all-go

Modified tests.scm from [af455125f4] to [9198f1fbaa].

1985
1986
1987
1988
1989
1990
1991






















1992
1993
1994
1995
1996
1997
1998
  (let* ((cpuload  (get-cpu-load))
	 (diskfree (get-df (current-directory)))
	 (tmpfree  (get-df "/tmp"))
	 (uname    (get-uname "-srvpio"))
	 (hostname (get-host-name)))
    (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname update-db: update-db tmpfree: tmpfree)))
    






















	 
;;======================================================================
;; A R C H I V I N G
;;======================================================================

(define (test:archive db test-id)
  #f)







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
  (let* ((cpuload  (get-cpu-load))
	 (diskfree (get-df (current-directory)))
	 (tmpfree  (get-df "/tmp"))
	 (uname    (get-uname "-srvpio"))
	 (hostname (get-host-name)))
    (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname update-db: update-db tmpfree: tmpfree)))
    
;; (define (tests:set-partial-meta-info test-id run-id minutes work-area)
#;(define (tests:set-partial-meta-info test-id run-id minutes work-area remtries)
  (let* ((cpuload  (get-cpu-load))
	 (diskfree (get-df (current-directory)))
	 (remtries 10))
    (handle-exceptions
     exn
     (if (> remtries 0)
	 (begin
	   (print-call-chain (current-error-port))
	   (debug:print-info 0 *default-log-port* "WARNING: failed to set meta info. Will try " remtries " more times")
	   (set! remtries (- remtries 1))
	   (thread-sleep! 10)
	   (tests:set-full-meta-info db test-id run-id minutes work-area (- remtries 1)))
	 (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
	   (debug:print-error 0 *default-log-port* "tried for over a minute to update meta info and failed. Giving up")
	   (debug:print 0 *default-log-port* "EXCEPTION: database probably overloaded or unreadable.")
	   (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	   (debug:print 5 *default-log-port* "exn=" (condition->list exn))
	   (debug:print 0 *default-log-port* " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
	   (print-call-chain (current-error-port))))
     (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes))))
	 
;;======================================================================
;; A R C H I V I N G
;;======================================================================

(define (test:archive db test-id)
  #f)