Megatest

Changes On Branch dbdd2c33cce3a3ef
Login

Changes In Branch v2.0001-matt-test-edits Through [dbdd2c33cc] Excluding Merge-Ins

This is equivalent to a diff from 3d2d201a06 to dbdd2c33cc

2022-02-10
12:19
changed the config hash key for toppath from empty string to toppath check-in: 366b1b75fd user: mmgraham tags: v2.0001
2022-02-09
09:56
Added back use of mutex for transactions check-in: 297a374249 user: mrwellan tags: v2.0001-matt-test-edits
08:02
Initialize placeholder record to correct length in db:get-run-info check-in: dbdd2c33cc user: mrwellan tags: v2.0001-matt-test-edits
2022-02-06
19:53
Reduced server expiration to 5sec. Fixed typo (extra paren) check-in: b612b353ea user: matt tags: v2.0001-matt-test-edits
2022-02-03
18:05
tweak waits in runconfigs check-in: 6c303b59b4 user: mrwellan tags: v2.0001-matt-test-edits
2022-02-02
18:07
corrected *configdat* to *runconfigdat* check-in: 3d2d201a06 user: mmgraham tags: v2.0001
16:08
changed to send unquoted cmd to runconfigs-get. When quoted, configf:lookup could not find the entry check-in: 2896749a24 user: mmgraham tags: v2.0001

Modified commonmod.scm from [875119b082] to [6aaa47a003].

4436
4437
4438
4439
4440
4441
4442
4443


4444
4445
4446
4447
4448
4449
4450
;; timeout is hms string: 1h 5m 3s, default is 10 minutes
;;
(define (server:expiration-timeout)
  (let ((tmo (configf:lookup *configdat* "server" "timeout")))
    (if (and (string? tmo)
	     (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below
        (* 3600 (string->number tmo))
	60))) ;; default is one minute



(define (runs:get-mt-env-alist run-id runname target testname itempath)
  ;;(bb-check-path msg: "runs:set-megatest-env-vars entry")
  `(("MT_TEST_NAME"     . ,testname)
    
    ("MT_ITEMPATH"      . ,itempath)








|
>
>







4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
;; timeout is hms string: 1h 5m 3s, default is 10 minutes
;;
(define (server:expiration-timeout)
  (let ((tmo (configf:lookup *configdat* "server" "timeout")))
    (if (and (string? tmo)
	     (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below
        (* 3600 (string->number tmo))
	;; 60 ;; default is one minute
	5
	)))

(define (runs:get-mt-env-alist run-id runname target testname itempath)
  ;;(bb-check-path msg: "runs:set-megatest-env-vars entry")
  `(("MT_TEST_NAME"     . ,testname)
    
    ("MT_ITEMPATH"      . ,itempath)

Modified dashboard.scm from [d302c30c66] to [5c46200846].

875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
	      (if (null? all-test-ids)
		  (hash-table-delete! (dboard:tabdat-allruns-by-id tabdat) run-id)
		  (hash-table-set!    (dboard:tabdat-allruns-by-id tabdat) run-id run-struct))
	      (if (or (null? tal)
		      (> elapsed-time 2)) ;; stop loading data after 5 seconds, on the next call more data *should* be loaded since get-tests-for-run uses last update
		  (begin
		    (when (> elapsed-time 2)   
                      (debug:print 0 *default-log-port* "NOTE: updates are taking a long time, " elapsed-time "s elapsed.")
                      (let* ((old-val (iup:attribute *tim* "TIME"))
                             (new-val (number->string (inexact->exact (floor (* 2  (string->number old-val)))))))
                        (if (< (string->number new-val) 5000)
                            ((debug:print 0 *default-log-port* "NOTE: increasing poll interval from "old-val" to "new-val)
                            (iup:attribute-set! *tim* "TIME" new-val))))


                      )
		    (dboard:tabdat-allruns-set! tabdat new-res)
		    maxtests)
		  (if (> (dboard:rundat-run-data-offset run-struct) 0)
		      (loop run tal new-res newmaxtests) ;; not done getting data for this run
		      (loop (car tal)(cdr tal) new-res newmaxtests)))))))
    (dboard:tabdat-filters-changed-set! tabdat #f)
    (dboard:update-tree tabdat runs-hash header tb)))







|
|
|
|
|
|
<
<
<







875
876
877
878
879
880
881
882
883
884
885
886
887



888
889
890
891
892
893
894
	      (if (null? all-test-ids)
		  (hash-table-delete! (dboard:tabdat-allruns-by-id tabdat) run-id)
		  (hash-table-set!    (dboard:tabdat-allruns-by-id tabdat) run-id run-struct))
	      (if (or (null? tal)
		      (> elapsed-time 2)) ;; stop loading data after 5 seconds, on the next call more data *should* be loaded since get-tests-for-run uses last update
		  (begin
		    (when (> elapsed-time 2)   
		      (debug:print 0 *default-log-port* "NOTE: updates are taking a long time, " elapsed-time "s elapsed.")
		      (let* ((old-val (iup:attribute *tim* "TIME"))
			     (new-val (number->string (inexact->exact (floor (* 2  (string->number old-val)))))))
			(if (< (string->number new-val) 5000)
			    (debug:print 0 *default-log-port* "NOTE: increasing poll interval from "old-val" to "new-val)
			    (iup:attribute-set! *tim* "TIME" new-val))))



		    (dboard:tabdat-allruns-set! tabdat new-res)
		    maxtests)
		  (if (> (dboard:rundat-run-data-offset run-struct) 0)
		      (loop run tal new-res newmaxtests) ;; not done getting data for this run
		      (loop (car tal)(cdr tal) new-res newmaxtests)))))))
    (dboard:tabdat-filters-changed-set! tabdat #f)
    (dboard:update-tree tabdat runs-hash header tb)))

Modified dbmod.scm from [8c09a0af38] to [0e76e5785b].

2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
;; use (get-value-by-header (db:get-header runinfo)(db:get-rows runinfo))
;; NOTE: Does NOT return a list of rows (or one row) for the first slot of the vector
;;       this is inconsistent with get-runs but it makes some sense.
;;
(define (db:get-run-info dbstruct run-id)
  ;;(if (hash-table-ref/default *run-info-cache* run-id #f)
  ;;    (hash-table-ref *run-info-cache* run-id)
  (let* ((res       (vector #f #f #f #f))
	 (keys      (db:get-keys dbstruct))
	 (remfields (list "id" "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count" "contour" "last_update")) ;;  "area_id"))
	 (header    (append keys remfields))
	 (keystr    (conc (keys->keystr keys) ","
			  (string-intersperse remfields ","))))
    (debug:print-info 11 *default-log-port* "db:get-run-info run-id: " run-id " header: " header " keystr: " keystr)
    







|







2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
;; use (get-value-by-header (db:get-header runinfo)(db:get-rows runinfo))
;; NOTE: Does NOT return a list of rows (or one row) for the first slot of the vector
;;       this is inconsistent with get-runs but it makes some sense.
;;
(define (db:get-run-info dbstruct run-id)
  ;;(if (hash-table-ref/default *run-info-cache* run-id #f)
  ;;    (hash-table-ref *run-info-cache* run-id)
  (let* ((res       (make-vector 11 #f))
	 (keys      (db:get-keys dbstruct))
	 (remfields (list "id" "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count" "contour" "last_update")) ;;  "area_id"))
	 (header    (append keys remfields))
	 (keystr    (conc (keys->keystr keys) ","
			  (string-intersperse remfields ","))))
    (debug:print-info 11 *default-log-port* "db:get-run-info run-id: " run-id " header: " header " keystr: " keystr)
    

Modified runsmod.scm from [727372ff23] to [26ea23059a].

1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
			   newtal:      newtal
			   itemmaps:    itemmaps
			   ;; prereqs-not-met: prereqs-not-met
			   )))
	(runs:dat-regfull-set! runsdat regfull)

	(if (> (- (current-seconds) *last-test-launch*) 5)        ;; be pretty aggressive for five seconds after
	    (runs:too-soon-delay (conc "loop delay " hed) 1 1)    ;; starting a test then apply more delay
	    (runs:too-soon-delay (conc "loop delay " hed) 1 0.1)) 
	
	(if (> num-running 0)
            (set! last-time-some-running (current-seconds)))

        (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000)))
            (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))







|







1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
			   newtal:      newtal
			   itemmaps:    itemmaps
			   ;; prereqs-not-met: prereqs-not-met
			   )))
	(runs:dat-regfull-set! runsdat regfull)

	(if (> (- (current-seconds) *last-test-launch*) 5)        ;; be pretty aggressive for five seconds after
	    (runs:too-soon-delay (conc "loop delay " hed) 1 0.6)    ;; starting a test then apply more delay
	    (runs:too-soon-delay (conc "loop delay " hed) 1 0.1)) 
	
	(if (> num-running 0)
            (set! last-time-some-running (current-seconds)))

        (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000)))
            (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
	 ))) ;; end loop on sorted test names
    ;; this is the point where everything is launched and now you can mark the run in metadata table as all launched 
    (rmt:set-var run-id (conc "launch-complete-" run-id) "yes")  
        
    ;; now *if* -run-wait we wait for all tests to be done
    ;; Now wait for any RUNNING tests to complete (if in run-wait mode)
    ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat)))
    (thread-sleep! 10) ;; I think there is a race condition here. Let states/statuses settle
    
    (let wait-loop ((num-running      (rmt:get-count-tests-running-for-run-id run-id))
		    (prev-num-running 0))
      ;; (debug:print-info 13 *default-log-port* "num-running=" num-running ", prev-num-running=" prev-num-running)
      (if (and (or (args:get-arg "-run-wait")
		   (equal? (configf:lookup *configdat* "setup" "run-wait") "yes"))
	       (> num-running 0))







|







1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
	 ))) ;; end loop on sorted test names
    ;; this is the point where everything is launched and now you can mark the run in metadata table as all launched 
    (rmt:set-var run-id (conc "launch-complete-" run-id) "yes")  
        
    ;; now *if* -run-wait we wait for all tests to be done
    ;; Now wait for any RUNNING tests to complete (if in run-wait mode)
    ;; (if (runs:dat-load-mgmt-function runsdat)((runs:dat-load-mgmt-function runsdat)))
    (thread-sleep! 0.1) ;; I think there is a race condition here. Let states/statuses settle
    
    (let wait-loop ((num-running      (rmt:get-count-tests-running-for-run-id run-id))
		    (prev-num-running 0))
      ;; (debug:print-info 13 *default-log-port* "num-running=" num-running ", prev-num-running=" prev-num-running)
      (if (and (or (args:get-arg "-run-wait")
		   (equal? (configf:lookup *configdat* "setup" "run-wait") "yes"))
	       (> num-running 0))

Modified ulex-dual/dbmgr.scm from [53b181f4c9] to [9a6a086d09].

331
332
333
334
335
336
337

338

339
340
341
342
343
344
345




346
347
348
349
350
351
352

;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed
;; sometime in the future
;;
(define (rmt:send-receive-real sinfo apath dbname cmd params)
  (let* ((cdat (rmt:get-conn sinfo apath dbname)))
    (assert cdat "FATAL: rmt:send-receive-real called without the needed channels opened")

    (let* ((uconn    (servdat-uconn sinfo)) ;; get the interface to ulex

           ;; then send-receive using the ulex layer to host-port stored in cdat
           (res      (send-receive uconn (conndat-hostport cdat) cmd params)))
      ;; since we accessed the server we can bump the expires time up
      (conndat-expires-set! cdat (+ (current-seconds)
                                    (server:expiration-timeout)
                                    -2)) ;; two second margin for network time misalignments etc.
      res)))





;
;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed
;; sometime in the future.
;;
;; Purpose - call the main.db server and request a server be started
;; for the given area path and dbname







>
|
>
|
|
|
|
|
|
|
>
>
>
>







331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358

;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed
;; sometime in the future
;;
(define (rmt:send-receive-real sinfo apath dbname cmd params)
  (let* ((cdat (rmt:get-conn sinfo apath dbname)))
    (assert cdat "FATAL: rmt:send-receive-real called without the needed channels opened")
    (condition-case
     (let* ((uconn    (servdat-uconn sinfo)) ;; get the interface to ulex
	    (hostport (conndat-hostport cdat))
            ;; then send-receive using the ulex layer to host-port stored in cdat
            (res      (send-receive uconn hostport cmd params)))
       ;; since we accessed the server we can bump the expires time up
       (conndat-expires-set! cdat (+ (current-seconds)
                                     (server:expiration-timeout)
                                     -2)) ;; two second margin for network time misalignments etc.
       res)
     ((exn i/o net)
      (debug:print-info 0 *default-log-port* "IO failure in connection to "hostport
			", resetting connection.")
      

;
;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed
;; sometime in the future.
;;
;; Purpose - call the main.db server and request a server be started
;; for the given area path and dbname

Modified ulex-dual/ulex.scm from [ba1e2ab076] to [12cecae7cb].

258
259
260
261
262
263
264













265
266
267
268
269
270
271
272
	 (dat          (list `(host-port . ,my-host-port)
			     `(qrykey . qrykey)
			     `(cmd . ,cmd)
			     `(params . ,params))))
    (cond
     (isme (do-work udata dat)) ;; no transmission needed
     (else













      (handle-exceptions ;; TODO - MAKE THIS EXCEPTION CMD SPECIFIC?
       exn
       (begin
         (print "ULEX send-receive: "cmd", "params", exn="exn)
         (message exn))
       (begin
	 ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP
	 (let-values (((inp oup)(tcp-connect host port)))







>
>
>
>
>
>
>
>
>
>
>
>
>
|







258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
	 (dat          (list `(host-port . ,my-host-port)
			     `(qrykey . qrykey)
			     `(cmd . ,cmd)
			     `(params . ,params))))
    (cond
     (isme (do-work udata dat)) ;; no transmission needed
     (else
      (let-values (((inp oup)(tcp-connect host port)))
	(let ((res (if (and inp oup)
		       (begin
			 (write (obj->string dat) oup)
			 (close-output-port oup)
			 (string->obj (read inp)))
		       (begin
			 (print "ERROR: send called but no receiver has been setup. Please call setup first!")
			 #f))))
	  (close-input-port inp)))
	
	
	     
      #;(handle-exceptions ;; TODO - MAKE THIS EXCEPTION CMD SPECIFIC?
       exn
       (begin
         (print "ULEX send-receive: "cmd", "params", exn="exn)
         (message exn))
       (begin
	 ;; (mutex-lock! *send-mutex*) ;; DOESN'T SEEM TO HELP
	 (let-values (((inp oup)(tcp-connect host port)))