Megatest

Changes On Branch 88116f8ed36f7dd7
Login

Changes In Branch v1.80-dbperformance Through [88116f8ed3] Excluding Merge-Ins

This is equivalent to a diff from 19861e6399 to 88116f8ed3

2023-02-10
21:06
Merging pretty good branch v1.80-dbperf to v1.80 check-in: 7170e5f43b user: matt tags: v1.80
20:19
Use debugprint module in dbfile module as stepping stone to replacing old debug:print calls with new. Closed-Leaf check-in: 0e8fa15f1d user: matt tags: v1.80-debugprint
2023-02-06
19:35
Squashed v1.80-dbperformance into one commit check-in: 6c7b8be468 user: matt tags: v1.80-dbperf
2023-02-05
11:47
Minor clean up. There were a couple communication errors in sixtyfivek but they looked likely to be host related. Closed-Leaf check-in: e973b1fb77 user: matt tags: v1.80-cleanup
08:36
wip, close idle db connections check-in: 97a3c4ad11 user: matt tags: v1.80-close-idle-connections
2023-02-03
17:29
control # of dbs with parameter num-run-dbs, default=2. check-in: cd56c55e6a user: matt tags: v1.80-dbperformance
07:11
attempt to reduce load on db by requesting state/status instead of entire record check-in: 88116f8ed3 user: matt tags: v1.80-dbperformance
02:16
Reduce load from get-state-status-and-roll-up-run. check-in: 4e634eb46a user: matt tags: v1.80-dbperformance
2023-02-02
12:54
Use an actual droop check-in: 19861e6399 user: matt tags: v1.80
09:27
Change couple queries to use prepared statements check-in: a2e41e0613 user: matt tags: v1.80

Modified api.scm from [0676a2f9d1] to [f3cc459c57].

37
38
39
40
41
42
43

44
45
46
47
48
49
50
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51







+







(define api:read-only-queries
  '(get-key-val-pairs
    get-var
    get-keys
    get-key-vals
    test-toplevel-num-items
    get-test-info-by-id
    get-test-state-status-by-id
    get-steps-info-by-id
    get-data-info-by-id
    test-get-rundir-from-test-id
    get-count-tests-running-for-testname
    get-count-tests-running
    get-count-tests-running-in-jobgroup
    get-previous-test-run-record
326
327
328
329
330
331
332

333
334
335
336
337
338
339
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341







+








    ;; ARCHIVES
    ((test-get-archive-block-info)     (apply db:test-get-archive-block-info dbstruct params))
    
    ;; TESTS
    ((test-toplevel-num-items)         (apply db:test-toplevel-num-items dbstruct params))
    ((get-test-info-by-id)	       (apply db:get-test-info-by-id dbstruct params))
    ((get-test-state-status-by-id)     (apply db:get-test-state-status-by-id dbstruct params))
    ((test-get-rundir-from-test-id)    (apply db:test-get-rundir-from-test-id dbstruct params))
    ((get-count-tests-running-for-testname) (apply db:get-count-tests-running-for-testname dbstruct params))
    ((get-count-tests-running)         (apply db:get-count-tests-running dbstruct params))
    ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params))
    ;; ((delete-test-step-records)        (apply db:delete-test-step-records dbstruct params))
    ;; ((get-previous-test-run-record)    (apply db:get-previous-test-run-record dbstruct params))
    ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params))
349
350
351
352
353
354
355

356
357
358
359
360
361
362
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365







+







    ((get-raw-run-stats)               (apply db:get-raw-run-stats dbstruct params))
    ((get-test-times)                  (apply db:get-test-times dbstruct params))

    ;; RUNS
    ((get-run-info)                 (apply db:get-run-info dbstruct params))
    ((get-run-status)               (apply db:get-run-status dbstruct params))
    ((get-run-state)                (apply db:get-run-state dbstruct params))
    ((get-run-state-status)         (apply db:get-run-state-status dbstruct params))
    ((set-run-status)               (apply db:set-run-status dbstruct params))
    ((set-run-state-status)  			 (apply db:set-run-state-status dbstruct params))
    ((update-tesdata-on-repilcate-db) (apply db:update-tesdata-on-repilcate-db  dbstruct params)) 
    ((get-tests-for-run)            (apply db:get-tests-for-run dbstruct params))
    ((get-tests-for-run-state-status) (apply db:get-tests-for-run-state-status dbstruct params))
    ((get-test-id)                  (apply db:get-test-id dbstruct params))
    ((get-tests-for-run-mindata)    (apply db:get-tests-for-run-mindata dbstruct params))

Modified db.scm from [66cca5d3c4] to [3db5884da5].

2064
2065
2066
2067
2068
2069
2070

2071
2072


2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083

2084
2085
















2086
2087
2088
2089
2090
2091
2092
2064
2065
2066
2067
2068
2069
2070
2071


2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085


2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108







+
-
-
+
+











+
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







  (let ((res "n/a"))
    (db:with-db
     dbstruct #f #f
     (lambda (dbdat db)
       (sqlite3:for-each-row 
	(lambda (status)
	  (set! res status))
	(db:get-cache-stmth
	db
	"SELECT status FROM runs WHERE id=?;" 
	 dbdat db
	 "SELECT status FROM runs WHERE id=?;" )
	run-id)
       res))))

(define (db:get-run-state dbstruct run-id)
  (let ((res "n/a"))
    (db:with-db
     dbstruct #f #f
     (lambda (dbdat db)
       (sqlite3:for-each-row 
	(lambda (status)
	  (set! res status))
	(db:get-cache-stmth
	db
	"SELECT state FROM runs WHERE id=?;" 
	 dbdat db
	 "SELECT state FROM runs WHERE id=?;" )
	run-id)
       res))))

(define (db:get-run-state-status dbstruct run-id)
  (let ((res (cons "n/a" "n/a")))
    (db:with-db
     dbstruct #f #f
     (lambda (dbdat db)
       (sqlite3:for-each-row 
	(lambda (state status)
	  (set! res (cons state status)))
	(db:get-cache-stmth
	 dbdat db
	 "SELECT state,status FROM runs WHERE id=?;" )
	run-id)
       res))))


;;======================================================================
;; K E Y S
;;======================================================================
2694
2695
2696
2697
2698
2699
2700
2701

2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
















2718
2719
2720
2721
2722
2723
2724
2710
2711
2712
2713
2714
2715
2716

2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756







-
+
















+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







  (let* ((run-ids (db:get-all-run-ids mtdb)))
    (for-each 
     (lambda (run-id)
       (let ((testrecs (db:get-all-tests-info-by-run-id mtdb run-id)))
	 (db:prep-megatest.db-adj-test-ids (dbr:dbdat-dbh mtdb) run-id testrecs)))
     run-ids)))

;; Get test data using test_id, run-id is not used
;; Get test data using test_id
;; 
(define (db:get-test-info-by-id dbstruct run-id test-id)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (dbdat db)
     (let ((res #f))
       (sqlite3:for-each-row ;; attemptnum added to hold pid of top process (not Megatest) controlling a test
	(lambda (id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum archived last-update)
	  ;;                0    1       2      3      4        5       6      7        8     9     10      11          12          13           14         15          16
	  (set! res (vector id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum archived last-update)))
	(db:get-cache-stmth dbdat db
			    (conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id=?;"))
	test-id)
       res))))

;; Get test state, status using test_id
;; 
(define (db:get-test-state-status-by-id dbstruct run-id test-id)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (dbdat db)
     (let ((res (cons #f #f)))
       (sqlite3:for-each-row ;; attemptnum added to hold pid of top process (not Megatest) controlling a test
	(lambda (state status)
	  (cons state status))
	(db:get-cache-stmth dbdat db "SELECT state,status FROM tests WHERE id=?;")
	test-id)
       res))))

;; Use db:test-get* to access
;; Get test data using test_ids. NB// Only works within a single run!!
;;
(define (db:get-test-info-by-ids dbstruct run-id test-ids)
  (db:with-db
   dbstruct

Modified launch.scm from [34af87f4bd] to [b249e00a3b].

237
238
239
240
241
242
243
244
245
246



247
248
249
250
251
252
253
237
238
239
240
241
242
243



244
245
246
247
248
249
250
251
252
253







-
-
-
+
+
+







                                   (delta (abs (- df disk-free))))
                              (if (and (> df 0)
                                       (> (/ delta df) 0.1)) ;; (> delta 200) ;; ignore changes under 200 Meg
                                  df
                                  #f)))
             (do-sync       (or new-cpu-load new-disk-free over-time))

             (test-info   (rmt:get-test-info-by-id run-id test-id))
             (state       (db:test-get-state test-info))
             (status      (db:test-get-status test-info))
             (test-info   (rmt:get-test-state-status-by-id run-id test-id))
             (state       (car test-info));; (db:test-get-state test-info))
             (status      (cdr test-info));; (db:test-get-status test-info))
             (kill-reason  "no kill reason specified")
             (kill-job?    #f))
        ;; (common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period))
        (cond
         ((test-get-kill-request run-id test-id)
          (set! kill-reason "KILLING TEST since received kill request (KILLREQ)")
          (set! kill-job? #t))
763
764
765
766
767
768
769
770

771
772
773

774

775
776
777
778
779












780
781
782
783
784
785
786
763
764
765
766
767
768
769

770
771
772

773
774
775





776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794







-
+


-
+

+
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+







;; if dead safe to mark the test as killed in the db
;; State/status table
;; new
;; 100% COMPLETED/ (PASS,FAIL,ABORT etc.) ==> COMPLETED / X where X is same as itemized rollup
;; > 3 RUNNING with not test_dead do nothing (run should already be RUNNING/ na
;; > 0 RUNNING and test_dead then send KILLREQ ==> COMPLETED
;; 0 RUNNING ==> this is actually the first condition, should not get here

(define *last-rollup* 0)
(define (launch:end-of-run-check run-id )
    (let* ((not-completed-cnt (rmt:get-not-completed-cnt run-id))  
           (running-cnt (rmt:get-count-tests-running-for-run-id run-id))
           (running-cnt       (rmt:get-count-tests-running-for-run-id run-id))
           (all-test-launched (rmt:get-var (conc "lunch-complete-" run-id)))
	   (current-state-status (rmt:get-run-state-status run-id))
           (current-state (rmt:get-run-state run-id))
           (current-status (rmt:get-run-status run-id)))
     ;;get-vars run-id to query metadata table to check if all completed. if all-test-launched = yes then only not-completed-cnt = 0 means everyting is completed if no entry found in the table do nothing 
     (debug:print 0 *default-log-port* "Running test cnt :" running-cnt)                      
     (rmt:set-state-status-and-roll-up-run  run-id current-state current-status)
           (current-state        (car current-state-status))  ;; (rmt:get-run-state run-id))
           (current-status       (cdr current-state-status))) ;; (rmt:get-run-status run-id)))
      ;;get-vars run-id to query metadata table to check if all completed. if all-test-launched = yes then only not-completed-cnt = 0 means everyting is completed if no entry found in the table do nothing 
      (debug:print 0 *default-log-port* "Running test cnt :" running-cnt)
      ;;
      ;; TODO: add a final rollup when run is done (if there isn't one already)
      ;;
      (if (or (< running-cnt 3)                              ;; have only few running
	      (> (- (current-seconds) *last-rollup*) 10))    ;; or haven't rolled up in past ten seconds
	  (begin
	    (rmt:set-state-status-and-roll-up-run  run-id current-state current-status)
	    (set! *last-rollup* (current-seconds))))
     (runs:update-junit-test-reporter-xml run-id) 
     (cond 
       ((and all-test-launched (eq? not-completed-cnt 0) (equal? all-test-launched "yes" ))
                (if (and (equal? (rmt:get-var (conc "end-of-run-" run-id)) "no") (common:simple-lock (conc "endOfRun" run-id)))
                (begin
           	(debug:print 4 *default-log-port* "look for  post hook. currseconds: " (current-seconds) " EOR " (rmt:get-var (conc "end-of-run-" run-id)))
                (debug:print 0 *default-log-port* "End of Run Detected.")

Modified rmt.scm from [771d7d8ec4] to [22216f2b37].

535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551



552
553
554
555
556
557
558
535
536
537
538
539
540
541


542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559







-
-








+
+
+







  (assert (number? run-id) "FATAL: Run id required.")
  (rmt:general-call 'register-test run-id run-id test-name item-path))

(define (rmt:get-test-id run-id testname item-path)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmt:send-receive 'get-test-id run-id (list run-id testname item-path)))

;; run-id is NOT used
;;
(define (rmt:get-test-info-by-id run-id test-id)
  (if (number? test-id)
      (rmt:send-receive 'get-test-info-by-id run-id (list run-id test-id))
      (begin
	(debug:print 0 *default-log-port* "WARNING: Bad data handed to rmt:get-test-info-by-id run-id=" run-id ", test-id=" test-id)
	(print-call-chain (current-error-port))
	#f)))

(define (rmt:get-test-state-status-by-id run-id test-id)
  (rmt:send-receive 'get-test-state-status-by-id run-id (list run-id test-id)))

(define (rmt:test-get-rundir-from-test-id run-id test-id)
  (rmt:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id)))

(define (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f))
  (assert (number? run-id) "FATAL: Run id required.")
  (let* ((test-path (if (string? work-area)
			work-area
797
798
799
800
801
802
803



804
805
806
807
808
809
810
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814







+
+
+







  (assert (number? run-id) "FATAL: Run id required.")
  (rmt:send-receive 'get-run-status #f (list run-id)))

(define (rmt:get-run-state run-id)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmt:send-receive 'get-run-state #f (list run-id)))

(define (rmt:get-run-state-status run-id)
  (assert (number? run-id) "FATAL: Run id required.")
  (rmt:send-receive 'get-run-state-status #f (list run-id)))

(define (rmt:set-run-status run-id run-status #!key (msg #f))
  (assert (number? run-id) "FATAL: Run id required.")
  (rmt:send-receive 'set-run-status #f (list run-id run-status msg)))

(define (rmt:set-run-state-status run-id state status )
  (assert (number? run-id) "FATAL: Run id required.")

Modified tests.scm from [5c2006972a] to [b17b9fa8b7].

1964
1965
1966
1967
1968
1969
1970
1971

1972
1973

1974
1975
1976
1977
1978
1979
1980
1964
1965
1966
1967
1968
1969
1970

1971
1972

1973
1974
1975
1976
1977
1978
1979
1980







-
+

-
+







;;======================================================================
;; test steps
;;======================================================================

;; teststep-set-status! used to be here

(define (test-get-kill-request run-id test-id) ;; run-id test-name itemdat)
  (let* ((testdat   (rmt:get-test-info-by-id run-id test-id)))
  (let* ((testdat   (rmt:get-test-state-status-by-id run-id test-id)))
    (and testdat
	 (equal? (test:get-state testdat) "KILLREQ"))))
	 (equal? (car testdat) "KILLREQ"))))

(define (test:tdb-get-rundat-count tdb)
  (if tdb
      (let ((res 0))
	(sqlite3:for-each-row
	 (lambda (count)
	   (set! res count))