Megatest

Check-in [162676ba8d]
Login
Overview
Comment:Lots of db adjustments made
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.7001-multi-db-rb01
Files: files | file ages | folders
SHA1: 162676ba8daa2efb0ea495752943773562a06219
User & Date: matt on 2022-04-05 20:29:54
Other Links: branch diff | manifest | tags
Context
2022-04-05
21:33
Fixed dbfile:close-all and added tests check-in: 0e9ad025c4 user: mmgraham tags: v1.7001-multi-db-rb01
20:29
Lots of db adjustments made check-in: 162676ba8d user: matt tags: v1.7001-multi-db-rb01
19:26
Remove specific interface from http server start. I'm pretty sure that should never have been there. check-in: 2204e62bd5 user: matt tags: v1.7001-multi-db-rb01
Changes

Modified api.scm from [f83932a9cc] to [fcdb3b5c5f].

342
343
344
345
346
347
348
349

350
351
352
353
354
355
356
342
343
344
345
346
347
348

349
350
351
352
353
354
355
356







-
+







                   ;; MISC
                   ((get-latest-host-load)         (apply db:get-latest-host-load dbstruct params))
                   ((have-incompletes?)            (apply db:have-incompletes? dbstruct params))
                   ((login)                        (apply db:login dbstruct params))
                   ((general-call)                 (let ((stmtname   (car params))
                                                         (run-id     (cadr params))
                                                         (realparams (cddr params)))
                                                     (db:general-call dbstruct stmtname realparams)))
                                                     (db:general-call dbstruct run-id stmtname realparams)))
                   ((sdb-qry)                      (apply sdb:qry params))
                   ((ping)                         (current-process-id))
		   ((get-changed-record-ids)       (apply db:get-changed-record-ids dbstruct params))
		   ((get-run-record-ids) 	   (apply db:get-run-record-ids dbstruct params))	
                   ;; TESTMETA
                   ((testmeta-get-record)       (apply db:testmeta-get-record dbstruct params))

Modified db.scm from [b1156f025e] to [4fc3fce020].

1218
1219
1220
1221
1222
1223
1224
1225



1226
1227
1228
1229
1230
1231
1232
1218
1219
1220
1221
1222
1223
1224

1225
1226
1227
1228
1229
1230
1231
1232
1233
1234







-
+
+
+







                               END;" )
	   (list "update_test_data_trigger"  "CREATE TRIGGER  IF NOT EXISTS update_test_data_trigger AFTER UPDATE ON test_data
                             FOR EACH ROW
                               BEGIN 
                                 UPDATE test_data SET last_update=(strftime('%s','now'))
                                   WHERE id=old.id;
                               END;" )))

;;
;; ADD run-id SUPPORT
;;
(define (db:create-all-triggers dbstruct)
(db:with-db
   dbstruct #f #f
   (lambda (dbdat db)
(db:create-triggers db))))

(define (db:create-triggers db)
1688
1689
1690
1691
1692
1693
1694
1695

1696
1697
1698
1699
1700
1701
1702
1690
1691
1692
1693
1694
1695
1696

1697
1698
1699
1700
1701
1702
1703
1704







-
+







	 (toplevels   '())
	 (deadtime-str (configf:lookup *configdat* "setup" "deadtime"))
	 (deadtime     (if (and deadtime-str
				(string->number deadtime-str))
			   (string->number deadtime-str)
			   72000))) ;; twenty hours
    (db:with-db
     dbstruct #f #f
     dbstruct run-id #f
     (lambda (dbdat db)
       (if (number? ovr-deadtime)(set! deadtime ovr-deadtime))
       
       ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes
       ;;
       ;; HOWEVER: this code in run:test seems to work fine
       ;;              (> (- (current-seconds)(+ (db:test-get-event_time testdat)
1771
1772
1773
1774
1775
1776
1777
1778

1779
1780
1781
1782
1783
1784
1785
1773
1774
1775
1776
1777
1778
1779

1780
1781
1782
1783
1784
1785
1786
1787







-
+







         (running-deadtime-default (+ server-start-allowance (* 2 launch-monitor-period)))
         (running-deadtime (or deadtime-trim running-deadtime-default)) ;; two minutes (30 seconds between updates, this leaves 3x grace period)
         )
    (debug:print-info 4  *default-log-port* "running-deadtime = " running-deadtime)
    (debug:print-info 4  *default-log-port* "deadtime-trim = " deadtime-trim)

    (db:with-db 
     dbstruct #f #f
     dbstruct run-id #f
     (lambda (dbdat db)
       (let* ((stmth1 (db:get-cache-stmth
		       dbdat run-id db
		       "SELECT id,rundir,uname,testname,item_path,event_time,run_duration FROM tests 
                           WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?)
                                          AND state IN ('RUNNING');"))
	      (stmth2 (db:get-cache-stmth
1892
1893
1894
1895
1896
1897
1898
1899

1900
1901
1902
1903
1904
1905
1906
1894
1895
1896
1897
1898
1899
1900

1901
1902
1903
1904
1905
1906
1907
1908







-
+







		 ;;call end of eud of run detection for posthook
		 (launch:end-of-run-check run-id)
		 )))))))

;; BUG: Probably broken - does not explicitly use run-id in the query
;;
(define (db:top-test-set-per-pf-counts dbstruct run-id test-name)
  (db:general-call dbstruct 'top-test-set-per-pf-counts (list test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name)))
  (db:general-call dbstruct run-id 'top-test-set-per-pf-counts (list test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name)))

;; Clean out old junk and vacuum the database
;;
;; Ultimately do something like this:
;;
;; 1. Look at test records either deleted or part of deleted run:
;;    a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown'
2049
2050
2051
2052
2053
2054
2055
2056

2057
2058
2059
2060
2061
2062
2063
2051
2052
2053
2054
2055
2056
2057

2058
2059
2060
2061
2062
2063
2064
2065







-
+








;; returns number if string->number is successful, string otherwise
;; also updates *global-delta*
;;
(define (db:get-var dbstruct var)
  (let* ((res      #f))
    (db:with-db
     dbstruct #f #f
     dbstruct run-id #f
     (lambda (dbdat db)
       (sqlite3:for-each-row
        (lambda (val)
          (set! res val))
        db
        "SELECT val FROM metadat WHERE var=?;" var)
       ;; convert to number if can
2104
2105
2106
2107
2108
2109
2110
2111

2112
2113
2114
2115
2116
2117
2118
2106
2107
2108
2109
2110
2111
2112

2113
2114
2115
2116
2117
2118
2119
2120







-
+







		(sqlite3:execute db "DELETE FROM metadat WHERE var=?;" var))))

;;======================================================================
;; no-sync.db - small bits of data to be shared between servers
;;======================================================================

(define (db:open-no-sync-db)
  (dbfile:open-no-syncd-db (db:dbfile-path)))
  (dbfile:open-no-sync-db (db:dbfile-path)))

;; if we are not a server create a db handle. this is not finalized
;; so watch for problems. I'm still not clear if it is needed to manually
;; finalize sqlite3 dbs with the sqlite3 egg.
;;
(define (db:no-sync-db db-in)
  (mutex-lock! *db-access-mutex*)
3088
3089
3090
3091
3092
3093
3094
3095
3096


3097
3098

3099
3100
3101
3102
3103
3104
3105
3090
3091
3092
3093
3094
3095
3096


3097
3098
3099

3100
3101
3102
3103
3104
3105
3106
3107







-
-
+
+

-
+







	 (db:get-all-run-ids dbstruct)))
    res))

;; Convert calling routines to get list of run-ids and loop, do not use the get-tests-for-runs
;;

(define (db:delete-test-records dbstruct run-id test-id)
  (db:general-call dbstruct 'delete-test-step-records (list test-id))
  (db:general-call dbstruct 'delete-test-data-records (list test-id))
  (db:general-call dbstruct run-id 'delete-test-step-records (list test-id))
  (db:general-call dbstruct run-id 'delete-test-data-records (list test-id))
  (db:with-db
   dbstruct #f #f
   dbstruct run-id #f
   (lambda (dbdat db)
     (sqlite3:execute db "UPDATE tests SET state='DELETED',status='n/a',comment='' WHERE id=?;" test-id))))

;; 
(define (db:delete-old-deleted-test-records dbstruct)
  (let ((targtime (- (current-seconds)
		     (or (configf:lookup-number *configdat* "setup" "keep-deleted-records")
3155
3156
3157
3158
3159
3160
3161
3162

3163
3164
3165
3166
3167
3168
3169
3170
3157
3158
3159
3160
3161
3162
3163

3164

3165
3166
3167
3168
3169
3170
3171







-
+
-







;; ;; NB// Ultimately this will be deprecated in deference to mt:test-set-state-status-by-id
;;
;;      NOTE: run-id is not used
;; ;;
(define (db:test-set-state-status dbstruct run-id test-id newstate newstatus newcomment)
  (db:with-db
   dbstruct
   ;; run-id
   run-id
   #f
   #t
   (lambda (dbdat db)
     (cond
      ((and newstate newstatus newcomment)
       (sqlite3:execute db "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;" newstate newstatus newcomment ;; (sdb:qry 'getid newcomment)
			test-id))
      ((and newstate newstatus)
3431
3432
3433
3434
3435
3436
3437
3438

3439
3440
3441
3442
3443
3444
3445
3432
3433
3434
3435
3436
3437
3438

3439
3440
3441
3442
3443
3444
3445
3446







-
+







     run-ids)))

;; Get test data using test_id, run-id is not used
;; 
(define (db:get-test-info-by-id dbstruct run-id test-id)
  (db:with-db
   dbstruct
   #f ;; run-id
   run-id
   #f
   (lambda (dbdat db)
     (let ((res #f))
       (sqlite3:for-each-row ;; attemptnum added to hold pid of top process (not Megatest) controlling a test
	(lambda (id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum archived last-update)
	  ;;                0    1       2      3      4        5       6      7        8     9     10      11          12          13           14         15          16
	  (set! res (vector id run-id testname state status event-time host cpuload diskfree uname rundir-id item-path run_duration final-logf-id comment short-dir-id attemptnum archived last-update)))
3615
3616
3617
3618
3619
3620
3621
3622

3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633

3634
3635

3636
3637
3638
3639
3640
3641
3642
3616
3617
3618
3619
3620
3621
3622

3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633

3634
3635

3636
3637
3638
3639
3640
3641
3642
3643







-
+










-
+

-
+







;; look at the test_data status field, 
;;    if all are pass (any case) and the test status is PASS or NULL or '' then set test status to PASS.
;;    if one or more are fail (any case) then set test status to PASS, non "pass" or "fail" are ignored
(define (db:test-data-rollup dbstruct run-id test-id status)
  (let* ((fail-count 0)
	 (pass-count 0))
    (db:with-db
     dbstruct #f #f
     dbstruct run-id #f
     (lambda (dbdat db)
       (sqlite3:for-each-row
	(lambda (fcount pcount)
	  (set! fail-count fcount)
	  (set! pass-count pcount))
	db 
	"SELECT (SELECT count(id) FROM test_data WHERE test_id=? AND status like 'fail') AS fail_count,
             (SELECT count(id) FROM test_data WHERE test_id=? AND status like 'pass') AS pass_count;"
	test-id test-id)
       ;; Now rollup the counts to the central megatest.db
       (db:general-call dbstruct 'pass-fail-counts (list pass-count fail-count test-id))
       (db:general-call dbstruct run-id 'pass-fail-counts (list pass-count fail-count test-id))
       ;; if the test is not FAIL then set status based on the fail and pass counts.
       (db:general-call dbstruct 'test_data-pf-rollup (list test-id test-id test-id test-id))))))
       (db:general-call dbstruct run-id 'test_data-pf-rollup (list test-id test-id test-id test-id))))))

;; each section is a rule except "final" which is the final result
;;
;; [rule-5]
;; operator in
;; section LogFileBody
;; desc Output voltage
3933
3934
3935
3936
3937
3938
3939
3940

3941
3942
3943

3944
3945
3946
3947
3948
3949
3950
3934
3935
3936
3937
3938
3939
3940

3941
3942
3943

3944
3945
3946
3947
3948
3949
3950
3951







-
+


-
+







			   test-name))
	 (item-path    (db:test-get-item-path testdat))
         (tl-testdat   (db:get-test-info dbstruct run-id test-name ""))
         (tl-test-id   (if tl-testdat
			   (db:test-get-id tl-testdat)
			   #f)))
    (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) 
	(db:general-call dbstruct 'set-test-start-time (list test-id)))
	(db:general-call dbstruct run-id 'set-test-start-time (list test-id)))
    (mutex-lock! *db-transaction-mutex*)
    (db:with-db
     dbstruct #f #f
     dbstruct run-id #f
     (lambda (dbdat db)
       (let ((tr-res
              (sqlite3:with-transaction
               db
               (lambda ()
                 ;; NB// Pass the db so it is part fo the transaction
                 (db:test-set-state-status db run-id test-id state status comment) ;; this call sets the item state/status
4297
4298
4299
4300
4301
4302
4303
4304


4305
4306
4307
4308
4309
4310
4311

4312
4313
4314
4315
4316
4317
4318
4319
4320
4321

4322
4323
4324
4325
4326
4327
4328
4298
4299
4300
4301
4302
4303
4304

4305
4306
4307
4308
4309
4310
4311
4312

4313
4314
4315
4316
4317
4318
4319
4320
4321
4322

4323
4324
4325
4326
4327
4328
4329
4330







-
+
+






-
+









-
+







   ((not (equal? megatest-version calling-version))
    (list #t (conc "Login warning due to mismatch megatest version: " calling-version ", " megatest-version)))
   
    (else
    (hash-table-set! *logged-in-clients* client-signature (current-seconds))
    '(#t "successful login"))))

(define (db:general-call dbstruct stmtname params)
(define (db:general-call dbstruct run-id stmtname params)
  ;; Why is db:lookup-query above not used here to get the query?
  (let ((query (let ((q (alist-ref (if (string? stmtname)
				       (string->symbol stmtname)
				       stmtname)
				   db:queries)))
 		 (if q (car q) #f))))
    (db:with-db
     dbstruct #f #f
     dbstruct run-id #f
     (lambda (dbdat db)
       (apply sqlite3:execute db query params)
       #t))))

;; get a summary of state and status counts to calculate a rollup
;;
(define (db:get-state-status-summary dbstruct run-id testname)
  (let ((res   '()))
    (db:with-db
     dbstruct #f #f
     dbstruct run-id #f
     (lambda (dbdat db)
       (sqlite3:for-each-row
	(lambda (state status count)
	  (set! res (cons (vector state status count) res)))
	db
	"SELECT state,status,count(state) FROM tests WHERE run_id=? AND testname=? AND item_path='' GROUP BY state,status;"
	run-id testname)

Modified launch.scm from [1931a96c9c] to [6498c309e0].

479
480
481
482
483
484
485
486

487
488
489
490
491
492
493
494
495
496
497
498
499

500
501
502
503
504
505
506
479
480
481
482
483
484
485

486
487
488
489
490
491
492
493
494
495
496
497
498

499
500
501
502
503
504
505
506







-
+












-
+







		 (test-pid  (db:test-get-process_id  test-info)))
	    (cond
             ;; -mrw- I'm removing KILLREQ from this list so that a test in KILLREQ state is treated as a "do not run" flag.
	     ((member (db:test-get-state test-info) '("INCOMPLETE" "KILLED" "UNKNOWN" "STUCK")) ;; prior run of this test didn't complete, go ahead and try to rerun
	      (debug:print 0 *default-log-port* "INFO: test is INCOMPLETE or KILLED, treat this execute call as a rerun request")
	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")

              (rmt:general-call 'set-test-start-time #f test-id)
              (rmt:general-call 'set-test-start-time run-id test-id)
              (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f)
	      ) ;; prime it for running
	     ((member (db:test-get-state test-info) '("RUNNING" "REMOTEHOSTSTART"))
	      (if (process:alive-on-host? test-host test-pid)
		  (debug:print-error 0 *default-log-port* "test state is "  (db:test-get-state test-info) " and process " test-pid " is still running on host " test-host ", cannot proceed")
		  (exit 1)))
	     ((member (db:test-get-state test-info) '("COMPLETED"))  ;; we do NOT want to re-run COMPLETED jobs. Mark as NOT_STARTED to run!
	      (debug:print 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed")
	      (debug:print 0 *default-log-port* "exiting with status 1")
	      (exit 1))
	     ((not (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ")))
	      ;; (tests:test-force-state-status! run-id test-id "REMOTEHOSTSTART" "n/a")
              (rmt:general-call 'set-test-start-time #f test-id)
              (rmt:general-call 'set-test-start-time run-id test-id)
	      (rmt:test-set-state-status run-id test-id "REMOTEHOSTSTART" "n/a" #f))
	     (else ;; (member (db:test-get-state test-info) '("REMOVING" "REMOTEHOSTSTART" "RUNNING" "KILLREQ"))
	      (debug:print 0 *default-log-port* "test state is " (db:test-get-state test-info) ", cannot proceed")
	      (debug:print 0 *default-log-port* "exiting with status 1")
	      (exit 1))))

          ;; cleanup prior execution's steps