Megatest

Check-in [a6be57bfc9]
Login
Overview
Comment:Cleaned up some gratuitous database opens, quietened some debug messages
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.70
Files: files | file ages | folders
SHA1: a6be57bfc9bc93eb74fcc1eb41b2b0a7456f2ac7
User & Date: matt on 2022-05-22 18:02:07
Other Links: branch diff | manifest | tags
Context
2022-05-22
20:20
Some awful hacks to keep the system running. There is something causing servers to crash, I suspect sync is the problem. This work-around just constantly replaces the servers with new ones. check-in: 3cdcb8c138 user: matt tags: v1.70
18:02
Cleaned up some gratuitous database opens, quietened some debug messages check-in: a6be57bfc9 user: matt tags: v1.70
15:42
Ensure that db opens are only done once per process per db file. Put out messages if this is not the case. check-in: d0bca99717 user: matt tags: v1.70
Changes

Modified db.scm from [73d0c4cca7] to [18a5213140].

211
212
213
214
215
216
217
218

219
220
221
222
223
224
225
226

227
228
229
230
231
232
233
211
212
213
214
215
216
217

218
219
220
221
222
223
224
225

226
227
228
229
230
231
232
233







-
+







-
+







			#f))
	 (db        (if have-struct                ;; this stuff just allows us to call with a db handle directly
			(dbr:dbdat-dbh dbdat)
			dbstruct))
	 (fname     (if dbdat
			(dbr:dbdat-dbfile dbdat)
			"nofilenameavailable"))
	 (subdb     (if have-struct
	 #;(subdb     (if have-struct
			(dbfile:get-subdb dbstruct run-id)
			#f))
	 (use-mutex (> *api-process-request-count* 25))) ;; was 25
    (if (and use-mutex
	     (common:low-noise-print 120 "over-50-parallel-api-requests"))
	(debug:print-info 0 *default-log-port* *api-process-request-count* " parallel api requests being processed in process " (current-process-id) ", throttling access"))
    (if (common:low-noise-print 600 (conc "parallel-api-requests" *max-api-process-requests*))
	(debug:print-info 2 *default-log-port* "Parallel api request count: " *api-process-request-count* " max parallel requests: " *max-api-process-requests*))
	(debug:print-info 1 *default-log-port* "Parallel api request count: " *api-process-request-count* " max parallel requests: " *max-api-process-requests*))
    (condition-case
	(begin
	  (if use-mutex (mutex-lock! *db-with-db-mutex*))
	  (let ((res (apply proc dbdat db params)))
	    (if use-mutex (mutex-unlock! *db-with-db-mutex*))
	    ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w))
	    (if dbdat
432
433
434
435
436
437
438


439

440
441
442
443



444
445
446
447
448
449
450
432
433
434
435
436
437
438
439
440

441
442



443
444
445
446
447
448
449
450
451
452







+
+
-
+

-
-
-
+
+
+







    ;; (cons db dbpath)))
    (make-dbr:dbdat dbfile: dbpath dbh: db read-only: (not write-access))))

;; sync run from tmp disk to nfs disk if touched
;;
(define (db:sync-touched dbstruct run-id #!key (force-sync #f))
  (debug:print-info 0 *default-log-port* "db:sync-touched Syncing: " (conc (if run-id run-id "main") ".db"))
  (let* (;; the subdb is needed to access the mtdbdat
	 (subdb     (or (dbfile:get-subdb dbstruct run-id)
  (let* ((subdb   (or (dbfile:get-subdb dbstruct run-id) (dbfile:init-subdb dbstruct run-id db:initialize-main-db)))
			(dbfile:init-subdb dbstruct run-id db:initialize-main-db)))
         (tmpdbfile (dbr:subdb-tmpdbfile subdb))
	 (mtdb    (dbr:subdb-mtdbdat subdb))
         (tmpdb (dbfile:open-sqlite3-db tmpdbfile #f))
	 (start-t (current-seconds)))
	 (mtdb      (dbr:subdb-mtdbdat subdb))
         (tmpdb     (dbfile:open-db dbstruct run-id db:initialize-main-db)) ;; sqlite3-db tmpdbfile #f))
	 (start-t   (current-seconds)))
    (mutex-lock! *db-multi-sync-mutex*)
    (let ((update_info (cons "last_update" (if force-sync 0 *db-last-sync*) )))
      (mutex-unlock! *db-multi-sync-mutex*)
      (db:sync-tables (db:sync-all-tables-list dbstruct) update_info tmpdb mtdb))
    (mutex-lock! *db-multi-sync-mutex*)
    (set! *db-last-sync* start-t)
    (set! *db-last-access* start-t)

Modified dbfile.scm from [0891876475] to [9934719f0f].

236
237
238
239
240
241
242



243
244
245
246
247
248
249
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252







+
+
+







				    mtdbfile:  mtdbpath
				    tmpdbfile: tmpdbpath
				    mtdbdat:   mtdbdat)))
    (dbfile:set-subdb dbstruct run-id newsubdb)
    newsubdb)) ;; return the new subdb - but shouldn't really use it

;; returns dbdat with dbh and dbfilepath
;;
;; NOTE: the handle is on /tmp db file!
;;
;;  1. if needed setup the subdb for the given run-id
;;  2. if there is no existing db handle in the stack
;;     create a new handle and return it (do NOT add
;;     it to the stack).
;;
(define (dbfile:open-db dbstruct run-id init-proc)
  (let* ((subdb (dbfile:get-subdb dbstruct run-id)))
263
264
265
266
267
268
269
270

271
272
273
274
275
276
277
266
267
268
269
270
271
272

273
274
275
276
277
278
279
280







-
+








;; this stuff is for initial debugging, please remove it when
;; this code stabilizes
(define *dbopens* (make-hash-table))
(define (dbfile:inc-db-open dbfile)
  (let* ((curr-opens-count (+ (hash-table-ref/default *dbopens* dbfile 0) 1)))
    (if (> curr-opens-count 1) ;; this should NOT be happening
	(dbfile:print-err "ERROR: db "dbfile" has been opened "curr-opens-count" times!"))
	(dbfile:print-err "INFO: db "dbfile" has been opened "curr-opens-count" times!"))
    (hash-table-set! *dbopens* dbfile curr-opens-count)
    curr-opens-count))

;; Open the classic megatest.db file (defaults to open in toppath)
;;
;;   NOTE: returns a dbdat not a dbstruct!
;;

Modified dbmod.scm from [9cc13aa737] to [043beb90c3].

29
30
31
32
33
34
35
36




















37
29
30
31
32
33
34
35

36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56







-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

	srfi-69)

(define (db:run-id->dbname run-id)
  (cond
   ((number? run-id)(conc run-id ".db"))
   ((not run-id)    "main.db")
   (else            run-id)))
 


;;======================================================================
;; hash of hashs
;;======================================================================


(define (db:hoh-set! dat key1 key2 val)
  (let* ((subhash (hash-table-ref/default dat key1 #f)))
    (if subhash
	(hash-table-set! subhash key2 val)
	(begin
	  (hash-table-set! dat key1 (make-hash-table))
	  (db:hoh-set! dat key1 key2 val)))))

(define (db:hoh-get dat key1 key2)
  (let* ((subhash (hash-table-ref/default dat key1 #f)))
    (and subhash
	 (hash-table-ref/default subhash key2 #f))))

)

Modified runs.scm from [a43ae4acb3] to [32a6ef2bce].

1500
1501
1502
1503
1504
1505
1506


1507
1508
1509
1510
1511
1512
1513
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515







+
+







;; every time though the loop increment the test/itempatt val.
;; when the min is > max-allowed and none running then force exit
;;
(define *max-tries-hash* (make-hash-table))

(define (runs:pretty-long-list lst)
   (if (> (length lst) 8)(append (take lst 3)(list "...")) lst))

(define *last-loop-time-ms* 0)

;;======================================================================
;; runs:run-tests-queue is called by runs:run-tests
;;======================================================================
;;
;; test-records is a hash table testname:item_path => vector < testname testconfig waitons priority items-info ... >
(define (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests reglen-in all-tests-registry)
1638
1639
1640
1641
1642
1643
1644












1645
1646
1647
1648
1649
1650
1651
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665







+
+
+
+
+
+
+
+
+
+
+
+







			   jobgroup:    jobgroup
			   waitons:     waitons
			   testmode:    testmode
			   newtal:      newtal
			   itemmaps:    itemmaps
			   ;; prereqs-not-met: prereqs-not-met
			   )))

	;; too-tight loop detection and delay, this might hide issues
	;; that occur in long run times. Consider commenting when debugging
	;; 
	(if (and (>= num-running max-concurrent-jobs)
		 (< (- (current-milliseconds) *last-loop-time-ms*) 500))
	    (begin
	      (if (runs:lownoise "too-tight-loop" 5)
		  (debug:print-info 2 *default-log-port* "Excessively fast loop, delaying 1/2 second"))
	      (thread-sleep! 0.5)))
	(set! *last-loop-time-ms* (current-milliseconds))
     
	(runs:dat-regfull-set! runsdat regfull)
    
	(if (> num-running 0)
            (set! last-time-some-running (current-seconds)))

        (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000)))
            (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))