Megatest

Check-in [138a40d18e]
Login
Overview
Comment:Use a key for the db lock-down that is unique to the db in .mtdb, this should elminate duplicate, overlapping servers.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.80
Files: files | file ages | folders
SHA1: 138a40d18eb6b80f7fd5ce10b2d786db7b4098d4
User & Date: matt on 2023-05-11 05:43:05
Other Links: branch diff | manifest | tags
Context
2023-05-11
09:08
reworked the sync locking, allow parallel servers (needs work to lock in to single machine) check-in: 7fb44b797e user: matt tags: v1.80
05:43
Use a key for the db lock-down that is unique to the db in .mtdb, this should elminate duplicate, overlapping servers. check-in: 138a40d18e user: matt tags: v1.80
2023-05-10
20:33
Patched in the -db2db code and it appears to work fine. check-in: 0131a588a0 user: matt tags: v1.80
Changes

Modified common.scm from [ec316c51cd] to [10e4ec655c].

54
55
56
57
58
59
60

61
62
63
64
65
66
67
68
69
;;       (old-exit code)))

(define (stop-the-train)
  (thread-start! (make-thread (lambda ()
				(let loop ()
				  (if (and *toppath*
					   (file-exists? (conc *toppath*"/stop-the-train")))

				      (begin
					(debug:print 0 *default-log-port* "ERROR: found file "*toppath*"/stop-the-train, exiting immediately")
					(exit 1)))
				  (thread-sleep! 5)
				  (loop))))))

;; execute thunk, return value.  If exception thrown, trap exception, return #f, and emit nonfatal condition note to *default-log-port* .
;; arguments - thunk, message
(define (common:fail-safe thunk warning-message-on-exception)







>
|
|







54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
;;       (old-exit code)))

(define (stop-the-train)
  (thread-start! (make-thread (lambda ()
				(let loop ()
				  (if (and *toppath*
					   (file-exists? (conc *toppath*"/stop-the-train")))
				      (let* ((msg (conc "ERROR: found file "*toppath*"/stop-the-train, exiting immediately")))
					(print msg)
					(debug:print 0 *default-log-port* msg)
					(exit 1)))
				  (thread-sleep! 5)
				  (loop))))))

;; execute thunk, return value.  If exception thrown, trap exception, return #f, and emit nonfatal condition note to *default-log-port* .
;; arguments - thunk, message
(define (common:fail-safe thunk warning-message-on-exception)

Modified dbfile.scm from [1443b07658] to [82b1ce2a6f].

535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
(define (db:no-sync-get-lock-with-id db keyname identifier)
  (sqlite3:with-transaction
   db
   (lambda ()
     (condition-case
      (let* ((curr-val (db:no-sync-get/default db keyname #f)))
	(if curr-val
	    (match (db:extract-time-identifier curr-val)
	       ((timestamp ident)
		(if (equal? ident identifier)
		    #t     ;; this *is* my lock
		    #f))   ;; nope, not my lock
	       (else #f))  ;; nope, not my lock
	    (let ((lock-value (if identifier
				  (conc (current-seconds)"+"identifier)







|







535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
(define (db:no-sync-get-lock-with-id db keyname identifier)
  (sqlite3:with-transaction
   db
   (lambda ()
     (condition-case
      (let* ((curr-val (db:no-sync-get/default db keyname #f)))
	(if curr-val
	    (match (db:extract-time-identifier curr-val) ;; result->timestamp, identifier
	       ((timestamp ident)
		(if (equal? ident identifier)
		    #t     ;; this *is* my lock
		    #f))   ;; nope, not my lock
	       (else #f))  ;; nope, not my lock
	    (let ((lock-value (if identifier
				  (conc (current-seconds)"+"identifier)

Modified dbmod.scm from [a71c3b544a] to [fa16c38514].

182
183
184
185
186
187
188
189
190
191

192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213

214
215




216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233

234
235
236
237
238
239
240
	 (tmpdir       (conc "/tmp/"(current-user-name)))
	 (tmpdb        (let* ((fname (conc tmpdir"/" (string-translate areapath "/" ".")"-"(current-process-id)"-"dbfname)))
			 (if (not (file-exists? tmpdir))(create-directory tmpdir))
			 ;; check if tmpdb already exists, either delete it or
			 ;; add something to the name
			 fname))
	 (inmem        (dbmod:open-inmem-db init-proc
					    (if (eq? (dbfile:cache-method) 'inmem)
						#f
						tmpdb)

					    ))
	 (write-access (file-write-access? dbpath))
	 (db           (dbmod:safely-open-db dbfullname init-proc write-access))
	 (tables       (db:sync-all-tables-list keys)))
    (if (not (and (sqlite3:database? inmem)
		  (sqlite3:database? db)))
	(begin
	  (debug:print 0 *default-log-port* "ERROR: Failed to properly open "dbfname-in", exiting immediately.")
	  (exit)))
    ;; (assert (sqlite3:database? inmem) "FATAL: open-dbmoddb: inmem is not a db")
    ;; (assert (sqlite3:database? db) "FATAL:  open-dbmoddb: db is not a db")
    (dbr:dbstruct-inmem-set!     dbstruct inmem)
    (dbr:dbstruct-ondiskdb-set!  dbstruct db)
    (dbr:dbstruct-dbfile-set!    dbstruct dbfullname)
    (dbr:dbstruct-dbfname-set!   dbstruct dbfname)
    (dbr:dbstruct-sync-proc-set! dbstruct
				 (lambda (last-update)
				   (if *sync-in-progress*
				       (debug:print 3 *default-log-port* "WARNING: overlapping calls to sync to disk")
				       (begin
					 (mutex-lock! *db-with-db-mutex*) ;; this mutex is used when overloaded or during a query that modifies the db
					 (set! *sync-in-progress* #t)

					 (dbmod:sync-gasket tables last-update inmem db
							    dbfullname syncdir keys)




					 (mutex-unlock! *db-with-db-mutex*)
					 (thread-sleep! 0.5) ;; ensure at least 1/2 second down time between sync calls
					 (set! *sync-in-progress* #f)))))
    ;; (dbmod:sync-tables tables #f db inmem)
    ;; (if db
    (dbmod:sync-gasket tables #f inmem db dbfullname 'fromdest keys) ;; ) ;; load into inmem
    (dbr:dbstruct-last-update-set! dbstruct (current-seconds)) ;; should this be offset back in time by one second?
    dbstruct))

;;    (if (eq? syncdir 'todisk) ;; sync to disk normally, sync from in dashboard
;;        (dbmod:sync-tables tables last-update inmem db)
;;        (dbmod:sync-tables tables last-update db inmem))))

;; direction: 'fromdest 'todest
;;
(define (dbmod:sync-gasket tables last-update inmem dbh dbfname direction keys)
  (assert (sqlite3:database? inmem) "FATAL: sync-gasket: inmem is not a db")
  (assert (sqlite3:database? inmem) "FATAL: sync-gasket: dbh is not a db")

  (case (dbfile:sync-method)
    ((none) #f)
    ((attach)
     (dbmod:attach-sync tables inmem dbfname direction))
    ((newsync)
     (dbmod:new-sync tables inmem dbh dbfname direction))
    (else







|
|
|
>








<
|









|


>
|
|
>
>
>
>


















>







182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200

201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
	 (tmpdir       (conc "/tmp/"(current-user-name)))
	 (tmpdb        (let* ((fname (conc tmpdir"/" (string-translate areapath "/" ".")"-"(current-process-id)"-"dbfname)))
			 (if (not (file-exists? tmpdir))(create-directory tmpdir))
			 ;; check if tmpdb already exists, either delete it or
			 ;; add something to the name
			 fname))
	 (inmem        (dbmod:open-inmem-db init-proc
					    ;; (if (eq? (dbfile:cache-method) 'inmem)
					    ;; 	#f
					    tmpdb
					    ;; )
					    ))
	 (write-access (file-write-access? dbpath))
	 (db           (dbmod:safely-open-db dbfullname init-proc write-access))
	 (tables       (db:sync-all-tables-list keys)))
    (if (not (and (sqlite3:database? inmem)
		  (sqlite3:database? db)))
	(begin
	  (debug:print 0 *default-log-port* "ERROR: Failed to properly open "dbfname-in", exiting immediately.")

	  (exit)))    ;; (assert (sqlite3:database? inmem) "FATAL: open-dbmoddb: inmem is not a db")
    ;; (assert (sqlite3:database? db) "FATAL:  open-dbmoddb: db is not a db")
    (dbr:dbstruct-inmem-set!     dbstruct inmem)
    (dbr:dbstruct-ondiskdb-set!  dbstruct db)
    (dbr:dbstruct-dbfile-set!    dbstruct dbfullname)
    (dbr:dbstruct-dbfname-set!   dbstruct dbfname)
    (dbr:dbstruct-sync-proc-set! dbstruct
				 (lambda (last-update)
				   (if *sync-in-progress*
				       (debug:print 3 *default-log-port* "WARNING: overlapping calls to sync to disk")
				       (let* ((sync-cmd (conc "megatest -db2db -from "tmpdb" -to "dbfullname)))
					 (mutex-lock! *db-with-db-mutex*) ;; this mutex is used when overloaded or during a query that modifies the db
					 (set! *sync-in-progress* #t)
					 ;; (if (eq? (dbfile:cache-method) 'inmem)
					 ;;     (dbmod:sync-gasket tables last-update inmem db
					 ;; 			dbfullname syncdir keys)
					 (thread-start! (make-thread
							 (lambda ()
							   (debug:print-info "Running "sync-cmd)
							   (system sync-cmd))))
					 (mutex-unlock! *db-with-db-mutex*)
					 (thread-sleep! 0.5) ;; ensure at least 1/2 second down time between sync calls
					 (set! *sync-in-progress* #f)))))
    ;; (dbmod:sync-tables tables #f db inmem)
    ;; (if db
    (dbmod:sync-gasket tables #f inmem db dbfullname 'fromdest keys) ;; ) ;; load into inmem
    (dbr:dbstruct-last-update-set! dbstruct (current-seconds)) ;; should this be offset back in time by one second?
    dbstruct))

;;    (if (eq? syncdir 'todisk) ;; sync to disk normally, sync from in dashboard
;;        (dbmod:sync-tables tables last-update inmem db)
;;        (dbmod:sync-tables tables last-update db inmem))))

;; direction: 'fromdest 'todest
;;
(define (dbmod:sync-gasket tables last-update inmem dbh dbfname direction keys)
  (assert (sqlite3:database? inmem) "FATAL: sync-gasket: inmem is not a db")
  (assert (sqlite3:database? inmem) "FATAL: sync-gasket: dbh is not a db")
  (debug:print-info 0 *default-log-port* "dbmod:sync-gasket called with sync-method="(dbfile:sync-method))
  (case (dbfile:sync-method)
    ((none) #f)
    ((attach)
     (dbmod:attach-sync tables inmem dbfname direction))
    ((newsync)
     (dbmod:new-sync tables inmem dbh dbfname direction))
    (else
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
;; direction = fromdest, todest
;; mode = 'full, 'incr
;;
;; Idea: youngest in dest is last_update time
;;
(define (dbmod:new-sync tables dbh1 dbh2 destdbfile direction #!key
			   (mode 'full))
  (debug:print 0 *default-log-port* "Doing sync "direction" "destdbfile)
  (if (not (sqlite3:auto-committing? dbh1))
      (debug:print 0 *default-log-port* "Skipping sync due to transaction in flight.")
      (let* ((table-names  (map car tables))
	     (dest-exists  (file-exists? destdbfile)))
	(assert dest-exists "FATAL: sync called with non-existant file, "destdbfile)
	(for-each
	 (lambda (table)







|







530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
;; direction = fromdest, todest
;; mode = 'full, 'incr
;;
;; Idea: youngest in dest is last_update time
;;
(define (dbmod:new-sync tables dbh1 dbh2 destdbfile direction #!key
			   (mode 'full))
  (debug:print 0 *default-log-port* "Doing new-sync "direction" "destdbfile)
  (if (not (sqlite3:auto-committing? dbh1))
      (debug:print 0 *default-log-port* "Skipping sync due to transaction in flight.")
      (let* ((table-names  (map car tables))
	     (dest-exists  (file-exists? destdbfile)))
	(assert dest-exists "FATAL: sync called with non-existant file, "destdbfile)
	(for-each
	 (lambda (table)

Modified launch.scm from [7e82dfb83e] to [a591d57e2c].

740
741
742
743
744
745
746

747
748
749
750
751
752
753
		 )
              )


	      ;; for automated creation of the rollup html file this is a good place...
	      (if (not (equal? item-path ""))
		      (tests:summarize-items run-id test-id test-name #f))

	      (tests:summarize-test run-id test-id)  ;; don't force - just update if no
              ;; Leave a .final-status file for the top level test
              (tests:save-final-status run-id test-id)
	      (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) ;; end of let*

	    (mutex-unlock! m)
            (launch:end-of-run-check run-id )







>







740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
		 )
              )


	      ;; for automated creation of the rollup html file this is a good place...
	      (if (not (equal? item-path ""))
		      (tests:summarize-items run-id test-id test-name #f))
	      ;; BUG was this meant to be the antecnt of the if above?
	      (tests:summarize-test run-id test-id)  ;; don't force - just update if no
              ;; Leave a .final-status file for the top level test
              (tests:save-final-status run-id test-id)
	      (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) ;; end of let*

	    (mutex-unlock! m)
            (launch:end-of-run-check run-id )
1153
1154
1155
1156
1157
1158
1159
1160

1161
1162
1163
1164




1165
1166
1167
1168
1169
1170
1171

	;; if have -append-config then read and append here
	(let ((cfname (args:get-arg "-append-config")))
	  (if (and cfname
		   (file-read-access? cfname))
	      (read-config cfname *configdat* #t))) ;; values are added to the hash, no need to do anything special.
	;; have config at this time, this is a good place to set params based on config file settings
	(let*  ((dbmode (configf:lookup *configdat* "setup" "dbcache-mode")))

	  (if dbmode
	      (begin
		(debug:print-info 0 *default-log-port* "Overriding dbmode to "dbmode)
		(dbcache-mode (string->symbol dbmode)))))




	
	*toppath*)))


(define (get-best-disk confdat testconfig)
  (let* ((disks   (or (and testconfig (hash-table-ref/default testconfig "disks" #f))
		      (hash-table-ref/default confdat "disks" #f)))







|
>



|
>
>
>
>







1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177

	;; if have -append-config then read and append here
	(let ((cfname (args:get-arg "-append-config")))
	  (if (and cfname
		   (file-read-access? cfname))
	      (read-config cfname *configdat* #t))) ;; values are added to the hash, no need to do anything special.
	;; have config at this time, this is a good place to set params based on config file settings
	(let*  ((dbmode   (configf:lookup *configdat* "setup" "dbcache-mode"))
		(syncmode (configf:lookup *configdat* "setup" "sync-mode")))
	  (if dbmode
	      (begin
		(debug:print-info 0 *default-log-port* "Overriding dbmode to "dbmode)
		(dbcache-mode (string->symbol dbmode))))
	  (if syncmode
	      (begin
		(debug:print-info 0 *default-log-port* "Overriding syncmode to "syncmode)
		(dbfile:sync-method (string->symbol syncmode)))))
	
	*toppath*)))


(define (get-best-disk confdat testconfig)
  (let* ((disks   (or (and testconfig (hash-table-ref/default testconfig "disks" #f))
		      (hash-table-ref/default confdat "disks" #f)))

Modified megatest.scm from [2473417c38] to [e2f14e189c].

2576
2577
2578
2579
2580
2581
2582

2583
2584




2585
2586
2587

2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
	   (sync-period (args:get-arg "-period"))    ;; NOT IMPLEMENTED YET
	   (sync-timeout (args:get-arg "-timeout"))  ;; NOT IMPLEMENTED YET
	   (lockfile    (conc dest-db".lock"))
	   (keys        (db:get-keys #f))
	   )
      
      (if (and src-db dest-db)

	  (begin
	    (debug:print-info 0 *default-log-port* "Attempting to sync data from "src-db" to "dest-db"...")




	    (let ((res (dbmod:db-to-db-sync src-db dest-db 0 (dbfile:db-init-proc) keys)))
	      (if res
		  (debug:print 0 *default-log-port* "Synced " res " records from "src-db" to "dest-db)

		  (debug:print 0 *default-log-port* "No sync due to permissions or non-existant source db."))))
	  (debug:print 0 *default-log-port* "Skipping sync, there is a sync in progress."))
      (set! *didsomething* #t))
    (debug:print 0 *default-log-port* "Usage for -db2db; -to and -from must be specified"))

(if (args:get-arg "-list-test-time")
     (let* ((toppath (launch:setup))) 
     (task:get-test-times)  
     (set! *didsomething* #t)))

(if (args:get-arg "-list-run-time")







>
|
|
>
>
>
>
|
|
|
>
|
|
|
<







2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596

2597
2598
2599
2600
2601
2602
2603
	   (sync-period (args:get-arg "-period"))    ;; NOT IMPLEMENTED YET
	   (sync-timeout (args:get-arg "-timeout"))  ;; NOT IMPLEMENTED YET
	   (lockfile    (conc dest-db".lock"))
	   (keys        (db:get-keys #f))
	   )
      
      (if (and src-db dest-db)
	  (if (file-exists? src-db)
	      (begin
		(debug:print-info 0 *default-log-port* "Attempting to sync data from "src-db" to "dest-db"...")
		(if (not (file-exists? dest-db))
		    (begin
		      (debug:print 0 *default-log-port* "Using copy to create "dest-db" from "src-db)
		      (file-copy src-db dest-db))
		    (let ((res (dbmod:db-to-db-sync src-db dest-db 0 (dbfile:db-init-proc) keys)))
		      (if res
			  (debug:print-info 0 *default-log-port* "Synced " res " records from "src-db" to "dest-db)
			  (debug:print-info 0 *default-log-port* "No sync due to permissions or other issue.")))))
	      (debug:print 0 *default-log-port* "No sync due to unreadble or non-existant source file"src-db))
	  (debug:print 0 *default-log-port* "Usage for -db2db; -to and -from must be specified"))
      (set! *didsomething* #t)))


(if (args:get-arg "-list-test-time")
     (let* ((toppath (launch:setup))) 
     (task:get-test-times)  
     (set! *didsomething* #t)))

(if (args:get-arg "-list-run-time")

Modified tcp-transportmod.scm from [9c6068b733] to [3d0d8b0130].

514
515
516
517
518
519
520
521



522
523
524
525
526
527
528
		       ((equal? (list-ref (car servers) 6) ;; compare the servinfofile
				(tt-servinf-file ttdat))
			(let* ((res (if db-locked-in
					#t
					(let* ((success (dbfile:with-no-sync-db
							 nosyncdbpath
							 (lambda (db)
							   (db:no-sync-get-lock-with-id db dbfname (tt-servinf-file ttdat))))))



					  (if success
					      (begin
						(tt-state-set! ttdat 'running)
						(debug:print 0 *default-log-port* "Got server lock for "
							     dbfname)
						(set! db-locked-in #t)
						#t)







|
>
>
>







514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
		       ((equal? (list-ref (car servers) 6) ;; compare the servinfofile
				(tt-servinf-file ttdat))
			(let* ((res (if db-locked-in
					#t
					(let* ((success (dbfile:with-no-sync-db
							 nosyncdbpath
							 (lambda (db)
							   (db:no-sync-get-lock-with-id db dbfname
											;; (tt-servinf-file ttdat) ;; does NOT work, must be unique to the dbname which seems silly but makes sense!
											areapath ;; as good as anything
											)))))
					  (if success
					      (begin
						(tt-state-set! ttdat 'running)
						(debug:print 0 *default-log-port* "Got server lock for "
							     dbfname)
						(set! db-locked-in #t)
						#t)