Megatest

Check-in [abb2c73974]
Login
Overview
Comment:wip
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.80
Files: files | file ages | folders
SHA1: abb2c7397456dc2a1867d0ebfe4458d8aa1cf6e9
User & Date: mrwellan on 2023-03-03 17:37:11
Other Links: branch diff | manifest | tags
Context
2023-03-03
18:32
no crashes (yet) check-in: 349234dd61 user: mrwellan tags: v1.80
17:37
wip check-in: abb2c73974 user: mrwellan tags: v1.80
02:16
Speculative fix for wrong server access issue check-in: 0f348aa84b user: mrwellan tags: v1.80
Changes

Modified api.scm from [1736f4bcb3] to [9b20a45ffe].

241
242
243
244
245
246
247
248





249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
  (if (not *server-signature*)
      (set! *server-signature* (tt:mk-signature *toppath*)))
  (lambda ()
    (let* ((indat      (deserialize))
	   (newcount   (+ *api-process-request-count* 1))
	   (delay-wait (if (> newcount 10)
			   (- newcount 10)
			   0)))





      (set! *api-process-request-count* newcount)
      (set! *db-last-access* (current-seconds))
      (match indat
	((cmd run-id params meta)
	 (let* ((ttdat   *server-info*)
		(server-state (tt-state ttdat))
		(status  (cond
			  ((> newcount 30) 'busy)
			  ((> newcount 15) 'loaded)
			  (else 'ok)))
		(errmsg  (case status
			   ((busy)   (conc "Server overloaded, "newcount" threads in flight"))
			   ((loaded) (conc "Server loaded, "newcount" threads in flight"))
			   (else     #f)))
		(result  (case status
			   ((busy)  (- newcount 29))
			   ((loaded) #f)
			   (else
			    (case cmd
			      ((ping) *server-signature*)
			      (else
			       (api:dispatch-request dbstruct cmd run-id params))))))
		(meta   (case cmd
			  ((ping) `((sstate . ,server-state)))
			  (else   `((wait . ,delay-wait)))))
		(payload (list status errmsg result meta)))
	   (set! *api-process-request-count* (- *api-process-request-count* 1))
	   (serialize payload)))
	(else







|
>
>
>
>
>







|
|






|
|
<
|
<
|
|







241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270

271

272
273
274
275
276
277
278
279
280
  (if (not *server-signature*)
      (set! *server-signature* (tt:mk-signature *toppath*)))
  (lambda ()
    (let* ((indat      (deserialize))
	   (newcount   (+ *api-process-request-count* 1))
	   (delay-wait (if (> newcount 10)
			   (- newcount 10)
			   0))
	   (normal-proc (lambda (cmd run-id params)
			  (case cmd
			    ((ping) *server-signature*)
			    (else
			     (api:dispatch-request dbstruct cmd run-id params))))))
      (set! *api-process-request-count* newcount)
      (set! *db-last-access* (current-seconds))
      (match indat
	((cmd run-id params meta)
	 (let* ((ttdat   *server-info*)
		(server-state (tt-state ttdat))
		(status  (cond
			  ;; ((> newcount 600) 'busy)
			  ;; ((> newcount 300) 'loaded)
			  (else 'ok)))
		(errmsg  (case status
			   ((busy)   (conc "Server overloaded, "newcount" threads in flight"))
			   ((loaded) (conc "Server loaded, "newcount" threads in flight"))
			   (else     #f)))
		(result  (case status
			   ((busy)  (- newcount 29)) ;; call back in as many seconds
			   ((loaded)

			    (normal-proc cmd run-id params))

			   (else
			    (normal-proc cmd run-id params))))
		(meta   (case cmd
			  ((ping) `((sstate . ,server-state)))
			  (else   `((wait . ,delay-wait)))))
		(payload (list status errmsg result meta)))
	   (set! *api-process-request-count* (- *api-process-request-count* 1))
	   (serialize payload)))
	(else

Modified db.scm from [5ed64563c8] to [5255866c2a].

2852
2853
2854
2855
2856
2857
2858














2859
2860
2861
2862
2863
2864
2865
	  ;;                 0    1       2      3      4        5       6      7        8     9     10      11          12          13       14
	  (set! res (cons (apply vector a b) res)))
	db
	(conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id in ("
	      (string-intersperse (map conc test-ids) ",") ");"))
       res))))















(define (db:get-test-info dbstruct run-id test-name item-path)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (dbdat db)
     (db:get-test-info-db db run-id test-name item-path))))







>
>
>
>
>
>
>
>
>
>
>
>
>
>







2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
	  ;;                 0    1       2      3      4        5       6      7        8     9     10      11          12          13       14
	  (set! res (cons (apply vector a b) res)))
	db
	(conc "SELECT " db:test-record-qry-selector " FROM tests WHERE id in ("
	      (string-intersperse (map conc test-ids) ",") ");"))
       res))))

;; try every second until tries times proc
;;
(define (db:keep-trying-until-true proc params tries)
  (let* ((res (apply proc params)))
    (if res
	res
	(if (> tries 0)
	    (begin
	      (thread-sleep! 1)
	      (db:keep-trying-until-true proc params (- tries 1)))
	    (begin
	      (debug:print-info 0 *default-log-port* "proc never returned true, params="params)
	      #f)))))
  
(define (db:get-test-info dbstruct run-id test-name item-path)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (dbdat db)
     (db:get-test-info-db db run-id test-name item-path))))
3314
3315
3316
3317
3318
3319
3320


3321

3322
3323
3324
3325
3326
3327
3328
;; if test-name is an integer work off that as test-id instead of test-name test-path
;;
(define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment)
  ;; establish info on incoming test followed by info on top level test
  ;; BBnote - for mode itemwait, linkage between upstream test & matching item status is propagated to run queue in db:prereqs-not-met
  (let* ((testdat      (if (number? test-name)
			   (db:get-test-info-by-id dbstruct run-id test-name) ;; test-name is actually a test-id


			   (db:get-test-info       dbstruct run-id test-name item-path)))

	 (test-id      (db:test-get-id testdat))
	 (test-name    (if (number? test-name)
			   (db:test-get-testname testdat)
			   test-name))
	 (item-path    (db:test-get-item-path testdat))
         (tl-testdat   (db:get-test-info dbstruct run-id test-name ""))
         (tl-test-id   (if tl-testdat







>
>
|
>







3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
;; if test-name is an integer work off that as test-id instead of test-name test-path
;;
(define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment)
  ;; establish info on incoming test followed by info on top level test
  ;; BBnote - for mode itemwait, linkage between upstream test & matching item status is propagated to run queue in db:prereqs-not-met
  (let* ((testdat      (if (number? test-name)
			   (db:get-test-info-by-id dbstruct run-id test-name) ;; test-name is actually a test-id
			   (db:keep-trying-until-true ;; in our threaded stuff this call could happen before the test is registered (maybe?)
			    db:get-test-info
			    (list dbstruct run-id test-name item-path)
			    10)))
	 (test-id      (db:test-get-id testdat))
	 (test-name    (if (number? test-name)
			   (db:test-get-testname testdat)
			   test-name))
	 (item-path    (db:test-get-item-path testdat))
         (tl-testdat   (db:get-test-info dbstruct run-id test-name ""))
         (tl-test-id   (if tl-testdat

Modified dbfile.scm from [776ffe336b] to [e6a2908d99].

15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;;======================================================================

(declare (unit dbfile))
;; (declare (uses debugprint))
(declare (uses commonmod))

(module dbfile
	*
	
  (import scheme
	  chicken
	  data-structures
	  extras
	  matchable)
  
(import (prefix sqlite3 sqlite3:)
	posix typed-records srfi-18 srfi-1
	srfi-69
	stack
	files
	ports

	commonmod
	;; debugprint
	)

(define keep-age-param (make-parameter 10)) ;; qif file age, if over move to attic
(define num-run-dbs    (make-parameter 10))     ;; number of db's in .megatest
(define dbfile:testsuite-name (make-parameter #f))
(define dbfile:sync-method (make-parameter 'attach)) ;; 'attach or 'original








|



















|







15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
;; 
;;     You should have received a copy of the GNU General Public License
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;;======================================================================

(declare (unit dbfile))
(declare (uses debugprint))
(declare (uses commonmod))

(module dbfile
	*
	
  (import scheme
	  chicken
	  data-structures
	  extras
	  matchable)
  
(import (prefix sqlite3 sqlite3:)
	posix typed-records srfi-18 srfi-1
	srfi-69
	stack
	files
	ports

	commonmod
	debugprint
	)

(define keep-age-param (make-parameter 10)) ;; qif file age, if over move to attic
(define num-run-dbs    (make-parameter 10))     ;; number of db's in .megatest
(define dbfile:testsuite-name (make-parameter #f))
(define dbfile:sync-method (make-parameter 'attach)) ;; 'attach or 'original

405
406
407
408
409
410
411

412
413
414
415
416
417
418
    (system cmd)))

;; opens and returns handle and nothing else
;;
(define (dbfile:raw-open-no-sync-db dbpath)
  (if (not (file-exists? dbpath))
      (create-directory dbpath #t))

  (let* ((dbname    (conc dbpath "/no-sync.db"))
	 (db-exists (file-exists? dbname))
	 (init-proc (lambda (db)
		      (if (not db-exists)
			  (begin
			    (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));"))
			  )))







>







405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
    (system cmd)))

;; opens and returns handle and nothing else
;;
(define (dbfile:raw-open-no-sync-db dbpath)
  (if (not (file-exists? dbpath))
      (create-directory dbpath #t))
  (debug:print-info 0 *default-log-port* "Opening "dbpath"/no-sync.db")
  (let* ((dbname    (conc dbpath "/no-sync.db"))
	 (db-exists (file-exists? dbname))
	 (init-proc (lambda (db)
		      (if (not db-exists)
			  (begin
			    (sqlite3:execute db "CREATE TABLE IF NOT EXISTS no_sync_metadat (var TEXT,val TEXT, CONSTRAINT no_sync_metadat_constraint UNIQUE (var));"))
			  )))
466
467
468
469
470
471
472


473
474
475
476
477
478
479
480

;; transaction protected lock aquisition
;; either:
;;    fails    returns  (#f . lock-creation-time)
;;    succeeds (returns (#t . lock-creation-time)
;; use (db:no-sync-del! db keyname) to release the lock
;;


(define (db:no-sync-get-lock db keyname)
  (sqlite3:with-transaction
   db
   (lambda ()
     (condition-case
	 (let* ((curr-val (db:no-sync-get/default db keyname #f)))
	   (if curr-val
	       `(#f . ,curr-val)   ;; (sqlite3:first-result db "SELECT val FROM no_sync_metadat WHERE var=?;" keyname))







>
>
|







467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483

;; transaction protected lock aquisition
;; either:
;;    fails    returns  (#f . lock-creation-time)
;;    succeeds (returns (#t . lock-creation-time)
;; use (db:no-sync-del! db keyname) to release the lock
;;
;; 
;;
(define (db:no-sync-get-lock db keyname . identification)
  (sqlite3:with-transaction
   db
   (lambda ()
     (condition-case
	 (let* ((curr-val (db:no-sync-get/default db keyname #f)))
	   (if curr-val
	       `(#f . ,curr-val)   ;; (sqlite3:first-result db "SELECT val FROM no_sync_metadat WHERE var=?;" keyname))

Modified launch.scm from [6fda936e21] to [b3f7d09843].

1457
1458
1459
1460
1461
1462
1463

1464
1465
1466
1467
1468
1469
1470
;; 2. create run dir on disk, path name is meaningful
;; 3. create link from run dir to megatest runs area 
;; 4. remotely run the test on allocated host
;;    - could be ssh to host from hosts table (update regularly with load)
;;    - could be netbatch
;;      (launch-test db (cadr status) test-conf))
(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params)

  (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex
  (let* ( ;; (lock-key        (conc "test-" test-id))
	;; (got-lock        (let loop ((lock        (rmt:no-sync-get-lock lock-key))
	;; 			     (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds
	;; 		    (if (car lock)
	;; 			#t
	;; 			(if (> (current-seconds) expire-time)







>







1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
;; 2. create run dir on disk, path name is meaningful
;; 3. create link from run dir to megatest runs area 
;; 4. remotely run the test on allocated host
;;    - could be ssh to host from hosts table (update regularly with load)
;;    - could be netbatch
;;      (launch-test db (cadr status) test-conf))
(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params)
  (assert runname "FATAL: launch-test called with no runname")
  (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex
  (let* ( ;; (lock-key        (conc "test-" test-id))
	;; (got-lock        (let loop ((lock        (rmt:no-sync-get-lock lock-key))
	;; 			     (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds
	;; 		    (if (car lock)
	;; 			#t
	;; 			(if (> (current-seconds) expire-time)
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
      ;; the following call handles waiver propogation. cannot yet condense into roll-up-pass-fail
      (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED"))
      (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "LAUNCHED" #f)
      ;; (pp (hash-table->alist tconfig))
      (set! diskpath (get-best-disk *configdat* tconfig))
      (debug:print 2 *default-log-port* "best disk path = " diskpath)
      (if diskpath
	  (let ((dat  (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat)))
	    (set! work-area (car dat))
	    (set! toptest-work-area (cadr dat))
	    (debug:print-info 2 *default-log-port* "Using work area " work-area))
	  (begin
	    (set! work-area (conc test-path "/tmp_run"))
	    (create-directory work-area #t)
	    (debug:print 0 *default-log-port* "WARNING: No disk work area specified - running in the test directory under tmp_run")))







|







1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
      ;; the following call handles waiver propogation. cannot yet condense into roll-up-pass-fail
      (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED"))
      (rmt:set-state-status-and-roll-up-items run-id test-name item-path #f "LAUNCHED" #f)
      ;; (pp (hash-table->alist tconfig))
      (set! diskpath (get-best-disk *configdat* tconfig))
      (debug:print 2 *default-log-port* "best disk path = " diskpath)
      (if diskpath
	  (let ((dat (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat)))
	    (set! work-area (car dat))
	    (set! toptest-work-area (cadr dat))
	    (debug:print-info 2 *default-log-port* "Using work area " work-area))
	  (begin
	    (set! work-area (conc test-path "/tmp_run"))
	    (create-directory work-area #t)
	    (debug:print 0 *default-log-port* "WARNING: No disk work area specified - running in the test directory under tmp_run")))

Modified megatest.scm from [a9076c4fb8] to [c6a7ef8a82].

93
94
95
96
97
98
99

100
101
102
103
104
105
106
(define *usage-use-seconds* #t) ;; for Epoc seconds in usage logging change this to #t in ~/.megatestrc file

;; set some parameters here - these need to be put in something that can be loaded from other
;; executables such as dashboard and mtutil
;;
(include "transport-mode.scm")
(dbfile:db-init-proc db:initialize-main-db)


;; load the ~/.megatestrc file, put (use trace)(trace-call-sites #t)(trace function-you-want-to-trace) in this file
;;
(let ((debugcontrolf (conc (get-environment-variable "HOME") "/.megatestrc")))
  (if (common:file-exists? debugcontrolf)
      (load debugcontrolf)))








>







93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
(define *usage-use-seconds* #t) ;; for Epoc seconds in usage logging change this to #t in ~/.megatestrc file

;; set some parameters here - these need to be put in something that can be loaded from other
;; executables such as dashboard and mtutil
;;
(include "transport-mode.scm")
(dbfile:db-init-proc db:initialize-main-db)
(debug:enable-timestamp #t) 

;; load the ~/.megatestrc file, put (use trace)(trace-call-sites #t)(trace function-you-want-to-trace) in this file
;;
(let ((debugcontrolf (conc (get-environment-variable "HOME") "/.megatestrc")))
  (if (common:file-exists? debugcontrolf)
      (load debugcontrolf)))

Modified tcp-transportmod.scm from [2182e473e2] to [8018e361e3].

206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
		   (if (and (number? delay-wait)
			    (> delay-wait 0))
		       (begin
			 (debug:print 0 *default-log-port* "Server is loaded, delaying "delay-wait" seconds")
			 (thread-sleep! delay-wait)))))
	     (case status
	       ((busy) ;; result will be how long the server wants you to delay
		(debug:print 0 *default-log-port* "WARNING: server is overloaded, will try again in "result" seconds.")
		(thread-sleep! (if (number? result) result 2))
		(tt:handler  ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))
	       ((loaded)
		(debug:print 0 *default-log-port* "WARNING: server is loaded, will try again in a 1/4 second.")
		(thread-sleep! 0.25)
		(tt:handler  ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))
	       (else
		result)))
	    (else
	     (if (not res)
		 (let* ((host    (tt-conn-host conn))







|



|







206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
		   (if (and (number? delay-wait)
			    (> delay-wait 0))
		       (begin
			 (debug:print 0 *default-log-port* "Server is loaded, delaying "delay-wait" seconds")
			 (thread-sleep! delay-wait)))))
	     (case status
	       ((busy) ;; result will be how long the server wants you to delay
		(debug:print 0 *default-log-port* "WARNING: server for "dbfname" is overloaded, will try again in "result" seconds.")
		(thread-sleep! (if (number? result) result 2))
		(tt:handler  ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))
	       ((loaded)
		(debug:print 0 *default-log-port* "WARNING: server for "dbfname" is loaded, will try again in a 1/4 second.")
		(thread-sleep! 0.25)
		(tt:handler  ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe))
	       (else
		result)))
	    (else
	     (if (not res)
		 (let* ((host    (tt-conn-host conn))
265
266
267
268
269
270
271
272






273
274
275
276
277
278
279
;; returns list of (host port startseconds server-id servinfofile)
;;
(define (tt:get-server-info-sorted ttdat dbfname)
  (let* ((areapath (tt-areapath ttdat))
	 (sfiles   (tt:find-server areapath dbfname))
	 (sdats    (filter car (map tt:server-get-info sfiles))) ;; first element is #f if the file disappeared while being read
	 (sorted   (sort sdats (lambda (a b)
				 (< (list-ref a 2)(list-ref b 2))))))






    sorted))
    
(define (tt:get-current-server-info ttdat dbfname)
  (assert (tt-areapath ttdat) "FATAL: areapath not set in ttdat.")
  ;;
  ;; TODO - replace most of below with tt;get-server-info-sorted
  ;;







|
>
>
>
>
>
>







265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
;; returns list of (host port startseconds server-id servinfofile)
;;
(define (tt:get-server-info-sorted ttdat dbfname)
  (let* ((areapath (tt-areapath ttdat))
	 (sfiles   (tt:find-server areapath dbfname))
	 (sdats    (filter car (map tt:server-get-info sfiles))) ;; first element is #f if the file disappeared while being read
	 (sorted   (sort sdats (lambda (a b)
				 (< (list-ref a 2)(list-ref b 2)))))
	 (count    0))
    (for-each
     (lambda (rec)
       (debug:print 0 *default-log-port* "SERVER #"count": "(string-intersperse (map conc sorted) ", "))
       (set! count (+ count 1)))
     sorted)
    sorted))
    
(define (tt:get-current-server-info ttdat dbfname)
  (assert (tt-areapath ttdat) "FATAL: areapath not set in ttdat.")
  ;;
  ;; TODO - replace most of below with tt;get-server-info-sorted
  ;;
385
386
387
388
389
390
391
392
393
394
395

396
397
398
399
400
401
402

403
404
405
406
407




408
409
410
411
412
413



414
415
416
417
418
419
420
421
422
423

424
425
426
427
428
429
430
431
432
433
434
435
436
437

438
439

440
441
442
443
444
445
446
    (thread-sleep! 0.05) ;; any real need for delay here?
    (let loop ()
      (let* ((servers (tt:get-server-info-sorted ttdat dbfname))
	     (ok      (cond
		       ((null? servers) #f) ;; not ok
		       ((equal? (list-ref (car servers) 6) ;; compare the servinfofile
				(tt-servinf-file ttdat))
			(debug:print-info 0 *default-log-port* "Keep running, I'm the top server on "(tt-host ttdat)":"(tt-port ttdat))
			(if db-locked-in
			    #t
			    (let* ((lockinfo  (dbfile:with-no-sync-db nosyncdbpath

								      (lambda (db)
									(db:no-sync-get-lock db dbfname))))
				   (success   (car lockinfo)))
			      (if success
				  (begin
				    (tt-state-set! ttdat 'running)
				    (debug:print 0 *default-log-port* "Got server lock for "dbfname)

				    (set! db-locked-in #t)
				    #t)
				  (begin
				    (debug:print 0 *default-log-port* "Failed to get server lock for "dbfname)
				    #f)))))




		       (else
			(debug:print-info 0 *default-log-port* "I'm not the lead server: "servers)
			(let* ((leadsrv (car servers)))
			  (match leadsrv
			    ((host port startseconds server-id pid dbfname servinfofile)
			     (if (tt:ping host port server-id)



				 #f ;; not the server, but all good, want to exit
				 (if (and (file-exists? servinfofile)
					  (> (- (current-seconds)(file-modification-time servinfofile)) 15))
				     (begin
				       ;; can't ping and file has been on disk 5 seconds, go ahead and try to remove it
				       (debug:print-info 0 *default-log-port* "Removing apparently dead server info file: "servinfofile)
				       (delete-file* servinfofile)
				       #t) ;; not the server but the server is not reachable
				     (begin
				       (debug:print 0 *default-log-port* "I'm not the server but will try again since "servinfofile" is fresh")

				       #t))))
			    (else ;; should never get here
			     (debug:print 0 *default-log-port* "BAD SERVER RECORD: "leadsrv)
			     (assert #f "Bad server record "leadsrv))))))))
	(if ok
	    ;; (if (> *api-process-request-count* 0) ;; have requests in flight
	    ;;	(tt-last-access-set! ttdat (current-seconds)))
	    (tt-last-access-set! ttdat *db-last-access*) ;; bit silly, just use db-last-access
	    (begin
	      (cleanup)
	      (exit)))

	(let* ((last-update (dbr:dbstruct-last-update dbstruct))
	       (curr-secs   (current-seconds)))

	  (if (> (- curr-secs last-update) 3) ;; every 3-4 seconds update the db? maybe this should be refresh the inmem?
	      (begin

		((dbr:dbstruct-sync-proc dbstruct) last-update)
		(dbr:dbstruct-last-update-set! dbstruct curr-secs))))
	  
	(if (< (- (current-seconds) (tt-last-access ttdat)) 60)
	    (begin
	      (thread-sleep! 5)
	      (loop)))))







<
|
|
|
>
|
|
|
|
|
|
|
>
|
|
|
|
|
>
>
>
>





|
>
>
>
|
|
|

|




|
>
|













>
|

>







391
392
393
394
395
396
397

398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
    (thread-sleep! 0.05) ;; any real need for delay here?
    (let loop ()
      (let* ((servers (tt:get-server-info-sorted ttdat dbfname))
	     (ok      (cond
		       ((null? servers) #f) ;; not ok
		       ((equal? (list-ref (car servers) 6) ;; compare the servinfofile
				(tt-servinf-file ttdat))

			(let* ((res (if db-locked-in
					#t
					(let* ((lockinfo  (dbfile:with-no-sync-db
							   nosyncdbpath
							   (lambda (db)
							     (db:no-sync-get-lock db dbfname))))
					       (success   (car lockinfo)))
					  (if success
					      (begin
						(tt-state-set! ttdat 'running)
						(debug:print 0 *default-log-port* "Got server lock for "
							     dbfname)
						(set! db-locked-in #t)
						#t)
					      (begin
						(debug:print 0 *default-log-port* "Failed to get server lock for "dbfname)
						#f))))))
			  (if res
			      (debug:print-info 0 *default-log-port* "Keep running, I'm the top server for "
						dbfname" on "(tt-host ttdat)":"(tt-port ttdat)))
			  res))
		       (else
			(debug:print-info 0 *default-log-port* "I'm not the lead server: "servers)
			(let* ((leadsrv (car servers)))
			  (match leadsrv
			    ((host port startseconds server-id pid dbfname servinfofile)
			     (let* ((res  (tt:ping host port server-id)))
			       (debug:print-info 0 *default-log-port* "Ping to "host":"port", with server-id "server-id
						 ", and file "servinfofile" returned "res)
			       (if res
				   #f ;; not the server, but all good, want to exit
				   (if (and (file-exists? servinfofile)
					  (> (- (current-seconds)(file-modification-time servinfofile)) 30))
				     (begin
				       ;; can't ping and file has been on disk 15 seconds, go ahead and try to remove it
				       (debug:print-info 0 *default-log-port* "Removing apparently dead server info file: "servinfofile)
				       (delete-file* servinfofile)
				       #t) ;; not the server but the server is not reachable
				     (begin
				       (debug:print 0 *default-log-port* "I'm not the server but could not ping "host":"port", trying again.")
				       (thread-sleep! 1) ;; just because
				       #t)))))
			    (else ;; should never get here
			     (debug:print 0 *default-log-port* "BAD SERVER RECORD: "leadsrv)
			     (assert #f "Bad server record "leadsrv))))))))
	(if ok
	    ;; (if (> *api-process-request-count* 0) ;; have requests in flight
	    ;;	(tt-last-access-set! ttdat (current-seconds)))
	    (tt-last-access-set! ttdat *db-last-access*) ;; bit silly, just use db-last-access
	    (begin
	      (cleanup)
	      (exit)))

	(let* ((last-update (dbr:dbstruct-last-update dbstruct))
	       (curr-secs   (current-seconds)))
	  (if (and (eq? (tt-state ttdat) 'running)
		   (> (- curr-secs last-update) 3)) ;; every 3-4 seconds update the db? maybe this should be refresh the inmem?
	      (begin
		(set! (file-modification-time (tt-servinf-file ttdat)) (current-seconds))
		((dbr:dbstruct-sync-proc dbstruct) last-update)
		(dbr:dbstruct-last-update-set! dbstruct curr-secs))))
	  
	(if (< (- (current-seconds) (tt-last-access ttdat)) 60)
	    (begin
	      (thread-sleep! 5)
	      (loop)))))