Megatest

Diff
Login

Differences From Artifact [e7099e7d7f]:

To Artifact [09b12e976e]:


355
356
357
358
359
360
361
362


363
364
365


366
367
368
369
370
371
372
373
374
375
376

377

378
379
380
381
382
383
384
385

386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404




















405
406
407

408
409
410
411
412
413
414
415
416
417
418
419
420
421
422

423
424
425
426


427
428
429
430
431
432
433
355
356
357
358
359
360
361

362
363
364


365
366
367
368
369
370
371
372
373
374
375
376
377
378

379
380
381
382
383
384
385
386
387
388



















389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408



409
410
411
412
413
414
415
416
417
418
419
420
421
422
423

424
425
426


427
428
429
430
431
432
433
434
435







-
+
+

-
-
+
+











+
-
+








+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
+














-
+


-
-
+
+







;; return #f otherwise
;; side effect - cleans up and exits on exception.
(define (http-transport:sync-inmemdb-to-db tdbdat server-state run-id server-id bad-sync-count)
  (if *inmemdb* 
      (let ((start-time (current-milliseconds))
            (sync-time  #f)
            (rem-time   #f)
            (sync-retry #f))
            (sync-retry #f)
            (sync-touched (db:sync-touched *inmemdb* *run-id* force-sync: #t)))
        ;; inmemdb is a dbstruct
        (condition-case
         (db:sync-touched *inmemdb* *run-id* force-sync: #t)
        (condition-case sync-touched
         
         ((sync-failed)(cond
                        ((> bad-sync-count 10) ;; time to give up
                         (http-transport:server-shutdown server-id port))
                        (else ;; (> bad-sync-count 0)  ;; we've had a fail or two, delay and loop
                         (thread-sleep! 5)
                         (set! sync-retry #t))))
         ((exn)
          (debug:print 0 "ERROR: error from sync code other than 'sync-failed. Attempting to gracefully shutdown the server")
          (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id " http-transport:keep-running crashed")
          (exit)))
        (if sync-retry
            (begin
            #t ; return true - retry
              #t) ; return true - retry
            (begin
              (set! sync-time  (- (current-milliseconds) start-time))
              (set! rem-time (quotient (- 4000 sync-time) 1000))
              (debug:print 4 "SYNC: time= " sync-time ", rem-time=" rem-time)
              
              (if (and (<= rem-time 4)
                       (> rem-time 0))
                  (thread-sleep! rem-time)
                  (thread-sleep! 4))))
                  (thread-sleep! 4)) ;; fallback for if the math is changed ...
              
              ;;
              ;; no *inmemdb* yet, set running after our first pass through and start the db
              ;;
              (if (eq? server-state 'available)
                  (let ((new-server-id (tasks:server-am-i-the-server? (db:delay-if-busy tdbdat) run-id))) ;; try to ensure no double registering of servers
                    (if (equal? new-server-id server-id)
                        (begin
                          (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "dbprep")
                          (thread-sleep! 0.5) ;; give some margin for queries to complete before switching from file based access to server based access
                          (set! *inmemdb*  (db:setup run-id))
                          ;; force initialization
                          ;; (db:get-db *inmemdb* #t)
                          (db:get-db *inmemdb* run-id)
                          (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "running"))
                        (begin ;; gotta exit nicely
                          (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "collision")
                          (http-transport:server-shutdown server-id port)))))
        #f) ;; fallback for if the math is changed ...
      
      ;;
      ;; no *inmemdb* yet, set running after our first pass through and start the db
      ;;
      (begin
        (if (eq? server-state 'available)
            (let ((new-server-id (tasks:server-am-i-the-server? (db:delay-if-busy tdbdat) run-id))) ;; try to ensure no double registering of servers
              (if (equal? new-server-id server-id)
                  (begin
                    (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "dbprep")
                    (thread-sleep! 0.5) ;; give some margin for queries to complete before switching from file based access to server based access
                    (set! *inmemdb*  (db:setup run-id))
                    ;; force initialization
                    ;; (db:get-db *inmemdb* #t)
                    (db:get-db *inmemdb* run-id)
                    (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "running"))
                  (begin ;; gotta exit nicely
                    (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "collision")
                    (http-transport:server-shutdown server-id port)))))))
              #f))) ; return #f - don't retry
      #f)) ; return #f - don't retry since there is no inmemdb

  #f)

;;; factored out of http-transport:keep-running
(define (http-transport:get-server-info tdbdat server-start-time server-id run-id)
  (let loop ((start-time (current-seconds))
             (changed    #t)
             (last-sdat  "not this"))
    (let ((sdat #f))
      (thread-sleep! 0.01)
      (debug:print-info 0 "Waiting for server alive signature")
      (mutex-lock!   *heartbeat-mutex*)
      (set! sdat     *server-info*)
      (mutex-unlock! *heartbeat-mutex*)
      (if (and sdat
               (not changed)
               (> (- (current-seconds) start-time) 2))
               (> (- (current-seconds) start-time) (- (tasks:update-pause-seconds) 1) ))
          sdat
          (begin
            (debug:print-info 0 "Still waiting, last-sdat=" last-sdat)
            (sleep 4)
            (debug:print-info 0 "Still waiting, sdat="sdat" last-sdat=" last-sdat)
            (sleep (tasks:update-pause-seconds))
            (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes
                (begin
                  (debug:print 0 "ERROR: transport appears to have died, exiting server " server-id " for run " run-id)
                  (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id "failed to start, never received server alive signature")
                  (exit))
                (loop start-time
                      (equal? sdat last-sdat)