Megatest

Check-in [88034605c0]
Login
Overview
Comment:fixed condition where watchdog continued to process many times rapidly when time-to-exit is #t
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.63
Files: files | file ages | folders
SHA1: 88034605c0e5a6bf5ef6a1feffde1bb4355f3dce
User & Date: bjbarcla on 2016-12-28 16:35:18
Other Links: branch diff | manifest | tags
Context
2017-01-03
10:50
Commented out a call to BB> that seemed to have trouble ... check-in: e26b0892dd user: mrwellan tags: v1.63
2016-12-29
00:11
updated server launch to handle collisions gracefully (but introduced other issues - moving to sidebranch to debug) check-in: c9880665a8 user: bjbarcla tags: v1.63-server-fix
2016-12-28
17:22
Automated merge of v1.63/88034605c0/integ into integ-home check-in: fd7a37d790 user: matt tags: integ-home
16:35
fixed condition where watchdog continued to process many times rapidly when time-to-exit is #t check-in: 88034605c0 user: bjbarcla tags: v1.63
2016-12-27
17:15
improved server stability check-in: 6b5c7789cb user: bjbarcla tags: v1.63
Changes

Modified common.scm from [ac422f66d5] to [d2df0eaa3f].

556
557
558
559
560
561
562





563
564
565
566

567
568
569
570
571
572





573
574
575
576

577
578
579
580
581

582

583
584
585
586
587
588
589
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575



576
577
578
579
580
581
582
583

584
585
586
587
588
589
590

591
592
593
594
595
596
597
598







+
+
+
+
+




+



-
-
-
+
+
+
+
+



-
+





+
-
+







	(res                (db:multi-db-sync dbstruct 'new2old)))
    (let ((sync-time (- (current-seconds) start-time)))
      (debug:print-info 3 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds pid="(current-process-id))
      (if (common:low-noise-print 30 "sync new to old")
	  (debug:print-info 0 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds pid="(current-process-id))))
    res))




(define *wdnum* 0)
(define *wdnum*mutex (make-mutex))
;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp
;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server)
;;
(define (common:watchdog)
  
  (thread-sleep! 0.05) ;; delay for startup
  (let ((legacy-sync (common:run-sync?))
	(debug-mode  (debug:debug-mode 1))
	(last-time   (current-seconds)))
    (debug:print-info 0 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id))
    (if legacy-sync
	(last-time   (current-seconds))
        (this-wd-num     (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x)))
        )
    (debug:print-info 0 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)" this-wd-num="this-wd-num)
    (if (and legacy-sync (not *time-to-exit*))
	(let ((dbstruct (db:setup)))
	  (debug:print-info 0 *default-log-port* "Server running, periodic sync started.")
	  (let loop ()
            ;;(BB> "watchdog loop.  pid="(current-process-id))
            (BB> "watchdog loop.  pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*)
	    ;; sync for filesystem local db writes
	    ;;
	    (mutex-lock! *db-multi-sync-mutex*)
	    (let* ((need-sync        (>= *db-last-access* *db-last-sync*)) ;; no sync since last write
		   (sync-in-progress *db-sync-in-progress*)
		   (should-sync      (and (not *time-to-exit*)
		   (should-sync      (> (- (current-seconds) *db-last-sync*) 5)) ;; sync every five seconds minimum
                                          (> (- (current-seconds) *db-last-sync*) 5))) ;; sync every five seconds minimum
		   (will-sync        (and (or need-sync should-sync)
					  (not sync-in-progress)))
		   (start-time       (current-seconds)))
	      ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync)
	      (if will-sync (set! *db-sync-in-progress* #t))
	      (mutex-unlock! *db-multi-sync-mutex*)
	      (if will-sync
607
608
609
610
611
612
613


614
615
616
617
618
619

620
621

622
623
624
625
626
627
628
616
617
618
619
620
621
622
623
624
625
626
627
628
629

630
631

632
633
634
635
636
637
638
639







+
+





-
+

-
+







		    (set! last-time start-time)
		    (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*))))))
	    
	    ;; keep going unless time to exit
	    ;;
	    (if (not *time-to-exit*)
		(let delay-loop ((count 0))
                  (BB> "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*)
                                                            
		  (if (and (not *time-to-exit*)
			   (< count 4)) ;; was 11, changing to 4. 
		      (begin
			(thread-sleep! 1)
			(delay-loop (+ count 1))))
		  (loop)))
		  (if (not *time-to-exit*) (loop))))
	    (if (common:low-noise-print 30)
		(debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id))))))))
		(debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" this-wd-num="this-wd-num)))))))

(define (std-exit-procedure)
  (on-exit (lambda () 0))
  (let ((no-hurry  (if *time-to-exit* ;; hurry up
		       #f
		       (begin
			 (set! *time-to-exit* #t)

Modified megatest.scm from [ea009aa7f2] to [db4e22b0be].

348
349
350
351
352
353
354
355

356
357
358
359
360
361
362
348
349
350
351
352
353
354

355
356
357
358
359
360
361
362







-
+







  (if targ (setenv "MT_TARGET" targ)))

;; The watchdog is to keep an eye on things like db sync etc.
;;
(define *watchdog* (make-thread common:watchdog "Watchdog thread"))

(thread-start! *watchdog*)

(BB> "thread-start! watchdog")
(if (args:get-arg "-log")
    (let ((oup (open-output-file (args:get-arg "-log"))))
      (debug:print-info 0 *default-log-port* "Sending log output to " (args:get-arg "-log"))
      (set! *default-log-port* oup)))

(if (or (args:get-arg "-h")
	(args:get-arg "-help")
1988
1989
1990
1991
1992
1993
1994
1995

1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
1988
1989
1990
1991
1992
1993
1994

1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008







-
+














;;======================================================================
;; Exit and clean up
;;======================================================================

(if (not *didsomething*)
    (debug:print 0 *default-log-port* help))

(BB> "thread-join! watchdog")
(thread-join! *watchdog*)
(set! *time-to-exit* #t)

(if (not (eq? *globalexitstatus* 0))
    (if (or (args:get-arg "-run")(args:get-arg "-runtests")(args:get-arg "-runall"))
        (begin
           (debug:print 0 *default-log-port* "NOTE: Subprocesses with non-zero exit code detected: " *globalexitstatus*)
           (exit 0))
        (case *globalexitstatus*
         ((0)(exit 0))
         ((1)(exit 1))
         ((2)(exit 2))
         (else (exit 3)))))