Megatest

Diff
Login

Differences From Artifact [4c859abee2]:

To Artifact [8440783b64]:


101
102
103
104
105
106
107
108
109
110
111

112
113
114
115
116
117
118
101
102
103
104
105
106
107

108
109

110
111
112
113
114
115
116
117







-


-
+







;; db stats
(define *db-stats*            (make-hash-table)) ;; hash of vectors < count duration-total >
(define *db-stats-mutex*      (make-mutex))
;; db access
(define *db-last-access*      (current-seconds)) ;; last db access, used in server
(define *db-write-access*     #t)
;; db sync
(define *db-last-write*       0)                 ;; used to record last touch of db
(define *db-last-sync*        0)                 ;; last time the sync to megatest.db happened
(define *db-sync-in-progress* #f)                ;; if there is a sync in progress do not try to start another
(define *db-multi-sync-mutex* (make-mutex))      ;; protect access to *db-sync-in-progress*, *db-last-sync* and *db-last-write*
(define *db-multi-sync-mutex* (make-mutex))      ;; protect access to *db-sync-in-progress*, *db-last-sync*
;; task db
(define *task-db*             #f) ;; (vector db path-to-db)
(define *db-access-allowed*   #t) ;; flag to allow access
(define *db-access-mutex*     (make-mutex))
(define *db-cache-path*       #f)

;; SERVER
157
158
159
160
161
162
163








164
165
166
167
168
169
170
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177







+
+
+
+
+
+
+
+







(define *test-ids*          (make-hash-table)) ;; cache run-id, testname, and item-path => test-id
(define *test-info*         (make-hash-table)) ;; cache the test info records, update the state, status, run_duration etc. from testdat.db

(define *run-info-cache*     (make-hash-table)) ;; run info is stable, no need to reget
(define *launch-setup-mutex* (make-mutex))     ;; need to be able to call launch:setup often so mutex it and re-call the real deal only if *toppath* not set
(define *homehost-mutex*     (make-mutex))

(defstruct remote
  (hh-dat            (common:get-homehost)) ;; homehost record ( addr . hhflag )
  (server-url        (if *toppath* (server:read-dotserver *toppath*))) ;; (server:check-if-running *toppath*) #f))
  (last-server-check 0)  ;; last time we checked to see if the server was alive
  (conndat           #f)
  (transport         *transport-type*)
  (server-timeout    (or (server:get-timeout) 100))) ;; default to 100 seconds

;; launching and hosts
(defstruct host
  (reachable    #f)
  (last-update  0)
  (last-used    0)
  (last-cpuload 1))

553
554
555
556
557
558
559
560
561
562
563
564







565
566
567
568
569
570
571
572
573

574
575

576
577
578
579
580
581
582
583
584
585
586

587
588
589
590

591
592
593
594

595
596
597
598
599
600
601
560
561
562
563
564
565
566





567
568
569
570
571
572
573
574
575
576
577
578
579
580
581

582
583

584
585
586
587
588
589
590
591
592
593
594

595
596
597
598
599
600
601
602
603

604
605
606
607
608
609
610
611







-
-
-
-
-
+
+
+
+
+
+
+








-
+

-
+










-
+




+



-
+







  (message-digest-string (md5-primitive) *toppath*))

;;======================================================================
;; E X I T   H A N D L I N G
;;======================================================================

(define (common:run-sync?)
  (let ((ohh (common:on-homehost?))
	(srv (args:get-arg "-server")))
    ;; (debug:print-info 0 *default-log-port* "common:run-sync? ohh=" ohh ", srv=" srv)
    (and (common:on-homehost?)
	 (args:get-arg "-server"))))
    (and (common:on-homehost?)
	 (args:get-arg "-server")))

;;   (let ((ohh (common:on-homehost?))
;; 	(srv (args:get-arg "-server")))
;;     (and ohh srv)))
    ;; (debug:print-info 0 *default-log-port* "common:run-sync? ohh=" ohh ", srv=" srv)

;;;; run-ids
;;    if #f use *db-local-sync* : or 'local-sync-flags
;;    if #t use timestamps      : or 'timestamps
(define (common:sync-to-megatest.db dbstruct) 
  (let ((start-time         (current-seconds))
	(res                (db:multi-db-sync dbstruct 'new2old)))
    (let ((sync-time (- (current-seconds) start-time)))
      (debug:print-info 3 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds")
      (debug:print-info 3 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds pid="(current-process-id))
      (if (common:low-noise-print 30 "sync new to old")
	  (debug:print-info 0 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds")))
	  (debug:print-info 0 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds pid="(current-process-id))))
    res))

;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp
;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server)
;;
(define (common:watchdog)
  (thread-sleep! 0.05) ;; delay for startup
  (let ((legacy-sync (common:run-sync?))
	(debug-mode  (debug:debug-mode 1))
	(last-time   (current-seconds)))
    (debug:print-info 0 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync)
    (debug:print-info 0 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id))
    (if legacy-sync
	(let ((dbstruct (db:setup)))
	  (debug:print-info 0 *default-log-port* "Server running, periodic sync started.")
	  (let loop ()
            ;;(BB> "watchdog loop.  pid="(current-process-id))
	    ;; sync for filesystem local db writes
	    ;;
	    (mutex-lock! *db-multi-sync-mutex*)
	    (let* ((need-sync        (>= *db-last-write* *db-last-sync*)) ;; no sync since last write
	    (let* ((need-sync        (>= *db-last-access* *db-last-sync*)) ;; no sync since last write
		   (sync-in-progress *db-sync-in-progress*)
		   (should-sync      (> (- (current-seconds) *db-last-sync*) 5)) ;; sync every five seconds minimum
		   (will-sync        (and (or need-sync should-sync)
					  (not sync-in-progress)))
		   (start-time       (current-seconds)))
	      ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync)
	      (if will-sync (set! *db-sync-in-progress* #t))
628
629
630
631
632
633
634
635

636
637
638

639
640
641
642
643
644
645
646
647
648

649
650
651
652
653
654
655
656





657

658
659
660
661

662
663
664
665





666
667
668
669
670
671
672
673
674
675
676
677
678





679
680
681
682
683
684
685
638
639
640
641
642
643
644

645
646
647

648
649
650
651
652
653
654
655
656
657

658
659
660
661
662
663
664
665
666
667
668
669
670
671

672
673
674
675
676
677




678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694

695
696
697
698
699
700
701
702
703
704
705
706







-
+


-
+









-
+








+
+
+
+
+
-
+




+
-
-
-
-
+
+
+
+
+












-
+
+
+
+
+







		  (if (and (not *time-to-exit*)
			   (< count 4)) ;; was 11, changing to 4. 
		      (begin
			(thread-sleep! 1)
			(delay-loop (+ count 1))))
		  (loop)))
	    (if (common:low-noise-print 30)
		(debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*)))))))
		(debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id))))))))

(define (std-exit-procedure)
  
  (on-exit (lambda () 0))
  (let ((no-hurry  (if *time-to-exit* ;; hurry up
		       #f
		       (begin
			 (set! *time-to-exit* #t)
			 #t))))
    (debug:print-info 4 *default-log-port* "starting exit process, finalizing databases.")
    (if (and no-hurry (debug:debug-mode 18))
	(rmt:print-db-stats))
    (let ((th1 (make-thread (lambda () ;; thread for cleaning up, give it five seconds
			      (if *dbstruct-db* (db:close-all *dbstruct-db*)) ;; one second allocated
                              (if *dbstruct-db* (db:close-all *dbstruct-db*)) ;; one second allocated
			      (if *task-db*    
				  (let ((db (cdr *task-db*)))
				    (if (sqlite3:database? db)
					(begin
					  (sqlite3:interrupt! db)
					  (sqlite3:finalize! db #t)
					  ;; (vector-set! *task-db* 0 #f)
					  (set! *task-db* #f)))))
                              (if (and *runremote*
                                       (remote-conndat *runremote*))
                                  (begin
                                    (http-client#close-all-connections!))) ;; for http-client
                              (if (not (eq? *default-log-port* (current-error-port)))
			      (close-output-port *default-log-port*)
                                  (close-output-port *default-log-port*))
			      (set! *default-log-port* (current-error-port))) "Cleanup db exit thread"))
	  (th2 (make-thread (lambda ()
			      (debug:print 4 *default-log-port* "Attempting clean exit. Please be patient and wait a few seconds...")
			      (if no-hurry
                                  (begin
				  (thread-sleep! 5) ;; give the clean up few seconds to do it's stuff
				  (thread-sleep! 2))
			      (debug:print 4 *default-log-port* " ... done")
			      )
                                    (thread-sleep! 5)) ;; give the clean up few seconds to do it's stuff
                                  (begin
      				  (thread-sleep! 2)))
      			      (debug:print 4 *default-log-port* " ... done")
      			      )
			    "clean exit")))

      ;; let's try to clean up open sockets
      (if *runremote*
          (case (remote-transport *runremote*)
            ((http) #t)
            ((rpc)  (rpc:close-all-connections!))
            (else
             (debug:print-info 0 *default-log-port* "Transport "(remote-transport *runremote*)" not supported"))))

      (thread-start! th1)
      (thread-start! th2)
      (thread-join! th1))))
      (thread-join! th1)
      )
    )

  0)

(define (std-signal-handler signum)
  ;; (signal-mask! signum)
  (set! *time-to-exit* #t)
  (debug:print-error 0 *default-log-port* "Received signal " signum " exiting promptly")
  ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway
  (exit))
811
812
813
814
815
816
817
818
819




820
821
822
823
824
825
826
827










828
829
830
831
832
833
834
832
833
834
835
836
837
838


839
840
841
842








843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859







-
-
+
+
+
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+







(define (common:args-get-state)
  (or (args:get-arg "-state")(args:get-arg ":state")))

(define (common:args-get-status)
  (or (args:get-arg "-status")(args:get-arg ":status")))

(define (common:args-get-testpatt rconf)
  (let* ((rtestpatt     (if rconf (runconfigs-get rconf "TESTPATT") #f))
	 (args-testpatt (or (args:get-arg "-testpatt")
  (let* ((tagexpr (args:get-arg "-tagexpr"))
         (tags-testpatt (if tagexpr (string-join (runs:get-tests-matching-tags tagexpr) ",") #f))
         (testpatt-key  (if (args:get-arg "-mode") (args:get-arg "-mode") "TESTPATT"))
         (args-testpatt (or (args:get-arg "-testpatt") (args:get-arg "-runtests") "%"))
			    (args:get-arg "-runtests")
			    "%"))
	 (testpatt    (or (and (equal? args-testpatt "%")
			       rtestpatt)
			  args-testpatt)))
    (if rtestpatt (debug:print-info 0 *default-log-port* "TESTPATT from runconfigs: " rtestpatt))
    testpatt))

         (rtestpatt     (if rconf (runconfigs-get rconf testpatt-key) #f)))
    (cond
     (tags-testpatt
      (debug:print-info 0 *default-log-port* "-tagexpr "tagexpr" selects testpatt "tags-testpatt)
      tags-testpatt)
     ((and (equal? args-testpatt "%") rtestpatt)
      (debug:print-info 0 *default-log-port* "testpatt defined in "testpatt-key" from runconfigs: " rtestpatt)
      rtestpatt)
     (else args-testpatt))))
     
(define (common:get-linktree)
  (or (getenv "MT_LINKTREE")
      (if *configdat*
	  (configf:lookup *configdat* "setup" "linktree"))))

(define (common:args-get-runname)
  (let ((res (or (args:get-arg "-runname")