Megatest

Diff
Login

Differences From Artifact [a5cc6ea588]:

To Artifact [51f05a712a]:


94
95
96
97
98
99
100
101

102
103
104
105
106
107
108
94
95
96
97
98
99
100

101
102
103
104
105
106
107
108







-
+







;; 
;; (use spiffy uri-common intarweb http-client spiffy-request-vars intarweb spiffy-directory-listing)
;; 
;; Configurations for server
(tcp-buffer-size 2048)
(max-connections 2048) 

(defstruct sdat
(defstruct servdat
  host
  port
  uuid)

(define (http-transport:make-server-url hostport)
  (if (not hostport)
      #f
224
225
226
227
228
229
230
231

232
233
234
235
236
237
238
224
225
226
227
228
229
230

231
232
233
234
235
236
237
238







-
+







		
		;; get_next_port goes here
		(http-transport:try-start-server ipaddrstr
						 (portlogger:open-run-close portlogger:find-port)))
	      (begin
		(print "ERROR: Tried and tried but could not start the server"))))
      ;; any error in following steps will result in a retry
      (set! *server-info* (make-sdat host: ipaddrstr port: portnum))
      (set! *server-info* (make-servdat host: ipaddrstr port: portnum))
      (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum)
      ;; This starts the spiffy server
      ;; NEED WAY TO SET IP TO #f TO BIND ALL
      ;; (start-server bind-address: ipaddrstr port: portnum)
      (if config-hostname ;; this is a hint to bind directly
	  (start-server port: portnum bind-address: (if (equal? config-hostname "-")
							ipaddrstr
445
446
447
448
449
450
451









452
453
454
455
456
457
458
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467







+
+
+
+
+
+
+
+
+







	 (api-req      (make-request method: 'POST uri: api-uri))
	 (server-dat   (vector iface port api-uri api-url api-req (current-seconds) server-id)))
    server-dat))

;;======================================================================
;; NEW SERVER METHOD
;;======================================================================

;; only use for main.db - need to re-write some of this :(
;;
(define (get-lock-db sdat dbfile)
  (let* ((dbh (db:open-run-db dbfile db:initialize-db))
	 (res (db:get-iam-server-lock dbh dbfile)))
    (sqlite3:finalize! dbh)
    res))


(define *srvpktspec*
  `((server (host    . h)
	    (port    . p)
	    (servkey . k)
	    (pid     . i)
	    (ipaddr  . a)
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
478
479
480
481
482
483
484





485
486
487
488
489
490
491







-
-
-
-
-







		   pkts-dir
		   pkt-dat
		   pktspec: pkt-spec
		   ptype: 'server)))
    (debug:print 0 *default-log-port* "Server on "host":"port" registered in pkt "uuid)
    uuid))

;; ya, fake it for now
;;
(define (register-server-in-db db-file)
  #t)

(define (get-pkts-dir #!optional (apath #f))
  (let* ((effective-toppath (or *toppath* apath)))
    (assert effective-toppath
	    "ERROR: get-pkts-dir called without *toppath* set. Exiting.")
    (let* ((pdir (conc effective-toppath "/.meta/srvpkts")))
      (if (file-exists? pdir)
	  pdir
610
611
612
613
614
615
616
617

618
619
620
621
622


623
624
625
626
627
628
629
630
631

632
633

634






635

636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655


656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675

676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698




699
700
701
702
703
704



705
706
707
708
709
710
711
712
713
714
715
716
717
718
719



720
721
722
723
724
725
726
614
615
616
617
618
619
620

621
622
623
624


625
626
627
628
629
630
631
632
633
634

635


636
637
638
639
640
641
642
643

644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662


663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683

684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703




704
705
706
707
708
709
710



711
712
713
714
715
716
717
718
719
720
721
722
723
724
725



726
727
728
729
730
731
732
733
734
735







-
+



-
-
+
+








-
+
-
-
+

+
+
+
+
+
+
-
+


















-
-
+
+



















-
+



















-
-
-
-
+
+
+
+



-
-
-
+
+
+












-
-
-
+
+
+







				;; create a server pkt in *toppath*/.meta/srvpkts

				;; TODO:
				;;   1. change sdat to stuct
				;;   2. add uuid to struct
				;;   3. update uuid in sdat here
				;;
				(sdat-uuid-set! sdat
				(servdat-uuid-set! sdat
						(register-server
						 pkts-dir *srvpktspec*
						 (get-host-name)
						 (sdat-port sdat) server-key
						 (sdat-host sdat) db-file))
						 (servdat-port sdat) server-key
						 (servdat-host sdat) db-file))

				;; now read pkts and see if we are a contender
				(let* ((all-pkts     (get-all-server-pkts pkts-dir *srvpktspec*))
				       (viables      (get-viable-servers all-pkts db-file))
				       (best-srv     (get-best-candidate viables db-file))
				       (best-srv-key (if best-srv (alist-ref 'servkey best-srv) #f)))
				  (debug:print 0 *default-log-port* "best-srv-key: "best-srv-key", server-key: "server-key)
				  ;; am I the best-srv, compare server-keys to know
				  (if (and (equal? best-srv-key server-key)
				  (if (equal? best-srv-key server-key)
					   (register-server-in-db db-file))
				      (if (db:get-iam-server-lock *dbstruct-db* *toppath* run-id)
				      (if (get-lock-db sdat db-file) ;; (db:get-iam-server-lock *dbstruct-db* *toppath* run-id)
					  (debug:print 0 *default-log-port* "I'm the server!")
					  (begin
					    (debug:print 0 *default-log-port* "I'm not the server, exiting.")
					    (bdat-time-to-exit-set! *bdat* #t)
					    (exit)))
				      (begin
					(debug:print 0 *default-log-port* "Keys do not match "best-srv-key", "server-key", exiting.")
					  (bdat-time-to-exit-set! *bdat* #t))) ;; nope, we are not needed, exit when can do
					(bdat-time-to-exit-set! *bdat* #t)))
				  sdat))
                              (begin
				(debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat)
                                (sleep 4)
				(if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes
				    (begin
				      (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server")
                                      #;(common:save-pkt `((action . died)
                                                         (T      . server)
                                                         (pid    . ,(current-process-id))
                                                         (ipaddr . ,(car sdat))
                                                         (port   . ,(cadr sdat))
                                                         (msg    . "Transport died?"))
						       *configdat* #t)
				      (exit))
				    (loop start-time
					  (equal? sdat last-sdat)
					  sdat)))))))
	 (iface       (car server-info))
         (port        (cadr server-info))
	 (iface       (servdat-host server-info))
         (port        (servdat-port server-info))
         (last-access 0)
	 (server-timeout (server:expiration-timeout))
	 (server-log-file (args:get-arg "-log"))) ;; always set when we are a server

    (let loop ((count         0)
	       (server-state 'available)
	       (bad-sync-count 0)
	       (start-time     (current-milliseconds)))
      ;; Use this opportunity to sync the tmp db to megatest.db
      (if (not *dbstruct-db* )
	  (let ((watchdog (bdat-watchdog *bdat*)))
	    (debug:print 0 *default-log-port* "SERVER: dbprep")
	    
	    (db:setup dbname) ;; sets *dbstruct-db* as side effect

	    (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version)) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine.
	    (if watchdog
		(if (not (member (thread-state watchdog) '(ready running blocked sleeping dead)))
		    (begin
		      (debug:print-info 0 "Starting watchdog thread (in state "(thread-state watchdog)")")
		      (debug:print-info 0 *default-log-port* "Starting watchdog thread (in state "(thread-state watchdog)")")
		      (thread-start! watchdog)))
		(debug:print 0 *default-log-port* "ERROR: *watchdog* not setup, cannot start it."))))
      
      ;; when things go wrong we don't want to be doing the various queries too often
      ;; so we strive to run this stuff only every four seconds or so.
      (let* ((sync-time (- (current-milliseconds) start-time))
	    (rem-time  (quotient (- 4000 sync-time) 1000)))
	(if (and (<= rem-time 4)
		 (>  rem-time 0))
	    (thread-sleep! rem-time)))
      
      (if (< count 1) ;; 3x3 = 9 secs aprox
	  (loop (+ count 1) 'running bad-sync-count (current-milliseconds)))
      
      ;; Check that iface and port have not changed (can happen if server port collides)
      (mutex-lock! *heartbeat-mutex*)
      (set! sdat *server-info*)
      (mutex-unlock! *heartbeat-mutex*)
      
      (if (or (not (equal? (sdat-host sdat) iface))
	      (not (equal? (sdat-port sdat) port)))
	  (let ((new-iface (car sdat))
		(new-port  (cadr sdat)))
      (if (or (not (equal? (servdat-host sdat) iface))
	      (not (equal? (servdat-port sdat) port)))
	  (let ((new-iface (servdat-host sdat))
		(new-port  (servdat-port sdat)))
	    (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info")
	    (set! iface new-iface)
	    (set! port  new-port)
             (if (not *server-id*)
              (set! *server-id* (server:mk-signature)))
            (debug:print 0 *default-log-port* (current-seconds) (current-directory) (current-process-id) (argv))
	    (if (not *server-id*)
		(set! *server-id* (server:mk-signature)))
            ;; (debug:print 0 *default-log-port* (current-seconds) (current-directory) (current-process-id) (argv))
	    (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*)
	    (flush-output *default-log-port*)))
      
      ;; Transfer *db-last-access* to last-access to use in checking that we are still alive
      (mutex-lock! *heartbeat-mutex*)
      (set! last-access *db-last-access*)
      (mutex-unlock! *heartbeat-mutex*)
      
      (if (common:low-noise-print 120 (conc "server running on " iface ":" port))
	  (begin
             (if (not *server-id*)
              (set! *server-id* (server:mk-signature)))
            (debug:print 0 *default-log-port* (current-seconds) (current-directory) (current-process-id) (argv))   
	    (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*)
	    (flush-output *default-log-port*)))
	     ;; (debug:print 0 *default-log-port* (current-seconds) (current-directory) (current-process-id) (argv))   
	     (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*)
	     (flush-output *default-log-port*)))
      (if (common:low-noise-print 60 "dbstats")
	  (begin
	    (debug:print 0 *default-log-port* "Server stats:")
	    (db:print-current-query-stats)))
      (let* ((hrs-since-start  (/ (- (current-seconds) server-start-time) 3600)))
	(cond
         ((and *server-run*