Megatest

Diff
Login

Differences From Artifact [0cd20b4ff2]:

To Artifact [5ace6e2c23]:


530
531
532
533
534
535
536
537

538
539
540
541
542
543
544
545
546
547
548
549
550

551
552
553
554
555
556
557
		(let loop ((servrs     servers)
			   (prime-host #f)
			   (result    '()))
		  (if (null? servrs)
		      (reverse result)
		      (let* ((servdat (car servrs)))
			(match servdat
			     ((host port startseconds server-id servinfofile)

			      (let* ((ping-res  (tt:timed-ping host port server-id))
				     (good-ping (match ping-res
						   ((result . ping-time)
						    (not result)) ;; we couldn't reach the server or it was not a megatest server
						   (else #f))) ;; the ping failed completely?
				     (same-host (or (not prime-host) ;; i.e. this is the first host
						    (equal? prime-host host)))
				     (keep-srv  (and good-ping same-host)))
				(if keep-srv	
				    (loop (cdr servrs)
					  host
					  (cons servdat result))
				    (begin

				      (handle-exceptions
				       exn
				       (debug:print-info 0 *default-log-port* "Error removing server info file: "servinfofile", "
							 (condition->list exn))
				       (delete-file* servinfofile))
				      (loop (cdr servrs) prime-host result)))))
			     (else







|
>













>







530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
		(let loop ((servrs     servers)
			   (prime-host #f)
			   (result    '()))
		  (if (null? servrs)
		      (reverse result)
		      (let* ((servdat (car servrs)))
			(match servdat
			     ((host port startseconds server-id pid dbfilename servinfofile)
                              (debug:print-info 0 *default-log-port* "Good servinfo file: " servdat)
			      (let* ((ping-res  (tt:timed-ping host port server-id))
				     (good-ping (match ping-res
						   ((result . ping-time)
						    (not result)) ;; we couldn't reach the server or it was not a megatest server
						   (else #f))) ;; the ping failed completely?
				     (same-host (or (not prime-host) ;; i.e. this is the first host
						    (equal? prime-host host)))
				     (keep-srv  (and good-ping same-host)))
				(if keep-srv	
				    (loop (cdr servrs)
					  host
					  (cons servdat result))
				    (begin
                                      ;; (debug:print-info 0 *default-log-port* "good-ping: " good-ping " same-host: " same-host "keep-srv: " keep-srv)
				      (handle-exceptions
				       exn
				       (debug:print-info 0 *default-log-port* "Error removing server info file: "servinfofile", "
							 (condition->list exn))
				       (delete-file* servinfofile))
				      (loop (cdr servrs) prime-host result)))))
			     (else
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
	 (goodfiles '()))

    ;; filter the files here by looking in processes table (if we are not main.db)
    ;; and or look at the time stamp on the servinfo file, a running server will
    ;; touch the file every minute (again, this will only apply for main.db)
    (for-each (lambda (fname)
		(let* ((age (- (current-seconds)(file-modification-time fname))))
		  (if (> age 200) ;; can't trust it if over 200 seconds old
		      (begin
			(debug:print 0 *default-log-port* "WARNING: removing stale servinfo file "fname", it is "age" seconds old")
			(handle-exceptions
			 exn
			 (debug:print 0 *default-log-port* "WARNING: error attempting to remove stale servinfo file "fname)
			 (delete-file fname))) ;; 
		      (set! goodfiles (cons fname goodfiles)))))
	      sfiles)
    goodfiles))

;; given a path to a server info file return: host port startseconds server-id pid dbfname logf
;; example of what it's looking for in the log file:
;;     SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4 
;;
(define (tt:server-get-info logf)
  (let ((server-rx    (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+) dbfname: (\\S+)")) ;; SERVER STARTED: host:port AT timesecs server id
        (dbprep-rx    (regexp "^SERVER: dbprep"))
        (dbprep-found 0)
	(bad-dat      (list #f #f #f #f #f #f logf)))
     (let ((fdat     (handle-exceptions
			 exn
		       (begin
			 ;; BUG, TODO: add err checking, for now blanket ignore the errors?
			 (debug:print-info 0 *default-log-port* "Unable to get server info from "logf
					   ", exn="(condition->list exn))
			 '()) ;; no idea what went wrong, call it a bad server, return empty list
		       (with-input-from-file logf read-lines))))
       (if (null? fdat) ;; bad data, return bad-dat
	   bad-dat
	   (let loop ((inl  (car fdat))
		      (tail (cdr fdat))
		      (lnum 0))
	     (let ((mlst (string-match server-rx inl))
		   (dbprep (string-match dbprep-rx inl)))
	       (if dbprep (set! dbprep-found 1))
	       (if (not mlst)
		   (if (> lnum 500) ;; give up if more than 500 lines of server log read
		       bad-dat
		       (if (null? tail)
			   bad-dat
			   (loop (car tail)(cdr tail)(+ lnum 1))))
		   (match mlst ;; have a not null list







|











|




<
<














|
<
<







716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739


740
741
742
743
744
745
746
747
748
749
750
751
752
753
754


755
756
757
758
759
760
761
	 (goodfiles '()))

    ;; filter the files here by looking in processes table (if we are not main.db)
    ;; and or look at the time stamp on the servinfo file, a running server will
    ;; touch the file every minute (again, this will only apply for main.db)
    (for-each (lambda (fname)
		(let* ((age (- (current-seconds)(file-modification-time fname))))
		  (if (> age (tt-server-timeout-param)) ;; can't trust it if over server timeout old.
		      (begin
			(debug:print 0 *default-log-port* "WARNING: removing stale servinfo file "fname", it is "age" seconds old")
			(handle-exceptions
			 exn
			 (debug:print 0 *default-log-port* "WARNING: error attempting to remove stale servinfo file "fname)
			 (delete-file fname))) ;; 
		      (set! goodfiles (cons fname goodfiles)))))
	      sfiles)
    goodfiles))

;; given a path to a server info file return: host port startseconds server-id pid dbfname logf
;; example of what it's looking for in the file:
;;     SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4 
;;
(define (tt:server-get-info logf)
  (let ((server-rx    (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+) dbfname: (\\S+)")) ;; SERVER STARTED: host:port AT timesecs server id


	(bad-dat      (list #f #f #f #f #f #f logf)))
     (let ((fdat     (handle-exceptions
			 exn
		       (begin
			 ;; BUG, TODO: add err checking, for now blanket ignore the errors?
			 (debug:print-info 0 *default-log-port* "Unable to get server info from "logf
					   ", exn="(condition->list exn))
			 '()) ;; no idea what went wrong, call it a bad server, return empty list
		       (with-input-from-file logf read-lines))))
       (if (null? fdat) ;; bad data, return bad-dat
	   bad-dat
	   (let loop ((inl  (car fdat))
		      (tail (cdr fdat))
		      (lnum 0))
	     (let ((mlst (string-match server-rx inl)))


	       (if (not mlst)
		   (if (> lnum 500) ;; give up if more than 500 lines of server log read
		       bad-dat
		       (if (null? tail)
			   bad-dat
			   (loop (car tail)(cdr tail)(+ lnum 1))))
		   (match mlst ;; have a not null list
911
912
913
914
915
916
917

918
919
920
921
922
923
924
925

926
927
928
929
930
931
932
  (let ((port (portlogger:open-run-close portlogger:find-port)))
    (assert (tt? uconn) "FATAL: setup-listener called with wrong struct "uconn)
    (debug:print 2 *default-log-port* "setup-listener-portlogger got port " port)
    (handle-exceptions
	exn
      (if (< port 65535)
	  (begin

	    (portlogger:open-run-close portlogger:set-failed port)
	    (thread-sleep! 0.25)
	    (setup-listener-portlogger uconn))
          (begin
            (debug:print 0 *default-log-port* "setup-listener-portlogger: could not get a port")
	    #f
          )
      )

      (connect-listener uconn port))))

(define (connect-listener uconn port)
  ;; (tcp-listener-socket LISTENER)(socket-name so)
  ;; sockaddr-address, sockaddr-port, sockaddr->string
  (let* ((tlsn (tcp-listen port 10000 #f)) ;; (tcp-listen TCPPORT [BACKLOG [HOST]])
	 (addr  (tt:get-best-guess-address (get-host-name)))) ;; (get-my-best-address))) ;; (hostinfo-addresses (host-information (current-hostname)))







>




|



>







909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
  (let ((port (portlogger:open-run-close portlogger:find-port)))
    (assert (tt? uconn) "FATAL: setup-listener called with wrong struct "uconn)
    (debug:print 2 *default-log-port* "setup-listener-portlogger got port " port)
    (handle-exceptions
	exn
      (if (< port 65535)
	  (begin
            (debug:print 0 *default-log-port* "setup-listener-portlogger: exception finding port. Retrying")
	    (portlogger:open-run-close portlogger:set-failed port)
	    (thread-sleep! 0.25)
	    (setup-listener-portlogger uconn))
          (begin
            (assert #t "setup-listener-portlogger: could not get a port")
	    #f
          )
      )
      (debug:print 2 *default-log-port* "setup-listener-portlogger: got port " port)
      (connect-listener uconn port))))

(define (connect-listener uconn port)
  ;; (tcp-listener-socket LISTENER)(socket-name so)
  ;; sockaddr-address, sockaddr-port, sockaddr->string
  (let* ((tlsn (tcp-listen port 10000 #f)) ;; (tcp-listen TCPPORT [BACKLOG [HOST]])
	 (addr  (tt:get-best-guess-address (get-host-name)))) ;; (get-my-best-address))) ;; (hostinfo-addresses (host-information (current-hostname)))