Megatest

Check-in [1993359947]
Login
Overview
Comment:Delete .servinfo files for servers that are no longer running
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.80
Files: files | file ages | folders
SHA1: 1993359947bed5bda35ef46dfc77e94b78a262e9
User & Date: mmgraham on 2023-10-31 12:22:38
Other Links: branch diff | manifest | tags
Context
2023-11-01
14:26
Changed megatest version to 1.8019 check-in: 8e5977eca9 user: icfadm tags: v1.80, v1.8019
2023-10-31
12:22
Delete .servinfo files for servers that are no longer running check-in: 1993359947 user: mmgraham tags: v1.80
2023-10-26
18:40
Corrected the glob for db files in dashboard:get-youngest-run-db-modtime, causing less runconfig reading, and speeding up dashboard check-in: cdccb0982f user: mmgraham tags: v1.80
Changes

Modified megatest.scm from [af8974dd23] to [b8a61f70e3].

997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
997
998
999
1000
1001
1002
1003

1004


1005
1006
1007
1008
1009
1010
1011







-

-
-







        (ttdat (make-tt areapath: *toppath*))
     )
     (format #t fmtstr "DB" "host:port" "PID" "age" "last mod" "state")
     (for-each
        (lambda (dbfile)
          (let* (
            (dbfname (conc (pathname-file dbfile) ".db"))
            (sfiles   (tt:find-server *toppath* dbfname))
            )
            (for-each 
              (lambda (sfile)
                (let (
                  (sinfos (tt:get-server-info-sorted ttdat dbfname))
                  )
                  (for-each 
                     (lambda (sinfo)
                       (let* (
                         (db (list-ref sinfo 5))
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1023
1024
1025
1026
1027
1028
1029



1030
1031
1032
1033
1034
1035
1036







-
-
-







                         )
                         (format #t fmtstr db (conc host ":" port) pid age last-mod state)
                       )
                     )
                     sinfos
                  )
                ) 
              )
              sfiles
            )
          )
       )
       dbfiles
     )
     (set! *didsomething* #t)
     (exit)  
  )
1077
1078
1079
1080
1081
1082
1083
1084

1085
1086
1087
1088
1089
1090
1091
1071
1072
1073
1074
1075
1076
1077

1078
1079
1080
1081
1082
1083
1084
1085







-
+







                         (age (seconds->hr-min-sec (- (current-seconds) (list-ref sinfo 2))))
                         (last-mod (seconds->string (list-ref sinfo 2)))
                         (killed (system (conc "ssh " host " kill " pid " > /dev/null")))
                         (dummy2 (sleep 1))
                         (state (if (> (system (conc "ssh " host " ps " pid " > /dev/null")) 0) "dead" "alive"))
                            )
                         (format #t fmtstr db (conc host ":" port) pid age last-mod state)
                         (system (conc "rm " sfile))
                         (delete-file* sfile)
                       )
                     )
                     sinfos
                  )
                ) 
              )
              sfiles

Modified tcp-transportmod.scm from [44aa462a83] to [157488cd36].

713
714
715
716
717
718
719
720
721





















722
723
724
725
726
727
728
713
714
715
716
717
718
719


720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747







-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







;; find valid server
;; get servers listed, last part of name must match :<dbfname>
;; if more than one, wait one second and look again
;; future: ping oldest, if alive remove other :<dbfname> files
;;
(define (tt:find-server areapath dbfname)
  (let* ((servdir  (tt:get-servinfo-dir areapath))
	 (sfiles   (glob (conc servdir"/*:"dbfname))))
    sfiles))
	 (sfiles   (glob (conc servdir"/*:"dbfname)))
         (good-files '()))
         (for-each 
           (lambda (sfile)
             (let* ((sinfo (tt:server-get-info sfile))
                 (host (list-ref sinfo 0))
                 (port (list-ref sinfo 1))
                 (server-id (list-ref sinfo 3))
                 (pid (list-ref sinfo 4))
                 (status (system (conc "ssh " host " ps " pid " > /dev/null")))
                 )
                (if (= status 0)
                  (set! good-files (cons sfile good-files))
                  (delete-file* sfile)
                )
             )
           )
           sfiles
         )
    (debug:print-info 2 *default-log-port* "tt:find-server: good-files: " good-files " sfiles: " sfiles)
    good-files))

;; given a path to a server info file return: host port startseconds server-id pid dbfname logf
;; example of what it's looking for in the log file:
;;     SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4 
;;
(define (tt:server-get-info logf)
  (let ((server-rx    (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+) dbfname: (\\S+)")) ;; SERVER STARTED: host:port AT timesecs server id