Overview
Comment: | Server usage in dashboard mostly working |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.70-nohomehost |
Files: | files | file ages | folders |
SHA1: |
3ce83ed77c5c50fbc3fd3ada213a34f3 |
User & Date: | matt on 2022-11-21 22:20:10 |
Other Links: | branch diff | manifest | tags |
Context
2022-11-22
| ||
02:57 | sixyfivek is running pretty good check-in: d1ac2665ef user: matt tags: v1.70-nohomehost | |
2022-11-21
| ||
22:20 | Server usage in dashboard mostly working check-in: 3ce83ed77c user: matt tags: v1.70-nohomehost | |
07:14 | Basic auto server start working check-in: 42bfe6fba7 user: matt tags: v1.70-nohomehost | |
Changes
Modified db.scm from [eca8c190a3] to [32f96d6d77].
︙ | ︙ | |||
475 476 477 478 479 480 481 | (define (db:multi-db-sync dbstruct . options) (let* (;; (dbdat (db:open-db dbstruct #f dbfile:db-init-proc)) (data-synced 0) ;; count of changed records (tmp-area (common:get-db-tmp-area)) (old2new (member 'old2new options)) (dejunk (member 'dejunk options)) (killservers (member 'killservers options)) | | | 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 | (define (db:multi-db-sync dbstruct . options) (let* (;; (dbdat (db:open-db dbstruct #f dbfile:db-init-proc)) (data-synced 0) ;; count of changed records (tmp-area (common:get-db-tmp-area)) (old2new (member 'old2new options)) (dejunk (member 'dejunk options)) (killservers (member 'killservers options)) (servers (server:choose-server *toppath* 'all-valid)) ;; (server:get-list *toppath*)) (src-area (if old2new *toppath* tmp-area)) (dest-area (if old2new tmp-area *toppath*)) (dbfiles (if old2new (glob (conc *toppath* "/.megatest/*.db")) (glob (conc tmp-area "/.megatest/*.db")))) (keys (db:get-keys dbstruct)) (sync-durations (make-hash-table))) |
︙ | ︙ |
Modified dcommon.scm from [587da77d0a] to [eab340744b].
︙ | ︙ | |||
702 703 704 705 706 707 708 | #:numcol 7 #:numcol-visible 7 #:numlin-visible 5 )) (colnames (list "Id" "MTver" "Pid" "Host" "Interface:OutPort" "RunTime" "State" "RunId")) (updater (lambda () (if (dashboard:monitor-changed? commondat tabdat) | | > | | | | | | | | | 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 | #:numcol 7 #:numcol-visible 7 #:numlin-visible 5 )) (colnames (list "Id" "MTver" "Pid" "Host" "Interface:OutPort" "RunTime" "State" "RunId")) (updater (lambda () (if (dashboard:monitor-changed? commondat tabdat) (let ((servers (server:choose-server *toppath* 'all-valid))) ;; (server:get-list *toppath* limit: 10))) (iup:attribute-set! servers-matrix "NUMLIN" (length servers)) ;; (set! colnum 0) ;; (for-each (lambda (colname) ;; ;; (print "colnum: " colnum " colname: " colname) ;; (iup:attribute-set! servers-matrix (conc "0:" colnum) colname) ;; (set! colnum (+ 1 colnum))) ;; colnames) (set! rownum 1) (for-each (lambda (server) (set! colnum 0) ;; (("192.168.0.127" 60215 1669088591.0 "c85484f764df7a8550b0224409bd4bcd") (match-let (((host port start-time server-id pid) server)) (let* (;; (uptime (- (current-seconds) mod-time)) (runtime (if start-time (- (current-seconds) start-time) 0)) (vals (list "-" ;; (vector-ref server 0) ;; Id "-" ;; (vector-ref server 9) ;; MT-Ver pid ;; (vector-ref server 1) ;; Pid host ;; (vector-ref server 2) ;; Hostname (conc host ":" port) ;; (conc (vector-ref server 3) ":" (vector-ref server 4)) ;; IP:Port (seconds->hr-min-sec runtime) ;; (- (current-seconds) start-time)) ;; (vector-ref server 6))) "-" #;(cond ((< uptime 5) "alive") ((< uptime 16) "probably alive");; less than 15 seconds since mod, call it alive (vector-ref server 8) ;; State (else "dead")) "-" ;; (vector-ref server 12) ;; RunId ))) (for-each (lambda (val) (let* ((row-col (conc rownum ":" colnum)) (curr-val (iup:attribute servers-matrix row-col))) (if (not (equal? (conc val) curr-val)) (begin (iup:attribute-set! servers-matrix row-col val) (iup:attribute-set! servers-matrix "FITTOTEXT" (conc "C" colnum)))) (set! colnum (+ 1 colnum)))) vals) (set! rownum (+ rownum 1))) (iup:attribute-set! servers-matrix "REDRAW" "ALL"))) (sort servers (lambda (a b)(> (caddr a)(caddr b)))))))))) (set! colnum 0) (for-each (lambda (colname) (iup:attribute-set! servers-matrix (conc "0:" colnum) colname) (iup:attribute-set! servers-matrix "FITTOTEXT" (conc "C" colnum)) (set! colnum (+ colnum 1))) colnames) ;; (set! dashboard:update-servers-table updater) |
︙ | ︙ |
Modified http-transport.scm from [e2df20210d] to [acc168fde7].
︙ | ︙ | |||
434 435 436 437 438 439 440 | (servinf (conc servinfodir"/"ipaddr":"port))) (if (not (file-exists? servinfodir)) (create-directory servinfodir #t)) (with-output-to-file servinf (lambda () (let* ((serv-id (server:mk-signature))) (set! *server-id* serv-id) | | | 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 | (servinf (conc servinfodir"/"ipaddr":"port))) (if (not (file-exists? servinfodir)) (create-directory servinfodir #t)) (with-output-to-file servinf (lambda () (let* ((serv-id (server:mk-signature))) (set! *server-id* serv-id) (print "SERVER STARTED: "ipaddr":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id)) (print "started: "(seconds->year-week/day-time (current-seconds)))))) (set! *on-exit-procs* (cons (lambda () (delete-file* servinf)) *on-exit-procs*)) ;; put data about this server into a simple flat file host.port (debug:print-info 0 *default-log-port* "Received server alive signature") |
︙ | ︙ |
Modified megatest.scm from [b216f35a15] to [89decfdb89].
︙ | ︙ | |||
949 950 951 952 953 954 955 | (adjutant-run) (set! *didsomething* #t))) (if (or (args:get-arg "-list-servers") (args:get-arg "-kill-servers")) (let ((tl (launch:setup))) (if tl ;; all roads from here exit | | | 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 | (adjutant-run) (set! *didsomething* #t))) (if (or (args:get-arg "-list-servers") (args:get-arg "-kill-servers")) (let ((tl (launch:setup))) (if tl ;; all roads from here exit (let* ((servers (server:choose-server *toppath* 'all-valid)) ;; (server:get-list *toppath*)) (fmtstr "~33a~22a~20a~20a~8a\n")) (format #t fmtstr "ID" "host:port" "age (hms)" "Last mod" "State") (format #t fmtstr "==" "=========" "=========" "========" "=====") (for-each ;; ( mod-time host port start-time pid ) (lambda (server) (let* ((mtm (any->number (car server))) (mod (if mtm (- (current-seconds) mtm) "unk")) |
︙ | ︙ |
Modified server.scm from [64c69c7a6f] to [816ebbf5c4].
︙ | ︙ | |||
183 184 185 186 187 188 189 | ;; given a path to a server log return: host port startseconds server-id ;; any changes to number of elements returned by this fuction will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which use match let ;; example of what it's looking for in the log file: ;; SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4 (define (server:logf-get-start-info logf) | | | | > | | | | | | < < | < | | > | | | > > > > | | | < | < | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | < > | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 | ;; given a path to a server log return: host port startseconds server-id ;; any changes to number of elements returned by this fuction will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which use match let ;; example of what it's looking for in the log file: ;; SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4 (define (server:logf-get-start-info logf) (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+)")) ;; SERVER STARTED: host:port AT timesecs server id (dbprep-rx (regexp "^SERVER: dbprep")) (dbprep-found 0) (bad-dat (list #f #f #f #f #f))) ;; (handle-exceptions ;; exn ;; (begin ;; (debug:print-info 0 *default-log-port* "Unable to get server info from " logf ", exn=" exn) ;; bad-dat) ;; no idea what went wrong, call it a bad server (with-input-from-file logf (lambda () (let loop ((inl (read-line)) (lnum 0)) (if (not (eof-object? inl)) (let ((mlst (string-match server-rx inl)) (dbprep (string-match dbprep-rx inl))) (if dbprep (set! dbprep-found 1)) (if (not mlst) (if (< lnum 500) ;; give up if more than 500 lines of server log read (loop (read-line)(+ lnum 1)) (begin (debug:print-info 0 *default-log-port* "Unable to get server info from first 500 lines of " logf ) bad-dat)) (match mlst ((_ host port start server-id pid) (list host (string->number port) (string->number start) server-id (string->number pid))) (else (debug:print 0 *current-log-port* "ERROR: did not recognise SERVER line info "mlst) bad-dat)))) (begin (if dbprep-found (begin (debug:print-info 2 *default-log-port* "Server is in dbprep at " (common:human-time)) (thread-sleep! 0.5)) ;; was 25 sec but that blocked things from starting? (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (seconds->time-string (current-seconds)))) bad-dat)))))));; ) ;; ;; get a list of servers from the log files, with all relevant data ;; ;; ( mod-time host port start-time pid ) ;; ;; ;; (define (server:get-list areapath #!key (limit #f)) ;; (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$")) ;; (day-seconds (* 24 60 60))) ;; ;; if the directory exists continue to get the list ;; ;; otherwise attempt to create the logs dir and then ;; ;; continue ;; (if (if (directory-exists? (conc areapath "/logs")) ;; '() ;; (if (file-write-access? areapath) ;; (begin ;; (condition-case ;; (create-directory (conc areapath "/logs") #t) ;; (exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs"))) ;; (exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list. exn=" exn))) ;; (directory-exists? (conc areapath "/logs"))) ;; '())) ;; ;; ;; Get the list of server logs. ;; (let* ( ;; ;; For some reason, when I uncomment the below line, ext-tests sometimes starts 1000's of servers. ;; ;; (exiting-servers (system (conc "bash -c 'rm -f `grep -il exiting " areapath "/logs/server-*-*.log 2> /dev/null`'"))) ;; (server-logs (glob (conc areapath "/logs/server-*-*.log"))) ;; (num-serv-logs (length server-logs))) ;; (if (or (null? server-logs) (= num-serv-logs 0)) ;; (let () ;; (debug:print 2 *default-log-port* "There are no servers running at " (common:human-time)) ;; '() ;; ) ;; (let loop ((hed (string-chomp (car server-logs))) ;; (tal (cdr server-logs)) ;; (res '())) ;; (let* ((mod-time (handle-exceptions ;; exn ;; (begin ;; (debug:print 0 *default-log-port* "server:get-list: failed to get modification time on " hed ", exn=" exn) ;; (current-seconds)) ;; 0 ;; (file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted ;; (down-time (- (current-seconds) mod-time)) ;; (serv-dat (if (or (< num-serv-logs 10) ;; (< down-time 900)) ;; day-seconds)) ;; (server:logf-get-start-info hed) ;; '())) ;; don't waste time processing server files not touched in the 15 minutes if there are more than ten servers to look at ;; (serv-rec (cons mod-time serv-dat)) ;; (fmatch (string-match fname-rx hed)) ;; (pid (if fmatch (string->number (list-ref fmatch 2)) #f)) ;; (new-res (if (null? serv-dat) ;; res ;; (cons (append serv-rec (list pid)) res)))) ;; any changes to number of elements in new-res will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let ;; (if (null? tal) ;; (if (and limit ;; (> (length new-res) limit)) ;; new-res ;; (take new-res limit) <= need intelligent sorting before this will work ;; new-res) ;; (loop (string-chomp (car tal)) (cdr tal) new-res))))))))) #;(define (server:get-num-alive srvlst) (let ((num-alive 0)) (for-each (lambda (server) (handle-exceptions exn (begin (debug:print-info 0 *default-log-port* "Unable to get server start-time and/or mod-time from " server ", exn=" exn)) (match-let (((mod-time host port start-time server-id pid) server)) (let* ((uptime (- (current-seconds) mod-time)) (runtime (if start-time (- mod-time start-time) 0))) (if (< uptime 5)(set! num-alive (+ num-alive 1))))))) srvlst) num-alive)) ;; ;; given a list of servers get a list of valid servers, i.e. at least ;; ;; 10 seconds old, has started and is less than 1 hour old and is ;; ;; active (i.e. mod-time < 10 seconds ;; ;; ;; ;; mod-time host port start-time pid ;; ;; ;; ;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off ;; ;; and servers should stick around for about two hours or so. ;; ;; ;; (define (server:get-best srvlst) ;; (let* ((nums (server:get-num-servers)) ;; (now (current-seconds)) ;; (slst (sort ;; (filter (lambda (rec) ;; (if (and (list? rec) ;; (> (length rec) 2)) ;; (let ((start-time (list-ref rec 3)) ;; (mod-time (list-ref rec 0))) ;; ;; (print "start-time: " start-time " mod-time: " mod-time) ;; (and start-time mod-time ;; (> (- now start-time) 0) ;; been running at least 0 seconds ;; (< (- now mod-time) 16) ;; still alive - file touched in last 16 seconds ;; (or (not (configf:lookup *configdat* "server" "runtime")) ;; skip if not set ;; (< (- now start-time) ;; (+ (- (string->number (configf:lookup *configdat* "server" "runtime")) ;; 180) ;; (random 360)))) ;; under one hour running time +/- 180 ;; )) ;; #f)) ;; srvlst) ;; (lambda (a b) ;; (< (list-ref a 3) ;; (list-ref b 3)))))) ;; (if (> (length slst) nums) ;; (take slst nums) ;; slst))) ;; ;; switch from server:get-list to server:get-servers-info ;; ;; ;; (define (server:get-first-best areapath) ;; (let ((srvrs (server:get-best (server:get-list areapath)))) ;; (if (and srvrs ;; (not (null? srvrs))) |
︙ | ︙ | |||
361 362 363 364 365 366 367 | (define (server:record->id servr) (handle-exceptions exn (begin (debug:print-info 0 *default-log-port* "Unable to get server id from " servr ", exn=" exn) #f) | | | | 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 | (define (server:record->id servr) (handle-exceptions exn (begin (debug:print-info 0 *default-log-port* "Unable to get server id from " servr ", exn=" exn) #f) (match-let (((host port start-time server-id pid) servr)) (if server-id server-id #f)))) (define (server:record->url servr) (handle-exceptions exn (begin (debug:print-info 0 *default-log-port* "Unable to get server url from " servr ", exn=" exn) #f) (match-let (((host port start-time server-id pid) servr)) (if (and host port) (conc host ":" port) #f)))) ;; if server-start-last exists, and wasn't old enough, wait <idle time> + 1, then call this function recursively until it is old enough. |
︙ | ︙ | |||
495 496 497 498 499 500 501 | (begin (server:run areapath) (thread-sleep! 3) (case mode ((homehost) (cons #f #f)) (else #f)))))) | > > > > | | 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 | (begin (server:run areapath) (thread-sleep! 3) (case mode ((homehost) (cons #f #f)) (else #f)))))) ;; would like to eventually get rid of this ;; (define (common:on-homehost?) (server:choose-server *toppath* 'home?)) ;; kind start up of server, wait before allowing another server for a given ;; area to be launched ;; (define (server:kind-run areapath) ;; look for $MT_RUN_AREA_HOME/logs/server-start-last ;; and wait for it to be at least <server idletime> seconds old ;; (server:wait-for-server-start-last-flag areapath) |
︙ | ︙ |