Overview
Comment: | multiple tweaks getting it to all work again |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.6584-ck5 |
Files: | files | file ages | folders |
SHA1: |
507ea188e31e686deba12ec31ab8e636 |
User & Date: | matt on 2021-04-18 22:58:32 |
Other Links: | branch diff | manifest | tags |
Context
2021-04-18
| ||
23:26 | wip check-in: 641ecb4b57 user: matt tags: v1.6584-ck5 | |
22:58 | multiple tweaks getting it to all work again check-in: 507ea188e3 user: matt tags: v1.6584-ck5 | |
21:07 | wiphtop check-in: cecf838aa0 user: matt tags: v1.6584-ck5 | |
Changes
Modified commonmod.scm from [0b83b5ae2b] to [f2f99ff8c5].
︙ | ︙ | |||
2065 2066 2067 2068 2069 2070 2071 | (if (file-exists? gzfile) (delete-file gzfile)) (system (conc "gzip " logfile)) (unsetenv "TARGETHOST_LOGF") (unsetenv "TARGETHOST")))) (define (server:get-logs-list area-path) | | | > > | 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 | (if (file-exists? gzfile) (delete-file gzfile)) (system (conc "gzip " logfile)) (unsetenv "TARGETHOST_LOGF") (unsetenv "TARGETHOST")))) (define (server:get-logs-list area-path) (let* (;; (server-logs-cmd (conc "grep -iL exiting " area-path "/logs/server-*-*.log")) ;; (server-logs (string-split (string-chomp (with-input-from-pipe server-logs-cmd read-string)))) (server-logs (glob (conc area-path"/logs/server-*-*.log"))) ) server-logs)) ;; get a list of servers with all relevant data ;; ( mod-time host port start-time pid ) ;; (define (server:get-list areapath #!key (limit #f)) (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$")) |
︙ | ︙ | |||
2217 2218 2219 2220 2221 2222 2223 | (or ns numservers))) ;; given a path to a server log return: host port startseconds ;; any changes to number of elements returned by this fuction will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let (define (server:logf-get-start-info logf) (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+)")) ;; SERVER STARTED: host:port AT timesecs server id | | > | > | | | | | < | | | > | < | | > > > | > | < | | 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 | (or ns numservers))) ;; given a path to a server log return: host port startseconds ;; any changes to number of elements returned by this fuction will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let (define (server:logf-get-start-info logf) (let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+)")) ;; SERVER STARTED: host:port AT timesecs server id (dbprep-rx (regexp "^SERVER: dbprep")) (exiting-rx (regexp ".*exiting promptly.*")) (dbprep-found #f) (exiting-found #f)) (handle-exceptions exn (begin (debug:print-info 0 *default-log-port* "Unable to get server info from " logf ", exn=" exn) (list #f #f #f #f)) ;; no idea what went wrong, call it a bad server (with-input-from-file logf (lambda () (let loop ((inl (read-line)) (lnum 0)) (if (not (eof-object? inl)) (let ((mlst (string-match server-rx inl)) (dbprep (string-match dbprep-rx inl)) (exiting (string-match exiting-rx inl))) (if dbprep (set! dbprep-found #t)) (if exiting (set! exiting-found #t)) (if (not mlst) (if (< lnum 500) ;; give up if more than 500 lines of server log read (loop (read-line)(+ lnum 1)) (begin (debug:print-info 0 *default-log-port* "Unable to get server info from first 500 lines of " logf ) (list #f #f #f #f))) (let ((dat (cdr mlst))) (list (car dat) ;; host (string->number (cadr dat)) ;; port (string->number (caddr dat)) (cadr (cddr dat)))))) (begin (cond (dbprep-found (debug:print-info 0 *default-log-port* "Server is in dbprep at " (current-seconds)) (thread-sleep! 25)) (exiting-found (debug:print-info 0 *default-log-port* "Removing server log "logf" as the server exited due to signal") (delete-file* logf) (thread-sleep! 1)) (else (debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (current-seconds)))) (list #f #f #f #f))))))))) ;;====================================================================== ;; DO NOT CALL THIS DIRECTLY. It is called from common:wait-for-normalized-load ;; ;; (define (common:wait-for-cpuload maxload-in numcpus-in waitdelay #!key (count 1000) (msg #f)(remote-host #f)(force-maxload #f)(num-tries 5)) ;; (let* ((loadavg (common:get-cpu-load remote-host)) |
︙ | ︙ |
Modified dbmod.scm from [6d0e6f1813] to [8e5937bb80].
︙ | ︙ | |||
404 405 406 407 408 409 410 | (if (and (or (not dbfexists) (and modtimedelta (> modtimedelta max-stale-tmp))) ;; if db in tmp is over ten seconds older than the file in MTRA then do a sync back do-sync) (begin (debug:print 1 *default-log-port* "filling db " (db:dbdat-get-path tmpdb) " with data \n from " (db:dbdat-get-path mtdb) " mod time delta: " modtimedelta) (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb refndb tmpdb) | | | | 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 | (if (and (or (not dbfexists) (and modtimedelta (> modtimedelta max-stale-tmp))) ;; if db in tmp is over ten seconds older than the file in MTRA then do a sync back do-sync) (begin (debug:print 1 *default-log-port* "filling db " (db:dbdat-get-path tmpdb) " with data \n from " (db:dbdat-get-path mtdb) " mod time delta: " modtimedelta) (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb refndb tmpdb) ;; touch tmp db to avoid wal mode wierdness (set-file-times! tmpdbfname (current-seconds)) (debug:print-info 13 *default-log-port* "db:sync-all-tables-list done.") ) (debug:print 4 *default-log-port* " db, " (db:dbdat-get-path tmpdb) " already exists or fresh enough, not propogating data from\n " (db:dbdat-get-path mtdb) " mod time delta: " modtimedelta) ) ;; (db:multi-db-sync dbstruct 'old2new)) ;; migrate data from megatest.db automatically tmpdb)))) |
︙ | ︙ |
Modified debugprint.scm from [668a77fa42] to [fdf96a030a].
︙ | ︙ | |||
58 59 60 61 62 63 64 | (print "ERROR: Invalid debug value \"" vstr "\"") #f) #t)) (define (debug:debug-mode n) (let* ((vb (verbosity))) (cond | | | | | | | | | | | | | > | 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 | (print "ERROR: Invalid debug value \"" vstr "\"") #f) #t)) (define (debug:debug-mode n) (let* ((vb (verbosity))) (cond ((and (number? vb) ;; number number (number? n)) (<= n vb)) ((and (list? vb) ;; list number (number? n)) (member n vb)) ((and (list? vb) ;; list list (list? n)) (not (null? (lset-intersection! eq? vb n)))) ((and (number? vb) (list? n)) (member vb n)) (else #f)))) (define (debug:print n e . params) (if (debug:debug-mode n) (with-output-to-port (or e (current-error-port)) (lambda () ;; (if *logging* ;; (db:log-event (apply conc params)) |
︙ | ︙ |
Modified http-transportmod.scm from [698ee19400] to [7319316419].
︙ | ︙ | |||
596 597 598 599 600 601 602 | (define (http-transport:server-shutdown port) (begin ;;(BB> "http-transport:server-shutdown called") (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) ;; ;; start_shutdown ;; | | | 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 | (define (http-transport:server-shutdown port) (begin ;;(BB> "http-transport:server-shutdown called") (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) ;; ;; start_shutdown ;; (bdat-time-to-exit-set! *bdat* #t) ;; tell on-exit to be fast as we've already cleaned up (portlogger:open-run-close portlogger:set-port port "released") (thread-sleep! 1) ;; (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*) ;; (debug:print-info 0 *default-log-port* "Number of cached writes " *number-of-writes*) ;; (debug:print-info 0 *default-log-port* "Average cached write time " ;; (if (eq? *number-of-writes* 0) |
︙ | ︙ |
Modified launchmod.scm from [046e0b01a9] to [2eee1b3817].
︙ | ︙ | |||
2339 2340 2341 2342 2343 2344 2345 | (loop (current-seconds))) #t))) (debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, (bdat-time-to-exit *bdat*) = " (bdat-time-to-exit *bdat*)" pid="(current-process-id)" mtpath="golden-mtpath))) ;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage (define (init-watchdog) | | | | | | | | | | | | | | | | | | 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 | (loop (current-seconds))) #t))) (debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, (bdat-time-to-exit *bdat*) = " (bdat-time-to-exit *bdat*)" pid="(current-process-id)" mtpath="golden-mtpath))) ;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage (define (init-watchdog) (bdat-watchdog-set! *bdat* (make-thread (lambda () (handle-exceptions exn (begin (print-call-chain) (print " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn)) (common:watchdog))) "Watchdog thread")) (start-watchdog)) (define (start-watchdog) ;;(if (not (args:get-arg "-server")) ;; (thread-start! *watchdog*)) ;; if starting a server; wait till we get to running state before kicking off watchdog (let* ((no-watchdog-args '("-list-runs" "-testdata-csv" "-list-servers" "-server" "-adjutant" "-list-disks" "-list-targets" "-show-runconfig" ;;"-list-db-targets" "-show-runconfig" "-show-config" "-show-cmdinfo" "-cleanup-db" )) (no-watchdog-argvals (list '("-archive" . "replicate-db"))) (start-watchdog-special-arg-val (let loop ((hed (car no-watchdog-argvals)) (tail (cdr no-watchdog-argvals))) ;; (print "hed" hed " arg " (args:get-arg (car hed)) " val:" (cdr hed) " eql" (equal? (args:get-arg (car hed)) (cdr hed))) (if (equal? (args:get-arg (car hed)) (cdr hed)) #f (if (null? tail) #t (loop (car tail) (cdr tail)))))) (no-watchdog-args-vals (filter (lambda (x) x) (map args:get-arg no-watchdog-args))) (start-watchdog (and (null? no-watchdog-args-vals) start-watchdog-special-arg-val))) ;; (print "no-watchdog-args="no-watchdog-args "no-watchdog-args-vals="no-watchdog-args-vals " start-watchdog-special-arg-val:" start-watchdog-special-arg-val " start-watchdog:" start-watchdog) (if start-watchdog (thread-start! (bdat-watchdog *bdat*))))) (define (server:writable-watchdog-deltasync dbstruct) (thread-sleep! 0.054) ;; delay for startup (let ((legacy-sync (common:run-sync?)) (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300)) (debug-mode (debug:debug-mode 1)) (last-time (current-seconds)) (no-sync-db (db:open-no-sync-db)) |
︙ | ︙ |
Modified megatest.scm from [f5acd0dd12] to [62df76911c].
︙ | ︙ | |||
775 776 777 778 779 780 781 | ;; immediately set MT_TARGET if -reqtarg or -target are available ;; (let ((targ (or (args:get-arg "-reqtarg")(args:get-arg "-target")))) (if targ (setenv "MT_TARGET" targ))) ;; The watchdog is to keep an eye on things like db sync etc. ;; | | | 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 | ;; immediately set MT_TARGET if -reqtarg or -target are available ;; (let ((targ (or (args:get-arg "-reqtarg")(args:get-arg "-target")))) (if targ (setenv "MT_TARGET" targ))) ;; The watchdog is to keep an eye on things like db sync etc. ;; (init-watchdog) ;; (define (debug:debug-mode n) ;; (cond ;; ((and (number? *verbosity*) ;; number number ;; (number? n)) ;; (<= n *verbosity*)) ;; ((and (list? *verbosity*) ;; list number |
︙ | ︙ |