Overview
Context
Changes
Modified commonmod.scm
from [0b83b5ae2b]
to [f2f99ff8c5].
︙ | | |
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
|
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
|
-
-
+
+
+
+
|
(if (file-exists? gzfile) (delete-file gzfile))
(system (conc "gzip " logfile))
(unsetenv "TARGETHOST_LOGF")
(unsetenv "TARGETHOST"))))
(define (server:get-logs-list area-path)
(let* ((server-logs-cmd (conc "grep -iL exiting " area-path "/logs/server-*-*.log"))
(server-logs (string-split (string-chomp (with-input-from-pipe server-logs-cmd read-string)))))
(let* (;; (server-logs-cmd (conc "grep -iL exiting " area-path "/logs/server-*-*.log"))
;; (server-logs (string-split (string-chomp (with-input-from-pipe server-logs-cmd read-string))))
(server-logs (glob (conc area-path"/logs/server-*-*.log")))
)
server-logs))
;; get a list of servers with all relevant data
;; ( mod-time host port start-time pid )
;;
(define (server:get-list areapath #!key (limit #f))
(let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$"))
|
︙ | | |
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
|
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
|
-
-
+
+
+
+
-
-
-
-
-
+
+
+
+
+
-
-
-
+
+
-
-
+
+
+
-
-
-
-
-
+
+
+
+
+
+
+
+
-
-
+
|
(or ns numservers)))
;; given a path to a server log return: host port startseconds
;; any changes to number of elements returned by this fuction will dirctly affect server:record->url,server:record->id,server:kill,server:get-num-alive which uses match let
(define (server:logf-get-start-info logf)
(let ((server-rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+)")) ;; SERVER STARTED: host:port AT timesecs server id
(dbprep-rx (regexp "^SERVER: dbprep"))
(dbprep-found 0))
(dbprep-rx (regexp "^SERVER: dbprep"))
(exiting-rx (regexp ".*exiting promptly.*"))
(dbprep-found #f)
(exiting-found #f))
(handle-exceptions
exn
(begin
(debug:print-info 0 *default-log-port* "Unable to get server info from " logf ", exn=" exn)
(list #f #f #f #f)) ;; no idea what went wrong, call it a bad server
(with-input-from-file
logf
(lambda ()
(let loop ((inl (read-line))
(lnum 0))
(if (not (eof-object? inl))
(let ((mlst (string-match server-rx inl))
(dbprep (string-match dbprep-rx inl))
)
(if dbprep
(set! dbprep-found 1)
(let ((mlst (string-match server-rx inl))
(dbprep (string-match dbprep-rx inl))
(exiting (string-match exiting-rx inl)))
(if dbprep (set! dbprep-found #t))
(if exiting (set! exiting-found #t))
)
(if (not mlst)
(if (< lnum 500) ;; give up if more than 500 lines of server log read
(loop (read-line)(+ lnum 1))
(begin
(debug:print-info 0 *default-log-port* "Unable to get server info from first 500 lines of " logf )
(list #f #f #f #f)))
(debug:print-info 0 *default-log-port* "Unable to get server info from first 500 lines of " logf )
(list #f #f #f #f)))
(let ((dat (cdr mlst)))
(list (car dat) ;; host
(string->number (cadr dat)) ;; port
(string->number (caddr dat))
(cadr (cddr dat))))))
(begin
(if dbprep-found
(begin
(cond
(dbprep-found
(begin
(debug:print-info 0 *default-log-port* "Server is in dbprep at " (current-seconds))
(thread-sleep! 25)
)
(debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (current-seconds))
(debug:print-info 0 *default-log-port* "Server is in dbprep at " (current-seconds))
(thread-sleep! 25))
(exiting-found
(debug:print-info 0 *default-log-port* "Removing server log "logf" as the server exited due to signal")
(delete-file* logf)
(thread-sleep! 1))
(else
(debug:print-info 0 *default-log-port* "Unable to get server info from " logf " at " (current-seconds))))
)
(list #f #f #f #f)))))))))
(list #f #f #f #f)))))))))
;;======================================================================
;; DO NOT CALL THIS DIRECTLY. It is called from common:wait-for-normalized-load
;;
;; (define (common:wait-for-cpuload maxload-in numcpus-in waitdelay #!key (count 1000) (msg #f)(remote-host #f)(force-maxload #f)(num-tries 5))
;; (let* ((loadavg (common:get-cpu-load remote-host))
|
︙ | | |
Modified dbmod.scm
from [6d0e6f1813]
to [8e5937bb80].
︙ | | |
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
|
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
|
-
-
+
+
|
(if (and (or (not dbfexists)
(and modtimedelta
(> modtimedelta max-stale-tmp))) ;; if db in tmp is over ten seconds older than the file in MTRA then do a sync back
do-sync)
(begin
(debug:print 1 *default-log-port* "filling db " (db:dbdat-get-path tmpdb) " with data \n from " (db:dbdat-get-path mtdb) " mod time delta: " modtimedelta)
(db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb refndb tmpdb)
;touch tmp db to avoid wal mode wierdness
(set! (file-modification-time tmpdbfname) (current-seconds))
;; touch tmp db to avoid wal mode wierdness
(set-file-times! tmpdbfname (current-seconds))
(debug:print-info 13 *default-log-port* "db:sync-all-tables-list done.")
)
(debug:print 4 *default-log-port* " db, " (db:dbdat-get-path tmpdb) " already exists or fresh enough, not propogating data from\n " (db:dbdat-get-path mtdb) " mod time delta: " modtimedelta) )
;; (db:multi-db-sync dbstruct 'old2new)) ;; migrate data from megatest.db automatically
tmpdb))))
|
︙ | | |
Modified debugprint.scm
from [668a77fa42]
to [fdf96a030a].
︙ | | |
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
|
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
|
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
|
(print "ERROR: Invalid debug value \"" vstr "\"")
#f)
#t))
(define (debug:debug-mode n)
(let* ((vb (verbosity)))
(cond
((and (number? vb) ;; number number
(number? n))
(<= n vb))
((and (list? vb) ;; list number
(number? n))
(member n vb))
((and (list? vb) ;; list list
(list? n))
(not (null? (lset-intersection! eq? vb n))))
((and (number? vb)
(list? n))
(member vb n)))))
((and (number? vb) ;; number number
(number? n))
(<= n vb))
((and (list? vb) ;; list number
(number? n))
(member n vb))
((and (list? vb) ;; list list
(list? n))
(not (null? (lset-intersection! eq? vb n))))
((and (number? vb)
(list? n))
(member vb n))
(else #f))))
(define (debug:print n e . params)
(if (debug:debug-mode n)
(with-output-to-port (or e (current-error-port))
(lambda ()
;; (if *logging*
;; (db:log-event (apply conc params))
|
︙ | | |
Modified http-transportmod.scm
from [698ee19400]
to [7319316419].
︙ | | |
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
|
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
|
-
+
|
(define (http-transport:server-shutdown port)
(begin
;;(BB> "http-transport:server-shutdown called")
(debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id))
;;
;; start_shutdown
;;
(set! (bdat-time-to-exit *bdat*) #t) ;; tell on-exit to be fast as we've already cleaned up
(bdat-time-to-exit-set! *bdat* #t) ;; tell on-exit to be fast as we've already cleaned up
(portlogger:open-run-close portlogger:set-port port "released")
(thread-sleep! 1)
;; (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*)
;; (debug:print-info 0 *default-log-port* "Number of cached writes " *number-of-writes*)
;; (debug:print-info 0 *default-log-port* "Average cached write time "
;; (if (eq? *number-of-writes* 0)
|
︙ | | |
Modified launchmod.scm
from [046e0b01a9]
to [2eee1b3817].
︙ | | |
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
|
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
|
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
-
-
-
+
+
+
-
-
+
+
-
+
|
(loop (current-seconds)))
#t)))
(debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, (bdat-time-to-exit *bdat*) = " (bdat-time-to-exit *bdat*)" pid="(current-process-id)" mtpath="golden-mtpath)))
;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage
(define (init-watchdog)
(set! (bdat-watchdog-set! *bdat*)
(make-thread
(lambda ()
(handle-exceptions
exn
(begin
(print-call-chain)
(print " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn))
(common:watchdog)))
"Watchdog thread"))
(bdat-watchdog-set! *bdat*
(make-thread
(lambda ()
(handle-exceptions
exn
(begin
(print-call-chain)
(print " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn))
(common:watchdog)))
"Watchdog thread"))
(start-watchdog))
(define (start-watchdog)
;;(if (not (args:get-arg "-server"))
;; (thread-start! *watchdog*)) ;; if starting a server; wait till we get to running state before kicking off watchdog
(let* ((no-watchdog-args
'("-list-runs"
"-testdata-csv"
"-list-servers"
"-server"
"-adjutant"
"-list-disks"
"-list-targets"
"-show-runconfig"
;;"-list-db-targets"
"-show-runconfig"
"-show-config"
"-show-cmdinfo"
"-cleanup-db"
))
(no-watchdog-argvals (list '("-archive" . "replicate-db")))
(start-watchdog-specail-arg-val (let loop ((hed (car no-watchdog-argvals))
(tail (cdr no-watchdog-argvals)))
(no-watchdog-argvals (list '("-archive" . "replicate-db")))
(start-watchdog-special-arg-val (let loop ((hed (car no-watchdog-argvals))
(tail (cdr no-watchdog-argvals)))
;; (print "hed" hed " arg " (args:get-arg (car hed)) " val:" (cdr hed) " eql" (equal? (args:get-arg (car hed)) (cdr hed)))
(if (equal? (args:get-arg (car hed)) (cdr hed))
#f
(if (null? tail)
#t
(loop (car tail) (cdr tail))))))
(no-watchdog-args-vals (filter (lambda (x) x)
(map args:get-arg no-watchdog-args)))
(start-watchdog (and (null? no-watchdog-args-vals) start-watchdog-specail-arg-val)))
;(print "no-watchdog-args="no-watchdog-args "no-watchdog-args-vals="no-watchdog-args-vals " start-watchdog-specail-arg-val:" start-watchdog-specail-arg-val " start-watchdog:" start-watchdog)
(start-watchdog (and (null? no-watchdog-args-vals) start-watchdog-special-arg-val)))
;; (print "no-watchdog-args="no-watchdog-args "no-watchdog-args-vals="no-watchdog-args-vals " start-watchdog-special-arg-val:" start-watchdog-special-arg-val " start-watchdog:" start-watchdog)
(if start-watchdog
(thread-start! (bdat-watchdog *bdat*)))))
(define (server:writable-watchdog-deltasync dbstruct)
(thread-sleep! 0.054) ;; delay for startup
(let ((legacy-sync (common:run-sync?))
(sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300))
(debug-mode (debug:debug-mode 1))
(last-time (current-seconds))
(no-sync-db (db:open-no-sync-db))
|
︙ | | |
Modified megatest.scm
from [f5acd0dd12]
to [62df76911c].
︙ | | |
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
|
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
|
-
+
|
;; immediately set MT_TARGET if -reqtarg or -target are available
;;
(let ((targ (or (args:get-arg "-reqtarg")(args:get-arg "-target"))))
(if targ (setenv "MT_TARGET" targ)))
;; The watchdog is to keep an eye on things like db sync etc.
;;
(init-watchdog)
;; (define (debug:debug-mode n)
;; (cond
;; ((and (number? *verbosity*) ;; number number
;; (number? n))
;; (<= n *verbosity*))
;; ((and (list? *verbosity*) ;; list number
|
︙ | | |