44
45
46
47
48
49
50
51
52
53
54
55
56
57
|
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
|
+
|
z3
typed-records
matchable)
(declare (unit db))
(declare (uses common))
(declare (uses dbmod))
;; (declare (uses debugprint))
(declare (uses dbfile))
(declare (uses keys))
(declare (uses ods))
(declare (uses client))
(declare (uses mt))
(include "common_records.scm")
|
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
|
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
|
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
-
-
-
|
(db:sync-tables (db:sync-all-tables-list dbstruct) update_info tmpdb refndb mtdb))
(mutex-lock! *db-multi-sync-mutex*)
(set! *db-last-sync* start-t)
(set! *db-last-access* start-t)
(mutex-unlock! *db-multi-sync-mutex*)
(stack-push! (dbr:subdb-dbstack subdb) tmpdb)))
(define (db:safely-close-sqlite3-db db stmt-cache #!key (try-num 3))
(if (<= try-num 0)
#f
(handle-exceptions
exn
(begin
(print "Attempt to safely close sqlite3 db failed. Trying again. exn=" exn)
(thread-sleep! 3)
(sqlite3:interrupt! db)
(db:safely-close-sqlite3-db db stmt-cache try-num: (- try-num 1)))
(if (sqlite3:database? db)
(let* ((stmts (and stmt-cache (hash-table-ref/default stmt-cache db #f))))
(if stmts (map sqlite3:finalize! (hash-table-values stmts)))
(sqlite3:finalize! db)
#t)
#f))))
;; close all opened run-id dbs
(define (db:close-all dbstruct)
(if (dbr:dbstruct? dbstruct)
(handle-exceptions
exn
(begin
(debug:print 0 *default-log-port* "WARNING: Finalizing failed, " ((condition-property-accessor 'exn 'message) exn) ", note - exn=" exn)
(print-call-chain *default-log-port*))
;; (db:sync-touched dbstruct 0 force-sync: #t) ;; NO. Do not do this here. Instead we rely on a server to be started when there are writes, even if the server itself is not going to be used as a server.
(let* ((subdbs (hash-table-values (dbr:dbstruct-subdbs dbstruct))))
(for-each
(lambda (subdb)
(let* ((tdbs (stack->list (dbr:subdb-dbstack subdb)))
(mdb (dbr:dbdat-dbh (dbr:subdb-mtdb subdb)))
(rdb (dbr:dbdat-dbh (dbr:subdb-refndb subdb))))
(map (lambda (dbdat)
(let* ((stmt-cache (dbr:dbdat-stmt-cache dbdat))
(dbh (dbr:dbdat-dbh dbdat)))
(db:safely-close-sqlite3-db dbh stmt-cache)))
;; db:safely-close-sqlite3-db and db:close-all were here, moved to dbfile
tdbs)))
subdbs)
(db:safely-close-sqlite3-db mdb stmt-cache) ;; (if (sqlite3:database? mdb) (sqlite3:finalize! mdb))
(db:safely-close-sqlite3-db rdb stmt-cache))))) ;; (if (sqlite3:database? rdb) (sqlite3:finalize! rdb))))))
;; (let ((locdbs (dbr:dbstruct-locdbs dbstruct)))
;; (if (hash-table? locdbs)
;; (for-each (lambda (run-id)
;; (db:close-run-db dbstruct run-id))
;; (hash-table-keys locdbs)))))
|
4998
4999
5000
5001
5002
5003
5004
5005
|
4959
4960
4961
4962
4963
4964
4965
4966
4967
4968
4969
4970
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
5066
5067
5068
5069
5070
5071
5072
5073
5074
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
|
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
|
results)
;; brutal clean up
(stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat)
(system "rm -rf tempdir")))
;; (db:extract-ods-file db "outputfile.ods" '(("sysname" "%")("fsname" "%")("datapath" "%")) "%")
;;======================================================================
;; moving watch dogs here due to dependencies
;;======================================================================
;;======================================================================
;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp
;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server)
;;
(define (common:readonly-watchdog dbstruct)
(thread-sleep! 0.05) ;; delay for startup
(debug:print-info 13 *default-log-port* "common:readonly-watchdog entered.")
;; sync megatest.db to /tmp/.../megatst.db
(let* ((sync-cool-off-duration 3)
(golden-mtdb (dbr:dbstruct-mtdb dbstruct))
(golden-mtpath (db:dbdat-get-path golden-mtdb))
(tmp-mtdb (dbr:dbstruct-tmpdb dbstruct))
(tmp-mtpath (db:dbdat-get-path tmp-mtdb)))
(debug:print-info 0 *default-log-port* "Read-only periodic sync thread started.")
(let loop ((last-sync-time 0))
(debug:print-info 13 *default-log-port* "loop top tmp-mtpath="tmp-mtpath" golden-mtpath="golden-mtpath)
(let* ((duration-since-last-sync (- (current-seconds) last-sync-time)))
(debug:print-info 13 *default-log-port* "duration-since-last-sync="duration-since-last-sync)
(if (and (not *time-to-exit*)
(< duration-since-last-sync sync-cool-off-duration))
(thread-sleep! (- sync-cool-off-duration duration-since-last-sync)))
(if (not *time-to-exit*)
(let ((golden-mtdb-mtime (file-modification-time golden-mtpath))
(tmp-mtdb-mtime (file-modification-time tmp-mtpath)))
(if (> golden-mtdb-mtime tmp-mtdb-mtime)
(if (< golden-mtdb-mtime (- (current-seconds) 3)) ;; file has NOT been touched in past three seconds, this way multiple servers won't fight to sync back
(let ((res (db:multi-db-sync dbstruct 'old2new)))
(debug:print-info 13 *default-log-port* "rosync called, " res " records transferred."))))
(loop (current-seconds)))
#t)))
(debug:print-info 0 *default-log-port* "Exiting readonly-watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" mtpath="golden-mtpath)))
;;======================================================================
;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage
(define (common:watchdog)
(debug:print-info 13 *default-log-port* "common:watchdog entered.")
(if (launch:setup)
(if (common:on-homehost?)
(let ((dbstruct (db:setup #t))) ;; (db:setup-db *dbstruct-dbs* *toppath* #f))) ;; #t)))
(debug:print-info 13 *default-log-port* "after db:setup with dbstruct=" dbstruct)
(cond
((dbr:dbstruct-read-only dbstruct)
(debug:print-info 13 *default-log-port* "loading read-only watchdog")
(common:readonly-watchdog dbstruct))
(else
(debug:print-info 13 *default-log-port* "loading writable-watchdog.")
(let* ((syncer (or (configf:lookup *configdat* "server" "sync-method") "delta-sync"))) ;; "brute-force-sync")))
(cond
((equal? syncer "brute-force-sync")
(server:writable-watchdog-bruteforce dbstruct))
((equal? syncer "delta-sync")
(server:writable-watchdog-deltasync dbstruct))
(else
(debug:print-error 0 *default-log-port* "Unknown server/sync-method specified ("syncer") - valid values are brute-force-sync and delta-sync.")
(exit 1)))
;;(debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] Syncer started (method="syncer")")
)))
(debug:print-info 13 *default-log-port* "watchdog done."))
(debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost"))))
(define (server:writable-watchdog-bruteforce dbstruct)
(thread-sleep! 1) ;; delay for startup
(let* ((do-a-sync (server:get-bruteforce-syncer dbstruct))
(final-sync (server:get-bruteforce-syncer dbstruct fork-to-background: #t persist-until-sync: #t)))
(when (and (not (args:get-arg "-sync-to-megatest.db")) ;; conditions under which we do not run the sync
(args:get-arg "-server"))
(let loop ()
(do-a-sync)
(if (not *time-to-exit*) (loop))) ;; keep going unless time to exit
;; time to exit, close the no-sync db here
(final-sync)
(if (common:low-noise-print 30)
(debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)
)))))
(define (server:writable-watchdog-deltasync dbstruct)
;; This is awful complex and convoluted. Plan to redo?
;; for now ... skip it.
;; ==>
;; ==> (thread-sleep! 0.05) ;; delay for startup
;; ==> (let ((legacy-sync (common:run-sync?))
;; ==> (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300))
;; ==> (debug-mode (debug:debug-mode 1))
;; ==> (last-time (current-seconds))
;; ==> (no-sync-db (db:open-no-sync-db))
;; ==> (stmt-cache #f) ;; (dbr:dbstruct-stmt-cache dbstruct))
;; ==> (sync-duration 0) ;; run time of the sync in milliseconds
;; ==> (subdbs (hash-table-values (dbr:dbstruct-subdbs dbstruct))))
;; ==> (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls
;; ==> (debug:print-info 2 *default-log-port* "Periodic sync thread started.")
;; ==> (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id) );; " this-wd-num="this-wd-num)
;; ==>
;; ==> (if (and legacy-sync (not *time-to-exit*))
;; ==> (begin
;; ==> (debug:print-info 0 *default-log-port* "Server running, periodic sync started.")
(let loop ()
;; ==> ;; sync for filesystem local db writes
;; ==> ;;
;; ==> (mutex-lock! *db-multi-sync-mutex*)
;; ==> (let* ((start-file (conc tmp-area "/.start-sync"))
;; ==> (end-file (conc tmp-area "/.end-sync"))
;; ==>
;; ==> (need-sync (>= *db-last-access* *db-last-sync*)) ;; no sync since last write
;; ==> (sync-in-progress *db-sync-in-progress*)
;; ==> (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 5))
;; ==> (should-sync (and (not *time-to-exit*)
;; ==> (> (- (current-seconds) *db-last-sync*) min-intersync-delay))) ;; sync every five seconds minimum, deprecated logic, can probably be removed
;; ==> (start-time (current-seconds))
;; ==> (cpu-load-adj (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f)))
;; ==> (mt-mod-time (file-modification-time mtpath))
;; ==> (last-sync-start (if (common:file-exists? start-file)
;; ==> (file-modification-time start-file)
;; ==> 0))
;; ==> (last-sync-end (if (common:file-exists? end-file)
;; ==> (file-modification-time end-file)
;; ==> 10))
;; ==> (sync-period (+ 3 (* cpu-load-adj 30))) ;; as adjusted load increases increase the sync period
;; ==> (recently-synced (and (< (- start-time mt-mod-time) sync-period) ;; not useful if sync didn't modify megatest.db!
;; ==> (< mt-mod-time last-sync-start)))
;; ==> (sync-done (<= last-sync-start last-sync-end))
;; ==> (sync-stale (> start-time (+ last-sync-start sync-stale-seconds)))
;; ==> (will-sync (and (not *time-to-exit*) ;; do not start a sync if we are in the process of exiting
;; ==> (or need-sync should-sync)
;; ==> (or sync-done sync-stale)
;; ==> (not sync-in-progress)
;; ==> (not recently-synced))))
;; ==> (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop. need-sync="need-sync" sync-in-progress=" sync-in-progress
;; ==> " should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync
;; ==> " sync-done=" sync-done " sync-period=" sync-period)
;; ==> (if (and (> sync-period 5)
;; ==> (common:low-noise-print 30 "sync-period"))
;; ==> (debug:print-info 0 *default-log-port* "Increased sync period due to long sync times, sync took: " sync-period " seconds."))
;; ==> ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced))
;; ==> ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync)
;; ==> (if will-sync (set! *db-sync-in-progress* #t))
;; ==> (mutex-unlock! *db-multi-sync-mutex*)
;; ==> (if will-sync
;; ==> (let (;; (max-sync-duration (configf:lookup-number *configdat* "server" "max-sync-duration")) ;; KEEPING THIS AVAILABLE BUT SHOULD NOT USE, I'M PRETTY SURE IT DOES NOT WORK!
;; ==> (sync-start (current-milliseconds)))
;; ==> (with-output-to-file start-file (lambda ()(print (current-process-id))))
;; ==>
;; ==> ;; put lock here
;; ==>
;; ==> ;; (if (or (not max-sync-duration)
;; ==> ;; (< sync-duration max-sync-duration)) ;; NOTE: db:sync-to-megatest.db keeps track of time of last sync and syncs incrementally
;; ==>
;; ==> ;;
;; ==>
;; ==> (for-each
;; ==> (lambda (subdb)
;; ==> (let* (;;(dbstruct (db:setup))
;; ==> (mtdb (dbr:subdb-mtdb subdb))
;; ==> (mtpath (db:dbdat-get-path mtdb))
;; ==> (tmp-area (common:get-db-tmp-area))
;; ==> (res (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive
;; ==> (set! sync-duration (- (current-milliseconds) sync-start))
;; ==> (if (> res 0) ;; some records were transferred, keep the db alive
;; ==> (begin
;; ==> (mutex-lock! *heartbeat-mutex*)
;; ==> (set! *db-last-access* (current-seconds))
;; ==> (mutex-unlock! *heartbeat-mutex*)
;; ==> (debug:print-info 0 *default-log-port* "sync called, " res " records transferred."))
;; ==> (debug:print-info 2 *default-log-port* "sync called but zero records transferred")))
;; ==> )
;; ==> subdbs)))
;; ==> ;; ;; TODO: factor this next routine out into a function
;; ==> ;; (with-input-from-pipe ;; this should not block other threads but need to verify this
;; ==> ;; (conc "megatest -sync-to-megatest.db -m testsuite:" (common:get-area-name) ":" *toppath*)
;; ==> ;; (lambda ()
;; ==> ;; (let loop ((inl (read-line))
;; ==> ;; (res #f))
;; ==> ;; (if (eof-object? inl)
;; ==> ;; (begin
;; ==> ;; (set! sync-duration (- (current-milliseconds) sync-start))
;; ==> ;; (cond
;; ==> ;; ((not res)
;; ==> ;; (debug:print 0 *default-log-port* "ERROR: sync from /tmp db to megatest.db appears to have failed. Recommended that you stop your runs and run \"megatest -cleanup-db\""))
;; ==> ;; ((> res 0)
;; ==> ;; (mutex-lock! *heartbeat-mutex*)
;; ==> ;; (set! *db-last-access* (current-seconds))
;; ==> ;; (mutex-unlock! *heartbeat-mutex*))))
;; ==> ;; (let ((num-synced (let ((matches (string-match "^Synced (\\d+).*$" inl)))
;; ==> ;; (if matches
;; ==> ;; (string->number (cadr matches))
;; ==> ;; #f))))
;; ==> ;; (loop (read-line)
;; ==> ;; (or num-synced res))))))))))
;; ==>
;; ==> (if will-sync
;; ==> (begin
;; ==> (mutex-lock! *db-multi-sync-mutex*)
;; ==> (set! *db-sync-in-progress* #f)
;; ==> (set! *db-last-sync* start-time)
;; ==> (with-output-to-file end-file (lambda ()(print (current-process-id))))
;; ==>
;; ==> ;; release lock here
;; ==>
;; ==> (mutex-unlock! *db-multi-sync-mutex*)))
;; ==> (if (and debug-mode
;; ==> (> (- start-time last-time) 60))
;; ==> (begin
;; ==> (set! last-time start-time)
;; ==> (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*))))))
;; ==>
;; keep going unless time to exit
;;
(if (not *time-to-exit*)
(let delay-loop ((count 0))
;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*)
(if (and (not *time-to-exit*)
(< count 6)) ;; was 11, changing to 4.
(begin
(thread-sleep! 1)
(delay-loop (+ count 1))))
(if (not *time-to-exit*) (loop))))
;; ==> ;; time to exit, close the no-sync db here
;; ==> (db:no-sync-close-db no-sync-db stmt-cache)
(if (common:low-noise-print 30)
(debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) )))) ;; ))) ;;" this-wd-num="this-wd-num)))))))
|