Changes In Branch v1.70-refactor-procedures Excluding Merge-Ins
This is equivalent to a diff from b1db729de1 to 024073632a
2022-06-07
| ||
18:41 | merged changes from 024073632a and 9a5898a74e. Fixes for db locking, syncing and dashboard startup check-in: 20a2d7904d user: mmgraham tags: v1.70 | |
08:30 | Added with-lock options for both simple and no-sync locking systems. Leaf check-in: 024073632a user: matt tags: v1.70-refactor-procedures | |
2022-06-06
| ||
20:25 | Fixed (kind-of) not working no-sync db based lock check-in: 79a7f8d883 user: matt tags: v1.70-refactor-procedures | |
2022-05-30
| ||
23:05 | Changed dbfile:cautious-open-database to use most of the code from db:lock-create-open check-in: 58512a448e user: mmgraham tags: v1.70 | |
2022-05-27
| ||
20:23 | Move bunch of functions from db.scm to dbfile.scm check-in: ed0f0698ab user: matt tags: v1.70-refactor-procedures | |
19:21 | Commented out some not-used fuctions, removed the server start every 120 seconds and added dbfile handle count check-in: b1db729de1 user: matt tags: v1.70 | |
2022-05-22
| ||
20:20 | Some awful hacks to keep the system running. There is something causing servers to crash, I suspect sync is the problem. This work-around just constantly replaces the servers with new ones. check-in: 3cdcb8c138 user: matt tags: v1.70 | |
Modified Makefile from [86d5260f44] to [8882cd4b6e].
︙ | ︙ | |||
26 27 28 29 30 31 32 | process.scm runs.scm tasks.scm tests.scm genexample.scm \ http-transport.scm tdb.scm client.scm mt.scm \ ezsteps.scm lock-queue.scm rmt.scm api.scm \ subrun.scm portlogger.scm archive.scm env.scm \ diff-report.scm cgisetup/models/pgdb.scm # module source files | | | 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 | process.scm runs.scm tasks.scm tests.scm genexample.scm \ http-transport.scm tdb.scm client.scm mt.scm \ ezsteps.scm lock-queue.scm rmt.scm api.scm \ subrun.scm portlogger.scm archive.scm env.scm \ diff-report.scm cgisetup/models/pgdb.scm # module source files MSRCFILES = dbmod.scm dbfile.scm debugprint.scm mtargs.scm commonmod.scm mofiles/dbfile.o : mofiles/debugprint.o mofiles/debugprint.o : mofiles/mtargs.o # ftail.scm rmtmod.scm commonmod.scm removed # MSRCFILES = ducttape-lib.scm pkts.scm stml2.scm cookie.scm mutils.scm \ # mtargs.scm commonmod.scm dbmod.scm adjutant.scm ulex.scm \ |
︙ | ︙ | |||
152 153 154 155 156 157 158 | $(PREFIX)/share/db/mt-pg.sql : mt-pg.sql mkdir -p $(PREFIX)/share/db $(INSTALL) mt-pg.sql $(PREFIX)/share/db/mt-pg.sql # Special dependencies for the includes $(MOFILE) $(MOIMPFILES) : megatest-fossil-hash.scm | | > | > > | 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 | $(PREFIX)/share/db/mt-pg.sql : mt-pg.sql mkdir -p $(PREFIX)/share/db $(INSTALL) mt-pg.sql $(PREFIX)/share/db/mt-pg.sql # Special dependencies for the includes $(MOFILE) $(MOIMPFILES) : megatest-fossil-hash.scm common.o : mofiles/commonmod.o megatest-fossil-hash.scm # mofiles/dbmod.o : mofiles/configfmod.o # commonmod.o dashboard.o megatest.o tcmt.o apimod.o : megatest-fossil-hash.scm tests.o db.o launch.o runs.o dashboard-tests.o \ dashboard-context-menu.o dashboard-guimonitor.o dashboard-main.o \ monitor.o dashboard.o archive.o megatest.o : db_records.scm megatest-fossil-hash.scm tests.o runs.o dashboard.o dashboard-tests.o dashboard-context-menu.o dashboard-main.o : run_records.scm db.o ezsteps.o keys.o launch.o megatest.o monitor.o runs-for-ref.o runs.o tests.o : key_records.scm db.o api.o : mofiles/dbmod.o mofiles/dbfile.o mofiles/debugprint.o mofiles/commonmod.o tests.o tasks.o dashboard-tasks.o : task_records.scm runs.o : test_records.scm megatest.o : megatest-fossil-hash.scm megatest-version.scm rmt.scm client.scm common.scm configf.scm dashboard-guimonitor.scm dashboard-tests.scm dashboard.scm db.scm dcommon.scm ezsteps.scm http-transport.scm index-tree.scm items.scm keys.scm launch.scm megatest.scm monitor.scm mt.scm newdashboard.scm runconfig.scm runs.scm server.scm tdb.scm tests.scm tree.scm : common_records.scm megatest-version.scm common_records.scm : altdb.scm mofiles/dbfile.o : mofiles/commonmod.o # mofiles/stml2.o : mofiles/cookie.o # configf.o : mofiles/commonmod.o vg.o dashboard.o : vg_records.scm megatest-version.scm dcommon.o : run_records.scm |
︙ | ︙ |
Modified api.scm from [64bd840562] to [736048365d].
︙ | ︙ | |||
145 146 147 148 149 150 151 | ;; These are called by the server on recipt of /api calls ;; - keep it simple, only return the actual result of the call, i.e. no meta info here ;; ;; - returns #( flag result ) ;; (define (api:execute-requests dbstruct dat) (db:open-no-sync-db) ;; sets *no-sync-db* | | | | | | | | | 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 | ;; These are called by the server on recipt of /api calls ;; - keep it simple, only return the actual result of the call, i.e. no meta info here ;; ;; - returns #( flag result ) ;; (define (api:execute-requests dbstruct dat) (db:open-no-sync-db) ;; sets *no-sync-db* ;; (handle-exceptions ;; exn ;; (let ((call-chain (get-call-chain))) ;; (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat ", exn=" exn) ;; (print-call-chain (current-error-port)) ;; (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) ;; (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens (cond ((not (vector? dat)) ;; it is an error to not receive a vector (vector #f (vector #f "remote must be called with a vector"))) ((> *api-process-request-count* 200) ;; 20) (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.") (set! *server-overloaded* #t) (vector #f (vector #f 'overloaded))) ;; the inner vector is what gets returned. nope, don't know why. please refactor! |
︙ | ︙ | |||
237 238 239 240 241 242 243 | ;; TEST DATA ((test-data-rollup) (apply db:test-data-rollup dbstruct params)) ((csv->test-data) (apply db:csv->test-data dbstruct params)) ;; MISC ((sync-inmem->db) (let ((run-id (car params))) | | | 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 | ;; TEST DATA ((test-data-rollup) (apply db:test-data-rollup dbstruct params)) ((csv->test-data) (apply db:csv->test-data dbstruct params)) ;; MISC ((sync-inmem->db) (let ((run-id (car params))) (db:sync-touched dbstruct run-id db:initialize-main-db force-sync: #t))) ((mark-incomplete) (apply db:find-and-mark-incomplete dbstruct params)) ((create-all-triggers) (db:create-all-triggers dbstruct)) ((drop-all-triggers) (db:drop-all-triggers dbstruct)) ;; TESTMETA ((testmeta-add-record) (apply db:testmeta-add-record dbstruct params)) ((testmeta-update-field) (apply db:testmeta-update-field dbstruct params)) |
︙ | ︙ | |||
373 374 375 376 377 378 379 | payload: `((params . ,params) (ok-res . #t))) (vector #f res)) (begin #;(common:telemetry-log (conc "api-out:"(->string cmd)) payload: `((params . ,params) (ok-res . #f))) | | | 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 | payload: `((params . ,params) (ok-res . #t))) (vector #f res)) (begin #;(common:telemetry-log (conc "api-out:"(->string cmd)) payload: `((params . ,params) (ok-res . #f))) (vector #t res))))))) ;; ) ;; http-server send-response ;; api:process-request ;; db:* ;; ;; NB// Runs on the server as part of the server loop ;; |
︙ | ︙ |
Modified client.scm from [dc4c7b41e8] to [b582d33d7d].
︙ | ︙ | |||
69 70 71 72 73 74 75 | ;; 2. We are a run tests, list runs or other interactive process and we must figure out ;; *transport-type* and *runremote* from the monitor.db ;; ;; client:setup ;; ;; lookup_server, need to remove *runremote* stuff ;; | | > > > > > > | | 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 | ;; 2. We are a run tests, list runs or other interactive process and we must figure out ;; *transport-type* and *runremote* from the monitor.db ;; ;; client:setup ;; ;; lookup_server, need to remove *runremote* stuff ;; (define (client:setup-http areapath #!key (remaining-tries 100) (failed-connects 0)(area-dat #f)) (mutex-lock! *rmt-mutex*) (let ((res (client:setup-http-baby areapath remaining-tries: remaining-tries failed-connects: failed-connects area-dat: area-dat))) (mutex-unlock! *rmt-mutex*) res)) (define (client:setup-http-baby areapath #!key (remaining-tries 100) (failed-connects 0)(area-dat #f)) (debug:print-info 2 *default-log-port* "client:setup remaining-tries=" remaining-tries) (server:start-and-wait areapath) (if (<= remaining-tries 0) (begin (debug:print-error 0 *default-log-port* "failed to start or connect to server") (exit 1)) ;; ;; Alternatively here, we can get the list of candidate servers and work our way ;; through them searching for a good one. ;; (let* ((server-dat (server:get-rand-best areapath)) ;; (server:get-first-best areapath)) (runremote (or area-dat *runremote*))) (if (not server-dat) ;; no server found (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1)) (let ((host (cadr server-dat)) (port (caddr server-dat)) (server-id (caddr (cddr server-dat)))) (debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) (if (and (not area-dat) (not *runremote*)) (begin |
︙ | ︙ | |||
106 107 108 109 110 111 112 | (let* ((start-res (case *transport-type* ((http)(http-transport:client-connect host port server-id)))) (ping-res (case *transport-type* ((http)(rmt:login-no-auto-client-setup start-res))))) (if (and start-res ping-res) (let ((runremote (or area-dat *runremote*))) ;; it might have been generated only a few statements ago | > > | | | > | | | 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 | (let* ((start-res (case *transport-type* ((http)(http-transport:client-connect host port server-id)))) (ping-res (case *transport-type* ((http)(rmt:login-no-auto-client-setup start-res))))) (if (and start-res ping-res) (let ((runremote (or area-dat *runremote*))) ;; it might have been generated only a few statements ago (if runremote (begin (remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res) (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res)) start-res) (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1)))) (begin ;; login failed but have a server record, clean out the record and try again (debug:print-info 0 *default-log-port* "client:setup, login unsuccessful, will attempt to start server ... start-res=" start-res ", server-dat=" server-dat) ;; had runid. Fixes part of Randy;s ticket 1405717332 (case *transport-type* ((http)(http-transport:close-connections))) (remote-conndat-set! runremote #f) ;; (hash-table-delete! runremote run-id) (thread-sleep! 1) (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1)) ))) (begin ;; no server registered ;; (server:kind-run areapath) (server:start-and-wait areapath) (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries) (thread-sleep! 1) ;; (+ 5 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms. (client:setup-http-baby areapath remaining-tries: (- remaining-tries 1))))))))) |
Modified common.scm from [fd321f41c8] to [fda00ca967].
︙ | ︙ | |||
24 25 26 27 28 29 30 | matchable regex posix (srfi 18) extras ;; tcp (prefix nanomsg nmsg:) (prefix sqlite3 sqlite3:) pkts (prefix dbi dbi:) ) (declare (unit common)) | | | | 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 | matchable regex posix (srfi 18) extras ;; tcp (prefix nanomsg nmsg:) (prefix sqlite3 sqlite3:) pkts (prefix dbi dbi:) ) (declare (unit common)) (declare (uses commonmod)) (import commonmod) (include "common_records.scm") ;; (require-library margs) ;; (include "margs.scm") |
︙ | ︙ | |||
127 128 129 130 131 132 133 | (define *toppath* #f) (define *already-seen-runconfig-info* #f) (define *test-meta-updated* (make-hash-table)) (define *globalexitstatus* 0) ;; attempt to work around possible thread issues (define *passnum* 0) ;; when running track calls to run-tests or similar ;; (define *alt-log-file* #f) ;; used by -log | | | | | | | | | 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 | (define *toppath* #f) (define *already-seen-runconfig-info* #f) (define *test-meta-updated* (make-hash-table)) (define *globalexitstatus* 0) ;; attempt to work around possible thread issues (define *passnum* 0) ;; when running track calls to run-tests or similar ;; (define *alt-log-file* #f) ;; used by -log ;; (define *common:denoise* (make-hash-table)) ;; for low noise printing (define *default-log-port* (current-error-port)) (define *time-zero* (current-seconds)) ;; for the watchdog (define *default-area-tag* "local") ;; DATABASE ;; (define *dbstruct-dbs* #f) ;; used to cache the dbstruct in db:setup. Goal is to remove this. ;; db stats (define *db-stats* (make-hash-table)) ;; hash of vectors < count duration-total > (define *db-stats-mutex* (make-mutex)) ;; db access (define *db-last-access* (current-seconds)) ;; last db access, used in server ;; (define *db-write-access* #t) ;; db sync ;; (define *db-last-sync* 0) ;; last time the sync to megatest.db happened (define *db-sync-in-progress* #f) ;; if there is a sync in progress do not try to start another ;; (define *db-multi-sync-mutex* (make-mutex)) ;; protect access to *db-sync-in-progress*, *db-last-sync* ;; task db (define *task-db* #f) ;; (vector db path-to-db) (define *db-access-allowed* #t) ;; flag to allow access ;; (define *db-access-mutex* (make-mutex)) ;; moved to dbfile (define *db-transaction-mutex* (make-mutex)) (define *db-cache-path* #f) ;; (define *db-with-db-mutex* (make-mutex)) (define *db-api-call-time* (make-hash-table)) ;; hash of command => (list of times) ;; no sync db ;; (define *no-sync-db* #f) ;; moved to dbfile ;; SERVER (define *my-client-signature* #f) (define *transport-type* 'http) ;; override with [server] transport http|rpc|nmsg (define *runremote* #f) ;; if set up for server communication this will hold <host port> ;; (define *max-cache-size* 0) (define *logged-in-clients* (make-hash-table)) (define *server-id* #f) (define *server-info* #f) ;; good candidate for easily convert to non-global (define *time-to-exit* #f) (define *server-run* #t) (define *run-id* #f) (define *server-kind-run* (make-hash-table)) (define *home-host* #f) ;; (define *total-non-write-delay* 0) (define *heartbeat-mutex* (make-mutex)) ;; (define *api-process-request-count* 0) ;; (define *max-api-process-requests* 0) (define *server-overloaded* #f) ;; client (define *rmt-mutex* (make-mutex)) ;; remote access calls mutex ;; RPC transport (define *rpc:listener* #f) |
︙ | ︙ | |||
308 309 310 311 312 313 314 315 316 317 318 319 320 321 | (defstruct remote (hh-dat (common:get-homehost)) ;; homehost record ( addr . hhflag ) (server-url #f) ;; (server:check-if-running *toppath*) #f)) (server-id #f) (server-info (if *toppath* (server:check-if-running *toppath*) #f)) (last-server-check 0) ;; last time we checked to see if the server was alive (conndat #f) (transport *transport-type*) (server-timeout (server:expiration-timeout)) (force-server #f) (ro-mode #f) (ro-mode-checked #f)) ;; flag that indicates we have checked for ro-mode | > | 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 | (defstruct remote (hh-dat (common:get-homehost)) ;; homehost record ( addr . hhflag ) (server-url #f) ;; (server:check-if-running *toppath*) #f)) (server-id #f) (server-info (if *toppath* (server:check-if-running *toppath*) #f)) (last-server-check 0) ;; last time we checked to see if the server was alive (connect-time (current-seconds)) (conndat #f) (transport *transport-type*) (server-timeout (server:expiration-timeout)) (force-server #f) (ro-mode #f) (ro-mode-checked #f)) ;; flag that indicates we have checked for ro-mode |
︙ | ︙ | |||
723 724 725 726 727 728 729 | (begin (debug:print-error 0 *default-log-port* "received bad encoded string \"" instr "\", message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) (print-call-chain (current-error-port)) #f) (read (open-input-string (base64:base64-decode instr)))) (read (open-input-string (z3:decode-buffer (base64:base64-decode instr)))))) | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 724 725 726 727 728 729 730 731 732 733 734 735 736 737 | (begin (debug:print-error 0 *default-log-port* "received bad encoded string \"" instr "\", message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) (print-call-chain (current-error-port)) #f) (read (open-input-string (base64:base64-decode instr)))) (read (open-input-string (z3:decode-buffer (base64:base64-decode instr)))))) ;;====================================================================== ;; S T A T E S A N D S T A T U S E S ;;====================================================================== ;; BBnote: *common:std-states* - dashboard filter control and test control state buttons defined here; used in set-fields-panel and dboard:make-controls (define *common:std-states* ;; for toggle buttons in dashboard |
︙ | ︙ |
Modified commonmod.scm from [9423abd515] to [79bf78cc96].
︙ | ︙ | |||
15 16 17 18 19 20 21 22 23 24 | ;; ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see <http://www.gnu.org/licenses/>. ;;====================================================================== (declare (unit commonmod)) (module commonmod * | > > | | > | 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 | ;; ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see <http://www.gnu.org/licenses/>. ;;====================================================================== (declare (unit commonmod)) (use srfi-69) (module commonmod * (import scheme chicken data-structures extras files) (import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-69 md5 message-digest regex srfi-1) ;;====================================================================== ;; CONTENTS ;; ;; config file utils |
︙ | ︙ | |||
42 43 44 45 46 47 48 49 50 51 52 53 54 55 | (define (get-full-version) (conc megatest-version "-" megatest-fossil-hash)) (define (version-signature) (conc megatest-version "-" (substring megatest-fossil-hash 0 4))) ;;====================================================================== ;; config file utils ;;====================================================================== (define (lookup cfgdat section var) (if (hash-table? cfgdat) | > > > > > > > > > > > | 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | (define (get-full-version) (conc megatest-version "-" megatest-fossil-hash)) (define (version-signature) (conc megatest-version "-" (substring megatest-fossil-hash 0 4))) (define *common:denoise* (make-hash-table)) ;; for low noise printing (define (common:low-noise-print waitval . keys) (let* ((key (string-intersperse (map conc keys) "-" )) (lasttime (hash-table-ref/default *common:denoise* key 0)) (currtime (current-seconds))) (if (> (- currtime lasttime) waitval) (begin (hash-table-set! *common:denoise* key currtime) #t) #f))) ;;====================================================================== ;; config file utils ;;====================================================================== (define (lookup cfgdat section var) (if (hash-table? cfgdat) |
︙ | ︙ | |||
70 71 72 73 74 75 76 77 78 79 80 81 82 83 | (if valstr (val->alist valstr) '()))) ;; should it return empty list or #f to indicate not set? (define (get-section cfgdat section) (hash-table-ref/default cfgdat section '())) ;;====================================================================== ;; misc conversion, data manipulation functions ;;====================================================================== ;; if it looks like a number -> convert it to a number, else return it ;; | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 | (if valstr (val->alist valstr) '()))) ;; should it return empty list or #f to indicate not set? (define (get-section cfgdat section) (hash-table-ref/default cfgdat section '())) ;; dot-locking egg seems not to work, using this for now ;; if lock is older than expire-time then remove it and try again ;; to get the lock ;; (define (common:simple-file-lock fname #!key (expire-time 300)) (let ((fmod-time (handle-exceptions ext (current-seconds) (file-modification-time fname)))) (if (file-exists? fname) ;; (common:file-exists? fname) (if (> (- (current-seconds) fmod-time) expire-time) (begin (handle-exceptions exn #f (delete-file* fname)) (common:simple-file-lock fname expire-time: expire-time)) #f) (let ((key-string (conc (get-host-name) "-" (current-process-id)))) (with-output-to-file fname (lambda () (print key-string))) (thread-sleep! 0.25) (if (file-exists? fname) ;; (common:file-exists? fname) (handle-exceptions exn #f (with-input-from-file fname (lambda () (equal? key-string (read-line))))) #f))))) (define (common:simple-file-lock-and-wait fname #!key (expire-time 300)) (let ((end-time (+ expire-time (current-seconds)))) (let loop ((got-lock (common:simple-file-lock fname expire-time: expire-time))) (if got-lock #t (if (> end-time (current-seconds)) (begin (thread-sleep! 3) (loop (common:simple-file-lock fname expire-time: expire-time))) #f))))) (define (common:simple-file-release-lock fname) (handle-exceptions exn #f ;; I don't really care why this failed (at least for now) (delete-file* fname))) ;;====================================================================== ;; misc conversion, data manipulation functions ;;====================================================================== ;; if it looks like a number -> convert it to a number, else return it ;; |
︙ | ︙ |
Modified db.scm from [48aca6e494] to [451105ba06].
︙ | ︙ | |||
69 70 71 72 73 74 75 76 77 78 79 80 81 82 | ;; record for keeping state,status and count for doing roll-ups in ;; iterated tests ;; (defstruct dbr:counts (state #f) (status #f) (count 0)) ;;====================================================================== ;; hash of hashs ;;====================================================================== (define (db:hoh-set! dat key1 key2 val) | > | 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | ;; record for keeping state,status and count for doing roll-ups in ;; iterated tests ;; (defstruct dbr:counts (state #f) (status #f) (count 0)) ;;====================================================================== ;; hash of hashs ;;====================================================================== (define (db:hoh-set! dat key1 key2 val) |
︙ | ︙ | |||
123 124 125 126 127 128 129 | default (begin (debug:print-error 0 *default-log-port* " query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) (print-call-chain (current-error-port)) default))) (apply sqlite3:first-result db stmt params))) | < < < < < < < < < > | > | 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 | default (begin (debug:print-error 0 *default-log-port* " query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) (print-call-chain (current-error-port)) default))) (apply sqlite3:first-result db stmt params))) (define (db:setup do-sync) (assert *toppath* "FATAL: db:setup called before launch:setup has been run.") (let* ((tmpdir (common:get-db-tmp-area))) (if (not *dbstruct-dbs*) (dbfile:setup do-sync *toppath* tmpdir) *dbstruct-dbs*))) ;; looks up subdb and returns it, if not found then set up ;; and then return it. ;; #;(define (db:get-db dbstruct run-id) (let* ((res (dbfile:get-subdb dbstruct run-id))) (if res |
︙ | ︙ | |||
192 193 194 195 196 197 198 | (apply debug:print-error 0 *default-log-port* message) (debug:print-error 0 *default-log-port* " params: " params ", error: " ((condition-property-accessor 'exn 'message) exn) ", arguments: " ((condition-property-accessor 'exn 'arguments) exn) ", location: " ((condition-property-accessor 'exn 'location) exn) )) | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 186 187 188 189 190 191 192 193 194 195 196 197 198 199 | (apply debug:print-error 0 *default-log-port* message) (debug:print-error 0 *default-log-port* " params: " params ", error: " ((condition-property-accessor 'exn 'message) exn) ", arguments: " ((condition-property-accessor 'exn 'arguments) exn) ", location: " ((condition-property-accessor 'exn 'location) exn) )) ;; NB// #f => return dbdir only ;; (was planned to be; zeroth db with name=main.db) ;; ;; If run-id is #f return to create and retrieve the path where the db will live. ;; (define db:dbfile-path common:get-db-tmp-area) |
︙ | ︙ | |||
428 429 430 431 432 433 434 | (write-access (file-write-access? dbpath))) (debug:print-info 13 *default-log-port* "db:open-megatest-db "dbpath) (if (and dbexists (not write-access)) (set! *db-write-access* #f)) ;; (cons db dbpath))) (make-dbr:dbdat dbfile: dbpath dbh: db read-only: (not write-access)))) | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 | (write-access (file-write-access? dbpath))) (debug:print-info 13 *default-log-port* "db:open-megatest-db "dbpath) (if (and dbexists (not write-access)) (set! *db-write-access* #f)) ;; (cons db dbpath))) (make-dbr:dbdat dbfile: dbpath dbh: db read-only: (not write-access)))) ;; db:safely-close-sqlite3-db and db:close-all were here, moved to dbfile ;; (let ((locdbs (dbr:dbstruct-locdbs dbstruct))) ;; (if (hash-table? locdbs) ;; (for-each (lambda (run-id) ;; (db:close-run-db dbstruct run-id)) ;; (hash-table-keys locdbs))))) ;; (define (db:open-inmem-db) ;; (let* ((db (sqlite3:open-database ":memory:")) ;; (handler (make-busy-timeout 3600))) ;; (sqlite3:set-busy-handler! db handler) ;; (db:initialize-run-id-db db) ;; (cons db #f))) ;; use bunch of Unix commands to try to break the lock and recreate the db ;; (define (db:move-and-recreate-db dbdat) (let* ((dbpath (dbr:dbdat-dbfile dbdat)) (dbdir (pathname-directory dbpath)) (fname (pathname-strip-directory dbpath)) (fnamejnl (conc fname "-journal")) |
︙ | ︙ | |||
643 644 645 646 647 648 649 | (sqlite3:execute "DELETE FROM servers WHERE state LIKE 'defunct%';")) (else (sqlite3:execute db "vacuum;"))) (sqlite3:finalize! db) #t)))))) | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 467 468 469 470 471 472 473 474 475 476 477 478 479 480 | (sqlite3:execute "DELETE FROM servers WHERE state LIKE 'defunct%';")) (else (sqlite3:execute db "vacuum;"))) (sqlite3:finalize! db) #t)))))) (define (db:patch-schema-rundb frundb) ;; ;; remove this some time after September 2016 (added in version v1.6031 ;; (for-each (lambda (table-name) |
︙ | ︙ | |||
1037 1038 1039 1040 1041 1042 1043 | ;; (exit 1)) ;; (let* ((th1 (make-thread ;; (lambda () ;; (if (and (common:file-exists? megatest-db) ;; (file-write-access? megatest-db)) ;; (begin ;; (db:sync-to-megatest.db dbstruct 'timestamps) ;; internally mutexes on *db-local-sync* | | < < < < < < < < < < < < < < < < < < < < < < < < | | 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 | ;; (exit 1)) ;; (let* ((th1 (make-thread ;; (lambda () ;; (if (and (common:file-exists? megatest-db) ;; (file-write-access? megatest-db)) ;; (begin ;; (db:sync-to-megatest.db dbstruct 'timestamps) ;; internally mutexes on *db-local-sync* ;; (debug:print-info 2 *default-log-port* "Done syncing to megatest.db")))) ;; "call-with-cached-db sync-to-megatest.db")) ;; (cache-db (db:cache-for-read-only ;; megatest-db ;; (conc cache-dir "/" fname) ;; use-last-update: #t))) ;; (thread-start! th1) ;; (apply proc cache-db params) ;; )))) (define (db:all-db-sync dbstruct) (let* ((dbdat (db:open-db dbstruct #f db:initialize-main-db)) (data-synced 0) ;; count of changed records (tmp-area (common:get-db-tmp-area)) (dbfiles (glob (conc tmp-area"/.db/*.db"))) (sync-durations (make-hash-table)) (no-sync-db (db:open-no-sync-db))) (for-each (lambda (file) |
︙ | ︙ | |||
1115 1116 1117 1118 1119 1120 1121 | (if do-cp (let* ((start-time (current-milliseconds)) (fname (pathname-file file)) (runid (if (string= fname "main") #f (string->number fname))) ) (debug:print-info 3 *default-log-port* "db:all-db-sync: fname: " fname", delta: " (- time1 time2) " seconds") | | | 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 | (if do-cp (let* ((start-time (current-milliseconds)) (fname (pathname-file file)) (runid (if (string= fname "main") #f (string->number fname))) ) (debug:print-info 3 *default-log-port* "db:all-db-sync: fname: " fname", delta: " (- time1 time2) " seconds") (db:lock-and-delta-sync no-sync-db dbstruct fname runid (db:get-keys dbstruct) db:initialize-main-db) (hash-table-set! sync-durations (conc fname".db") (- (current-milliseconds) start-time))) (debug:print-info 3 *default-log-port* "skipping sync. " file " is up to date") ))) dbfiles ) (if dbdat (dbfile:add-dbdat dbstruct #f dbdat))) |
︙ | ︙ | |||
1355 1356 1357 1358 1359 1360 1361 | ;; (define open-run-close #;(define open-run-close open-run-close-exception-handling) ;; open-run-close-no-exception-handling ;; open-run-close-exception-handling) ;;) | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | 925 926 927 928 929 930 931 932 933 934 935 936 937 938 | ;; (define open-run-close #;(define open-run-close open-run-close-exception-handling) ;; open-run-close-no-exception-handling ;; open-run-close-exception-handling) ;;) (define (db:initialize-main-db db) (when (not *configinfo*) (launch:setup)) ;; added because Elena was getting stack dump because *configinfo* below was #f. (let* ((configdat (car *configinfo*)) ;; tut tut, global warning... (keys (keys:config-get-fields configdat)) (havekeys (> (length keys) 0)) |
︙ | ︙ | |||
2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 | ;; why get the keys from the db? why not get from the *configdat* ;; using keys:config-get-fields? (define (db:get-keys dbstruct) (keys:config-get-fields *configdat*) ) ;; (if *db-keys* *db-keys* ;; (let ((res '())) ;; (db:with-db dbstruct #f #f ;; (lambda (dbdat db) ;; (sqlite3:for-each-row ;; (lambda (key) ;; (set! res (cons key res))) | > | 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 | ;; why get the keys from the db? why not get from the *configdat* ;; using keys:config-get-fields? (define (db:get-keys dbstruct) (keys:config-get-fields *configdat*) ) ;; (if *db-keys* *db-keys* ;; (let ((res '())) ;; (db:with-db dbstruct #f #f ;; (lambda (dbdat db) ;; (sqlite3:for-each-row ;; (lambda (key) ;; (set! res (cons key res))) |
︙ | ︙ | |||
2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 | (contour (or contour-in "")) ;; empty string to force no hierarcy and be backwards compatible. (comma (if (> (length keys) 0) "," "")) (andstr (if (> (length keys) 0) " AND " "")) (valslots (keys->valslots keys)) ;; ?,?,? ... (allvals (append (list runname state status user contour) (map cadr keyvals))) (qryvals (append (list runname) (map cadr keyvals))) (key=?str (string-intersperse (map (lambda (k)(conc k "=?")) keys) " AND "))) (debug:print 3 *default-log-port* "keys: " keys " allvals: " allvals " keyvals: " keyvals " key=?str is " key=?str) (debug:print 2 *default-log-port* "NOTE: using target " (string-intersperse (map cadr keyvals) "/") " for this run") (if (and runname (null? (filter (lambda (x)(not x)) keyvals))) ;; there must be a better way to "apply and" (db:with-db dbstruct #f #f (lambda (dbdat db) (let ((res #f)) (apply sqlite3:execute db (conc "INSERT OR IGNORE INTO runs (runname,state,status,owner,event_time,contour" comma keystr ") VALUES (?,?,?,?,strftime('%s','now'),?" comma valslots ");") allvals) (apply sqlite3:for-each-row (lambda (id) (set! res id)) db | > > | 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 | (contour (or contour-in "")) ;; empty string to force no hierarcy and be backwards compatible. (comma (if (> (length keys) 0) "," "")) (andstr (if (> (length keys) 0) " AND " "")) (valslots (keys->valslots keys)) ;; ?,?,? ... (allvals (append (list runname state status user contour) (map cadr keyvals))) (qryvals (append (list runname) (map cadr keyvals))) (key=?str (string-intersperse (map (lambda (k)(conc k "=?")) keys) " AND "))) (debug:print 0 *default-log-port* "Got here 0.") (debug:print 3 *default-log-port* "keys: " keys " allvals: " allvals " keyvals: " keyvals " key=?str is " key=?str) (debug:print 2 *default-log-port* "NOTE: using target " (string-intersperse (map cadr keyvals) "/") " for this run") (if (and runname (null? (filter (lambda (x)(not x)) keyvals))) ;; there must be a better way to "apply and" (db:with-db dbstruct #f #f (lambda (dbdat db) (debug:print 0 *default-log-port* "Got here 1.") (let ((res #f)) (apply sqlite3:execute db (conc "INSERT OR IGNORE INTO runs (runname,state,status,owner,event_time,contour" comma keystr ") VALUES (?,?,?,?,strftime('%s','now'),?" comma valslots ");") allvals) (apply sqlite3:for-each-row (lambda (id) (set! res id)) db |
︙ | ︙ |
Modified dbfile.scm from [330fd055bc] to [3ddda7a78f].
︙ | ︙ | |||
16 17 18 19 20 21 22 | ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see <http://www.gnu.org/licenses/>. ;;====================================================================== (declare (unit dbfile)) ;; (declare (uses debugprint)) | | | > | | | 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 | ;; You should have received a copy of the GNU General Public License ;; along with Megatest. If not, see <http://www.gnu.org/licenses/>. ;;====================================================================== (declare (unit dbfile)) ;; (declare (uses debugprint)) (declare (uses commonmod)) (module dbfile * (import scheme chicken data-structures extras matchable) (import (prefix sqlite3 sqlite3:) posix typed-records srfi-18 srfi-1 srfi-69 stack files ports commonmod ) ;; (import debugprint) ;;====================================================================== ;; R E C O R D S ;;====================================================================== |
︙ | ︙ | |||
75 76 77 78 79 80 81 | ) ;; goal is to converge on one struct for an area but for now it is too confusing ;; need to keep dbhandles and cached statements together (defstruct dbr:dbdat (dbfile #f) (dbh #f) (stmt-cache (make-hash-table)) | | > > > > > > > > > > > > > > > > > | 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 | ) ;; goal is to converge on one struct for an area but for now it is too confusing ;; need to keep dbhandles and cached statements together (defstruct dbr:dbdat (dbfile #f) (dbh #f) (stmt-cache (make-hash-table)) (read-only #f) (birth-sec (current-seconds))) (define *dbstruct-dbs* #f) (define *db-access-mutex* (make-mutex)) (define *no-sync-db* #f) (define *db-sync-in-progress* #f) (define *db-with-db-mutex* (make-mutex)) (define *max-api-process-requests* 0) (define *api-process-request-count* 0) (define *db-write-access* #t) (define *db-last-sync* 0) ;; last time the sync to megatest.db happened (define *db-multi-sync-mutex* (make-mutex)) ;; protect access to *db-sync-in-progress*, *db-last-sync* (define (db:generic-error-printout exn . message) (print-call-chain (current-error-port)) (apply dbfile:print-err message) (dbfile:print-err ", error: " ((condition-property-accessor 'exn 'message) exn) ", arguments: " ((condition-property-accessor 'exn 'arguments) exn) ", location: " ((condition-property-accessor 'exn 'location) exn) )) (define (dbfile:run-id->key run-id) (or run-id 'main)) (define (db:safely-close-sqlite3-db db stmt-cache #!key (try-num 3)) (if (<= try-num 0) #f |
︙ | ︙ | |||
220 221 222 223 224 225 226 | #f (begin (set! *dbfile:num-handles-in-use* (+ *dbfile:num-handles-in-use* 1)) (stack-pop! (dbr:subdb-dbstack subdb)))))) ;; return a previously opened db handle to the stack of available handles (define (dbfile:add-dbdat dbstruct run-id dbdat) | | > > > > > > > | | | 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 | #f (begin (set! *dbfile:num-handles-in-use* (+ *dbfile:num-handles-in-use* 1)) (stack-pop! (dbr:subdb-dbstack subdb)))))) ;; return a previously opened db handle to the stack of available handles (define (dbfile:add-dbdat dbstruct run-id dbdat) (let* ((subdb (dbfile:get-subdb dbstruct run-id)) (age (- (current-seconds)(dbr:dbdat-birth-sec dbdat)))) (if (> age 300) ;; just testing - discard and close after 30 sec (begin ;; (map sqlite3:finalize! (hash-table-values (dbr:dbdat-stmt-cache dbdat))) ;; (sqlite3:finalize! (dbr:dbdat-dbh dbdat)) (dbfile:print-err "INFO: Discarded dbdat over 30 sec old ("age"s)")) (begin (set! *dbfile:num-handles-in-use* (- *dbfile:num-handles-in-use* 1)) (stack-push! (dbr:subdb-dbstack subdb) dbdat))))) ;; set up a subdb ;; (define (dbfile:init-subdb dbstruct run-id init-proc) (let* ((dbname (dbfile:run-id->dbname run-id)) (areapath (dbr:dbstruct-areapath dbstruct)) (tmppath (dbr:dbstruct-tmppath dbstruct)) |
︙ | ︙ | |||
474 475 476 477 478 479 480 | (dbfile:print-err "INFO: dbfile:cautious-open-database: journal file " busy-file " exists, trying again in few seconds.") (thread-sleep! 1) (if (eq? tries-left 2) (begin (dbfile:print-err "INFO: forcing journal rollup "busy-file) (dbfile:brute-force-salvage-db fname))) (dbfile:cautious-open-database fname init-proc (- tries-left 1))) | > > > > | < | | | | | | | | | | | | | | | | | | | | | | | 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 | (dbfile:print-err "INFO: dbfile:cautious-open-database: journal file " busy-file " exists, trying again in few seconds.") (thread-sleep! 1) (if (eq? tries-left 2) (begin (dbfile:print-err "INFO: forcing journal rollup "busy-file) (dbfile:brute-force-salvage-db fname))) (dbfile:cautious-open-database fname init-proc (- tries-left 1))) (let* ((result (condition-case (dbfile:with-simple-file-lock (conc fname ".lock") (lambda () (let* ((db-exists (file-exists? fname)) (db (sqlite3:open-database fname))) (if (and init-proc (not db-exists)) (init-proc db)) db))) (exn (io-error) (dbfile:print-err exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.") (retry)) (exn (corrupt) (dbfile:print-err exn "ERROR: database " fname " is corrupt. Repair it to proceed.") (retry)) (exn (busy) (dbfile:print-err exn "ERROR: database " fname " is locked. Try copying to another location, remove original and copy back.") (retry)) (exn (permission)(dbfile:print-err exn "ERROR: database " fname " has some permissions problem.") (retry)) (exn () (dbfile:print-err exn "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn)) (retry))))) #;(if (file-write-access? fname) (dbfile:simple-file-release-lock lock-file)) result)))) (define (dbfile:brute-force-salvage-db fname) (let* ((backupfname (conc fname"-"(current-process-id)".bak")) (cmd (conc "cp "fname" "backupfname";mv "fname" "(conc fname ".delme;") "cp "backupfname" "fname))) (dbfile:print-err "WARNING: attempting recovery of file "fname" by running commands:\n" " "cmd) (system cmd))) #;(define (dbfile:cautious-open-database-orig fname init-proc #!optional (tries-left 50)) (let* ((lock-file (conc fname".lock")) (delay-time (* (- 51 tries-left) 1.1)) (retry (lambda () (thread-sleep! delay-time) (if (> tries-left 0) (dbfile:cautious-open-database fname init-proc (- tries-left 1)))))) (assert (>= tries-left 0) (conc "FATAL: too many attempts in dbfile:cautious-open-database of "fname", giving up.")) |
︙ | ︙ | |||
606 607 608 609 610 611 612 | ;; succeeds (returns (#t . lock-creation-time) ;; use (db:no-sync-del! db keyname) to release the lock ;; (define (db:no-sync-get-lock db keyname) (sqlite3:with-transaction db (lambda () | > > > > > > > > > > > > > > > | > > > > > > > > > > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | > > > > > > > > > > > > > > > > > > > > > > | > > > > > > > | > > > > > > > > > > > | > > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 | ;; succeeds (returns (#t . lock-creation-time) ;; use (db:no-sync-del! db keyname) to release the lock ;; (define (db:no-sync-get-lock db keyname) (sqlite3:with-transaction db (lambda () (condition-case (let* ((curr-val (db:no-sync-get/default db keyname #f))) (if curr-val `(#f . ,curr-val) ;; (sqlite3:first-result db "SELECT val FROM no_sync_metadat WHERE var=?;" keyname)) (let ((lock-time (current-seconds))) (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time) `(#t . ,lock-time)))) (exn (io-error) (dbfile:print-err "ERROR: i/o error with no-sync db. Check permissions, disk space etc. and try again.")) (exn (corrupt) (dbfile:print-err "ERROR: database no-sync db is corrupt. Repair it to proceed.")) (exn (busy) (dbfile:print-err "ERROR: database no-sync db is locked. Try copying to another location, remove original and copy back.")) (exn (permission)(dbfile:print-err "ERROR: database no-sync db has some permissions problem.")) (exn () ;; (status done) ;; I don't know how to detect status done but no data! (dbfile:print-err "ERROR: Unknown error with database no-sync db message: exn="(condition->list exn)", \n" ((condition-property-accessor 'exn 'message) exn)) `(#f . ,(current-seconds))))))) (define (db:no-sync-get-lock-timeout db keyname timeout) (let* ((lockdat (db:no-sync-get-lock db keyname))) (match lockdat ((#f . lock-time) (if (> (- (current-seconds) (if (string? lock-time)(string->number lock-time)lock-time)) timeout) (let ((lock-time (current-seconds))) ;; (debug:print-info 2 *default-log-port* "db:no-sync-get-lock keyname=" keyname ", lock-time=" lock-time ", exn=" exn) (sqlite3:execute db "INSERT OR REPLACE INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time) `(#t . ,lock-time)) lockdat)) (else lockdat)))) ;; NOTE: This will steal the lock after timeout of waiting. ;; (define (db:with-no-sync-lock db keyname timeout proc) (let* ((lockdat (db:no-sync-get-lock-timeout db keyname)) (gotlock (car lockdat)) (locktime (cdr lockdat))) (if gotlock (let ((res (proc))) (db:no-sync-del! db keyname) res)))) ;;====================================================================== ;; sync back functions pulled from db.scm ;;====================================================================== ;; Get a lock from the no-sync-db for the from-db, then delta sync the from-db to the to-db, otherwise return #f ;; (define (db:lock-and-delta-sync no-sync-db dbstruct from-db-file runid keys dbinit) (assert (not *db-sync-in-progress*) "FATAL: db:lock-and-sync called while a sync is in progress.") ;; (dbfile:print-err *default-log-port* "db:lock-and-delta-sync") (let* ((lock-file (conc from-db-file ".lock"))) (if (common:simple-file-lock lock-file) (begin (dbfile:print-err "INFO: db:lock-and-delta-sync copying db "runid" at "(current-seconds)) (set! *db-sync-in-progress* #t) (db:sync-touched dbstruct runid keys dbinit) (set! *db-sync-in-progress* #f) (delete-file* lock-file) #t) (begin (dbfile:print-err "INFO: could not get lock for " from-db-file ", sync likely in progress.") #f )))) ;; ;; Get a lock from the no-sync-db for the from-db, then delta sync the from-db to the to-db, otherwise return #f ;; ;; ;; (define (db:lock-and-delta-sync-orig no-sync-db dbstruct from-db-file runid keys dbinit) ;; (assert (not *db-sync-in-progress*) "FATAL: db:lock-and-sync called while a sync is in progress.") ;; ;; (dbfile:print-err *default-log-port* "db:lock-and-delta-sync") ;; (let* ((lockdat (db:no-sync-get-lock-timeout no-sync-db from-db-file 60)) ;; (gotlock (car lockdat)) ;; (locktime (cdr lockdat))) ;; ;; (debug:print-info 3 *default-log-port* "db:lock-and-delta-sync: got lock?") ;; ;; (if gotlock ;; (begin ;; (dbfile:print-err "INFO: db:lock-and-delta-sync copying db "runid" at "(current-seconds)) ;; (set! *db-sync-in-progress* #t) ;; (db:sync-touched dbstruct runid keys dbinit) ;; (set! *db-sync-in-progress* #f) ;; (db:no-sync-del! no-sync-db from-db-file) ;; #t) ;; (begin ;; (dbfile:print-err "ERROR: could not get lock for " from-db-file " from no-sync-db") ;; #f ;; )))) ;; sync run from tmp disk to nfs disk if touched ;; ;; call with dbinit=db:initialize-main-db ;; (define (db:sync-touched dbstruct run-id keys #!key dbinit (force-sync #f)) (dbfile:print-err "db:sync-touched Syncing: " (conc (if run-id run-id "main") ".db")) (let* (;; the subdb is needed to access the mtdbdat (subdb (or (dbfile:get-subdb dbstruct run-id) (dbfile:init-subdb dbstruct run-id dbinit))) (tmpdbfile (dbr:subdb-tmpdbfile subdb)) (mtdb (dbr:subdb-mtdbdat subdb)) (tmpdb (dbfile:open-db dbstruct run-id dbinit)) ;; sqlite3-db tmpdbfile #f)) (start-t (current-seconds))) (mutex-lock! *db-multi-sync-mutex*) (let ((update_info (cons "last_update" (if force-sync 0 *db-last-sync*) ))) (mutex-unlock! *db-multi-sync-mutex*) (db:sync-tables (db:sync-all-tables-list dbstruct keys) update_info tmpdb mtdb)) (mutex-lock! *db-multi-sync-mutex*) (set! *db-last-sync* start-t) (set! *db-last-access* start-t) (mutex-unlock! *db-multi-sync-mutex*) (dbfile:add-dbdat dbstruct run-id tmpdb) #t)) ;; just tests, test_steps and test_data tables (define db:sync-tests-only (list ;; (list "strs" ;; '("id" #f) ;; '("str" #f)) (list "tests" '("id" #f) '("run_id" #f) '("testname" #f) '("host" #f) '("cpuload" #f) '("diskfree" #f) '("uname" #f) '("rundir" #f) '("shortdir" #f) '("item_path" #f) '("state" #f) '("status" #f) '("attemptnum" #f) '("final_logf" #f) '("logdat" #f) '("run_duration" #f) '("comment" #f) '("event_time" #f) '("fail_count" #f) '("pass_count" #f) '("archived" #f) '("last_update" #f)) (list "test_steps" '("id" #f) '("test_id" #f) '("stepname" #f) '("state" #f) '("status" #f) '("event_time" #f) '("comment" #f) '("logfile" #f) '("last_update" #f)) (list "test_data" '("id" #f) '("test_id" #f) '("category" #f) '("variable" #f) '("value" #f) '("expected" #f) '("tol" #f) '("units" #f) '("comment" #f) '("status" #f) '("type" #f) '("last_update" #f)))) ;; needs db to get keys, this is for syncing all tables ;; (define (db:sync-main-list dbstruct keys) (let ((keys keys)) ;; (db:get-keys dbstruct))) (list (list "keys" '("id" #f) '("fieldname" #f) '("fieldtype" #f)) (list "metadat" '("var" #f) '("val" #f)) (append (list "runs" '("id" #f)) (map (lambda (k)(list k #f)) (append keys (list "runname" "state" "status" "owner" "event_time" "comment" "fail_count" "pass_count" "contour" "last_update")))) (list "archive_disks" '("id" #f) '("archive_area_name" #f) '("disk_path" #f) '("last_df" #f) '("last_df_time" #f) '("creation_time" #f)) (list "archive_blocks" '("id" #f) '("archive_disk_id" #f) '("disk_path" #f) '("last_du" #f) '("last_du_time" #f) '("creation_time" #f)) (list "test_meta" '("id" #f) '("testname" #f) '("owner" #f) '("description" #f) '("reviewed" #f) '("iterated" #f) '("avg_runtime" #f) '("avg_disk" #f) '("tags" #f) '("jobgroup" #f))))) (define (db:sync-all-tables-list dbstruct keys) (append (db:sync-main-list dbstruct keys) db:sync-tests-only)) ;; tbls is ( ("tablename" ( "field1" [#f|proc1] ) ( "field2" [#f|proc2] ) .... ) ) ;; db's are dbdat's ;; ;; if last-update specified ("field-name" . time-in-seconds) ;; then sync only records where field-name >= time-in-seconds ;; IFF field-name exists ;; (define (db:sync-tables tbls last-update fromdb todb . slave-dbs) (handle-exceptions exn (begin (dbfile:print-err "EXCEPTION: database probably overloaded or unreadable in db:sync-tables.") (print-call-chain (current-error-port)) (dbfile:print-err " message: " ((condition-property-accessor 'exn 'message) exn)) (dbfile:print-err "exn=" (condition->list exn)) (dbfile:print-err " status: " ((condition-property-accessor 'sqlite3 'status) exn)) (dbfile:print-err " src db: " (dbr:dbdat-dbfile fromdb)) (for-each (lambda (dbdat) (let ((dbpath (dbr:dbdat-dbfile dbdat))) (dbfile:print-err " dbpath: " dbpath) (if #t ;; (not (db:repair-db dbdat)) (begin (dbfile:print-err "Failed to rebuild (repair is turned off) " dbpath ", exiting now.") (exit))))) (cons todb slave-dbs)) 0) ;; this is the work to be done") (cond ((not fromdb) (dbfile:print-err "WARNING: db:sync-tables called with fromdb missing") -1) ((not todb) (dbfile:print-err "WARNING: db:sync-tables called with todb missing") -2) ((not (sqlite3:database? (dbr:dbdat-dbh fromdb))) (dbfile:print-err "db:sync-tables called with fromdb not a database " fromdb) -3) ((not (sqlite3:database? (dbr:dbdat-dbh todb))) (dbfile:print-err "db:sync-tables called with todb not a database " todb) -4) ((not (file-write-access? (dbr:dbdat-dbfile todb))) (dbfile:print-err "db:sync-tables called with todb not a read-only database " todb) -5) ((not (null? (let ((readonly-slave-dbs (filter (lambda (dbdat) (not (file-write-access? (dbr:dbdat-dbfile todb)))) slave-dbs))) (for-each (lambda (bad-dbdat) (dbfile:print-err "db:sync-tables called with todb not a read-only database " bad-dbdat)) readonly-slave-dbs) readonly-slave-dbs))) -6) (else (dbfile:print-err "db:sync-tables: args are good") (let ((stmts (make-hash-table)) ;; table-field => stmt (all-stmts '()) ;; ( ( stmt1 value1 ) ( stml2 value2 )) (numrecs (make-hash-table)) (start-time (current-milliseconds)) (tot-count 0)) (for-each ;; table (lambda (tabledat) (let* ((tablename (car tabledat)) (fields (cdr tabledat)) (has-last-update (member "last_update" fields)) (use-last-update (cond ((and has-last-update (member "last_update" fields)) #t) ;; if given a number, just use it for all fields ((number? last-update) #f) ;; if not matched first entry then ignore last-update for this table ((and (pair? last-update) (member (car last-update) ;; last-update field name (map car fields))) #t) ((and last-update (not (pair? last-update)) (not (number? last-update))) (dbfile:print-err "ERROR: parameter last-update for db:sync-tables must be a pair or a number, received: " last-update);; found in fields #f) (else #f))) (last-update-value (if use-last-update ;; no need to check for has-last-update - it is already accounted for (if (number? last-update) last-update (cdr last-update)) #f)) (last-update-field (if use-last-update (if (number? last-update) "last_update" (car last-update)) #f)) (num-fields (length fields)) (field->num (make-hash-table)) (num->field (apply vector (map car fields))) ;; BBHERE (full-sel (conc "SELECT " (string-intersperse (map car fields) ",") " FROM " tablename (if use-last-update ;; apply last-update criteria (conc " WHERE " last-update-field " >= " last-update-value) "") ";")) (full-ins (conc "INSERT OR REPLACE INTO " tablename " ( " (string-intersperse (map car fields) ",") " ) " " VALUES ( " (string-intersperse (make-list num-fields "?") ",") " );")) (fromdat '()) (fromdats '()) (totrecords 0) (batch-len 100) ;; (string->number (or (configf:lookup *configdat* "sync" "batchsize") "100"))) (todat (make-hash-table)) (count 0) (field-names (map car fields)) (delay-handicap 0) ;; (string->number (or (configf:lookup *configdat* "sync" "delay-handicap") "0"))) ) ;; set up the field->num table (for-each (lambda (field) (hash-table-set! field->num field count) (set! count (+ count 1))) fields) ;; read the source table ;; store a list of all rows in the table in fromdat, up to batch-len. ;; Then add fromdat to the fromdats list, clear fromdat and repeat. (sqlite3:for-each-row (lambda (a . b) (set! fromdat (cons (apply vector a b) fromdat)) (if (> (length fromdat) batch-len) (begin (set! fromdats (cons fromdat fromdats)) (set! fromdat '()) (set! totrecords (+ totrecords 1))) ) ) (dbr:dbdat-dbh fromdb) full-sel) ;; Count less than batch-len as a record (if (> (length fromdat) 0) (set! totrecords (+ totrecords 1))) ;; tack on remaining records in fromdat (if (not (null? fromdat)) (set! fromdats (cons fromdat fromdats))) (if (common:low-noise-print 120 "sync-records") (dbfile:print-err "found " totrecords " records to sync")) (sqlite3:for-each-row (lambda (a . b) (hash-table-set! todat a (apply vector a b))) (dbr:dbdat-dbh todb) full-sel) (when (and delay-handicap (> delay-handicap 0)) (dbfile:print-err "imposing synthetic sync delay of "delay-handicap" seconds since sync/delay-handicap is configured") (thread-sleep! delay-handicap) (dbfile:print-err "synthetic sync delay of "delay-handicap" seconds completed") ) ;; first pass implementation, just insert all changed rows (for-each (lambda (targdb) (let* ((db (dbr:dbdat-dbh targdb)) (drp-trigger (if (member "last_update" field-names) (db:drop-trigger db tablename) #f)) (has-last-update (member "last_update" field-names)) (is-trigger-dropped (if has-last-update (db:is-trigger-dropped db tablename) #f)) (stmth (sqlite3:prepare db full-ins)) (changed-rows 0)) ;; (db:delay-if-busy targdb) ;; NO WAITING (if (and has-last-update (common:low-noise-print 120 "is-trigger-dropped")) (dbfile:print-err "is-trigger-dropped: " is-trigger-dropped)) (for-each (lambda (fromdat-lst) (sqlite3:with-transaction db (lambda () (for-each ;; (lambda (fromrow) (let* ((a (vector-ref fromrow 0)) (curr (hash-table-ref/default todat a #f)) (same #t)) (let loop ((i 0)) (if (or (not curr) (not (equal? (vector-ref fromrow i)(vector-ref curr i)))) (set! same #f)) (if (and same (< i (- num-fields 1))) (loop (+ i 1)))) (if (not same) (begin (apply sqlite3:execute stmth (vector->list fromrow)) (hash-table-set! numrecs tablename (+ 1 (hash-table-ref/default numrecs tablename 0))) (set! changed-rows (+ changed-rows 1)) ) ) )) fromdat-lst)))) fromdats) (if (> changed-rows 0) (dbfile:print-err "table " tablename " changed rows: " changed-rows) ) (sqlite3:finalize! stmth) (if (member "last_update" field-names) (db:create-trigger db tablename)))) (append (list todb) slave-dbs) ) ) ) tbls) (let* ((runtime (- (current-milliseconds) start-time)) (should-print (or ;; (debug:debug-mode 12) (common:low-noise-print 120 "db sync" (> runtime 500))))) ;; low and high sync times treated as separate. (if should-print (dbfile:print-err "INFO: db sync, total run time " runtime " ms")) (for-each (lambda (dat) (let ((tblname (car dat)) (count (cdr dat))) (set! tot-count (+ tot-count count)) (if (> count 0) (if should-print (dbfile:print-err "FIXME: tblname: " tblname", count: "count" "))))) ;; (format #f " ~10a ~5a" tblname count)))))) (sort (hash-table->alist numrecs)(lambda (a b)(> (cdr a)(cdr b)))))) tot-count))))) ;;====================================================================== ;; trigger setup/takedown ;;====================================================================== (define db:trigger-list (list (list "update_runs_trigger" "CREATE TRIGGER IF NOT EXISTS update_runs_trigger AFTER UPDATE ON runs FOR EACH ROW BEGIN UPDATE runs SET last_update=(strftime('%s','now')) WHERE id=old.id; END;" ) (list "update_run_stats_trigger" "CREATE TRIGGER IF NOT EXISTS update_run_stats_trigger AFTER UPDATE ON run_stats FOR EACH ROW BEGIN UPDATE run_stats SET last_update=(strftime('%s','now')) WHERE id=old.id; END;" ) (list "update_tests_trigger" "CREATE TRIGGER IF NOT EXISTS update_tests_trigger AFTER UPDATE ON tests FOR EACH ROW BEGIN UPDATE tests SET last_update=(strftime('%s','now')) WHERE id=old.id; END;" ) (list "update_teststeps_trigger" "CREATE TRIGGER IF NOT EXISTS update_teststeps_trigger AFTER UPDATE ON test_steps FOR EACH ROW BEGIN UPDATE test_steps SET last_update=(strftime('%s','now')) WHERE id=old.id; END;" ) (list "update_test_data_trigger" "CREATE TRIGGER IF NOT EXISTS update_test_data_trigger AFTER UPDATE ON test_data FOR EACH ROW BEGIN UPDATE test_data SET last_update=(strftime('%s','now')) WHERE id=old.id; END;" ))) ;; ;; ADD run-id SUPPORT ;; (define (db:create-all-triggers dbstruct) (db:with-db dbstruct #f #f (lambda (dbdat db) (db:create-triggers db)))) (define (db:create-triggers db) (for-each (lambda (key) (sqlite3:execute db (cadr key))) db:trigger-list)) (define (db:drop-all-triggers dbstruct) (db:with-db dbstruct #f #f (lambda (dbdat db) (db:drop-triggers db)))) (define (db:is-trigger-dropped db tbl-name) (let* ((trigger-name (if (equal? tbl-name "test_steps") "update_teststeps_trigger" (conc "update_" tbl-name "_trigger"))) (res #f)) (sqlite3:for-each-row (lambda (name) (if (equal? name trigger-name) (set! res #t))) db "SELECT name FROM sqlite_master WHERE type = 'trigger' ;") res)) (define (db:drop-triggers db) (for-each (lambda (key) (sqlite3:execute db (conc "drop trigger if exists " (car key)))) db:trigger-list)) (define (db:drop-trigger db tbl-name) (let* ((trigger-name (if (equal? tbl-name "test_steps") "update_teststeps_trigger" (conc "update_" tbl-name "_trigger")))) (for-each (lambda (key) (if (equal? (car key) trigger-name) (sqlite3:execute db (conc "drop trigger if exists " trigger-name)))) db:trigger-list))) (define (db:create-trigger db tbl-name) (let* ((trigger-name (if (equal? tbl-name "test_steps") "update_teststeps_trigger" (conc "update_" tbl-name "_trigger")))) (for-each (lambda (key) (if (equal? (car key) trigger-name) (sqlite3:execute db (cadr key)))) db:trigger-list))) ;;====================================================================== ;; db access stuff ;;====================================================================== ;; call with dbinit=db:initialize-main-db ;; (define (db:open-db dbstruct run-id dbinit) (let* ((dbdat (dbfile:open-db dbstruct run-id dbinit))) (set! *db-write-access* (not (dbr:dbdat-read-only dbdat))) dbdat)) (define dbfile:db-init-proc (make-parameter #f)) ;; (db:with-db dbstruct run-id sqlite3:exec "select blah fgrom blaz;") ;; r/w is a flag to indicate if the db is modified by this query #t = yes, #f = no ;; (define (db:with-db dbstruct run-id r/w proc . params) (let* ((have-struct (dbr:dbstruct? dbstruct)) (dbdat (if have-struct ;; this stuff just allows us to call with a db handle directly (db:open-db dbstruct run-id (dbfile:db-init-proc)) ;; (dbfile:get-subdb dbstruct run-id) #f)) (db (if have-struct ;; this stuff just allows us to call with a db handle directly (dbr:dbdat-dbh dbdat) dbstruct)) (fname (if dbdat (dbr:dbdat-dbfile dbdat) "nofilenameavailable")) #;(subdb (if have-struct (dbfile:get-subdb dbstruct run-id) #f)) (use-mutex (> *api-process-request-count* 25))) ;; was 25 (if (and use-mutex (common:low-noise-print 120 "over-50-parallel-api-requests")) (dbfile:print-err *api-process-request-count* " parallel api requests being processed in process " (current-process-id) ", throttling access")) (if (common:low-noise-print 600 (conc "parallel-api-requests" *max-api-process-requests*)) (dbfile:print-err "Parallel api request count: " *api-process-request-count* " max parallel requests: " *max-api-process-requests*)) (condition-case (begin (if use-mutex (mutex-lock! *db-with-db-mutex*)) (let ((res (apply proc dbdat db params))) (if use-mutex (mutex-unlock! *db-with-db-mutex*)) ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w)) (if dbdat (dbfile:add-dbdat dbstruct run-id dbdat)) res)) (exn (io-error) (db:generic-error-printout exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")) (exn (corrupt) (db:generic-error-printout exn "ERROR: database " fname " is corrupt. Repair it to proceed.")) (exn (busy) (db:generic-error-printout exn "ERROR: database " fname " is locked. Try copying to another location, remove original and copy back.")) (exn (permission)(db:generic-error-printout exn "ERROR: database " fname " has some permissions problem.")) (exn () (db:generic-error-printout exn "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn)))))) ;;====================================================================== ;; file utils ;;====================================================================== ;;====================================================================== ;; lazy-safe get file mod time. on any error (file not existing etc.) return 0 |
︙ | ︙ | |||
704 705 706 707 708 709 710 | (define (dbfile:simple-file-release-lock fname) (handle-exceptions exn #f ;; I don't really care why this failed (at least for now) (delete-file* fname))) | > > > > > > > | | 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 | (define (dbfile:simple-file-release-lock fname) (handle-exceptions exn #f ;; I don't really care why this failed (at least for now) (delete-file* fname))) (define (dbfile:with-simple-file-lock fname proc #!key (expire-time 300)) (let ((gotlock (dbfile:simple-file-lock-and-wait fname expire-time: expire-time))) (if gotlock (let ((res (proc))) (dbfile:simple-file-release-lock fname) res) (assert #t "FATAL: simple file lock never got a lock.")))) ) |
Modified http-transport.scm from [4ca66092ff] to [9e9f2a4e8b].
︙ | ︙ | |||
34 35 36 37 38 39 40 41 42 43 44 45 | (declare (uses tests)) (declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. (declare (uses server)) ;; (declare (uses daemon)) (declare (uses portlogger)) (declare (uses rmt)) (declare (uses dbfile)) (include "common_records.scm") (include "db_records.scm") (include "js-path.scm") | > | | 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 | (declare (uses tests)) (declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. (declare (uses server)) ;; (declare (uses daemon)) (declare (uses portlogger)) (declare (uses rmt)) (declare (uses dbfile)) (declare (uses commonmod)) (include "common_records.scm") (include "db_records.scm") (include "js-path.scm") (import dbfile commonmod) (require-library stml) (define (http-transport:make-server-url hostport) (if (not hostport) #f (conc "http://" (car hostport) ":" (cadr hostport)))) |
︙ | ︙ | |||
282 283 284 285 286 287 288 | (if (debug:debug-mode 1) (debug:print-info 0 *default-log-port* "couldn't talk to server, trying again ...") (begin (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".") (debug:print 0 *default-log-port* " message: " msg ", exn=" exn) (debug:print 0 *default-log-port* " cmd: " cmd " params: " params " key:" (or server-id "thekey")) (debug:print 0 *default-log-port* " call-chain: " call-chain))) | > > | | | | | | | | | | | | | | 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 | (if (debug:debug-mode 1) (debug:print-info 0 *default-log-port* "couldn't talk to server, trying again ...") (begin (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".") (debug:print 0 *default-log-port* " message: " msg ", exn=" exn) (debug:print 0 *default-log-port* " cmd: " cmd " params: " params " key:" (or server-id "thekey")) (debug:print 0 *default-log-port* " call-chain: " call-chain))) (set! *runremote* #f) (set! runremote #f) ;; (if runremote ;; (remote-conndat-set! runremote #f)) ;; Killing associated server to allow clean retry.") ;; (tasks:kill-server-run-id run-id) ;; better to kill the server in the logic that called this routine? (mutex-unlock! *http-mutex*) ;; (signal (make-composite-condition ;; (make-property-condition 'commfail 'message "failed to connect to server"))) ;; "communications failed" (db:obj->string #f)) (with-input-from-request ;; was dat fullurl (list (cons 'key (or server-id "thekey")) (cons 'cmd cmd) (cons 'params sparams)) read-string)) transport: 'http) 0)) ;; added this speculatively ;; Shouldn't this be a call to the managed call-all-connections stuff above? (close-all-connections!) ;; BUG? WHY IS THIS HERE? Are we failing to reuse connections? (mutex-unlock! *http-mutex*) )) (time-out (lambda () (thread-sleep! 45) (debug:print 0 *default-log-port* "WARNING: send-receive took more than 45 seconds!!") #f)) (th1 (make-thread send-recieve "with-input-from-request")) |
︙ | ︙ | |||
550 551 552 553 554 555 556 | (current-seconds))) (if (common:low-noise-print 120 "server continuing") (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) (let ((curr-time (current-seconds))) (handle-exceptions exn (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on log file " server-log-file ". Are you out of space on that disk? exn=" exn) | > | | > > > | 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 | (current-seconds))) (if (common:low-noise-print 120 "server continuing") (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) (let ((curr-time (current-seconds))) (handle-exceptions exn (debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on log file " server-log-file ". Are you out of space on that disk? exn=" exn) (if (and (< (- (current-seconds) server-start-time) 600) ;; run for ten minutes for experiment, 3600 thereafter (not *server-overloaded*)) (change-file-times server-log-file curr-time curr-time) (if (common:low-noise-print 120 "start new server") (server:kind-run *toppath*) ;; server:kind-run uses [servers] numservers ))))) (loop 0 server-state bad-sync-count (current-milliseconds))) (else (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) (http-transport:server-shutdown port))))))) (define (http-transport:server-shutdown port) (begin |
︙ | ︙ |
Modified megatest.scm from [c7ae0fc5df] to [dc12e408b1].
︙ | ︙ | |||
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | (declare (uses tdb)) (declare (uses mt)) (declare (uses api)) (declare (uses tasks)) ;; only used for debugging. (declare (uses env)) (declare (uses diff-report)) (declare (uses dbmod)) (declare (uses dbmod.import)) (declare (uses dbfile)) (declare (uses dbfile.import)) ;; (declare (uses debugprint)) ;; (declare (uses debugprint.import)) ;; (declare (uses mtargs)) ;; (declare (uses mtargs.import)) ;; (declare (uses ftail)) ;; (import ftail) (import dbmod dbfile) (define *db* #f) ;; this is only for the repl, do not use in general!!!! (include "common_records.scm") (include "key_records.scm") (include "db_records.scm") | > > > > | 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | (declare (uses tdb)) (declare (uses mt)) (declare (uses api)) (declare (uses tasks)) ;; only used for debugging. (declare (uses env)) (declare (uses diff-report)) (declare (uses db)) (declare (uses dbmod)) (declare (uses dbmod.import)) (declare (uses commonmod)) (declare (uses commonmod.import)) (declare (uses dbfile)) (declare (uses dbfile.import)) ;; (declare (uses debugprint)) ;; (declare (uses debugprint.import)) ;; (declare (uses mtargs)) ;; (declare (uses mtargs.import)) ;; (declare (uses ftail)) ;; (import ftail) (import dbmod commonmod dbfile) (define *db* #f) ;; this is only for the repl, do not use in general!!!! (include "common_records.scm") (include "key_records.scm") (include "db_records.scm") |
︙ | ︙ | |||
73 74 75 76 77 78 79 80 81 82 83 84 85 86 | ;; (use sparse-vectors) (require-library mutils) (define *usage-log-file* #f) ;; put path to file for logging usage in this var in the ~/.megatestrc file (define *usage-use-seconds* #t) ;; for Epoc seconds in usage logging change this to #t in ~/.megatestrc file ;; load the ~/.megatestrc file, put (use trace)(trace-call-sites #t)(trace function-you-want-to-trace) in this file ;; (let ((debugcontrolf (conc (get-environment-variable "HOME") "/.megatestrc"))) (if (common:file-exists? debugcontrolf) (load debugcontrolf))) | > > | 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 | ;; (use sparse-vectors) (require-library mutils) (define *usage-log-file* #f) ;; put path to file for logging usage in this var in the ~/.megatestrc file (define *usage-use-seconds* #t) ;; for Epoc seconds in usage logging change this to #t in ~/.megatestrc file (dbfile:db-init-proc db:initialize-main-db) ;; load the ~/.megatestrc file, put (use trace)(trace-call-sites #t)(trace function-you-want-to-trace) in this file ;; (let ((debugcontrolf (conc (get-environment-variable "HOME") "/.megatestrc"))) (if (common:file-exists? debugcontrolf) (load debugcontrolf))) |
︙ | ︙ |
Modified rmt.scm from [d97a5a0482] to [f2baad3376].
︙ | ︙ | |||
19 20 21 22 23 24 25 26 27 28 | ;;====================================================================== (use format typed-records) ;; RADT => purpose of json format?? (declare (unit rmt)) (declare (uses api)) (declare (uses http-transport)) (include "common_records.scm") ;; (declare (uses rmtmod)) | > | | 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 | ;;====================================================================== (use format typed-records) ;; RADT => purpose of json format?? (declare (unit rmt)) (declare (uses api)) (declare (uses http-transport)) (declare (uses dbfile)) (include "common_records.scm") ;; (declare (uses rmtmod)) (import dbfile) ;; rmtmod) ;; ;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!! ;; ;; generate entries for ~/.megatestrc with the following ;; |
︙ | ︙ | |||
61 62 63 64 65 66 67 | ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) ;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected #;(common:telemetry-log (conc "rmt:"(->string cmd)) payload: `((rid . ,rid) (params . ,params))) | | | 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 | ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) ;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected #;(common:telemetry-log (conc "rmt:"(->string cmd)) payload: `((rid . ,rid) (params . ,params))) (if (> attemptnum 2) (debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum)) (cond ((> attemptnum 2) (thread-sleep! 0.05)) ((> attemptnum 10) (thread-sleep! 0.5)) ((> attemptnum 20) (thread-sleep! 1))) |
︙ | ︙ | |||
117 118 119 120 121 122 123 124 125 126 127 128 129 130 | ;; ensure we have a homehost record (if (not (pair? (remote-hh-dat runremote))) ;; not on homehost (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little (remote-hh-dat-set! runremote (common:get-homehost))) ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile) (cond ;;DOT EXIT; ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" } ;; give up if more than 150 attempts ((> attemptnum 150) (debug:print 0 *default-log-port* "ERROR: 150 tries to start/connect to server. Giving up.") (exit 1)) | > > > > > > > | 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 | ;; ensure we have a homehost record (if (not (pair? (remote-hh-dat runremote))) ;; not on homehost (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little (remote-hh-dat-set! runremote (common:get-homehost))) ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile) (cond ((> (- (current-seconds)(remote-connect-time runremote)) 180) ;; reconnect to server every 180 seconds (debug:print 0 *default-log-port* "Forcing reconnect to server(s) due to 180 second timeout.") (set! *runremote* #f) ;; BUG: close-connections should go here? (mutex-unlock! *rmt-mutex*) (rmt:send-receive cmd rid params attemptnum: 1 area-dat: area-dat)) ;;DOT EXIT; ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" } ;; give up if more than 150 attempts ((> attemptnum 150) (debug:print 0 *default-log-port* "ERROR: 150 tries to start/connect to server. Giving up.") (exit 1)) |
︙ | ︙ | |||
373 374 375 376 377 378 379 | (let* ((qry-is-write (not (member cmd api:read-only-queries))) (db-file-path (db:dbfile-path)) ;; 0)) (dbstructs-local (db:setup #t)) ;; make-dbr:dbstruct path: dbdir local: #t))) (read-only (not (file-write-access? db-file-path))) (start (current-milliseconds)) (resdat (if (not (and read-only qry-is-write)) (let ((v (api:execute-requests dbstructs-local (vector (symbol->string cmd) params)))) | | | | | | | | 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 | (let* ((qry-is-write (not (member cmd api:read-only-queries))) (db-file-path (db:dbfile-path)) ;; 0)) (dbstructs-local (db:setup #t)) ;; make-dbr:dbstruct path: dbdir local: #t))) (read-only (not (file-write-access? db-file-path))) (start (current-milliseconds)) (resdat (if (not (and read-only qry-is-write)) (let ((v (api:execute-requests dbstructs-local (vector (symbol->string cmd) params)))) ;; (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong.. ;; exn ;; This is an attempt to detect that situation and recover gracefully ;; (begin ;; (debug:print 0 *default-log-port* "ERROR: bad data from server " v " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) ;; (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy (if (and (vector? v) (> (vector-length v) 1)) (let ((newvec (vector (vector-ref v 0)(vector-ref v 1)))) newvec) ;; by copying the vector while inside the error handler we should force the detection of a corrupted record (vector #t '()))) ;; ) ;; we could also check that the returned types are valid (vector #t '()))) (success (vector-ref resdat 0)) (res (vector-ref resdat 1)) (duration (- (current-milliseconds) start))) (if (and read-only qry-is-write) (debug:print 0 *default-log-port* "ERROR: attempt to write to read-only database ignored. cmd=" cmd)) (if (not success) |
︙ | ︙ | |||
410 411 412 413 414 415 416 | (mutex-lock! *db-multi-sync-mutex*) / (set! *db-last-access* start-time) ;; THIS IS PROBABLY USELESS? (we are on a client) (mutex-unlock! *db-multi-sync-mutex*))))) res)) (define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params) (let* ((run-id (if run-id run-id 0)) | | | | | | | | 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 | (mutex-lock! *db-multi-sync-mutex*) / (set! *db-last-access* start-time) ;; THIS IS PROBABLY USELESS? (we are on a client) (mutex-unlock! *db-multi-sync-mutex*))))) res)) (define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params) (let* ((run-id (if run-id run-id 0)) (res ;; (handle-exceptions ;; exn ;; (begin ;; (print "transport failed. exn=" exn) ;; #f) (http-transport:client-api-send-receive run-id connection-info cmd params))) ;; ) (if (and res (vector-ref res 0)) (vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!! #f))) ;;====================================================================== ;; ;; A C T U A L A P I C A L L S |
︙ | ︙ |
Modified server.scm from [6d65c175e8] to [bb020a2020].
︙ | ︙ | |||
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 | (use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest directory-utils posix-extras matchable utils) (use spiffy uri-common intarweb http-client spiffy-request-vars) (declare (unit server)) (declare (uses common)) (declare (uses db)) (declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. ;; (declare (uses synchash)) (declare (uses http-transport)) ;;(declare (uses rpc-transport)) (declare (uses launch)) ;; (declare (uses daemon)) (include "common_records.scm") (include "db_records.scm") (define (server:make-server-url hostport) (if (not hostport) #f | > > > > | 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 | (use srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest directory-utils posix-extras matchable utils) (use spiffy uri-common intarweb http-client spiffy-request-vars) (declare (unit server)) (declare (uses commonmod)) (declare (uses common)) (declare (uses db)) (declare (uses tasks)) ;; tasks are where stuff is maintained about what is running. ;; (declare (uses synchash)) (declare (uses http-transport)) ;;(declare (uses rpc-transport)) (declare (uses launch)) ;; (declare (uses daemon)) (import commonmod) (include "common_records.scm") (include "db_records.scm") (define (server:make-server-url hostport) (if (not hostport) #f |
︙ | ︙ |
Modified tasks.scm from [e54ee89bd7] to [fd2a2e05e7].
︙ | ︙ | |||
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 | ;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') (use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking format) (import (prefix sqlite3 sqlite3:)) (declare (unit tasks)) (declare (uses db)) (declare (uses rmt)) (declare (uses common)) (declare (uses pgdb)) ;; (import pgdb) ;; pgdb is a module (include "task_records.scm") (include "db_records.scm") ;;====================================================================== ;; Tasks db | > > | 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | ;; strftime('%m/%d/%Y %H:%M:%S','now','localtime') (use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking format) (import (prefix sqlite3 sqlite3:)) (declare (unit tasks)) (declare (uses dbfile)) (declare (uses db)) (declare (uses rmt)) (declare (uses common)) (declare (uses pgdb)) (import dbfile) ;; (import pgdb) ;; pgdb is a module (include "task_records.scm") (include "db_records.scm") ;;====================================================================== ;; Tasks db |
︙ | ︙ |