Changes In Branch v1.65-wip Through [386b7b1848] Excluding Merge-Ins
This is equivalent to a diff from 8a70b57bea to 386b7b1848
2019-10-21
| ||
14:00 | Added defense against directories in the logs dir. check-in: 3eaa18cb5b user: mrwellan tags: v1.65 | |
2019-10-01
| ||
22:54 | Broke it with mutex deadlock check-in: 71a141e42e user: matt tags: v1.65-wip | |
22:35 | Removed more globals from rmt:send-receive-orig check-in: 386b7b1848 user: matt tags: v1.65-wip | |
22:24 | Removed globals from rmt:send-receive-orig check-in: 2d1ae5e508 user: matt tags: v1.65-wip | |
21:38 | Merged in v1.65 check-in: a114850abb user: matt tags: v1.65-wip | |
10:40 | Fix for Makefile for mtexec check-in: 8a70b57bea user: mrwellan tags: v1.65 | |
09:52 | Merge fix for periods in env vars from trunk check-in: 65e88e7bdc user: mrwellan tags: v1.65 | |
Modified common.scm from [77eb320f92] to [91c6910f87].
︙ | ︙ | |||
277 278 279 280 281 282 283 | (server-url (if *toppath* (server:check-if-running *toppath*))) ;; (server:check-if-running *toppath*) #f)) (last-server-check 0) ;; last time we checked to see if the server was alive (conndat #f) (transport *transport-type*) (server-timeout (server:expiration-timeout)) (force-server #f) (ro-mode #f) | | > > | 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 | (server-url (if *toppath* (server:check-if-running *toppath*))) ;; (server:check-if-running *toppath*) #f)) (last-server-check 0) ;; last time we checked to see if the server was alive (conndat #f) (transport *transport-type*) (server-timeout (server:expiration-timeout)) (force-server #f) (ro-mode #f) (ro-mode-checked #f) ;; flag that indicates we have checked for ro-mode (ulex:conn #f) ;; ulex db conn is not exactly a db connector, more like a network connector ) ;; launching and hosts (defstruct host (reachable #f) (last-update 0) (last-used 0) (last-cpuload 1)) |
︙ | ︙ |
Modified db.scm from [560d632862] to [5cbdd1ef19].
︙ | ︙ | |||
302 303 304 305 306 307 308 | ;(mtdbmodtime (if mtdbexists (common:lazy-sqlite-db-modification-time mtdbpath) #f)) ; moving this before db:open-megatest-db is called. if wal mode is on -WAL and -shm file get created with causing the tmpdbmodtime timestamp always greater than mtdbmodtime ;(tmpdbmodtime (if dbfexists (common:lazy-sqlite-db-modification-time tmpdbfname) #f)) ;if wal mode is on -WAL and -shm file get created when db:open-megatest-db is called. modtimedelta will always be < 10 so db in tmp not get synced ;(tmpdbmodtime (if dbfexists (db:get-last-update-time (car tmpdb)) #f)) ;(fmt (file-modification-time tmpdbfname)) (modtimedelta (and mtdbmodtime tmpdbmodtime (- mtdbmodtime tmpdbmodtime)))) | > > > > > > | | | | 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 | ;(mtdbmodtime (if mtdbexists (common:lazy-sqlite-db-modification-time mtdbpath) #f)) ; moving this before db:open-megatest-db is called. if wal mode is on -WAL and -shm file get created with causing the tmpdbmodtime timestamp always greater than mtdbmodtime ;(tmpdbmodtime (if dbfexists (common:lazy-sqlite-db-modification-time tmpdbfname) #f)) ;if wal mode is on -WAL and -shm file get created when db:open-megatest-db is called. modtimedelta will always be < 10 so db in tmp not get synced ;(tmpdbmodtime (if dbfexists (db:get-last-update-time (car tmpdb)) #f)) ;(fmt (file-modification-time tmpdbfname)) (modtimedelta (and mtdbmodtime tmpdbmodtime (- mtdbmodtime tmpdbmodtime)))) (handle-exceptions exn (let ((call-chain (get-call-chain)) (msg ((condition-property-accessor 'exn 'message) exn))) (debug:print 0 *default-log-port* "ERROR: attempted to drop triggers on MTRA/megatest.db but failed. Error is " msg) (set! write-access #f)) ;; if we failed to drop the triggers then we probably don't have write access (when write-access (sqlite3:execute (car mtdb) "drop trigger if exists update_tests_trigger") (sqlite3:execute (car mtdb) "drop trigger if exists update_runs_trigger"))) ;(print "mtdbmodtime " mtdbmodtime " tmpdbmodtime " tmpdbmodtime " mtdbpath " mtdbpath " " (conc *toppath* "/megatest.db")) ;;(debug:print-info 13 *default-log-port* "db:open-db>> mtdbpath="mtdbpath" mtdbexists="mtdbexists" and write-access="write-access) (if (and dbexists (not write-access)) (begin (set! *db-write-access* #f) (dbr:dbstruct-read-only-set! dbstruct #t))) |
︙ | ︙ |
Modified rmt.scm from [5054b48a41] to [97df11d244].
︙ | ︙ | |||
23 24 25 26 27 28 29 30 31 32 33 34 35 36 | (declare (unit rmt)) (declare (uses api)) (declare (uses http-transport)) (include "common_records.scm") (declare (uses rmtmod)) (import rmtmod) ;; ;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!! ;; ;; generate entries for ~/.megatestrc with the following ;; | > | 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 | (declare (unit rmt)) (declare (uses api)) (declare (uses http-transport)) (include "common_records.scm") (declare (uses rmtmod)) (import rmtmod) ;; ;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!! ;; ;; generate entries for ~/.megatestrc with the following ;; |
︙ | ︙ | |||
52 53 54 55 56 57 58 59 60 | cinfo (if (server:check-if-running areapath) (client:setup areapath) #f)))) (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) ;; | > > > > > > > > > > > > > > > > > > > > > > > > > | | < < < < < < | | < < | < < < < < < < < < < < < < < | < < < | | | < < < | < < < | | | < < < | | | < < < | | | < < < | | | < < < | | | | | | | < < < | | | | | | < < < | | | < < < < | < | 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 | cinfo (if (server:check-if-running areapath) (client:setup areapath) #f)))) (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id ;; this entry point can decide based on cmd whether to dispatch to old api calls via remote or via ulex ;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) (let* ((areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas (runremote (or area-dat *runremote*))) ;; ensure we have a record for our connection for given area (if (not runremote) ;; can remove this one. should never get here. (begin (set! *runremote* (make-remote)) (set! runremote *runremote*))) ;; new runremote will come from this on next iteration (if (member cmd '(blah)) (begin (mutex-lock! *send-receive-mutex*) (if (not *runremote*)(set! *runremote* (make-remote))) (let ((ulex:conn (remote-ulex:conn *runremote*))) (if (not ulex:conn)(remote-ulex:conn-set! *runremote* (rmtmod:setup-ulex *toppath*))) (rmtmod:send-receive-ulex ulex:conn cmd rid params attemptnum area-dat))) (rmt:send-receive-orig *default-log-port* runremote *rmt-mutex* areapath *db-multi-sync-mutex* cmd rid params attemptnum: attemptnum area-dat: area-dat)))) ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) ;; ;; add multi-sync-mutex ;; (define (rmt:send-receive-orig log-port runremote rmt-mutex toppath multi-sync-mutex cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected #;(common:telemetry-log (conc "rmt:"(->string cmd)) payload: `((rid . ,rid) (params . ,params))) ;; do all the prep locked under the rmt-mutex (mutex-lock! rmt-mutex) ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds. ;; 3. do the query, if on homehost use local access ;; (let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value (readonly-mode (rmtmod:calc-ro-mode runremote toppath))) ;; ensure we have a homehost record (if (not (pair? (remote-hh-dat runremote))) ;; not on homehost (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little (remote-hh-dat-set! runremote (common:get-homehost))) ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile) (cond ;; give up if more than 15 attempts ((> attemptnum 15) (debug:print 0 log-port "ERROR: 15 tries to start/connect to server. Giving up.") (exit 1)) ;; readonly mode, read request- handle it - case 2 ((and readonly-mode (member cmd api:read-only-queries)) (mutex-unlock! rmt-mutex) (debug:print-info 12 log-port "rmt:send-receive, case 2") (rmt:open-qry-close-locally log-port multi-sync-mutex cmd 0 params) ) ;; readonly mode, write request. Do nothing, return #f (readonly-mode (extras-readonly-mode rmt-mutex log-port cmd params)) ;; This block was for pre-emptively resetting the connection if there had been no communication for some time. ;; I don't think it adds any value. If the server is not there, just fail and start a new connection. ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout) ;; ;; reset the connection if it has been unused too long ((and runremote (remote-conndat runremote) (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on (+ (http-transport:server-dat-get-last-access (remote-conndat runremote)) (remote-server-timeout runremote)))) (debug:print-info 0 log-port "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.") (http-transport:close-connections area-dat: runremote) (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection. (mutex-unlock! rmt-mutex) (rmt:send-receive-orig log-port runremote rmt-mutex toppath multi-sync-mutex cmd rid params attemptnum: attemptnum)) ;; on homehost and this is a read ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required (cdr (remote-hh-dat runremote)) ;; on homehost (member cmd api:read-only-queries)) ;; this is a read (mutex-unlock! rmt-mutex) (debug:print-info 12 log-port "rmt:send-receive, case 5") (rmt:open-qry-close-locally log-port multi-sync-mutex cmd 0 params)) ;; on homehost and this is a write, we already have a server, but server has died ((and (cdr (remote-hh-dat runremote)) ;; on homehost (not (member cmd api:read-only-queries)) ;; this is a write (remote-server-url runremote) ;; have a server (not (server:ping (remote-server-url runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach. (set! *runremote* (make-remote)) (remote-force-server-set! runremote (common:force-server?)) (mutex-unlock! rmt-mutex) (debug:print-info 12 log-port "rmt:send-receive, case 6") (rmt:send-receive-orig log-port runremote rmt-mutex toppath multi-sync-mutex cmd rid params attemptnum: attemptnum)) ;; on homehost and this is a write, we already have a server ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required (cdr (remote-hh-dat runremote)) ;; on homehost (not (member cmd api:read-only-queries)) ;; this is a write (remote-server-url runremote)) ;; have a server (mutex-unlock! rmt-mutex) (debug:print-info 12 log-port "rmt:send-receive, case 4.1") (rmt:open-qry-close-locally log-port multi-sync-mutex cmd 0 params)) ;; on homehost, no server contact made and this is a write, passively start a server ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required (cdr (remote-hh-dat runremote)) ;; have homehost (not (remote-server-url runremote)) ;; no connection yet (not (member cmd api:read-only-queries))) ;; not a read-only query (debug:print-info 12 log-port "rmt:send-receive, case 8") (let ((server-url (server:check-if-running toppath))) ;; (server:read-dotserver->url toppath))) ;; (server:check-if-running toppath))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call (if server-url (remote-server-url-set! runremote server-url) ;; the string can be consumed by the client setup if needed (if (common:force-server?) (server:start-and-wait toppath) (server:kind-run toppath)))) (remote-force-server-set! runremote (common:force-server?)) (mutex-unlock! rmt-mutex) (debug:print-info 12 log-port "rmt:send-receive, case 8.1") (rmt:open-qry-close-locally log-port multi-sync-mutex cmd 0 params)) ((or (and (remote-force-server runremote) ;; we are forcing a server and don't yet have a connection to one (not (remote-conndat runremote))) (and (not (cdr (remote-hh-dat runremote))) ;; not on a homehost (not (remote-conndat runremote)))) ;; and no connection (debug:print-info 12 log-port "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote)) (mutex-unlock! rmt-mutex) (if (not (server:check-if-running toppath)) ;; who knows, maybe one has started up? (server:start-and-wait toppath)) (remote-conndat-set! runremote (rmt:get-connection-info toppath)) ;; calls client:setup which calls client:setup-http (rmt:send-receive-orig log-port runremote rmt-mutex toppath multi-sync-mutex cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as ;; all set up if get this far, dispatch the query ((and (not (remote-force-server runremote)) (cdr (remote-hh-dat runremote))) ;; we are on homehost (mutex-unlock! rmt-mutex) (debug:print-info 12 log-port "rmt:send-receive, case 10") (rmt:open-qry-close-locally log-port multi-sync-mutex cmd (if rid rid 0) params)) ;; not on homehost, do server query (else (extras-case-11 log-port runremote cmd params attemptnum rid))))) ;; bunch of small functions factored out of send-receive to make debug easier ;; (define (extras-case-11 *default-log-port* runremote cmd params attemptnum rid) ;; (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9") |
︙ | ︙ | |||
329 330 331 332 333 334 335 | (if (> tot 10) (cons newmax-cmd currmax) (cons 'none 0)) (loop (car tal)(cdr tal) newmax-cmd currmax))))))) (mutex-unlock! *db-stats-mutex*) res)) | | | | | | | | | | | 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 | (if (> tot 10) (cons newmax-cmd currmax) (cons 'none 0)) (loop (car tal)(cdr tal) newmax-cmd currmax))))))) (mutex-unlock! *db-stats-mutex*) res)) (define (rmt:open-qry-close-locally log-port multi-sync-mutex cmd run-id params #!key (remretries 5)) (let* ((qry-is-write (not (member cmd api:read-only-queries))) (db-file-path (db:dbfile-path)) ;; 0)) (dbstruct-local (db:setup #t)) ;; make-dbr:dbstruct path: dbdir local: #t))) (read-only (not (file-write-access? db-file-path))) (start (current-milliseconds)) (resdat (if (not (and read-only qry-is-write)) (let ((v (api:execute-requests dbstruct-local (vector (symbol->string cmd) params)))) (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong.. exn ;; This is an attempt to detect that situation and recover gracefully (begin (debug:print0 log-port "ERROR: bad data from server " v " message: " ((condition-property-accessor 'exn 'message) exn)) (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy (if (and (vector? v) (> (vector-length v) 1)) (let ((newvec (vector (vector-ref v 0)(vector-ref v 1)))) newvec) ;; by copying the vector while inside the error handler we should force the detection of a corrupted record (vector #t '())))) ;; we could also check that the returned types are valid (vector #t '()))) (success (vector-ref resdat 0)) (res (vector-ref resdat 1)) (duration (- (current-milliseconds) start))) (if (and read-only qry-is-write) (debug:print 0 log-port "ERROR: attempt to write to read-only database ignored. cmd=" cmd)) (if (not success) (if (> remretries 0) (begin (debug:print-error 0 log-port "local query failed. Trying again.") (thread-sleep! (/ (random 5000) 1000)) ;; some random delay (rmt:open-qry-close-locally log-port multi-sync-mutex cmd run-id params remretries: (- remretries 1))) (begin (debug:print-error 0 log-port "too many retries in rmt:open-qry-close-locally, giving up") #f)) (begin ;; (rmt:update-db-stats run-id cmd params duration) ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it (if qry-is-write (let ((start-time (current-seconds))) (mutex-lock! multi-sync-mutex) ;; (set! *db-last-access* start-time) ;; THIS IS PROBABLY USELESS? (we are on a client) (mutex-unlock! multi-sync-mutex))))) res)) (define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params) (let* ((run-id (if run-id run-id 0)) (res (handle-exceptions exn #f |
︙ | ︙ |
Modified rmtmod.scm from [8f5d01960e] to [0228767cb5].
1 | ;;====================================================================== | | | 1 2 3 4 5 6 7 8 9 | ;;====================================================================== ;; Copyright 2019, Matthew Welland. ;; ;; This file is part of Megatest. ;; ;; Megatest is free software: you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation, either version 3 of the License, or ;; (at your option) any later version. |
︙ | ︙ | |||
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 | (module rmtmod * (import scheme chicken data-structures extras) (import (prefix sqlite3 sqlite3:) posix typed-records srfi-18) (import commonmod) ;; Hack to make these functions visible to the refactored code, goal is to eliminate these over time. (define (rmt:send-receive . params) #f) (define (http-transport:close-connections . params) #f) ;; from remote defstruct in common.scm (define (remote-conndat-set! . params) #f) (define (remote-server-url-set! . params) #f) (define (remote-ro-mode . params) #f) (define (remote-ro-mode-set! . params) #f) (define (remote-ro-mode-checked-set! . params) #f) (define (remote-ro-mode-checked . params) #f) (define (debug:print . params) #f) (define (debug:print-info . params) #f) (define (set-functions send-receive rsus close-connections rcs dbgp dbgpinfo ro-mode ro-mode-set ro-mode-checked-set ro-mode-checked ) | > | | 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 | (module rmtmod * (import scheme chicken data-structures extras) (import (prefix sqlite3 sqlite3:) posix typed-records srfi-18) (import commonmod) (use (prefix ulex ulex:)) ;; Hack to make these functions visible to the refactored code, goal is to eliminate these over time. (define (rmt:send-receive . params) #f) (define (http-transport:close-connections . params) #f) ;; from remote defstruct in common.scm (define (remote-conndat-set! . params) #f) (define (remote-server-url-set! . params) #f) (define (remote-ro-mode . params) #f) (define (remote-ro-mode-set! . params) #f) (define (remote-ro-mode-checked-set! . params) #f) (define (remote-ro-mode-checked . params) #f) (define (debug:print . params) #f) (define (debug:print-info . params) #f) (define (set-functions send-receive rsus close-connections rcs dbgp dbgpinfo ro-mode ro-mode-set ro-mode-checked-set ro-mode-checked ) (set! rmt:send-receive send-receive) (set! remote-server-url-set! rsus) (set! http-transport:close-connections close-connections) (set! remote-conndat-set! rcs) (set! debug:print dbgp) (set! debug:print-info dbgpinfo) (set! remote-ro-mode ro-mode) (set! remote-ro-mode-set! ro-mode-set) |
︙ | ︙ | |||
111 112 113 114 115 116 117 118 | (mutex-lock! *rmt-mutex*) (http-transport:close-connections area-dat: runremote) (set! *runremote* #f) ;; force starting over (mutex-unlock! *rmt-mutex*) (thread-sleep! wait-delay) (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) res)) ;; All good, return res | > > > | > > > > > > > > > > > > | 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 | (mutex-lock! *rmt-mutex*) (http-transport:close-connections area-dat: runremote) (set! *runremote* #f) ;; force starting over (mutex-unlock! *rmt-mutex*) (thread-sleep! wait-delay) (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1))) res)) ;; All good, return res ;;====================================================================== ;; ulex and steps stuff ;;====================================================================== (define (rmtmod:setup-ulex toppath) (ulex:make-area dbdir: (conc toppath "/ulexdb") pktsdir: (conc toppath "/pkts") )) (define (rmtmod:send-receive-ulex ulex:conn cmd rid params attemptnum area-dat) #f) ) |