Changes In Branch v1.65-side2 Through [0e1320be35] Excluding Merge-Ins
This is equivalent to a diff from 2769e4b7c9 to 0e1320be35
2021-03-01
| ||
17:42 | Manually patched in the new view check-in: f5206150ee user: mrwellan tags: v1.6569-new-view | |
2021-01-26
| ||
14:00 | Fix for the > crash. Maybe... Leaf check-in: 5a05fc04ff user: matt tags: v1.6569-gt-crash-fix | |
2021-01-25
| ||
12:03 | rebased lazy-queue rollup check-in: 07ab120544 user: matt tags: v1.65-lazyqueue-items-rollup | |
2021-01-15
| ||
22:46 | begin diet check-in: badd71f3b3 user: matt tags: v1.6569-diet | |
21:34 | eval-string-in-environment if was disabled, re-enabled check-in: 9564772564 user: matt tags: v1.6569-reenable-eval-if | |
2021-01-08
| ||
11:42 | enable custom value for max delay between archive time and test last update time Leaf check-in: 86a3d1148e user: pjhatwal tags: v1.6569-refactor | |
2020-11-25
| ||
12:00 | Fixed issues in server gating code Leaf check-in: 063273e8cb user: mrwellan tags: v1.6569-server-gate-fix | |
2020-11-24
| ||
22:27 | Added support for resetting run - allows to reload tests-paths to add tests to a run part way though. Just run megatest -clean-cache -runname $MT_RUNNAME Leaf check-in: 213021e02d user: mrwellan tags: v1.6596-reload-tests-paths | |
2020-10-13
| ||
16:46 | Changed version from 69 to 76. No other changes. Will compile with chicken 13 check-in: 87ca35010f user: mmgraham tags: v1.65, v1.6576 | |
2020-10-12
| ||
16:58 | Merged minor change to v1.65 check-in: 60a665385a user: mrwellan tags: v1.65-side2 | |
16:49 | Reduced message from failed to info. Reverted a delay which seems to help pass full stack ext-tests. Leaf check-in: 9e35b1252c user: mrwellan tags: v1.65-minor-patch | |
13:06 | Added in the query-rest stuff. check-in: 0e1320be35 user: mrwellan tags: v1.65-side2 | |
10:19 | For running local do not use ssh to run pstree check-in: 55f3c8af7f user: mrwellan tags: v1.65-side2 | |
10:18 | Safe vector access in rmt. check-in: 58bb6d997a user: mrwellan tags: v1.65-side2 | |
2020-10-11
| ||
22:46 | Patched forward adjutant code. check-in: f936717bfa user: matt tags: v1.65-adjutant-again | |
2020-10-05
| ||
22:49 | Do not exit on failure to create directory - race conditons on NFS cause false fail scenarios - just keep going and cross your fingers... (cherrypicked from v1.6572) check-in: 05b253a452 user: matt tags: v1.65-sidework | |
22:46 | run duration testdat check-in: 4a0b43f3c6 user: matt tags: v1.65-test-rundat2 | |
2020-09-28
| ||
23:41 | Moving call throttle attempt to v1.6564 ==5.71/0.8/WARN/1201== Leaf check-in: 919bc699d8 user: mrwellan tags: v1.65-rmt-call-throttle | |
2020-09-21
| ||
15:36 | merged in 1.65-test-rundat branch ==/FAIL/orion,mars/== check-in: cfd25d66e9 user: mmgraham tags: v1.6571, v1.65-failed-testdat | |
07:00 | Added get-testsuite-name all over launch:setup and still not set when needed! This did NOT work. Closed-Leaf check-in: 2efe8ad422 user: mrwellan tags: v1.65-get-testsuitename | |
2020-09-19
| ||
04:21 | Start moving test_rundat to no-sync db. ==/20/2/WARN/1203/mars/== check-in: abfabdb839 user: matt tags: v1.65-test-rundat | |
2020-09-18
| ||
17:30 | added check for file existence before file delete ==/14/1.9/WARN/orion,mars/== NOTE: This is the last v1.65 before the split off. I.e code from before this point IS in the far future v1.65 branch. Code from this point to that branch might NOT be in the branch. check-in: 2769e4b7c9 user: mmgraham tags: v1.65, v1.6569 | |
12:27 | cherry picked 2 fixes, changed version to 1.6569 ==/7.2/2.0/PASS/1201/mars/== check-in: d145d0eb02 user: mmgraham tags: v1.65 | |
Modified api.scm from [4fa67bb6bd] to [cc4c2bfc8f].
︙ | ︙ | |||
152 153 154 155 156 157 158 | ((not (vector? dat)) ;; it is an error to not receive a vector (vector #f (vector #f "remote must be called with a vector"))) ((> *api-process-request-count* 20) ;; 20) (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.") (set! *server-overloaded* #t) (vector #f (vector #f 'overloaded))) ;; the inner vector is what gets returned. nope, don't know why. please refactor! (else | | | | > > | 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 | ((not (vector? dat)) ;; it is an error to not receive a vector (vector #f (vector #f "remote must be called with a vector"))) ((> *api-process-request-count* 20) ;; 20) (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an overloaded message.") (set! *server-overloaded* #t) (vector #f (vector #f 'overloaded))) ;; the inner vector is what gets returned. nope, don't know why. please refactor! (else (let* ((cmd-in (common:safe-vector-ref dat 0 'nocmd)) (cmd (if (symbol? cmd-in) cmd-in (string->symbol cmd-in))) (params (common:safe-vector-ref dat 1 '())) (start-t (current-milliseconds)) (readonly-mode (dbr:dbstruct-read-only dbstruct)) (readonly-command (member cmd api:read-only-queries)) (writecmd-in-readonly-mode (and readonly-mode (not readonly-command))) (foo (begin #;(common:telemetry-log (conc "api-in:"(->string cmd)) payload: `((params . ,params))) #t)) (res (if writecmd-in-readonly-mode (conc "attempt to run write command "cmd" on a read-only database") (case cmd ;;=============================================== ;; READ/WRITE QUERIES ;;=============================================== ((nocmd) '(#f "All broken!")) ((get-keys-write) (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl ;; SERVERS ((start-server) (apply server:kind-run params)) ((kill-server) (set! *server-run* #f)) |
︙ | ︙ | |||
357 358 359 360 361 362 363 | ;; save all stats (let ((delta-t (- (current-milliseconds) start-t))) (hash-table-set! *db-api-call-time* cmd (cons delta-t (hash-table-ref/default *db-api-call-time* cmd '())))) (if writecmd-in-readonly-mode (begin | | | | | | 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 | ;; save all stats (let ((delta-t (- (current-milliseconds) start-t))) (hash-table-set! *db-api-call-time* cmd (cons delta-t (hash-table-ref/default *db-api-call-time* cmd '())))) (if writecmd-in-readonly-mode (begin #;(common:telemetry-log (conc "api-out:"(->string cmd)) payload: `((params . ,params) (ok-res . #t))) (vector #f res)) (begin #;(common:telemetry-log (conc "api-out:"(->string cmd)) payload: `((params . ,params) (ok-res . #f))) (vector #t res)))))))) ;; http-server send-response ;; api:process-request ;; db:* ;; ;; NB// Runs on the server as part of the server loop ;; (define (api:process-request dbstruct $) ;; the $ is the request vars proc (set! *api-process-request-count* (+ *api-process-request-count* 1)) (let* ((cmd ($ 'cmd)) (paramsj ($ 'params)) (params (db:string->obj paramsj transport: 'http)) ;; incoming data from the POST (or is it a GET?) (resdat (api:execute-requests dbstruct (vector cmd params))) ;; process the request, resdat = #( flag result ) (success (common:safe-vector-ref resdat 0 #f)) (res (common:safe-vector-ref resdat 1 #f))) ;; (vector flag payload), get the payload, ignore the flag (why?) (if (not success) (debug:print 0 *default-log-port* "ERROR: success flag is #f for " cmd " with params " params)) (if (> *api-process-request-count* *max-api-process-requests*) (set! *max-api-process-requests* *api-process-request-count*)) (set! *api-process-request-count* (- *api-process-request-count* 1)) ;; This can be here but needs controls to ensure it doesn't run more than every 4 seconds ;; (rmt:dat->json-str |
︙ | ︙ |
Modified common.scm from [33c7316880] to [2732dee33e].
︙ | ︙ | |||
486 487 488 489 490 491 492 | ;; copy <file>.hrs.gz <file>.days.gz (when (>= (age-wks daysfile) 1) (copy daysfile wksfile) (copy hrsfile daysfile)) #t) #f)) | > > > > > > > > > | | 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 | ;; copy <file>.hrs.gz <file>.days.gz (when (>= (age-wks daysfile) 1) (copy daysfile wksfile) (copy hrsfile daysfile)) #t) #f)) (define (common:safe-vector-ref vec indx default) (if (vector? vec) (handle-exceptions exn (begin (debug:print-info 0 *default-log-port* "remote data issue: exn=" exn) default) (vector-ref vec indx)) default)) ;; Rotate logs, logic: ;; if > 500k and older than 1 week: ;; remove previous compressed log and compress this log ;; WARNING: This proc operates assuming that it is in the directory above the ;; logs directory you wish to log-rotate. ;; |
︙ | ︙ |
Modified launch.scm from [d0067277fa] to [d79c56de3e].
︙ | ︙ | |||
769 770 771 772 773 774 775 | (item-path (vector-ref running-test 11))) (debug:print 0 *default-log-port* "test " test-name "/" item-path " not completed") (if (not (null? tal)) (loop (car tal) (cdr tal))))))))))) (define (launch:is-test-alive host pid) (if (and host pid (not (equal? host "n/a"))) | > > | | 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 | (item-path (vector-ref running-test 11))) (debug:print 0 *default-log-port* "test " test-name "/" item-path " not completed") (if (not (null? tal)) (loop (car tal) (cdr tal))))))))))) (define (launch:is-test-alive host pid) (if (and host pid (not (equal? host "n/a"))) (let* ((is-local (equal? host (get-host-name))) (ssh-cmd (if is-local " " (conc "ssh " host " "))) (cmd (conc ssh-cmd "pstree -A " pid)) (output (with-input-from-pipe cmd read-lines))) (debug:print 2 *default-log-port* "Running " cmd " received " output) (if (eq? (length output) 0) #f #t)) #t)) |
︙ | ︙ |
Modified rmt.scm from [39d97c528a] to [23bba59d7b].
︙ | ︙ | |||
52 53 54 55 56 57 58 59 60 61 62 63 64 65 | cinfo (if (server:check-if-running areapath) (client:setup areapath) #f)))) (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) ;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected #;(common:telemetry-log (conc "rmt:"(->string cmd)) payload: `((rid . ,rid) (params . ,params))) | > > > > > > > > > > > > > > > > > > > > > | | 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 | cinfo (if (server:check-if-running areapath) (client:setup areapath) #f)))) (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id (define *rmt-query-last-call-time* 0) (define *rmt-query-last-rest-time* 0) ;; last time there was at least a 1/2 second rest - giving other processes access to the db ;; NOTE: This query rest algorythm will not adapt to long query times. REDESIGN NEEDED. TODO. FIXME. ;; (define (rmt:query-rest) (let* ((now (current-milliseconds))) (cond ((> (- now *rmt-query-last-call-time*) 500) ;; it's been a while since last query - no need to rest (set! *rmt-query-last-rest-time* now) (set! *rmt-query-last-call-time* now)) ((> (- now *rmt-query-last-rest-time*) 5000) ;; no natural rests have happened (debug:print 0 *default-log-port* "query rest needed. blocking for 1/2 second.") (thread-sleep! 0.5) ;; force a rest of a half second (set! *rmt-query-last-rest-time* now) (set! *rmt-query-last-call-time* now)) (else ;; sufficient rests have occurred, just record the last query time (set! *rmt-query-last-call-time* now))))) ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) ;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected #;(common:telemetry-log (conc "rmt:"(->string cmd)) payload: `((rid . ,rid) (params . ,params))) (if (not (equal? (configf:lookup *configdat* "setup" "query-rest") "no")) (rmt:query-rest)) (if (> attemptnum 2) (debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum)) (cond ((> attemptnum 2) (thread-sleep! 0.05)) ((> attemptnum 10) (thread-sleep! 0.5)) ((> attemptnum 20) (thread-sleep! 1))) |
︙ | ︙ | |||
367 368 369 370 371 372 373 | (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy (if (and (vector? v) (> (vector-length v) 1)) (let ((newvec (vector (vector-ref v 0)(vector-ref v 1)))) newvec) ;; by copying the vector while inside the error handler we should force the detection of a corrupted record (vector #t '())))) ;; we could also check that the returned types are valid (vector #t '()))) | | | | | 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 | (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy (if (and (vector? v) (> (vector-length v) 1)) (let ((newvec (vector (vector-ref v 0)(vector-ref v 1)))) newvec) ;; by copying the vector while inside the error handler we should force the detection of a corrupted record (vector #t '())))) ;; we could also check that the returned types are valid (vector #t '()))) (success (common:safe-vector-ref resdat 0 #f)) ;; (vector-ref resdat 0)) (res (common:safe-vector-ref resdat 1 #f)) ;; (vector-ref resdat 1)) (duration (- (current-milliseconds) start))) (if (and read-only qry-is-write) (debug:print 0 *default-log-port* "ERROR: attempt to write to read-only database ignored. cmd=" cmd)) (if (not success) (if (> remretries 0) (begin (debug:print-error 0 *default-log-port* "local query failed. Trying again.") (thread-sleep! (/ (random 5000) 1000)) ;; some random delay (rmt:open-qry-close-locally cmd run-id params remretries: (- remretries 1))) (begin (debug:print-error 0 *default-log-port* "too many retries in rmt:open-qry-close-locally, giving up") #f)) (begin ;; (rmt:update-db-stats run-id cmd params duration) ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it (if qry-is-write (let ((start-time (current-seconds))) (mutex-lock! *db-multi-sync-mutex*) (set! *db-last-access* start-time) ;; THIS IS PROBABLY USELESS? (we are on a client) (mutex-unlock! *db-multi-sync-mutex*))))) res)) (define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params) (let* ((run-id (if run-id run-id 0)) (res (handle-exceptions exn |
︙ | ︙ |