Overview
Context
Changes
Modified client.scm
from [a61d4e6d81]
to [c1867a27a6].
︙ | | |
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
|
204
205
206
207
208
209
210
211
212
213
214
215
216
217
|
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
|
(let ((num-available (tasks:num-in-available-state (db:dbdat-get-db tdbdat) run-id)))
(debug:print-info 0 "client:setup, no server registered, remaining-tries=" remaining-tries " num-available=" num-available)
(if (< num-available 2)
(server:try-running run-id))
(thread-sleep! (+ 5 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms.
(client:setup run-id remaining-tries: (- remaining-tries 1)))))))))
;; (let ((host-info (hash-table-ref/default *runremote* run-id #f)))
;; (if host-info ;; this is a bit circular. the host-info *is* the start-res FIXME
;; (let* ((iface (http-transport:server-dat-get-iface host-info))
;; (port (http-transport:server-dat-get-port host-info))
;; (start-res (case *transport-type*
;; ((http)(http-transport:client-connect iface port))
;; ((nmsg)(nmsg-transport:client-connect iface port)) ;; (http-transport:server-dat-get-socket host-info))
;; (else #f)))
;; (ping-res (case *transport-type*
;; ((http)(rmt:login-no-auto-client-setup start-res run-id))
;; ((nmsg)(let ((logininfo (rmt:login-no-auto-client-setup start-res run-id)))
;; (if logininfo
;; (vector-ref (vector-ref logininfo 1) 1)
;; #f)))
;; (else #f))))
;; (if ping-res ;; sucessful login?
;; (begin
;; (debug:print-info 2 "client:setup, ping is good using host-info=" host-info ", remaining-tries=" remaining-tries)
;; start-res) ;; return the server info
;; ;; have host info but no ping. shutdown the current connection and try again
;; (begin ;; login failed
;; (debug:print-info 1 "client:setup, ping is bad for start-res=" start-res " and *runremote*=" host-info)
;; (case *transport-type*
;; ((http)(http-transport:close-connections run-id)))
;; (hash-table-delete! *runremote* run-id)
;; (if (< remaining-tries 8)
;; (thread-sleep! 5)
;; (thread-sleep! 1))
;; (client:setup run-id remaining-tries: (- remaining-tries 1)))))
;; ;; YUK: rename server-dat here
;;
;; keep this as a function to ease future
(define (client:start run-id server-info)
(http-transport:client-connect (tasks:hostinfo-get-interface server-info)
(tasks:hostinfo-get-port server-info)))
;; client:signal-handler
(define (client:signal-handler signum)
|
︙ | | |
Modified dashboard.scm
from [6d6a8350b9]
to [2960f85268].
︙ | | |
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
|
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
|
-
+
+
-
+
|
(let* ((modtime (dashboard:get-youngest-run-db-mod-time)) ;; (file-modification-time *db-file-path*))
(monitor-modtime (if (file-exists? *monitor-db-path*)
(file-modification-time *monitor-db-path*)
-1))
(run-update-time (current-seconds))
(recalc (dashboard:recalc modtime *please-update-buttons* *last-db-update-time*)))
(if (and (eq? *current-tab-number* 0)
(> monitor-modtime *last-monitor-update-time*))
(or (> monitor-modtime *last-monitor-update-time*)
(> (- run-update-time *last-monitor-update-time*) 5))) ;; update every 1/2 minute just in case
(begin
(set! *last-monitor-update-time* monitor-modtime)
(set! *last-monitor-update-time* run-update-time) ;; monitor-modtime)
(if dashboard:update-servers-table (dashboard:update-servers-table))))
(if recalc
(begin
(case *current-tab-number*
((0)
(if dashboard:update-summary-tab (dashboard:update-summary-tab)))
((1) ;; The runs table is active
|
︙ | | |
Modified rmt.scm
from [c2992f6eb2]
to [537ba5245e].
︙ | | |
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
|
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
|
+
+
+
+
-
+
-
-
-
-
-
-
-
+
+
+
+
+
+
+
|
;; (require-library trace)
;; (import trace)
;; (trace
;; rmt:send-receive
;; api:execute-requests
;; )
;; generate entries for ~/.megatestrc with the following
;;
;; grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u
;;======================================================================
;; S U P P O R T F U N C T I O N S
;;======================================================================
;; NOT USED
;; NOT USED?
;;
(define (rmt:call-transport run-id connection-info cmd jparams)
(case (server:get-transport)
((rpc) ( rpc-transport:client-api-send-receive run-id connection-info cmd jparams))
((http) (http-transport:client-api-send-receive run-id connection-info cmd jparams))
((fs) ( fs-transport:client-api-send-receive run-id connection-info cmd jparams))
((zmq) (zmq-transport:client-api-send-receive run-id connection-info cmd jparams))
(else ( rpc-transport:client-api-send-receive run-id connection-info cmd jparams))))
;; (define (rmt:call-transport run-id connection-info cmd jparams)
;; (case (server:get-transport)
;; ((rpc) ( rpc-transport:client-api-send-receive run-id connection-info cmd jparams))
;; ((http) (http-transport:client-api-send-receive run-id connection-info cmd jparams))
;; ((fs) ( fs-transport:client-api-send-receive run-id connection-info cmd jparams))
;; ((zmq) (zmq-transport:client-api-send-receive run-id connection-info cmd jparams))
;; (else ( rpc-transport:client-api-send-receive run-id connection-info cmd jparams))))
;;
(define (rmt:write-frequency-over-limit? cmd run-id)
(and (not (member cmd api:read-only-queries))
(let* ((tmprec (hash-table-ref/default *write-frequency* run-id #f))
(record (if tmprec tmprec
(let ((v (vector (current-seconds) 0)))
|
︙ | | |
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
|
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
|
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
|
(if (> tot 10)
(cons newmax-cmd currmax)
(cons 'none 0))
(loop (car tal)(cdr tal) newmax-cmd currmax)))))))
(mutex-unlock! *db-stats-mutex*)
res))
(define (rmt:open-qry-close-locally cmd run-id params)
(define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5))
(let* ((dbstruct-local (if *dbstruct-db*
*dbstruct-db*
(let* ((dbdir (conc (configf:lookup *configdat* "setup" "linktree") "/.db"))
(db (make-dbr:dbstruct path: dbdir local: #t)))
(set! *dbstruct-db* db)
db)))
(db-file-path (db:dbfile-path 0)))
;; (read-only (not (file-read-access? db-file-path)))
(let* ((start (current-milliseconds))
(resdat (api:execute-requests dbstruct-local (vector (symbol->string cmd) params)))
(res (vector-ref resdat 1))
(duration (- (current-milliseconds) start)))
(rmt:update-db-stats run-id cmd params duration)
;; mark this run as dirty if this was a write
(if (not (member cmd api:read-only-queries))
(let ((start-time (current-seconds)))
(mutex-lock! *db-multi-sync-mutex*)
;; (if (not (hash-table-ref/default *db-local-sync* run-id #f))
;; just set it every time. Is a write more expensive than a read and does it matter?
(hash-table-set! *db-local-sync* (or run-id 0) start-time) ;; the oldest "write"
(mutex-unlock! *db-multi-sync-mutex*)))
res)))
(db-file-path (db:dbfile-path 0))
;; (read-only (not (file-read-access? db-file-path)))
(start (current-milliseconds))
(resdat (api:execute-requests dbstruct-local (vector (symbol->string cmd) params)))
(success (vector-ref resdat 0))
(res (vector-ref resdat 1))
(duration (- (current-milliseconds) start)))
(if (not success)
(if (> remretries 0)
(begin
(debug:print 0 "ERROR: local query failed. Trying again.")
(thread-sleep! (/ (random 5000) 1000)) ;; some random delay
(rmt:open-qry-close-locally cmd run-id params remretries: (- remretries 1)))
(begin
(debug:print 0 "ERROR: too many retries in rmt:open-qry-close-locally, giving up")
#f))
(begin
(rmt:update-db-stats run-id cmd params duration)
;; mark this run as dirty if this was a write
(if (not (member cmd api:read-only-queries))
(let ((start-time (current-seconds)))
(mutex-lock! *db-multi-sync-mutex*)
;; (if (not (hash-table-ref/default *db-local-sync* run-id #f))
;; just set it every time. Is a write more expensive than a read and does it matter?
(hash-table-set! *db-local-sync* (or run-id 0) start-time) ;; the oldest "write"
(mutex-unlock! *db-multi-sync-mutex*)))
res))))
(define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params)
(let* ((run-id (if run-id run-id 0))
;; (jparams (db:obj->string params)) ;; (rmt:dat->json-str params))
(res (handle-exceptions
exn
#f
|
︙ | | |
Modified runs.scm
from [866c85daf6]
to [3bd91bccb4].
︙ | | |
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
|
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
|
-
-
-
+
+
+
|
(if (> (- currtime lasttime) waitval)
(begin
(hash-table-set! *runs:denoise* key currtime)
#t)
#f)))
(define (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)
;;(thread-sleep! (cond
;; ((> *runs:can-run-more-tests-count* 20) 2);; obviously haven't had any work to do for a while
;; (else 0)))
(thread-sleep! (cond
((> *runs:can-run-more-tests-count* 20) 2);; obviously haven't had any work to do for a while
(else 0)))
(let* ((num-running (rmt:get-count-tests-running run-id))
(num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup))
(job-group-limit (let ((jobg-count (config-lookup *configdat* "jobgroups" jobgroup)))
(if (string? jobg-count)
(string->number jobg-count)
jobg-count))))
(if (> (+ num-running num-running-in-jobgroup) 0)
|
︙ | | |
Modified tasks.scm
from [edd9ff6647]
to [274e4ea2dc].
︙ | | |
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
|
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
|
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
|
(lambda (id)
(set! res id))
mdb ;; NEEDS dbprep ADDED
"SELECT id FROM servers WHERE run_id=? AND state = 'running';" run-id)
res))
(define (tasks:need-server run-id)
(let ((forced (configf:lookup *configdat* "server" "required"))
(maxqry (cdr (rmt:get-max-query-average run-id)))
(threshold (string->number (or (configf:lookup *configdat* "server" "server-query-threshold") "10"))))
(cond
(forced
(if (common:low-noise-print 60 run-id "server required is set")
(debug:print-info 0 "Server required is set, starting server for run-id " run-id "."))
#t)
((> maxqry threshold)
(if (common:low-noise-print 60 run-id "Max query time execeeded")
(debug:print-info 0 "Max avg query time of " maxqry "ms exceeds limit of " threshold "ms, server needed for run-id " run-id "."))
#t)
(else
#f))))
(configf:lookup *configdat* "server" "required"))
;; (maxqry (cdr (rmt:get-max-query-average run-id)))
;; (threshold (string->number (or (configf:lookup *configdat* "server" "server-query-threshold") "10"))))
;; (cond
;; (forced
;; (if (common:low-noise-print 60 run-id "server required is set")
;; (debug:print-info 0 "Server required is set, starting server for run-id " run-id "."))
;; #t)
;; ((> maxqry threshold)
;; (if (common:low-noise-print 60 run-id "Max query time execeeded")
;; (debug:print-info 0 "Max avg query time of " maxqry "ms exceeds limit of " threshold "ms, server needed for run-id " run-id "."))
;; #t)
;; (else
;; #f))))
;; try to start a server and wait for it to be available
;;
(define (tasks:start-and-wait-for-server tdbdat run-id delay-max-tries)
;; ensure a server is running for this run
(let loop ((server-dat (tasks:get-server (db:delay-if-busy tdbdat) run-id))
(delay-time 0))
|
︙ | | |