Changes In Branch v1.65-diet2
Through [f6629c6cb4]
Excluding Merge-Ins
This is equivalent to a diff from
275adb0d10
to f6629c6cb4
Modified api.scm
from [4fa67bb6bd]
to [280d15e219].
︙ | | |
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
|
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
|
-
+
|
cmd-in
(string->symbol cmd-in)))
(params (vector-ref dat 1))
(start-t (current-milliseconds))
(readonly-mode (dbr:dbstruct-read-only dbstruct))
(readonly-command (member cmd api:read-only-queries))
(writecmd-in-readonly-mode (and readonly-mode (not readonly-command)))
(foo (begin
#;(foo (begin
(common:telemetry-log (conc "api-in:"(->string cmd))
payload: `((params . ,params)))
#t))
(res
(if writecmd-in-readonly-mode
(conc "attempt to run write command "cmd" on a read-only database")
|
︙ | | |
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
|
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
|
-
+
-
+
|
;; save all stats
(let ((delta-t (- (current-milliseconds)
start-t)))
(hash-table-set! *db-api-call-time* cmd
(cons delta-t (hash-table-ref/default *db-api-call-time* cmd '()))))
(if writecmd-in-readonly-mode
(begin
(common:telemetry-log (conc "api-out:"(->string cmd))
#;(common:telemetry-log (conc "api-out:"(->string cmd))
payload: `((params . ,params)
(ok-res . #t)))
(vector #f res))
(begin
(common:telemetry-log (conc "api-out:"(->string cmd))
#;(common:telemetry-log (conc "api-out:"(->string cmd))
payload: `((params . ,params)
(ok-res . #f)))
(vector #t res))))))))
;; http-server send-response
;; api:process-request
;; db:*
|
︙ | | |
Modified common.scm
from [a82c407907]
to [5791123b30].
︙ | | |
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
|
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
|
-
-
+
+
-
+
-
+
|
(begin
(debug:print 0 *default-log-port* "joining threads failed. exn=" exn)
#t) ;; just ignore it, it might have died in the meantime so joining it will throw an exception
(thread-join! thread))
)))
(hash-table-keys *common:thread-punchlist*)))
(define *common:telemetry-log-state* 'startup)
(define *common:telemetry-log-socket* #f)
#;(define *common:telemetry-log-state* 'startup)
#;(define *common:telemetry-log-socket* #f)
(define (common:telemetry-log-open)
#;(define (common:telemetry-log-open)
(if (eq? *common:telemetry-log-state* 'startup)
(let* ((serverhost (configf:lookup *configdat* "telemetry" "host"))
(serverport (configf:lookup-number *configdat* "telemetry" "port"))
(user (or (get-environment-variable "USER") "unknown"))
(host (or (get-environment-variable "HOST") "unknown")))
(set! *common:telemetry-log-state*
(handle-exceptions
exn
(begin
(debug:print-info 0 *default-log-port* "common-telemetry-log open udp port failure")
'broken)
(if (and serverhost serverport user host)
(let* ((s (udp-open-socket)))
;;(udp-bind! s #f 0)
(udp-connect! s serverhost serverport)
(set! *common:telemetry-log-socket* s)
'open)
'not-needed))))))
(define (common:telemetry-log event #!key (payload '()))
#;(define (common:telemetry-log event #!key (payload '()))
(if (eq? *common:telemetry-log-state* 'startup)
(common:telemetry-log-open))
(if (eq? 'open *common:telemetry-log-state*)
(handle-exceptions
exn
(begin
|
︙ | | |
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
|
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
|
-
+
|
(base64:base64-encode
(z3:encode-buffer
(with-output-to-string (lambda () (pp payload))))))
(msg (conc user":"host":"start":"(current-process-id)":"(car (argv))":"
toppath":"payload-serialized)))
(udp-send *common:telemetry-log-socket* msg))))))
(define (common:telemetry-log-close)
#;(define (common:telemetry-log-close)
(when (or (member *common:telemetry-log-state* '(broken-or-no-server-preclose open)) *common:telemetry-log-socket*)
(handle-exceptions
exn
(begin
(define *common:telemetry-log-state* 'closed-fail)
(debug:print-info 0 *default-log-port* "common-telemetry-log closure failure")
)
(begin
(define *common:telemetry-log-state* 'closed)
(udp-close-socket *common:telemetry-log-socket*)
(set! *common:telemetry-log-socket* #f)))))
|
Modified launch.scm
from [d0067277fa]
to [4cbc0e59d3].
︙ | | |
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
|
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
|
-
+
|
;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area)
(tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10)
(let loop ((minutes (calc-minutes))
(cpu-load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
(disk-free (get-df (current-directory)))
(last-sync (current-seconds)))
(common:telemetry-log "zombie" (conc "launch:monitor-job - top of loop encountered at "(current-seconds)" with last-sync="last-sync))
;; (common:telemetry-log "zombie" (conc "launch:monitor-job - top of loop encountered at "(current-seconds)" with last-sync="last-sync))
(let* ((over-time (> (current-seconds) (+ last-sync update-period)))
(new-cpu-load (let* ((load (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
(delta (abs (- load cpu-load))))
(if (> delta 0.1) ;; don't bother updating with small changes
load
#f)))
(new-disk-free (let* ((df (if over-time ;; only get df every 30 seconds
|
︙ | | |
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
|
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
|
-
+
-
+
-
+
|
(do-sync (or new-cpu-load new-disk-free over-time))
(test-info (rmt:get-test-info-by-id run-id test-id))
(state (db:test-get-state test-info))
(status (db:test-get-status test-info))
(kill-reason "no kill reason specified")
(kill-job? #f))
(common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period))
;; (common:telemetry-log "zombie" (conc "launch:monitor-job - decision time encountered at "(current-seconds)" with last-sync="last-sync" do-sync="do-sync" over-time="over-time" update-period="update-period))
(cond
((test-get-kill-request run-id test-id)
(set! kill-reason "KILLING TEST since received kill request (KILLREQ)")
(set! kill-job? #t))
((and runtlim (> (- (current-seconds) start-seconds) runtlim))
(set! kill-reason (conc "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" (- (current-seconds) start-seconds) " seconds, limit=" runtlim))
(set! kill-job? #t))
((equal? status "DEAD")
(tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
(rmt:set-state-status-and-roll-up-items run-id test-id 'foo "RUNNING" "n/a" "was marked dead; really still running.")
;;(set! kill-reason "KILLING TEST because it was marked as DEAD by launch:handle-zombie-tests (might indicate really overloaded server or else overzealous setup.deadtime)") ;; MARK RUNNING
(set! kill-job? #f)))
(debug:print 4 *default-log-port* "cpu: " new-cpu-load " disk: " new-disk-free " last-sync: " last-sync " do-sync: " do-sync)
(launch:handle-zombie-tests run-id)
(when do-sync
;;(with-output-to-file (conc (getenv "MT_TEST_RUN_DIR") "/last-loadinfo.log" #:append)
;; (lambda () (pp (list (current-seconds) new-cpu-load new-disk-free (calc-minutes)))))
(common:telemetry-log "zombie" (conc "launch:monitor-job - dosync started at "(current-seconds)))
;; (common:telemetry-log "zombie" (conc "launch:monitor-job - dosync started at "(current-seconds)))
(tests:update-central-meta-info run-id test-id new-cpu-load new-disk-free (calc-minutes) #f #f)
(common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds))))
#;(common:telemetry-log "zombie" (conc "launch:monitor-job - dosync finished at "(current-seconds))))
(if kill-job?
(begin
(debug:print-info 0 *default-log-port* "proceeding to kill test: "kill-reason)
(mutex-lock! m)
;; NOTE: The pid can change as different steps are run. Do we need handshaking between this
;; section and the runit section? Or add a loop that tries three times with a 1/4 second
|
︙ | | |
Modified mt.scm
from [e9055c2687]
to [283ae4be89].
︙ | | |
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
|
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
|
-
+
|
(if last-time
(< (current-seconds)(+ last-time 5))
#f))))
(if useres
(let ((result (vector-ref res 1)))
(debug:print 4 *default-log-port* "Using lazy value res: " result)
result)
(let ((newres (rmt:get-prereqs-not-met run-id waitons ref-item-path mode: mode itemmaps: itemmaps)))
(let ((newres (rmt:get-prereqs-not-met run-id waitons ref-item-path mode itemmaps)))
(hash-table-set! *pre-reqs-met-cache* key (vector (current-seconds) newres))
newres))))
(define (mt:get-run-stats dbstruct run-id)
;; Get run stats from local access, move this ... but where?
(db:get-run-stats dbstruct run-id))
|
︙ | | |
Modified rmt.scm
from [39d97c528a]
to [46b655b2eb].
︙ | | |
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
|
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
|
+
-
+
|
(define (rmt:test-get-paths-matching-keynames-target-new keynames target res testpatt statepatt statuspatt runname)
(let ((run-ids (rmt:get-run-ids-matching-target keynames target res runname testpatt statepatt statuspatt)))
(apply append
(map (lambda (run-id)
(rmt:send-receive 'test-get-paths-matching-keynames-target-new run-id (list run-id keynames target res testpatt statepatt statuspatt runname)))
run-ids))))
;; NOTE: rmt functions can NEVER have key params as they might be called as local
(define (rmt:get-prereqs-not-met run-id waitons ref-test-name ref-item-path #!key (mode '(normal))(itemmaps #f))
(define (rmt:get-prereqs-not-met run-id waitons ref-test-name ref-item-path mode itemmaps) ;; #!key (mode '(normal))(itemmaps #f))
(rmt:send-receive 'get-prereqs-not-met run-id (list run-id waitons ref-test-name ref-item-path mode itemmaps)))
(define (rmt:get-count-tests-running-for-run-id run-id fastmode)
(rmt:send-receive 'get-count-tests-running-for-run-id run-id (list run-id fastmode)))
(define (rmt:get-not-completed-cnt run-id)
(rmt:send-receive 'get-not-completed-cnt run-id (list run-id)))
|
︙ | | |
Modified runs.scm
from [7d76065ab9]
to [4de72ed2f2].
︙ | | |
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
|
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
|
-
+
-
+
|
;; => review of a previously seen test is higher priority of never visited test
;; reg - list of previously visited tests
;; tal - list of never visited tests
;; prefer next hed to be from reg than tal.
(define runs:nothing-left-in-queue-count 0)
(define (runs:lazy-get-prereqs-not-met testdat run-id waitons hed item-path #!optional (mode '(normal))(itemmaps #f)) ;; mode: testmode itemmaps: itemmaps)
(define (runs:lazy-get-prereqs-not-met testdat run-id waitons hed item-path #!key (mode '(normal))(itemmaps #f)) ;; mode: testmode itemmaps: itemmaps)
(if (and (runs:testdat-prereqs-not-met testdat)
(< (- (current-seconds) (runs:testdat-last-update testdat)) 10)) ;; only refresh for this test if it has been at least 10 seconds
(runs:testdat-prereqs-not-met testdat)
(let* ((res (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: mode itemmaps: itemmaps)))
(let* ((res (let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode itemmaps)))
(if (list? res)
res
(begin
(debug:print 0 *default-log-port*
"ERROR: rmt:get-prereqs-not-met returned non-list!\n"
" res=" res " run-id=" run-id " waitons=" waitons " hed=" hed " item-path=" item-path " testmode=" mode " itemmaps=" itemmaps)
'())))))
|
︙ | | |
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
|
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
|
-
-
-
-
-
-
-
-
-
|
;; (tal (cdr sorted-test-names))
;; (reg '()) ;; registered, put these at the head of tal
;; (reruns '()))
(define (runs:expand-items hed tal reg reruns regfull newtal jobgroup max-concurrent-jobs run-id waitons item-path testmode test-record
can-run-more items runname tconfig reglen test-registry test-records itemmaps testdat)
(let* ((loop-list (list hed tal reg reruns))
(prereqs-not-met (runs:lazy-get-prereqs-not-met testdat run-id waitons hed item-path mode: testmode itemmaps: itemmaps))
#;(let ((res (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)))
(if (list? res)
res
(begin
(debug:print 0 *default-log-port*
"ERROR: rmt:get-prereqs-not-met returned non-list!\n"
" res=" res " run-id=" run-id " waitons=" waitons " hed=" hed " item-path=" item-path " testmode=" testmode " itemmaps=" itemmaps)
'())))
(have-itemized (not (null? (lset-intersection eq? testmode '(itemmatch itemwait)))))
;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))
(fails (runs:calc-fails prereqs-not-met))
(prereq-fails (runs:calc-prereq-fail prereqs-not-met))
(non-completed (runs:calc-not-completed prereqs-not-met))
(runnables (runs:calc-runnable prereqs-not-met))
(unexpanded-prereqs
(filter (lambda (testname)
(let* ((test-rec (hash-table-ref test-records testname))
|
︙ | | |
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
|
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
|
-
-
|
(run-limits-info (runs:dat-can-run-more-tests runsdat))
;; (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)) ;; look at the test jobgroup and tot jobs running
(have-resources (car run-limits-info))
(num-running (list-ref run-limits-info 1))
(num-running-in-jobgroup(list-ref run-limits-info 2))
(max-concurrent-jobs (list-ref run-limits-info 3))
(job-group-limit (list-ref run-limits-info 4))
;; (prereqs-not-met (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps))
;; (prereqs-not-met (mt:lazy-get-prereqs-not-met run-id waitons item-path mode: testmode itemmap: itemmap))
(fails (if (list? prereqs-not-met) ;; TODO: rename fails to failed-prereqs
(runs:calc-fails prereqs-not-met)
(begin
(debug:print-error 0 *default-log-port* "prereqs-not-met is not a list! " prereqs-not-met)
'())))
(non-completed (filter (lambda (x) ;; remove hed from not completed list, duh, of course it is not completed!
(not (equal? x hed)))
|
︙ | | |
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
|
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
|
-
|
registry-mutex: registry-mutex
flags: flags
keyvals: keyvals
run-info: run-info
;; newtal: newtal
all-tests-registry: all-tests-registry
;; itemmaps: itemmaps
;; prereqs-not-met: (rmt:get-prereqs-not-met run-id waitons hed item-path mode: testmode itemmaps: itemmaps)
;; can-run-more-tests: (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs) ;; look at the test jobgroup and tot jobs running
)))
;; Initialize the test-registery hash with tests that already have a record
;; convert state to symbol and use that as the hash value
(for-each (lambda (trec)
(let ((id (db:test-get-id trec))
|
︙ | | |
Modified tests.scm
from [0094b671e6]
to [6adeda2896].
︙ | | |
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
|
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
|
-
+
|
;;======================================================================
;;======================================================================
;; Tests
;;======================================================================
(declare (unit tests))
(declare (uses lock-queue))
;;(declare (uses lock-queue))
(declare (uses db))
(declare (uses tdb))
(declare (uses common))
;; (declare (uses dcommon)) ;; needed for the steps processing
(declare (uses items))
(declare (uses runconfig))
;; (declare (uses sdb))
|
︙ | | |