Changes In Branch private
Through [42e2fcd1c8]
Excluding Merge-Ins
This is equivalent to a diff from
0044c7f04a
to 42e2fcd1c8
Modified api.scm
from [f1d46841da]
to [b8376b33fc].
︙ | | |
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
|
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
|
-
+
-
-
+
+
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
|
;; - keep it simple, only return the actual result of the call, i.e. no meta info here
;;
;; - returns #( flag result )
;;
(define (api:execute-requests dbstruct dat)
(handle-exceptions
exn
(let ((call-chain (get-call-chain))
(let ((call-chain (get-call-chain)))
)
(debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat)
(print-call-chain (current-error-port))
(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
(vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens
(cond
((not (vector? dat)) ;; it is an error to not receive a vector
(vector #f #f "remote must be called with a vector") )
(vector #f (vector #f "remote must be called with a vector")))
((> *api-process-request-count* 20) ;; 20)
'overloaded) ;; the inner vector is what gets returned. nope, don't know why. please refactor!
(else
(let* ((cmd-in (vector-ref dat 0))
(cmd (if (symbol? cmd-in)
cmd-in
(string->symbol cmd-in)))
(params (vector-ref dat 1))
(start-t (current-milliseconds))
(readonly-mode (dbr:dbstruct-read-only dbstruct))
(readonly-command (member cmd api:read-only-queries))
(let* ((cmd-in (vector-ref dat 0))
(cmd (if (symbol? cmd-in)
cmd-in
(string->symbol cmd-in)))
(params (vector-ref dat 1))
(start-t (current-milliseconds))
(readonly-mode (dbr:dbstruct-read-only dbstruct))
(readonly-command (member cmd api:read-only-queries))
(writecmd-in-readonly-mode (and readonly-mode (not readonly-command)))
(res
(if writecmd-in-readonly-mode
(conc "attempt to run write command "cmd" on a read-only database")
(case cmd
;;===============================================
;; READ/WRITE QUERIES
|
︙ | | |
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
|
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
|
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
+
+
+
+
+
+
|
((ping) (current-process-id))
((get-changed-record-ids) (apply db:get-changed-record-ids dbstruct params))
;; TESTMETA
((testmeta-get-record) (apply db:testmeta-get-record dbstruct params))
;; TASKS
((find-task-queue-records) (apply tasks:find-task-queue-records dbstruct params))))))
(if (not writecmd-in-readonly-mode)
(let ((delta-t (- (current-milliseconds)
start-t)))
(hash-table-set! *db-api-call-time* cmd
(cons delta-t (hash-table-ref/default *db-api-call-time* cmd '())))
(vector #t res))
(vector #f res)))))))
((find-task-queue-records) (apply tasks:find-task-queue-records dbstruct params))
(else
(debug:print 0 *default-log-port* "ERROR: bad api call " cmd)
(conc "ERROR: BAD api call " cmd))))))
;; save all stats
(let ((delta-t (- (current-milliseconds)
start-t)))
(hash-table-set! *db-api-call-time* cmd
(cons delta-t (hash-table-ref/default *db-api-call-time* cmd '()))))
(if writecmd-in-readonly-mode
(vector #f res)
(vector #t res)))))))
;; http-server send-response
;; api:process-request
;; db:*
;;
;; NB// Runs on the server as part of the server loop
;;
(define (api:process-request dbstruct $) ;; the $ is the request vars proc
(set! *api-process-request-count* (+ *api-process-request-count* 1))
(let* ((cmd ($ 'cmd))
(paramsj ($ 'params))
(params (db:string->obj paramsj transport: 'http)) ;; (rmt:json-str->dat paramsj))
(resdat (api:execute-requests dbstruct (vector cmd params))) ;; #( flag result )
(res (vector-ref resdat 1)))
(params (db:string->obj paramsj transport: 'http)) ;; incoming data from the POST (or is it a GET?)
(resdat (api:execute-requests dbstruct (vector cmd params))) ;; process the request, resdat = #( flag result )
(success (vector-ref resdat 0))
(res (vector-ref resdat 1))) ;; (vector flag payload), get the payload, ignore the flag (why?)
(if (not success)
(debug:print 0 *default-log-port* "ERROR: success flag is #f for " cmd " with params " params))
(if (> *api-process-request-count* *max-api-process-requests*)
(set! *max-api-process-requests* *api-process-request-count*))
(set! *api-process-request-count* (- *api-process-request-count* 1))
;; This can be here but needs controls to ensure it doesn't run more than every 4 seconds
;; (rmt:dat->json-str
;; (if (or (string? res)
;; (list? res)
;; (number? res)
;; (boolean? res))
;; res
;; (list "ERROR, not string, list, number or boolean" 1 cmd params res)))))
(db:obj->string res transport: 'http)))
|
Modified client.scm
from [19d242e4dd]
to [950fa4a4a2].
︙ | | |
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
|
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
|
-
+
|
(begin
(debug:print-error 0 *default-log-port* "failed to start or connect to server")
(exit 1))
;;
;; Alternatively here, we can get the list of candidate servers and work our way
;; through them searching for a good one.
;;
(let* ((server-dat (server:get-first-best areapath))
(let* ((server-dat (server:get-rand-best areapath)) ;; (server:get-first-best areapath))
(runremote (or area-dat *runremote*)))
(if (not server-dat) ;; no server found
(client:setup-http areapath remaining-tries: (- remaining-tries 1))
(let ((host (cadr server-dat))
(port (caddr server-dat)))
(debug:print-info 4 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
(if (and (not area-dat)
|
︙ | | |
106
107
108
109
110
111
112
113
114
115
116
117
118
|
106
107
108
109
110
111
112
113
114
115
116
117
118
|
-
+
+
-
|
(case *transport-type*
((http)(http-transport:close-connections)))
(remote-conndat-set! runremote #f) ;; (hash-table-delete! runremote run-id)
(thread-sleep! 1)
(client:setup-http areapath remaining-tries: (- remaining-tries 1))
)))
(begin ;; no server registered
(server:kind-run areapath)
;; (server:kind-run areapath)
(server:start-and-wait areapath)
(debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries)
(thread-sleep! 1) ;; (+ 5 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms.
(server:start-and-wait areapath)
(client:setup-http areapath remaining-tries: (- remaining-tries 1)))))))))
|
Modified common.scm
from [0b58394952]
to [4b658465e8].
︙ | | |
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
|
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
|
-
+
+
+
+
|
(defstruct remote
(hh-dat (common:get-homehost)) ;; homehost record ( addr . hhflag )
(server-url (if *toppath* (server:check-if-running *toppath*))) ;; (server:check-if-running *toppath*) #f))
(last-server-check 0) ;; last time we checked to see if the server was alive
(conndat #f)
(transport *transport-type*)
(server-timeout (or (server:get-timeout) 100))) ;; default to 100 seconds
(server-timeout (or (server:get-timeout) 100)) ;; default to 100 seconds
(force-server #f)
(ro-mode #f)
(ro-mode-checked #f)) ;; flag that indicates we have checked for ro-mode
;; launching and hosts
(defstruct host
(reachable #f)
(last-update 0)
(last-used 0)
(last-cpuload 1))
|
︙ | | |
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
|
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
|
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
|
(read (open-input-string (z3:decode-buffer (base64:base64-decode instr))))))
;; dot-locking egg seems not to work, using this for now
;; if lock is older than expire-time then remove it and try again
;; to get the lock
;;
(define (common:simple-file-lock fname #!key (expire-time 300))
(handle-exceptions
exn
#f ;; don't really care what went wrong right now. NOTE: I have not seen this one actually fail.
(if (file-exists? fname)
(if (> (- (current-seconds)(file-modification-time fname)) expire-time)
(begin
(delete-file* fname)
(common:simple-file-lock fname expire-time: expire-time))
#f)
(let ((key-string (conc (get-host-name) "-" (current-process-id))))
(with-output-to-file fname
(lambda ()
(print key-string)))
(thread-sleep! 0.25)
(if (file-exists? fname)
(with-input-from-file fname
(lambda ()
(equal? key-string (read-line))))
#f))))
(if (file-exists? fname)
(if (> (- (current-seconds)(file-modification-time fname)) expire-time)
(begin
(delete-file* fname)
(common:simple-file-lock fname expire-time: expire-time))
#f)
(let ((key-string (conc (get-host-name) "-" (current-process-id))))
(with-output-to-file fname
(lambda ()
(print key-string)))
(thread-sleep! 0.25)
(if (file-exists? fname)
(with-input-from-file fname
(lambda ()
(equal? key-string (read-line))))
#f)))))
(define (common:simple-file-lock-and-wait fname #!key (expire-time 300))
(let ((end-time (+ expire-time (current-seconds))))
(let loop ((got-lock (common:simple-file-lock fname expire-time: expire-time)))
(if got-lock
#t
(if (> end-time (current-seconds))
(begin
(thread-sleep! 3)
(loop (common:simple-file-lock fname expire-time: expire-time)))
#f)))))
(define (common:simple-file-release-lock fname)
(handle-exceptions
exn
#f ;; I don't really care why this failed (at least for now)
(delete-file* fname))
(delete-file* fname)))
;;======================================================================
;; S T A T E S A N D S T A T U S E S
;;======================================================================
(define *common:std-states*
'((0 "ARCHIVED")
|
︙ | | |
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
|
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
|
-
+
|
(if res (cadr res)(if (null? default) #f (car default)))))
(define (common:get-testsuite-name)
(or (configf:lookup *configdat* "setup" "area-name") ;; megatest is a flexible tool, testsuite is too limiting a description.
(configf:lookup *configdat* "setup" "testsuite" )
(if (string? *toppath* )
(pathname-file *toppath*)
(pathname-file (current-directory)))))
#f))) ;; (pathname-file (current-directory)))))
(define common:get-area-name common:get-testsuite-name)
(define (common:get-db-tmp-area)
(if *db-cache-path*
*db-cache-path*
(if *toppath*
|
︙ | | |
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
|
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
|
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
|
;; (tags-testpatt
;; (debug:print-info 0 *default-log-port* "-tagexpr "tagexpr" selects testpatt "tags-testpatt)
;; tags-testpatt)
((and (equal? args-testpatt "%") rtestpatt)
(debug:print-info 0 *default-log-port* "testpatt defined in "testpatt-key" from runconfigs: " rtestpatt)
rtestpatt)
(else args-testpatt))))
(define (common:false-on-exception thunk #!key (message #f))
(handle-exceptions exn
(begin
(if message
(debug:print-info 0 *default-log-port* message))
#f) (thunk) ))
(define (common:file-exists? path-string)
;; this avoids stack dumps in the case where
;;;; TODO: catch permission denied exceptions and emit appropriate warnings, eg: system error while trying to access file: "/nfs/pdx/disks/icf_env_disk001/bjbarcla/gwa/issues/mtdev/randy-slow/reproduce/q...
(common:false-on-exception (lambda () (file-exists? path-string))
message: (conc "Unable to access path: " path-string)
))
(define (common:directory-exists? path-string)
;;;; TODO: catch permission denied exceptions and emit appropriate warnings, eg: system error while trying to access file: "/nfs/pdx/disks/icf_env_disk001/bjbarcla/gwa/issues/mtdev/randy-slow/reproduce/q...
(common:false-on-exception (lambda () (directory-exists? path-string))
message: (conc "Unable to access path: " path-string)
))
(define (common:get-linktree)
(or (getenv "MT_LINKTREE")
(or (and *configdat*
(configf:lookup *configdat* "setup" "linktree"))
(if *toppath*
(conc *toppath* "/lt")
(if (file-exists? "megatest.config") ;; we are in the toppath (new area, mtutils compatible)
|
︙ | | |
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
|
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
|
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
|
;; am I on the homehost?
;;
(define (common:on-homehost?)
(let ((hh (common:get-homehost)))
(if hh
(cdr hh)
#f)))
;; do we honor the caches of the config files?
;;
(define (common:use-cache?)
(not (or (args:get-arg "-no-cache")
(and *configdat*
(equal? (configf:lookup *configdat* "setup" "use-cache") "no")))))
;; force use of server?
;;
(define (common:force-server?)
(let* ((force-setting (configf:lookup *configdat* "server" "force"))
(force-type (if force-setting (string->symbol force-setting) #f))
(force-result (case force-type
((#f) #f)
((always) #t)
((test) (if (args:get-arg "-execute") ;; we are in a test
#t
#f))
(else
(debug:print 0 *default-log-port* "ERROR: Bad server force setting " force-setting ", forcing server.")
#t)))) ;; default to requiring server
(if force-result
(begin
(debug:print-info 0 *default-log-port* "forcing use of server, force setting is \"" force-setting "\".")
#t)
#f)))
;; do we honor the caches of the config files?
;;
(define (common:use-cache?)
(not (or (args:get-arg "-no-cache")
(and *configdat*
(equal? (configf:lookup *configdat* "setup" "use-cache") "no")))))
|
︙ | | |
Modified dashboard.scm
from [3869e6c0c0]
to [15a7467abe].
︙ | | |
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
|
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
|
-
+
|
;; Force creation of the db in case it isn't already there.
;; (tasks:open-db)
(define (dashboard:get-youngest-run-db-mod-time dbdir)
(handle-exceptions
exn
(begin
(debug:print 0 *default-log-port* "WARNING: error in accessing databases in get-youngest-run-db-mod-time: " ((condition-property-accessor 'exn 'message) exn) " db-dir="dbdir)
(debug:print 2 *default-log-port* "WARNING: error in accessing databases in get-youngest-run-db-mod-time: " ((condition-property-accessor 'exn 'message) exn) " db-dir="dbdir)
(current-seconds)) ;; something went wrong - just print an error and return current-seconds
(common:max (map (lambda (filen)
(file-modification-time filen))
(glob (conc dbdir "/*.db*"))))))
(define (dashboard:monitor-changed? commondat tabdat)
(let* ((run-update-time (current-seconds))
|
︙ | | |
Modified db.scm
from [f0fec9ca36]
to [8260cfea21].
︙ | | |
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
|
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
|
-
+
+
+
-
+
+
-
+
+
|
(define (db:print-current-query-stats)
;; generate stats from *db-api-call-time*
(let ((ordered-keys (sort (hash-table-keys *db-api-call-time*)
(lambda (a b)
(let ((sum-a (common:sum (hash-table-ref *db-api-call-time* a)))
(sum-b (common:sum (hash-table-ref *db-api-call-time* b))))
(> sum-a sum-b))))))
(> sum-a sum-b)))))
(total 0))
(for-each
(lambda (cmd-key)
(let* ((dat (hash-table-ref *db-api-call-time* cmd-key))
(num (length dat))
(avg (if (> (length dat) 0)
(avg (if (> num 0)
(/ (common:sum dat)(length dat)))))
(set! total (+ total num))
(debug:print-info 0 *default-log-port* cmd-key "\tavg: " avg " max: " (common:max dat) " min: " (common:min-max < dat) " num: " (length dat))))
ordered-keys)))
ordered-keys)
(debug:print-info 0 *default-log-port* "TOTAL: " total " api calls since start.")))
(define (db:get-all-run-ids dbstruct)
(db:with-db
dbstruct
#f
#f
(lambda (db)
|
︙ | | |
Modified http-transport.scm
from [d55287325f]
to [a7745a1b3a].
︙ | | |
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
|
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
|
-
-
+
+
|
(th2 (make-thread time-out "time out")))
(thread-start! th1)
(thread-start! th2)
(thread-join! th1)
(thread-terminate! th2)
(debug:print-info 11 *default-log-port* "got res=" res)
(if (vector? res)
(if (vector-ref res 0)
res
(if (vector-ref res 0) ;; this is the first flag or the second flag?
res ;; this is the *inner* vector? seriously? why?
(if (debug:debug-mode 11)
(begin ;; note: this code also called in nmsg-transport - consider consolidating it
(debug:print-error 11 *default-log-port* "error occured at server, info=" (vector-ref res 2))
(debug:print 11 *default-log-port* " client call chain:")
(print-call-chain (current-error-port))
(debug:print 11 *default-log-port* " server call chain:")
(pp (vector-ref res 1) (current-error-port))
|
︙ | | |
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
|
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
|
-
+
+
+
+
|
(set! last-access *db-last-access*)
(mutex-unlock! *heartbeat-mutex*)
(if (common:low-noise-print 120 (conc "server running on " iface ":" port))
(begin
(debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds))
(flush-output *default-log-port*)))
(if (common:low-noise-print 60 "dbstats")
(begin
(debug:print 0 *default-log-port* "Server stats:")
(db:print-current-query-stats)))
(let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600))
(adjusted-timeout (if (> hrs-since-start 1)
(- server-timeout (inexact->exact (round (* hrs-since-start 60)))) ;; subtract 60 seconds per hour
server-timeout)))
(if (common:low-noise-print 120 "server timeout")
(debug:print-info 0 *default-log-port* "Adjusted server timeout: " adjusted-timeout))
(cond
|
︙ | | |
Modified launch.scm
from [7f8f1157fc]
to [df0549f95d].
︙ | | |
419
420
421
422
423
424
425
426
427
428
429
430
431
432
|
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
|
+
|
;; (serverinf (assoc/default 'serverinf cmdinfo))
;; (port (assoc/default 'port cmdinfo))
(serverurl (assoc/default 'serverurl cmdinfo))
(homehost (assoc/default 'homehost cmdinfo))
(run-id (assoc/default 'run-id cmdinfo))
(test-id (assoc/default 'test-id cmdinfo))
(target (assoc/default 'target cmdinfo))
(areaname (assoc/default 'areaname cmdinfo))
(itemdat (assoc/default 'itemdat cmdinfo))
(env-ovrd (assoc/default 'env-ovrd cmdinfo))
(set-vars (assoc/default 'set-vars cmdinfo)) ;; pre-overrides from -setvar
(runname (assoc/default 'runname cmdinfo))
(megatest (assoc/default 'megatest cmdinfo))
(runtlim (assoc/default 'runtlim cmdinfo))
(contour (assoc/default 'contour cmdinfo))
|
︙ | | |
443
444
445
446
447
448
449
450
451
452
453
454
455
456
|
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
|
+
+
+
+
+
+
+
|
(file-execute-access? fulln))
fulln
runscript))))) ;; assume it is on the path
) ;; (rollup-status 0)
(if contour (setenv "MT_CONTOUR" contour))
;; immediated set some key variables from CMDINFO data, yes, these will be set again below ...
;;
(setenv "MT_TESTSUITENAME" areaname)
(setenv "MT_RUN_AREA_HOME" top-path)
(set! *toppath* top-path)
(setenv "MT_TEST_RUN_DIR" work-area)
;; On NFS it can be slow and unreliable to get needed startup information.
;; i. Check if we are on the homehost, if so, proceed
;; ii. Check if host and port passed in via CMDINFO are valid and if
;; possible use them.
(let ((bestadrs (server:get-best-guess-address (get-host-name)))
(needcare #f))
(if (equal? homehost bestadrs) ;; we are likely on the homehost
|
︙ | | |
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
|
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
|
+
-
+
+
+
+
+
|
(start-res (http-transport:client-connect host port))
(ping-res (rmt:login-no-auto-client-setup start-res)))
(if (and start-res
ping-res)
(let ((url (http-transport:server-dat-make-url start-res)))
(remote-conndat-set! *runremote* start-res)
(remote-server-url-set! *runremote* url)
(if (server:ping url)
(debug:print-info 0 *default-log-port* "connected to " url " using CMDINFO data."))
(debug:print-info 0 *default-log-port* "connected to " url " using CMDINFO data.")
(begin
(debug:print-info 0 *default-log-port* "have CMDINFO data but failed to connect to " url)
(remote-conndat-set! *runremote* #f)
(remote-server-url-set! *runremote* #f))))
(debug:print-info 0 *default-log-port* "received " host ":" port " for url but could not connect.")
))
(begin
(debug:print-info 0 *default-log-port* (if host-port
(conc "received invalid host-port information " host-port)
"no host-port information received"))
;; potential for bad situation if simultaneous starting of hundreds of jobs on servers, set needcare.
|
︙ | | |
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
|
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
|
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
+
-
-
-
-
-
-
-
+
+
+
+
+
-
-
+
|
(mutex-unlock! *launch-setup-mutex*)
*toppath*)
(let ((res (launch:setup-body force: force areapath: areapath)))
(mutex-unlock! *launch-setup-mutex*)
res)))
(define (launch:setup-body #!key (force #f) (areapath #f))
(if (and (eq? *configstatus* 'fulldata) *toppath*) ;; no need to reprocess
*toppath* ;; return toppath
(let* ((use-cache (common:use-cache?))
(toppath (or *toppath* areapath (getenv "MT_RUN_AREA_HOME"))) ;; preserve toppath
(runname (common:args-get-runname))
(target (common:args-get-target exit-if-bad: #t))
(linktree (common:get-linktree))
(contour #f) ;; NOT READY FOR THIS (args:get-arg "-contour"))
(sections (if target (list "default" target) #f)) ;; for runconfigs
(mtconfig (or (args:get-arg "-config") "megatest.config")) ;; allow overriding megatest.config
(rundir (if (and runname target linktree)(conc linktree (if contour (conc "/" contour) "") "/" target "/" runname) #f))
(mtcachef (and rundir (conc rundir "/" ".megatest.cfg-" megatest-version "-" megatest-fossil-hash)))
(rccachef (and rundir (conc rundir "/" ".runconfigs.cfg-" megatest-version "-" megatest-fossil-hash)))
(cancreate (and rundir (file-exists? rundir)(file-write-access? rundir))))
;; (cxt (hash-table-ref/default *contexts* toppath #f)))
(let* ((use-cache (common:use-cache?))
(toppath (or *toppath* areapath (getenv "MT_RUN_AREA_HOME"))) ;; preserve toppath
(runname (common:args-get-runname))
(target (common:args-get-target exit-if-bad: #t))
(linktree (common:get-linktree))
(contour #f) ;; NOT READY FOR THIS (args:get-arg "-contour"))
(sections (if target (list "default" target) #f)) ;; for runconfigs
(mtconfig (or (args:get-arg "-config") "megatest.config")) ;; allow overriding megatest.config
(rundir (if (and runname target linktree)(conc linktree "/" target "/" runname) #f))
(mtcachef (and rundir (conc rundir "/" ".megatest.cfg-" megatest-version "-" megatest-fossil-hash)))
(rccachef (and rundir (conc rundir "/" ".runconfigs.cfg-" megatest-version "-" megatest-fossil-hash)))
(cancreate (and rundir (common:file-exists? rundir)(file-write-access? rundir))))
;; (cxt (hash-table-ref/default *contexts* toppath #f)))
;; create our cxt for this area if it doesn't already exist
;; (if (not cxt)(hash-table-set! *contexts* toppath (make-cxt)))
;; (print "runname: " runname " target: " target " mtcachef: " mtcachef " rccachef: " rccachef)
(set! *toppath* toppath) ;; This is needed when we are running as a test using CMDINFO as a datasource
(cond
;; create our cxt for this area if it doesn't already exist
;; (if (not cxt)(hash-table-set! *contexts* toppath (make-cxt)))
;; (print "runname: " runname " target: " target " mtcachef: " mtcachef " rccachef: " rccachef)
(set! *toppath* toppath) ;; This is needed when we are running as a test using CMDINFO as a datasource
(cond
;; data was read and cached and available in *configstatus*, toppath has already been set
((eq? *configstatus* 'fulldata)
*toppath*)
;; if mtcachef exists just read it, however we need to assume toppath is available in $MT_RUN_AREA_HOME
((and mtcachef (file-exists? mtcachef) (get-environment-variable "MT_RUN_AREA_HOME") use-cache)
(set! *configdat* (configf:read-alist mtcachef))
(set! *runconfigdat* (configf:read-alist rccachef))
(set! *configinfo* (list *configdat* (get-environment-variable "MT_RUN_AREA_HOME")))
(set! *configstatus* 'fulldata)
(set! *toppath* (get-environment-variable "MT_RUN_AREA_HOME"))
*toppath*)
;; we have all the info needed to fully process runconfigs and megatest.config
(mtcachef
(let* ((first-pass (find-and-read-config ;; NB// sets MT_RUN_AREA_HOME as side effect
mtconfig
environ-patt: "env-override"
given-toppath: toppath
pathenvvar: "MT_RUN_AREA_HOME"))
(first-rundat (let ((toppath (if toppath
toppath
(car first-pass))))
(read-config ;; (conc toppath "/runconfigs.config") ;; this should be converted to runconfig:read but it is non-trivial, leaving it for now.
(conc (if (string? toppath)
toppath
(get-environment-variable "MT_RUN_AREA_HOME"))
"/runconfigs.config")
*runconfigdat* #t
sections: sections))))
(set! *runconfigdat* first-rundat)
(if first-pass ;;
(begin
(set! *configdat* (car first-pass))
(set! *configinfo* first-pass)
(set! *toppath* (or toppath (cadr first-pass))) ;; use the gathered data unless already have it
(set! toppath *toppath*)
(if (not *toppath*)
(begin
(debug:print-error 0 *default-log-port* "you are not in a megatest area!")
(exit 1)))
(setenv "MT_RUN_AREA_HOME" *toppath*)
;; the seed read is done, now read runconfigs, cache it then read megatest.config one more time and cache it
(let* ((keys (rmt:get-keys))
(key-vals (keys:target->keyval keys target))
(linktree (common:get-linktree))
;; if mtcachef exists just read it, however we need to assume toppath is available in $MT_RUN_AREA_HOME
((and mtcachef (common:file-exists? mtcachef) (get-environment-variable "MT_RUN_AREA_HOME") use-cache)
(set! *configdat* (configf:read-alist mtcachef))
(set! *runconfigdat* (configf:read-alist rccachef))
(set! *configinfo* (list *configdat* (get-environment-variable "MT_RUN_AREA_HOME")))
(set! *configstatus* 'fulldata)
(set! *toppath* (get-environment-variable "MT_RUN_AREA_HOME"))
*toppath*)
;; we have all the info needed to fully process runconfigs and megatest.config
(mtcachef
(let* ((first-pass (find-and-read-config ;; NB// sets MT_RUN_AREA_HOME as side effect
mtconfig
environ-patt: "env-override"
given-toppath: toppath
pathenvvar: "MT_RUN_AREA_HOME"))
(first-rundat (let ((toppath (if toppath
toppath
(car first-pass))))
(read-config ;; (conc toppath "/runconfigs.config") ;; this should be converted to runconfig:read but it is non-trivial, leaving it for now.
(conc (if (string? toppath)
toppath
(get-environment-variable "MT_RUN_AREA_HOME"))
"/runconfigs.config")
*runconfigdat* #t
sections: sections))))
(set! *runconfigdat* first-rundat)
(if first-pass ;;
(begin
(set! *configdat* (car first-pass))
(set! *configinfo* first-pass)
(set! *toppath* (or toppath (cadr first-pass))) ;; use the gathered data unless already have it
(set! toppath *toppath*)
(if (not *toppath*)
(begin
(debug:print-error 0 *default-log-port* "you are not in a megatest area!")
(exit 1)))
(setenv "MT_RUN_AREA_HOME" *toppath*)
;; the seed read is done, now read runconfigs, cache it then read megatest.config one more time and cache it
(let* ((keys (rmt:get-keys))
(key-vals (keys:target->keyval keys target))
(linktree (common:get-linktree)) ;; (or (getenv "MT_LINKTREE")(if *configdat* (configf:lookup *configdat* "setup" "linktree") #f)))
; (or (getenv "MT_LINKTREE")
; (if *configdat*
; (configf:lookup *configdat* "setup" "linktree")
; (conc *toppath* "/lt"))))
(second-pass (find-and-read-config
mtconfig
environ-patt: "env-override"
given-toppath: toppath
pathenvvar: "MT_RUN_AREA_HOME"))
(runconfigdat (begin ;; this read of the runconfigs will see any adjustments made by re-reading megatest.config
(for-each (lambda (kt)
(setenv (car kt) (cadr kt)))
key-vals)
(read-config (conc toppath "/runconfigs.config") *runconfigdat* #t ;; consider using runconfig:read some day ...
sections: sections))))
(if cancreate (configf:write-alist runconfigdat rccachef))
(set! *runconfigdat* runconfigdat)
(if cancreate (configf:write-alist *configdat* mtcachef))
(if cancreate (set! *configstatus* 'fulldata))))
;; no configs found? should not happen but let's try to recover gracefully, return an empty hash-table
(set! *configdat* (make-hash-table))
)))
;; else read what you can and set the flag accordingly
(else
(let* ((cfgdat (find-and-read-config
(or (args:get-arg "-config") "megatest.config")
environ-patt: "env-override"
given-toppath: (get-environment-variable "MT_RUN_AREA_HOME")
pathenvvar: "MT_RUN_AREA_HOME")))
(if cfgdat
(let* ((toppath (or (get-environment-variable "MT_RUN_AREA_HOME")(cadr cfgdat)))
(rdat (read-config (conc toppath ;; convert this to use runconfig:read!
"/runconfigs.config") *runconfigdat* #t sections: sections)))
(set! *configinfo* cfgdat)
(set! *configdat* (car cfgdat))
(set! *runconfigdat* rdat)
(set! *toppath* toppath)
(set! *configstatus* 'partial))
(begin
(debug:print-error 0 *default-log-port* "No " mtconfig " file found. Giving up.")
(exit 2))))))
;; additional house keeping
(let* ((linktree (common:get-linktree)))
(if linktree
(begin
(if (not (file-exists? linktree))
(begin
(handle-exceptions
exn
(begin
(debug:print-error 0 *default-log-port* "Something went wrong when trying to create linktree dir at " linktree)
(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
(exit 1))
(create-directory linktree #t))))
(handle-exceptions
exn
(begin
(debug:print-error 0 *default-log-port* "Something went wrong when trying to create link to linktree at " *toppath*)
(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)))
(let ((tlink (conc *toppath* "/lt")))
(if (not (file-exists? tlink))
(create-symbolic-link linktree tlink)))))
(begin
(debug:print-error 0 *default-log-port* "linktree not defined in [setup] section of megatest.config")
)))
(if (and *toppath*
(directory-exists? *toppath*))
(begin
(setenv "MT_RUN_AREA_HOME" *toppath*)
(setenv "MT_TESTSUITENAME" (common:get-testsuite-name)))
(begin
(debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.")
(second-pass (find-and-read-config
mtconfig
environ-patt: "env-override"
given-toppath: toppath
pathenvvar: "MT_RUN_AREA_HOME"))
(runconfigdat (begin ;; this read of the runconfigs will see any adjustments made by re-reading megatest.config
(for-each (lambda (kt)
(setenv (car kt) (cadr kt)))
key-vals)
(read-config (conc toppath "/runconfigs.config") *runconfigdat* #t ;; consider using runconfig:read some day ...
sections: sections))))
(if cancreate (configf:write-alist runconfigdat rccachef))
(set! *runconfigdat* runconfigdat)
(if cancreate (configf:write-alist *configdat* mtcachef))
(if cancreate (set! *configstatus* 'fulldata))))
;; no configs found? should not happen but let's try to recover gracefully, return an empty hash-table
(set! *configdat* (make-hash-table))
))) ;; end of mtcachef
;; else read what you can and set the flag accordingly
(else ;;; else clause for cond
(let* ((cfgdat (find-and-read-config
(or (args:get-arg "-config") "megatest.config")
environ-patt: "env-override"
given-toppath: (get-environment-variable "MT_RUN_AREA_HOME")
pathenvvar: "MT_RUN_AREA_HOME")))
(if cfgdat
(let* ((toppath (or (get-environment-variable "MT_RUN_AREA_HOME")(cadr cfgdat)))
(rdat (read-config (conc toppath ;; convert this to use runconfig:read!
"/runconfigs.config") *runconfigdat* #t sections: sections)))
(set! *configinfo* cfgdat)
(set! *configdat* (car cfgdat))
(set! *runconfigdat* rdat)
(set! *toppath* toppath)
(set! *configstatus* 'partial))
(begin
(debug:print-error 0 *default-log-port* "No " mtconfig " file found. Giving up.")
(exit 2)))))) ;;; cond ends here
;; additional house keeping
(let* ((linktree (common:get-linktree)))
(if linktree
(begin
(if (not (common:file-exists? linktree))
(begin
(handle-exceptions
exn
(begin
(debug:print-error 0 *default-log-port* "Something went wrong when trying to create linktree dir at " linktree)
(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
(exit 1))
(create-directory linktree #t))))
(handle-exceptions
exn
(begin
(debug:print-error 0 *default-log-port* "Something went wrong when trying to create link to linktree at " *toppath*)
(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)))
(let ((tlink (conc *toppath* "/lt")))
(if (not (file-exists? tlink))
(create-symbolic-link linktree tlink)))))
(begin ;; no link tree spec?
(debug:print-error 0 *default-log-port* "linktree not defined in [setup] section of megatest.config")
)))
(if (and *toppath*
(directory-exists? *toppath*))
(begin
(setenv "MT_RUN_AREA_HOME" *toppath*)
(setenv "MT_TESTSUITENAME" (common:get-testsuite-name)))
(begin
(debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.")
;;(exit 1)
(set! *toppath* #f) ;; force it to be false so we return #f
(set! *toppath* #f))) ;; force it to be false so we return #f
#f
))
;; if have -append-config then read and append here
(let ((cfname (args:get-arg "-append-config")))
(if (and cfname
(file-read-access? cfname))
(read-config cfname *configdat* #t))) ;; values are added to the hash, no need to do anything special.
;; if have -append-config then read and append here
(let ((cfname (args:get-arg "-append-config")))
(if (and cfname
(file-read-access? cfname))
(read-config cfname *configdat* #t)))))) ;; values are added to the hash, no need to do anything special.
*toppath*))
(define (get-best-disk confdat testconfig)
(let* ((disks (or (and testconfig (hash-table-ref/default testconfig "disks" #f))
(hash-table-ref/default confdat "disks" #f)))
(minspace (let ((m (configf:lookup confdat "setup" "minspace")))
(string->number (or m "10000")))))
(if disks
(let ((res (common:get-disk-with-most-free-space disks minspace))) ;; min size of 1000, seems tad dumb
|
︙ | | |
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
|
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
|
-
+
-
-
+
+
|
(lnktarget (conc lnkpath "/" item-path)))
;; Update the rundir path in the test record for all, rundir=physical, shortdir=logical
;; rundir shortdir
(rmt:general-call 'test-set-rundir-shortdir run-id lnkpathf test-path testname item-path run-id)
(debug:print 2 *default-log-port* "INFO:\n lnkbase=" lnkbase "\n lnkpath=" lnkpath "\n toptest-path=" toptest-path "\n test-path=" test-path)
(if (not (file-exists? linktree))
(if (not (common:file-exists? linktree))
(begin
(debug:print 0 *default-log-port* "WARNING: linktree did not exist! Creating it now at " linktree)
(create-directory linktree #t))) ;; (system (conc "mkdir -p " linktree))))
;; create the directory for the tests dir links, this is needed no matter what...
(if (and (not (directory-exists? lnkbase))
(not (file-exists? lnkbase)))
(if (and (not (common:directory-exists? lnkbase))
(not (common:file-exists? lnkbase)))
(handle-exceptions
exn
(begin
(debug:print-error 0 *default-log-port* "Problem creating linktree base at " lnkbase)
(print-error-message exn (current-error-port)))
(create-directory lnkbase #t)))
|
︙ | | |
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
|
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
|
-
+
+
|
;; (list 'serverinf *server-info*)
(list 'homehost (let* ((hhdat (common:get-homehost)))
(if hhdat
(car hhdat)
#f)))
(list 'serverurl (if *runremote*
(remote-server-url *runremote*)
#f)) ;;
#f)) ;;
(list 'areaname (common:get-testsuite-name))
(list 'toppath *toppath*)
(list 'work-area work-area)
(list 'test-name test-name)
(list 'runscript runscript)
(list 'run-id run-id )
(list 'test-id test-id )
;; (list 'item-path item-path )
|
︙ | | |
Modified rmt.scm
from [5167aff935]
to [f051a84a44].
︙ | | |
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
|
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
|
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
|
;; do all the prep locked under the rmt-mutex
(mutex-lock! *rmt-mutex*)
;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote
;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds.
;; 3. do the query, if on homehost use local access
;;
(let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value
(areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas
(dbfile (conc *toppath* "/megatest.db"))
(readonly-mode (not (file-write-access? dbfile))) ;; TODO: use dbstruct or runremote to figure this out in future
(runremote (or area-dat *runremote*)))
(let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value
(areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas
(runremote (or area-dat
*runremote*))
(readonly-mode (if (and runremote
(remote-ro-mode-checked runremote))
(remote-ro-mode runremote)
(let* ((dbfile (conc *toppath* "/megatest.db"))
(ro-mode (not (file-write-access? dbfile)))) ;; TODO: use dbstruct or runremote to figure this out in future
(if runremote
(begin
(remote-ro-mode-set! runremote ro-mode)
(remote-ro-mode-checked-set! runremote #t)
ro-mode)
ro-mode)))))
;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile)
(cond
;; give up if more than 15 attempts
((> attemptnum 15)
(debug:print 0 *default-log-port* "ERROR: 15 tries to start/connect to server. Giving up.")
(exit 1))
|
︙ | | |
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
|
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
|
-
-
+
+
+
-
+
+
-
+
-
+
+
-
+
+
+
-
+
+
+
+
-
-
+
+
+
-
+
+
+
-
+
|
(let ((expire-time (+ (- start-time (remote-server-timeout runremote))(random 30)))) ;; add 30 seconds of noise so that not all running tests expire at the same time causing a storm of server starts
(< (http-transport:server-dat-get-last-access (remote-conndat runremote)) expire-time)))
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 8")
(remote-conndat-set! runremote #f)
(mutex-unlock! *rmt-mutex*)
(rmt:send-receive cmd rid params attemptnum: attemptnum))
;; ensure we have a record for our connection for given area
((not runremote)
(set! *runremote* (make-remote))
((not runremote) ;; can remove this one. should never get here.
(set! *runremote* (make-remote)) ;; new runremote will come from this on next iteration
(mutex-unlock! *rmt-mutex*)
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 1")
(rmt:send-receive cmd rid params attemptnum: attemptnum))
;; ensure we have a homehost record
((not (pair? (remote-hh-dat runremote))) ;; not on homehost
(thread-sleep! 0.1) ;; since we shouldn't get here, delay a little
(remote-hh-dat-set! runremote (common:get-homehost))
(mutex-unlock! *rmt-mutex*)
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 2")
(rmt:send-receive cmd rid params attemptnum: attemptnum))
;; on homehost and this is a read
((and (not (remote-force-server runremote)) ;; honor forced use of server
((and (cdr (remote-hh-dat runremote)) ;; on homehost
(cdr (remote-hh-dat runremote)) ;; on homehost
(member cmd api:read-only-queries)) ;; this is a read
(mutex-unlock! *rmt-mutex*)
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 3")
(rmt:open-qry-close-locally cmd 0 params))
;; on homehost and this is a write, we already have a server, but server has died
((and (cdr (remote-hh-dat runremote)) ;; on homehost
(not (member cmd api:read-only-queries)) ;; this is a write
(remote-server-url runremote) ;; have a server
(not (server:check-if-running *toppath*))) ;; server has died.
(set! *runremote* (make-remote))
(mutex-unlock! *rmt-mutex*)
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1")
(rmt:send-receive cmd rid params attemptnum: attemptnum))
;; on homehost and this is a write, we already have a server
((and (not (remote-force-server runremote)) ;; honor forced use of server
((and (cdr (remote-hh-dat runremote)) ;; on homehost
(cdr (remote-hh-dat runremote)) ;; on homehost
(not (member cmd api:read-only-queries)) ;; this is a write
(remote-server-url runremote)) ;; have a server
(remote-server-url runremote)) ;; have a server
(mutex-unlock! *rmt-mutex*)
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 4")
(rmt:open-qry-close-locally cmd 0 params))
;; on homehost, no server contact made and this is a write, passively start a server
((and (not (remote-force-server runremote)) ;; honor forced use of server
((and (cdr (remote-hh-dat runremote)) ; new
(cdr (remote-hh-dat runremote)) ;; new
(not (remote-server-url runremote))
(not (member cmd api:read-only-queries)))
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 5")
(let ((server-url (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call
(if server-url
(remote-server-url-set! runremote server-url) ;; the string can be consumed by the client setup if needed
(if (common:force-server?)
(server:start-and-wait *toppath*)
(server:kind-run *toppath*)))
(server:kind-run *toppath*))))
(remote-force-server-set! runremote (common:force-server?))
(mutex-unlock! *rmt-mutex*)
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 5.1")
(rmt:open-qry-close-locally cmd 0 params))
((or (and (remote-force-server runremote) ;; we are forcing a server and don't yet have a connection to one
(not (remote-conndat runremote)))
((and (not (cdr (remote-hh-dat runremote))) ;; not on a homehost
(not (remote-conndat runremote))) ;; and no connection
(and (not (cdr (remote-hh-dat runremote))) ;; not on a homehost
(not (remote-conndat runremote)))) ;; and no connection
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 6 hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote))
(mutex-unlock! *rmt-mutex*)
(if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up?
(server:start-and-wait *toppath*)
(server:start-and-wait *toppath*))
(remote-force-server-set! runremote (common:force-server?))
(remote-conndat-set! runremote (rmt:get-connection-info *toppath*)) ;; calls client:setup which calls client:setup-http
(rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as
;; all set up if get this far, dispatch the query
((and (not (remote-force-server runremote))
((cdr (remote-hh-dat runremote)) ;; we are on homehost
(cdr (remote-hh-dat runremote))) ;; we are on homehost
(mutex-unlock! *rmt-mutex*)
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 7")
(rmt:open-qry-close-locally cmd (if rid rid 0) params))
;; not on homehost, do server query
(else
(mutex-unlock! *rmt-mutex*)
|
︙ | | |
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
|
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
|
-
+
-
-
-
-
-
-
-
+
+
+
+
+
+
-
-
+
+
+
-
-
-
+
+
+
|
(else
(debug:print 0 *default-log-port* "ERROR: transport " (remote-transport runremote) " not supported")
(exit))))
(success (if (vector? dat) (vector-ref dat 0) #f))
(res (if (vector? dat) (vector-ref dat 1) #f)))
(if (vector? conninfo)(http-transport:server-dat-update-last-access conninfo)) ;; refresh access time
;; (mutex-unlock! *rmt-mutex*)
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 9. conninfo=" conninfo " dat=" dat " runremote = "runremote)
(debug:print-info 13 *default-log-port* "rmt:send-receive, case 9. conninfo=" conninfo " dat=" dat " runremote = " runremote)
(if success
(case (remote-transport runremote)
((http)
(mutex-unlock! *rmt-mutex*)
res)
(else
(debug:print 0 *default-log-port* "ERROR: transport " (remote-transport runremote) " is unknown")
(mutex-unlock! *rmt-mutex*)
(if success ;; success only tells us that the transport was successful, have to examine the data to see if there was a detected issue at the other end
(if (and (symbol? res)
(eq? res 'overloaded))
(let ((wait-delay (+ attemptnum (* attemptnum 10))))
(debug:print 0 *default-log-port* "WARNING: server is overloaded. Delaying " wait-delay " seconds and trying call again.")
(mutex-unlock! *rmt-mutex*)
(exit 1)))
(thread-sleep! wait-delay)
(rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
res) ;; All good, return res
(begin
(debug:print 0 *default-log-port* "WARNING: communication failed. Trying again, try num: " attemptnum)
(remote-conndat-set! runremote #f)
(remote-server-url-set! runremote #f)
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 9.1")
(mutex-unlock! *rmt-mutex*)
(server:start-and-wait *toppath*)
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 9.1")
(if (not (server:check-if-running *toppath*))
(server:start-and-wait *toppath*))
(rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))))))))
;; (define (rmt:update-db-stats run-id rawcmd params duration)
;; (mutex-lock! *db-stats-mutex*)
;; (handle-exceptions
;; exn
;; (begin
|
︙ | | |
Modified server.scm
from [a31b48f74d]
to [2e218ee65f].
︙ | | |
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
|
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
|
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
-
+
-
+
-
-
+
+
|
(thread-join! log-rotate)
(pop-directory)))
;; given a path to a server log return: host port startseconds
;;
(define (server:logf-get-start-info logf)
(let ((rx (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+)"))) ;; SERVER STARTED: host:port AT timesecs
(handle-exceptions
exn
(list #f #f #f) ;; no idea what went wrong, call it a bad server
(with-input-from-file
logf
(lambda ()
(let loop ((inl (read-line))
(lnum 0))
(if (not (eof-object? inl))
(let ((mlst (string-match rx inl)))
(if (not mlst)
(if (< lnum 500) ;; give up if more than 500 lines of server log read
(loop (read-line)(+ lnum 1))
(list #f #f #f))
(let ((dat (cdr mlst)))
(list (car dat) ;; host
(string->number (cadr dat)) ;; port
(string->number (caddr dat))))))
(list #f #f #f)))))))
(with-input-from-file
logf
(lambda ()
(let loop ((inl (read-line))
(lnum 0))
(if (not (eof-object? inl))
(let ((mlst (string-match rx inl)))
(if (not mlst)
(if (< lnum 500) ;; give up if more than 500 lines of server log read
(loop (read-line)(+ lnum 1))
(list #f #f #f))
(let ((dat (cdr mlst)))
(list (car dat) ;; host
(string->number (cadr dat)) ;; port
(string->number (caddr dat))))))
(list #f #f #f))))))))
;; get a list of servers with all relevant data
;; ( mod-time host port start-time pid )
;;
(define (server:get-list areapath #!key (limit #f))
(let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$"))
(day-seconds (* 24 60 60)))
;; if the directory exists continue to get the list
;; otherwise attempt to create the logs dir and then
;; continue
(if (if (directory-exists? (conc areapath "/logs"))
#t
'()
(if (file-write-access? areapath)
(begin
(condition-case
(create-directory (conc areapath "/logs") #t)
(exn (i/o file)(debug:print 0 *default-log-port* "ERROR: Cannot create directory at " (conc areapath "/logs")))
(exn ()(debug:print 0 *default-log-port* "ERROR: Unknown error attemtping to get server list.")))
(directory-exists? (conc areapath "/logs")))
#f))
'()))
(let* ((server-logs (glob (conc areapath "/logs/server-*.log")))
(num-serv-logs (length server-logs)))
(if (null? server-logs)
'()
(let loop ((hed (car server-logs))
(tal (cdr server-logs))
(res '()))
(let* ((mod-time (handle-exceptions
exn
exn
0
(file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted
(current-seconds) ;; 0
(file-modification-time hed))) ;; default to *very* old so log gets ignored if deleted
(down-time (- (current-seconds) mod-time))
(serv-dat (if (or (< num-serv-logs 10)
(< down-time day-seconds))
(server:logf-get-start-info hed)
'())) ;; don't waste time processing server files not touched in the past day if there are more than ten servers to look at
(serv-rec (cons mod-time serv-dat))
(fmatch (string-match fname-rx hed))
|
︙ | | |
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
|
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
|
+
+
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
|
;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off
;; and servers should stick around for about two hours or so.
;;
(define (server:get-best srvlst)
(let ((now (current-seconds)))
(sort
(filter (lambda (rec)
(if (and (list? rec)
(> (length rec) 2))
(let ((start-time (list-ref rec 3))
(mod-time (list-ref rec 0)))
;; (print "start-time: " start-time " mod-time: " mod-time)
(and start-time mod-time
(> (- now start-time) 0) ;; been running at least 0 seconds
(< (- now mod-time) 16) ;; still alive - file touched in last 16 seconds
(< (- now start-time)
(+ (- (string->number (or (configf:lookup *configdat* "server" "runtime") "3600"))
180)
(random 360))) ;; under one hour running time +/- 180
)))
(let ((start-time (list-ref rec 3))
(mod-time (list-ref rec 0)))
;; (print "start-time: " start-time " mod-time: " mod-time)
(and start-time mod-time
(> (- now start-time) 0) ;; been running at least 0 seconds
(< (- now mod-time) 16) ;; still alive - file touched in last 16 seconds
(< (- now start-time)
(+ (- (string->number (or (configf:lookup *configdat* "server" "runtime") "3600"))
180)
(random 360))) ;; under one hour running time +/- 180
))
#f))
srvlst)
(lambda (a b)
(< (list-ref a 3)
(list-ref b 3))))))
(define (server:get-first-best areapath)
(let ((srvrs (server:get-best (server:get-list areapath))))
(if (and srvrs
(not (null? srvrs)))
(car srvrs)
#f)))
(define (server:get-rand-best areapath)
(let ((srvrs (server:get-best (server:get-list areapath))))
(if (and (list? srvrs)
(not (null? srvrs)))
(let* ((len (length srvrs))
(idx (random len)))
(list-ref srvrs idx))
#f)))
(define (server:record->url servr)
(match-let (((mod-time host port start-time pid)
servr))
(if (and host port)
(conc host ":" port)
#f)))
(define (server:get-client-signature) ;; BB> why is this proc named "get-"? it returns nothing -- set! has not return value.
(if *my-client-signature* *my-client-signature*
(let ((sig (server:mk-signature)))
(set! *my-client-signature* sig)
*my-client-signature*)))
;; kind start up of servers, wait 40 seconds before allowing another server for a given
;; run-id to be launched
(define (server:kind-run areapath)
(if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
(let* ((last-run-dat (hash-table-ref/default *server-kind-run* areapath '(0 0))) ;; callnum, whenrun
(call-num (car last-run-dat))
(when-run (cadr last-run-dat))
(run-delay (+ (case call-num
((0) 0)
((1) 20)
((2) 300)
(else 600))
(random 5)))) ;; add a small random number just in case a lot of jobs hit the work hosts simultaneously
(if (> (- (current-seconds) when-run) run-delay)
(server:run areapath))
(hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds)))))
(let* ((last-run-dat (hash-table-ref/default *server-kind-run* areapath '(0 0))) ;; callnum, whenrun
(call-num (car last-run-dat))
(when-run (cadr last-run-dat))
(run-delay (+ (case call-num
((0) 0)
((1) 20)
((2) 300)
(else 600))
(random 5))) ;; add a small random number just in case a lot of jobs hit the work hosts simultaneously
(lock-file (conc areapath "/logs/server-start.lock")))
(if (> (- (current-seconds) when-run) run-delay)
(begin
(common:simple-file-lock-and-wait lock-file expire-time: 15)
(server:run areapath)
(thread-sleep! 5) ;; don't release the lock for at least a few seconds
(common:simple-file-release-lock lock-file)))
(hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds))))))
(define (server:start-and-wait areapath #!key (timeout 60))
(let ((give-up-time (+ (current-seconds) timeout)))
(let loop ((server-url (server:check-if-running areapath)))
(if (or server-url
(> (current-seconds) give-up-time)) ;; server-url will be #f if no server available.
server-url
(let ((num-ok (length (server:get-best (server:get-list areapath)))))
(if (< num-ok 1) ;; if there are no decent candidates for servers then try starting a new one
(server:kind-run areapath))
(thread-sleep! 5)
(loop (server:check-if-running areapath)))))))
(define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG.
(define (server:dotserver-age-seconds areapath)
(let ((server-file (conc areapath "/.server")))
(begin
(handle-exceptions
exn
#f
(- (current-seconds)
(file-modification-time server-file))))))
;; no longer care if multiple servers are started by accident. older servers will drop off in time.
;;
(define (server:check-if-running areapath)
(let* ((servers (server:get-best (server:get-list areapath))))
(if (null? servers)
(define (server:check-if-running areapath #!key (numservers "2"))
(let* ((ns (string->number
(or (configf:lookup *configdat* "server" "numservers") numservers)))
(servers (server:get-best (server:get-list areapath))))
;; (print "servers: " servers " ns: " ns)
(if (or (and servers
(null? servers))
(not servers)
(and (list? servers)
(< (length servers) (random ns)))) ;; somewhere between 0 and numservers
#f
(let loop ((hed (car servers))
(tal (cdr servers)))
(let ((res (server:check-server hed)))
(if res
res
(if (null? tal)
|
︙ | | |
Modified tests.scm
from [bccd138868]
to [897e386321].
︙ | | |
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
|
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
|
+
+
+
-
+
|
(tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename))
(common:simple-file-release-lock lockf)
(change-directory orig-dir)
;; NB// tests:test-set-toplog! is remote internal...
(tests:test-set-toplog! run-id test-name outputfilename))
;; didn't get the lock, check to see if current update started later than this
;; update, if so we can exit without doing any work
(if (> my-start-time (handle-exceptions
exn
0
(if (> my-start-time (file-modification-time lockf))
(file-modification-time lockf)))
;; we started since current re-gen in flight, delay a little and try again
(begin
(debug:print-info 1 *default-log-port* "Waiting to update " outputfilename ", another test currently updating it")
(thread-sleep! (+ 5 (random 5))) ;; delay between 5 and 10 seconds
(loop (common:simple-file-lock lockf))))))))))
(define (tests:generate-html-summary-for-iterated-test run-id test-id test-name outputfilename)
|
︙ | | |