Overview
Context
Changes
Modified configf.scm
from [346c0caf52]
to [0cf569e087].
︙ | | |
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
|
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
|
-
-
+
+
+
+
+
|
(with-output-to-file fname ;; first write out the file
(lambda ()
(pp dat)))
(if (file-exists? fname) ;; now verify it is readable
(if (configf:read-alist fname)
#t ;; data is good.
(begin
(delete-file fname)
(debug:print 0 *default-log-port* "WARNING: content " dat " for cache " fname " is not readable. Deleting generated file.")
(handle-exceptions
exn
#f
(debug:print 0 *default-log-port* "WARNING: content " dat " for cache " fname " is not readable. Deleting generated file.")
(delete-file fname))
#f))
#f)))
;; convert hierarchial list to ini format
;;
(define (configf:config->ini data)
(map
|
︙ | | |
Modified launch.scm
from [cc10125ef0]
to [f8bf4a3053].
︙ | | |
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
|
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
|
-
+
+
-
-
-
-
-
-
-
+
+
+
+
+
+
+
|
(> (length host-port) 1))
(let* ((host (car host-port))
(port (cadr host-port))
(start-res (http-transport:client-connect host port))
(ping-res (rmt:login-no-auto-client-setup start-res)))
(if (and start-res
ping-res)
(let ((url (http-transport:server-dat-make-url start-res)))
;; (begin ;; let ((url (http-transport:server-dat-make-url start-res)))
(begin
(remote-conndat-set! *runremote* start-res)
(remote-server-url-set! *runremote* url)
(if (server:ping url)
(debug:print-info 0 *default-log-port* "connected to " url " using CMDINFO data.")
(begin
(debug:print-info 0 *default-log-port* "have CMDINFO data but failed to connect to " url)
(remote-conndat-set! *runremote* #f)
(remote-server-url-set! *runremote* #f))))
;; (remote-server-url-set! *runremote* url)
;; (if (server:ping url)
(debug:print-info 0 *default-log-port* "connected to " host ":" port " using CMDINFO data."))
;; (begin
;; (debug:print-info 0 *default-log-port* "have CMDINFO data but failed to connect to " url)
;; (remote-conndat-set! *runremote* #f)
;; (remote-server-url-set! *runremote* #f))))
(debug:print-info 0 *default-log-port* "received " host ":" port " for url but could not connect.")
))
(begin
(debug:print-info 0 *default-log-port* (if host-port
(conc "received invalid host-port information " host-port)
"no host-port information received"))
;; potential for bad situation if simultaneous starting of hundreds of jobs on servers, set needcare.
|
︙ | | |
Modified megatest.scm
from [164cc6d2b1]
to [84dec1a162].
︙ | | |
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
|
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
|
-
-
+
|
-setvars VAR1=val1,VAR2=val2 : Add environment variables to a run NB// these are
overwritten by values set in config files.
-server -|hostname : start the server (reduces contention on megatest.db), use
- to automatically figure out hostname
-transport http|rpc : use http or rpc for transport (default is http)
-log logfile : send stdout and stderr to logfile
-list-servers : list the servers
-stop-server id : stop server specified by id (see output of -list-servers), use
0 to kill all
-kill-servers : kill all servers
-repl : start a repl (useful for extending megatest)
-load file.scm : load and run file.scm
-mark-incompletes : find and mark incomplete tests
-ping run-id|host:port : ping server, exit with 0 if found
-debug N|N,M,O... : enable debug 0-N or N and M and O ...
-config fname : override the megatest.config file with fname
-append-config fname : append fname to the megatest.config file
|
︙ | | |
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
|
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
|
-
-
|
":expected"
":tol"
":units"
;; misc
"-start-dir"
"-contour"
"-server"
"-stop-server"
"-transport"
"-kill-server"
"-port"
"-extract-ods"
"-pathmod"
"-env2file"
"-envcap"
"-envdelta"
"-setvars"
|
︙ | | |
317
318
319
320
321
322
323
324
325
326
327
328
329
330
|
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
|
+
|
"-cache-db"
"-use-db-cache"
;; misc
"-repl"
"-lock"
"-unlock"
"-list-servers"
"-kill-servers"
"-run-wait" ;; wait on a run to complete (i.e. no RUNNING)
"-local" ;; run some commands using local db access
"-generate-html"
;; misc queries
"-list-disks"
"-list-targets"
|
︙ | | |
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
|
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
|
-
+
-
+
|
(eq? pid-val 0))
(begin
(printf "Sending signal/term to ~A\n" pid)
(process-signal pid signal/term))))))
(process:children #f))
(original-exit exit-code)))))
;; for some switches alway print the command to stderr
;; for some switches always print the command to stderr
;;
(if (args:any? "-run" "-runall" "-list-runs" "-remove-runs" "-set-state-status")
(if (args:any? "-run" "-runall" "-remove-runs" "-set-state-status")
(debug:print 0 *default-log-port* (string-intersperse (argv) " ")))
;;======================================================================
;; Misc setup stuff
;;======================================================================
(debug:setup)
|
︙ | | |
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
|
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
|
-
-
+
-
+
-
+
-
-
-
-
-
-
-
-
+
+
+
-
-
-
+
+
+
+
-
-
+
-
-
-
-
-
-
-
-
-
+
+
+
-
-
-
+
+
-
-
-
-
+
-
-
-
-
-
+
+
+
+
-
-
-
+
+
+
+
+
+
-
+
+
|
(if (args:get-arg "-server")
(let ((tl (launch:setup))
(transport-type (string->symbol (or (args:get-arg "-transport") "http"))))
(server:launch 0 transport-type)
(set! *didsomething* #t)))
(if (or (args:get-arg "-list-servers")
(args:get-arg "-stop-server")
(args:get-arg "-kill-server"))
(args:get-arg "-kill-servers"))
(let ((tl (launch:setup)))
(if tl
(if tl ;; all roads from here exit
(let* ((servers (server:get-list *toppath*))
(fmtstr "~5a~12a~8a~20a~24a~10a~10a~10a~10a\n")
(fmtstr "~8a~22a~20a~20a~8a\n"))
(servers-to-kill '())
(kill-switch (if (args:get-arg "-kill-server") "-9" ""))
(killinfo (or (args:get-arg "-stop-server") (args:get-arg "-kill-server") ))
(khost-port (if killinfo (if (substring-index ":" killinfo)(string-split ":") #f) #f))
(sid (if killinfo (if (substring-index ":" killinfo) #f (string->number killinfo)) #f)))
(format #t fmtstr "Id" "MTver" "Pid" "Host" "Interface:OutPort" "InPort" "LastBeat" "State" "Transport")
(format #t fmtstr "==" "=====" "===" "====" "=================" "======" "========" "=====" "=========")
(for-each
(format #t fmtstr "pid" "Interface:port" "age (hms)" "Last mod" "State")
(format #t fmtstr "===" "==============" "=========" "========" "=====")
(for-each ;; ( mod-time host port start-time pid )
(lambda (server)
(let* ((id (vector-ref server 0))
(pid (vector-ref server 1))
(hostname (vector-ref server 2))
(let* ((mtm (any->number (car server)))
(mod (if mtm (- (current-seconds) mtm) "unk"))
(age (- (current-seconds)(or (any->number (list-ref server 3)) (current-seconds))))
(url (conc (cadr server) ":" (caddr server)))
(interface (vector-ref server 3))
(pullport (vector-ref server 4))
(pid (list-ref server 4))
(pubport (vector-ref server 5))
(start-time (vector-ref server 6))
(priority (vector-ref server 7))
(state (vector-ref server 8))
(mt-ver (vector-ref server 9))
(last-update (vector-ref server 10))
(transport (vector-ref server 11))
(killed #f)
(status (< last-update 20)))
(alv (if (number? mod)(< mod 10) #f)))
(format #t
fmtstr
;; (zmq-sockets (if status (server:client-connect hostname port) #f)))
;; no need to login as status of #t indicates we are connecting to correct
;; server
pid
url
;; (if (equal? state "dead")
;; (if (> last-update (* 25 60 60)) ;; keep records around for slighly over a day.
;; (tasks:server-deregister (db:delay-if-busy tdbdat) hostname pullport: pullport pid: pid action: 'delete))
;; (if (> last-update 20) ;; Mark as dead if not updated in last 20 seconds
(seconds->hr-min-sec age)
;; (tasks:server-deregister (db:delay-if-busy tdbdat) hostname pullport: pullport pid: pid)))
(format #t fmtstr id mt-ver pid hostname (conc interface ":" pullport) pubport last-update
(if status "alive" "dead") transport)
(if (or (equal? id sid)
(equal? sid 0)) ;; kill all/any
(seconds->hr-min-sec mod)
(if alv "alive" "dead"))
(if (and alv
(args:get-arg "-kill-servers"))
(begin
(debug:print-info 0 *default-log-port* "Attempting to kill "kill-switch" server with pid " pid)
(tasks:kill-server hostname pid kill-switch: kill-switch)))))
servers)
(debug:print-info 0 *default-log-port* "Attempting to kill server with pid " pid)
(server:kill server)))))
(sort servers (lambda (a b)
(let ((ma (or (any->number (car a)) 9e9))
(mb (or (any->number (car b)) 9e9)))
(> ma mb)))))
(debug:print-info 1 *default-log-port* "Done with listservers")
(set! *didsomething* #t)
(exit)) ;; must do, would have to add checks to many/all calls below
(exit))
(exit))))
;; must do, would have to add checks to many/all calls below
;;======================================================================
;; Weird special calls that need to run *after* the server has started?
;;======================================================================
(if (args:get-arg "-list-targets")
(if (launch:setup)
|
︙ | | |
Modified portlogger.scm
from [e604a481b0]
to [b8f7cf5181].
︙ | | |
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
|
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
|
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
-
-
-
-
+
-
+
-
|
fail_count INTEGER DEFAULT 0,
update_time TIMESTAMP DEFAULT (strftime('%s','now')) );")
db))
(define (portlogger:open-run-close proc . params)
(let* ((fname (conc "/tmp/." (current-user-name) "-portlogger.db"))
(avail (tasks:wait-on-journal fname 10))) ;; wait up to about 10 seconds for the journal to go away
;;(handle-exceptions
;; exn
;; (begin
;; ;; (release-dot-lock fname)
;; (debug:print-error 0 *default-log-port* "portlogger:open-run-close failed. " proc " " params)
;; (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
;; (debug:print 0 *default-log-port* "exn=" (condition->list exn))
;; (if (file-exists? fname)
(handle-exceptions
exn
(begin
;; (release-dot-lock fname)
(debug:print-error 0 *default-log-port* "portlogger:open-run-close failed. " proc " " params)
(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
(debug:print 0 *default-log-port* "exn=" (condition->list exn))
(if (file-exists? fname)(delete-file fname)) ;; brutally get rid of it
;; (begin
;; (debug:print 0 *default-log-port* "Removing portlogger database file " fname)
;; (delete-file fname))) ;; just get rid of the portlogger file
;; (print-call-chain (current-error-port)))
(print-call-chain (current-error-port)))
(let* (;; (lock (obtain-dot-lock fname 2 9 10))
(db (portlogger:open-db fname))
(res (apply proc db params)))
(sqlite3:finalize! db)
;; (release-dot-lock fname)
res)))
res))))
;; )
;; (fold-row PROC INIT DATABASE SQL . PARAMETERS)
(define (portlogger:take-port db portnum)
(let* ((qry1 (sqlite3:prepare db "INSERT INTO ports (port,state) VALUES (?,?);"))
(qry2 (sqlite3:prepare db "UPDATE ports SET state=?,update_time=strftime('%s','now') WHERE port=?;"))
(qry3 (sqlite3:prepare db "SELECT state FROM ports WHERE port=?;"))
(res (sqlite3:with-transaction
|
︙ | | |
Modified rmt.scm
from [f051a84a44]
to [01e080d921].
︙ | | |
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
|
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
|
-
+
-
+
|
(debug:print 0 *default-log-port* "WARNING: write transaction requested on a readonly area. cmd="cmd" params="params)
#f
)
;; reset the connection if it has been unused too long
((and runremote
(remote-conndat runremote)
(let ((expire-time (+ (- start-time (remote-server-timeout runremote))(random 30)))) ;; add 30 seconds of noise so that not all running tests expire at the same time causing a storm of server starts
(let ((expire-time (+ (- start-time (remote-server-timeout runremote))))) ;; NOTE: REMOVED the 30 second noise. If adding it back be sure to offset!! add 30 seconds of noise so that not all running tests expire at the same time causing a storm of server starts
(< (http-transport:server-dat-get-last-access (remote-conndat runremote)) expire-time)))
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 8")
(remote-conndat-set! runremote #f)
(remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection.
(mutex-unlock! *rmt-mutex*)
(rmt:send-receive cmd rid params attemptnum: attemptnum))
;; ensure we have a record for our connection for given area
((not runremote) ;; can remove this one. should never get here.
(set! *runremote* (make-remote)) ;; new runremote will come from this on next iteration
(mutex-unlock! *rmt-mutex*)
(debug:print-info 12 *default-log-port* "rmt:send-receive, case 1")
|
︙ | | |
Modified server.scm
from [34ba33b083]
to [a878389459].
︙ | | |
417
418
419
420
421
422
423
424
425
426
|
417
418
419
420
421
422
423
424
425
426
|
-
+
|
(define (server:get-timeout)
(let ((tmo (configf:lookup *configdat* "server" "timeout")))
(if (and (string? tmo)
(string->number tmo))
(* 60 60 (string->number tmo))
;; (* 3 24 60 60) ;; default to three days
;;(* 60 60 1) ;; default to one hour
(* 60 60 0.25) ;; default to 0.25 hours
(* 60 5) ;; default to five minutes
)))
|