50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
|
(loop (file-exists? fullpath)
(- count 1)))
(begin
(if remove (system (conc "rm -rf " fullpath)))
#f)))
#t))))))
(define (tasks:get-task-db-path)
(let* ((linktree (configf:lookup *configdat* "setup" "linktree"))
(dbpath (conc linktree "/.db")))
dbpath))
;; If file exists AND
;; file readable
;; ==> open it
;; If file exists AND
;; file NOT readable
;; ==> open in-mem version
;; If file NOT exists
;; ==> open in-mem version
;;
(define (tasks:open-db #!key (numretries 4))
(if *task-db*
*task-db*
(handle-exceptions
exn
(if (> numretries 0)
(begin
(print-call-chain (current-error-port))
(debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
(debug:print 0 " exn=" (condition->list exn))
(thread-sleep! 1)
(tasks:open-db numretries (- numretries 1)))
(begin
(print-call-chain (current-error-port))
(debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
(debug:print 0 " exn=" (condition->list exn))))
(let* ((dbpath (tasks:get-task-db-path))
(dbfile (conc dbpath "/monitor.db"))
(avail (tasks:wait-on-journal dbpath 10)) ;; wait up to about 10 seconds for the journal to go away
(exists (file-exists? dbpath))
(write-access (file-write-access? dbpath))
(mdb (cond ;; what the hek is *toppath* doing here?
((and (string? *toppath*)(file-write-access? *toppath*))
(sqlite3:open-database dbfile))
((file-read-access? dbpath) (sqlite3:open-database dbfile))
(else (sqlite3:open-database ":memory:")))) ;; (never-give-up-open-db dbpath))
(handler (make-busy-timeout 36000)))
(if (and exists
(not write-access))
(set! *db-write-access* write-access)) ;; only unset so other db's also can use this control
|
|
|
|
|
>
|
|
|
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
|
(loop (file-exists? fullpath)
(- count 1)))
(begin
(if remove (system (conc "rm -rf " fullpath)))
#f)))
#t))))))
(define (tasks:get-task-db-path area-dat)
(let* ((linktree (configf:lookup (megatest:area-configdat area-dat) "setup" "linktree"))
(dbpath (conc linktree "/.db")))
dbpath))
;; If file exists AND
;; file readable
;; ==> open it
;; If file exists AND
;; file NOT readable
;; ==> open in-mem version
;; If file NOT exists
;; ==> open in-mem version
;;
(define (tasks:open-db area-dat #!key (numretries 4))
(if *task-db*
*task-db*
(handle-exceptions
exn
(if (> numretries 0)
(begin
(print-call-chain (current-error-port))
(debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
(debug:print 0 " exn=" (condition->list exn))
(thread-sleep! 1)
(tasks:open-db area-dat numretries: (- numretries 1)))
(begin
(print-call-chain (current-error-port))
(debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
(debug:print 0 " exn=" (condition->list exn))))
(let* ((toppath (megatest:area-path area-dat))
(dbpath (tasks:get-task-db-path area-dat))
(dbfile (conc dbpath "/monitor.db"))
(avail (tasks:wait-on-journal dbpath 10)) ;; wait up to about 10 seconds for the journal to go away
(exists (file-exists? dbpath))
(write-access (file-write-access? dbpath))
(mdb (cond ;; what the hek is *toppath* doing here?
((and (string? toppath)(file-write-access? toppath))
(sqlite3:open-database dbfile))
((file-read-access? dbpath) (sqlite3:open-database dbfile))
(else (sqlite3:open-database ":memory:")))) ;; (never-give-up-open-db dbpath))
(handler (make-busy-timeout 36000)))
(if (and exists
(not write-access))
(set! *db-write-access* write-access)) ;; only unset so other db's also can use this control
|
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
|
hostname TEXT,
cmdline TEXT,
login_time TIMESTAMP,
logout_time TIMESTAMP DEFAULT -1,
CONSTRAINT clients_constraint UNIQUE (pid,hostname));")
;))
(set! *task-db* (cons mdb dbpath))
*task-db*))))
;;======================================================================
;; Server and client management
;;======================================================================
;; make-vector-record tasks hostinfo id interface port pubport transport pid hostname
|
|
|
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
|
hostname TEXT,
cmdline TEXT,
login_time TIMESTAMP,
logout_time TIMESTAMP DEFAULT -1,
CONSTRAINT clients_constraint UNIQUE (pid,hostname));")
;))
(set! *task-db* (cons mdb dbpath)) ;; Move into area-dat !!!!
*task-db*))))
;;======================================================================
;; Server and client management
;;======================================================================
;; make-vector-record tasks hostinfo id interface port pubport transport pid hostname
|
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
|
(setenv "TARGETHOST_LOGF" "server-kills.log")
(system (conc "nbfake kill " pid))
(unsetenv "TARGETHOST_LOGF")
(unsetenv "TARGETHOST"))
;; look up a server by run-id and send it a kill, also delete the record for that server
;;
(define (tasks:kill-server-run-id run-id #!key (tag "default"))
(let* ((tdbdat (tasks:open-db))
(sdat (tasks:get-server (db:delay-if-busy tdbdat) run-id)))
(if sdat
(let ((hostname (vector-ref sdat 6))
(pid (vector-ref sdat 5))
(server-id (vector-ref sdat 0)))
(tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "killed")
(debug:print-info 0 "Killing server " server-id " for run-id " run-id " on host " hostname " with pid " pid)
|
|
|
|
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
|
(setenv "TARGETHOST_LOGF" "server-kills.log")
(system (conc "nbfake kill " pid))
(unsetenv "TARGETHOST_LOGF")
(unsetenv "TARGETHOST"))
;; look up a server by run-id and send it a kill, also delete the record for that server
;;
(define (tasks:kill-server-run-id run-id area-dat #!key (tag "default"))
(let* ((tdbdat (tasks:open-db area-dat))
(sdat (tasks:get-server (db:delay-if-busy tdbdat) run-id)))
(if sdat
(let ((hostname (vector-ref sdat 6))
(pid (vector-ref sdat 5))
(server-id (vector-ref sdat 0)))
(tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "killed")
(debug:print-info 0 "Killing server " server-id " for run-id " run-id " on host " hostname " with pid " pid)
|
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
|
target run-name state-patt action-patt test-patt)
res)) ;; )
;; kill any runner processes (i.e. processes handling -runtests) that match target/runname
;;
;; do a remote call to get the task queue info but do the killing as self here.
;;
(define (tasks:kill-runner target run-name)
(let ((records (rmt:tasks-find-task-queue-records target run-name "%" "running" "run-tests"))
(hostpid-rx (regexp "\\s+(\\w+)\\s+(\\d+)$"))) ;; host pid is at end of param string
(if (null? records)
(debug:print 0 "No run launching processes found for " target " / " run-name)
(debug:print 0 "Found " (length records) " run(s) to kill."))
(for-each
(lambda (record)
(let* ((param-key (list-ref record 8))
|
|
|
|
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
|
target run-name state-patt action-patt test-patt)
res)) ;; )
;; kill any runner processes (i.e. processes handling -runtests) that match target/runname
;;
;; do a remote call to get the task queue info but do the killing as self here.
;;
(define (tasks:kill-runner target run-name area-dat)
(let ((records (rmt:tasks-find-task-queue-records target run-name "%" "running" "run-tests" area-dat))
(hostpid-rx (regexp "\\s+(\\w+)\\s+(\\d+)$"))) ;; host pid is at end of param string
(if (null? records)
(debug:print 0 "No run launching processes found for " target " / " run-name)
(debug:print 0 "Found " (length records) " run(s) to kill."))
(for-each
(lambda (record)
(let* ((param-key (list-ref record 8))
|