377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
|
;; (debug:print-info 0 "Max avg query time of " maxqry "ms exceeds limit of " threshold "ms, server needed for run-id " run-id "."))
;; #t)
;; (else
;; #f))))
;; try to start a server and wait for it to be available
;;
(define (tasks:start-and-wait-for-server tdbdat run-id delay-max-tries)
;; ensure a server is running for this run
(let loop ((server-dat (tasks:get-server (db:delay-if-busy tdbdat) run-id))
(delay-time 0))
(if (and (not server-dat)
(< delay-time delay-max-tries))
(begin
(if (common:low-noise-print 60 "tasks:start-and-wait-for-server" run-id)
(debug:print 0 "Try starting server for run-id " run-id))
(thread-sleep! (/ (random 2000) 1000))
(server:kind-run run-id)
(thread-sleep! (min delay-time 1))
(loop (tasks:get-server (db:delay-if-busy tdbdat) run-id)(+ delay-time 1))))))
(define (tasks:get-all-servers mdb)
(let ((res '()))
(sqlite3:for-each-row
(lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id)
;; 0 1 2 3 4 5 6 7 8 9 10 11 12
(set! res (cons (vector id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) res)))
|
|
|
|
|
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
|
;; (debug:print-info 0 "Max avg query time of " maxqry "ms exceeds limit of " threshold "ms, server needed for run-id " run-id "."))
;; #t)
;; (else
;; #f))))
;; try to start a server and wait for it to be available
;;
(define (tasks:start-and-wait-for-server tdbdat run-id delay-max-tries area-dat)
;; ensure a server is running for this run
(let loop ((server-dat (tasks:get-server (db:delay-if-busy tdbdat area-dat) run-id))
(delay-time 0))
(if (and (not server-dat)
(< delay-time delay-max-tries))
(begin
(if (common:low-noise-print 60 "tasks:start-and-wait-for-server" run-id)
(debug:print 0 "Try starting server for run-id " run-id))
(thread-sleep! (/ (random 2000) 1000))
(server:kind-run run-id)
(thread-sleep! (min delay-time 1))
(loop (tasks:get-server (db:delay-if-busy tdbdat area-dat) run-id)(+ delay-time 1))))))
(define (tasks:get-all-servers mdb)
(let ((res '()))
(sqlite3:for-each-row
(lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id)
;; 0 1 2 3 4 5 6 7 8 9 10 11 12
(set! res (cons (vector id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) res)))
|
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
|
(unsetenv "TARGETHOST_LOGF")
(unsetenv "TARGETHOST"))
;; look up a server by run-id and send it a kill, also delete the record for that server
;;
(define (tasks:kill-server-run-id run-id area-dat #!key (tag "default"))
(let* ((tdbdat (tasks:open-db area-dat))
(sdat (tasks:get-server (db:delay-if-busy tdbdat) run-id)))
(if sdat
(let ((hostname (vector-ref sdat 6))
(pid (vector-ref sdat 5))
(server-id (vector-ref sdat 0)))
(tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "killed")
(debug:print-info 0 "Killing server " server-id " for run-id " run-id " on host " hostname " with pid " pid)
(tasks:kill-server hostname pid)
(tasks:server-delete-record (db:delay-if-busy tdbdat) server-id tag) )
(debug:print-info 0 "No server found for run-id " run-id ", nothing to kill"))
;; (sqlite3:finalize! tdb)
))
;;======================================================================
;; M O N I T O R S
;;======================================================================
|
|
|
|
|
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
|
(unsetenv "TARGETHOST_LOGF")
(unsetenv "TARGETHOST"))
;; look up a server by run-id and send it a kill, also delete the record for that server
;;
(define (tasks:kill-server-run-id run-id area-dat #!key (tag "default"))
(let* ((tdbdat (tasks:open-db area-dat))
(sdat (tasks:get-server (db:delay-if-busy tdbdat area-dat) run-id)))
(if sdat
(let ((hostname (vector-ref sdat 6))
(pid (vector-ref sdat 5))
(server-id (vector-ref sdat 0)))
(tasks:server-set-state! (db:delay-if-busy tdbdat area-dat) server-id "killed")
(debug:print-info 0 "Killing server " server-id " for run-id " run-id " on host " hostname " with pid " pid)
(tasks:kill-server hostname pid)
(tasks:server-delete-record (db:delay-if-busy tdbdat area-dat) server-id tag) )
(debug:print-info 0 "No server found for run-id " run-id ", nothing to kill"))
;; (sqlite3:finalize! tdb)
))
;;======================================================================
;; M O N I T O R S
;;======================================================================
|
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
|
exn
'()
(sqlite3:first-row db "SELECT id,action,owner,state,target,name,testpatt,keylock,params WHERE
params LIKE ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;"
param-key state-patt action-patt test-patt)))))
(define (tasks:find-task-queue-records dbstruct target run-name test-patt state-patt action-patt)
;; (handle-exceptions
;; exn
;; '()
;; (sqlite3:first-row
(let ((db (db:delay-if-busy (db:get-db dbstruct #f)))
(res '()))
(sqlite3:for-each-row
(lambda (a . b)
(set! res (cons (cons a b) res)))
db "SELECT id,action,owner,state,target,name,testpatt,keylock,params FROM tasks_queue
WHERE
target = ? AND name = ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;"
|
|
|
|
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
|
exn
'()
(sqlite3:first-row db "SELECT id,action,owner,state,target,name,testpatt,keylock,params WHERE
params LIKE ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;"
param-key state-patt action-patt test-patt)))))
(define (tasks:find-task-queue-records dbstruct area-dat target run-name test-patt state-patt action-patt)
;; (handle-exceptions
;; exn
;; '()
;; (sqlite3:first-row
(let ((db (db:delay-if-busy (db:get-db dbstruct #f) area-dat))
(res '()))
(sqlite3:for-each-row
(lambda (a . b)
(set! res (cons (cons a b) res)))
db "SELECT id,action,owner,state,target,name,testpatt,keylock,params FROM tasks_queue
WHERE
target = ? AND name = ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;"
|