Overview
Comment: | Added safety net for server start failing, put server kills into log file server-kills.log |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.60 |
Files: | files | file ages | folders |
SHA1: |
cf0372bf78ef6b214410ccd48a00be32 |
User & Date: | mrwellan on 2014-11-06 08:52:45 |
Other Links: | branch diff | manifest | tags |
Context
2014-11-06
| ||
20:12 | Use run specific db access times to determine servers to start. check-in: 029c9c9936 user: matt tags: v1.60, v1.6005_ww45.2a | |
08:52 | Added safety net for server start failing, put server kills into log file server-kills.log check-in: cf0372bf78 user: mrwellan tags: v1.60 | |
07:33 | Portlogger uses CREATE TABLE IF NOT EXISTS, so no need to test for file - just create the table every time. Added kill server when http-client has issue. Fixed typos in tasks:kill-server check-in: 158a434c54 user: mrwellan tags: v1.60 | |
Changes
Modified http-transport.scm from [231b0ef76f] to [6319f7b685].
︙ | ︙ | |||
353 354 355 356 357 358 359 | ;; server last used then start shutdown ;; This thread waits for the server to come alive (let* ((server-info (let loop ((start-time (current-seconds)) (changed #t) (last-sdat "not this")) (let ((sdat #f)) (thread-sleep! 0.01) | | > > > > > > | | | | 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 | ;; server last used then start shutdown ;; This thread waits for the server to come alive (let* ((server-info (let loop ((start-time (current-seconds)) (changed #t) (last-sdat "not this")) (let ((sdat #f)) (thread-sleep! 0.01) (debug:print-info 0 "Waiting for server alive signature") (mutex-lock! *heartbeat-mutex*) (set! sdat *server-info*) (mutex-unlock! *heartbeat-mutex*) (if (and sdat (not changed) (> (- (current-seconds) start-time) 2)) sdat (begin (debug:print-info 0 "Still waiting, last-sdat=" last-sdat) (sleep 4) (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes (let ((tdb (tasks:open-db))) (debug:print 0 "ERROR: transport appears to have died, exiting server " server-id " for run " run-id) (tasks:server-delete-record tdb server-id "failed to start, never received server alive signature") (sqlite3:finalize! tdb) (exit)) (loop start-time (equal? sdat last-sdat) sdat))))))) (iface (car server-info)) (port (cadr server-info)) (last-access 0) (tdb (tasks:open-db)) (server-timeout (let ((tmo (configf:lookup *configdat* "server" "timeout"))) (if (and (string? tmo) (string->number tmo)) |
︙ | ︙ |
Modified tasks.scm from [40e15eef78] to [b57f2cb57e].
︙ | ︙ | |||
685 686 687 688 689 690 691 692 693 | (process-signal pid signal/int) (thread-sleep! 5) (if (process:alive? pid) (process-signal pid signal/kill))))) ;; (call-with-environment-variables (let ((old-targethost (getenv "TARGETHOST"))) (setenv "TARGETHOST" hostname) (system (conc "nbfake kill " pid)) (if old-targethost (setenv "TARGETHOST" old-targethost)) | > > | | 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 | (process-signal pid signal/int) (thread-sleep! 5) (if (process:alive? pid) (process-signal pid signal/kill))))) ;; (call-with-environment-variables (let ((old-targethost (getenv "TARGETHOST"))) (setenv "TARGETHOST" hostname) (setenv "TARGETHOST_LOGF" "server-kills.log") (system (conc "nbfake kill " pid)) (if old-targethost (setenv "TARGETHOST" old-targethost)) (unsetenv "TARGETHOST") (unsetenv "TARGETHOST_LOGF")))) (debug:print 0 "ERROR: no record or improper record for " target "/" run-name " in tasks_queue in monitor.db")))) records))) ;;====================================================================== ;; The routines to process tasks ;;====================================================================== |
︙ | ︙ |