Overview
Comment: | Fixed wrong settings control for server load gate. Fixed logic error on detecting defunct server starts |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.64 | v1.6427 |
Files: | files | file ages | folders |
SHA1: |
6bffff11684d284de8b2ea4a71fdaa93 |
User & Date: | mrwellan on 2017-07-26 16:34:53 |
Other Links: | branch diff | manifest | tags |
Context
2017-07-26
| ||
17:31 | Fixed missing begin check-in: ec2dd2547c user: mrwellan tags: v1.64, v1.6427 | |
16:34 | Fixed wrong settings control for server load gate. Fixed logic error on detecting defunct server starts check-in: 6bffff1168 user: mrwellan tags: v1.64, v1.6427 | |
2017-07-25
| ||
14:43 | Fixed emergency loaded to not put out noise on file not exists check-in: 7d1e789cb3 user: mrwellan tags: v1.64 | |
Changes
Modified http-transport.scm from [09510faceb] to [3650524f6a].
︙ | ︙ | |||
516 517 518 519 520 521 522 | (system (conc "sleep 1;mv -f " full-serv-fname " " new-serv-fname)) (debug:print 0 *default-log-port* "INFO: cannot move " full-serv-fname " to " new-serv-fname)) (exit))))) (if (and (not start-time-old) ;; last server start try was less than five seconds ago (not server-starting)) (begin (cleanup-proc "NOT starting server, there is either a recently started server or a server in process of starting") | | | | | | > | | | | | | | | | | | | | | | | | | 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 | (system (conc "sleep 1;mv -f " full-serv-fname " " new-serv-fname)) (debug:print 0 *default-log-port* "INFO: cannot move " full-serv-fname " to " new-serv-fname)) (exit))))) (if (and (not start-time-old) ;; last server start try was less than five seconds ago (not server-starting)) (begin (cleanup-proc "NOT starting server, there is either a recently started server or a server in process of starting") (exit))) ;; lets not even bother to start if there are already three or more server files ready to go (let* ((num-alive (server:get-num-alive (server:get-list *toppath*)))) (if (> num-alive 3) (cleanup-proc (conc "ERROR: Aborting server start because there are already " num-alive " possible servers either running or starting up")) (exit))) (let* ((th2 (make-thread (lambda () (debug:print-info 0 *default-log-port* "Server run thread started") (http-transport:run (if (args:get-arg "-server") (args:get-arg "-server") "-") )) "Server run")) (th3 (make-thread (lambda () (debug:print-info 0 *default-log-port* "Server monitor thread started") (http-transport:keep-running) "Keep running")))) (thread-start! th2) (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor. (thread-start! th3) (set! *didsomething* #t) (thread-join! th2) (exit)))) ;; (define (http-transport:server-signal-handler signum) ;; (signal-mask! signum) ;; (handle-exceptions ;; exn ;; (debug:print 0 *default-log-port* " ... exiting ...") ;; (let ((th1 (make-thread (lambda () |
︙ | ︙ |
Modified server.scm from [d21d7ab2e0] to [0b4350005b].
︙ | ︙ | |||
111 112 113 114 115 116 117 | (cmdln (conc (common:get-megatest-exe) " -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes") " -daemonize " "") ;; " -log " logfile " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &"))))) (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread")) | | | 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 | (cmdln (conc (common:get-megatest-exe) " -server " (or target-host "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes") " -daemonize " "") ;; " -log " logfile " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &"))))) (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread")) (load-limit (configf:lookup-number *configdat* "jobtools" "maxhomehostload" default: 3.0))) ;; we want the remote server to start in *toppath* so push there (push-directory areapath) (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...") (thread-start! log-rotate) ;; host.domain.tld match host? (if (and target-host |
︙ | ︙ | |||
507 508 509 510 511 512 513 | (common:low-noise-print 30 "sync-period")) (debug:print-info 0 *default-log-port* "Increased sync period due to load: " sync-period)) ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced)) ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync) (if will-sync (set! *db-sync-in-progress* #t)) (mutex-unlock! *db-multi-sync-mutex*) (if will-sync | > | > | | | | | | | | | | | | | | | | | | | | | | | | | 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 | (common:low-noise-print 30 "sync-period")) (debug:print-info 0 *default-log-port* "Increased sync period due to load: " sync-period)) ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced)) ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync) (if will-sync (set! *db-sync-in-progress* #t)) (mutex-unlock! *db-multi-sync-mutex*) (if will-sync (let (;; (max-sync-duration (configf:lookup-number *configdat* "server" "max-sync-duration")) ;; KEEPING THIS AVAILABLE BUT SHOULD NOT USE, I'M PRETTY SURE IT DOES NOT WORK! (sync-start (current-milliseconds))) (with-output-to-file start-file (lambda ()(print (current-process-id)))) ;; put lock here ;; (if (or (not max-sync-duration) ;; (< sync-duration max-sync-duration)) ;; NOTE: db:sync-to-megatest.db keeps track of time of last sync and syncs incrementally (let ((res (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive (set! sync-duration (- (current-milliseconds) sync-start)) (if (> res 0) ;; some records were transferred, keep the db alive (begin (mutex-lock! *heartbeat-mutex*) (set! *db-last-access* (current-seconds)) (mutex-unlock! *heartbeat-mutex*) (debug:print-info 0 *default-log-port* "sync called, " res " records transferred.")) (debug:print-info 2 *default-log-port* "sync called but zero records transferred"))))) ;; ;; TODO: factor this next routine out into a function ;; (with-input-from-pipe ;; this should not block other threads but need to verify this ;; (conc "megatest -sync-to-megatest.db -m testsuite:" (common:get-area-name) ":" *toppath*) ;; (lambda () ;; (let loop ((inl (read-line)) ;; (res #f)) ;; (if (eof-object? inl) ;; (begin ;; (set! sync-duration (- (current-milliseconds) sync-start)) ;; (cond ;; ((not res) ;; (debug:print 0 *default-log-port* "ERROR: sync from /tmp db to megatest.db appears to have failed. Recommended that you stop your runs and run \"megatest -cleanup-db\"")) ;; ((> res 0) ;; (mutex-lock! *heartbeat-mutex*) ;; (set! *db-last-access* (current-seconds)) ;; (mutex-unlock! *heartbeat-mutex*)))) ;; (let ((num-synced (let ((matches (string-match "^Synced (\\d+).*$" inl))) ;; (if matches ;; (string->number (cadr matches)) ;; #f)))) ;; (loop (read-line) ;; (or num-synced res)))))))))) (if will-sync (begin (mutex-lock! *db-multi-sync-mutex*) (set! *db-sync-in-progress* #f) (set! *db-last-sync* start-time) (with-output-to-file end-file (lambda ()(print (current-process-id)))) |
︙ | ︙ |