Overview
Comment: | Better server and sync run-away protection using dot files in the users tmp db area. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.64 | v1.6424 |
Files: | files | file ages | folders |
SHA1: |
30074493838bdab3cf7e4bf1aa65d913 |
User & Date: | matt on 2017-07-11 22:43:26 |
Other Links: | branch diff | manifest | tags |
Context
2017-07-12
| ||
18:36 | server connection tag check-in: 6b1258c69a user: mrwellan tags: v1.64-server-connection-tagging | |
17:13 | increase default expiration time to 1400 hours from 1 hour. added server/time-to-die-seconds to allow future trimming check-in: e396b98a87 user: bjbarcla tags: v1.64 | |
2017-07-11
| ||
22:43 | Better server and sync run-away protection using dot files in the users tmp db area. check-in: 3007449383 user: matt tags: v1.64, v1.6424 | |
15:54 | Fixed rollup of DEAD. check-in: 44fee9274f user: mrwellan tags: v1.64 | |
Changes
cgisetup/cgi-bin/models became a symlink with target [39c07627cc].
cgisetup/cgi-bin/pages became a symlink with target [e2b5ed002d].
Modified db.scm from [d861a30e88] to [08b5e15dd9].
︙ | ︙ | |||
1899 1900 1901 1902 1903 1904 1905 | ;; transaction protected lock aquisition ;; either: ;; fails returns (#f . lock-creation-time) ;; succeeds (returns (#t . lock-creation-time) ;; use (db:no-sync-del! db keyname) to release the lock ;; | | > | | | | | | | | | | 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 | ;; transaction protected lock aquisition ;; either: ;; fails returns (#f . lock-creation-time) ;; succeeds (returns (#t . lock-creation-time) ;; use (db:no-sync-del! db keyname) to release the lock ;; (define (db:no-sync-get-lock db-in keyname) (let ((db (db:no-sync-db db-in))) (sqlite3:with-transaction db (lambda () (handle-exceptions exn (let ((lock-time (current-seconds))) (sqlite3:execute db "INSERT INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time) `(#t . ,lock-time)) `(#f . ,(sqlite3:first-result db "SELECT val FROM no_sync_metadat WHERE var=?;" keyname))))))) ;; use a global for some primitive caching, it is just silly to ;; re-read the db over and over again for the keys since they never ;; change |
︙ | ︙ |
Modified http-transport.scm from [1066bc8fad] to [91f92e466a].
︙ | ︙ | |||
55 56 57 58 59 60 61 | (hostname (get-host-name)) (ipaddrstr (let ((ipstr (if (string=? "-" hostn) ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") (server:get-best-guess-address hostname) #f))) (if ipstr ipstr hostn))) ;; hostname))) (start-port (portlogger:open-run-close portlogger:find-port)) | | > > > | 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 | (hostname (get-host-name)) (ipaddrstr (let ((ipstr (if (string=? "-" hostn) ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") (server:get-best-guess-address hostname) #f))) (if ipstr ipstr hostn))) ;; hostname))) (start-port (portlogger:open-run-close portlogger:find-port)) (link-tree-path (common:get-linktree)) (tmp-area (common:get-db-tmp-area)) (start-file (conc tmp-area "/.server-start"))) (debug:print-info 0 *default-log-port* "portlogger recommended port: " start-port) ;; set some parameters for the server (root-path (if link-tree-path link-tree-path (current-directory))) ;; WARNING: SECURITY HOLE. FIX ASAP! (handle-directory spiffy-directory-listing) (handle-exception (lambda (exn chain) (signal (make-composite-condition (make-property-condition |
︙ | ︙ | |||
102 103 104 105 106 107 108 109 110 111 112 113 114 115 | (send-response body: "hey there!\n" headers: '((content-type text/plain)))) ((equal? (uri-path (request-uri (current-request))) '(/ "hey")) (send-response body: "hey there!\n" headers: '((content-type text/plain)))) (else (continue)))))))) (http-transport:try-start-server ipaddrstr start-port))) ;; This is recursively run by http-transport:run until sucessful ;; (define (http-transport:try-start-server ipaddrstr portnum) (let ((config-hostname (configf:lookup *configdat* "server" "hostname")) (config-use-proxy (equal? (configf:lookup *configdat* "client" "use-http_proxy") "yes"))) | > | 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 | (send-response body: "hey there!\n" headers: '((content-type text/plain)))) ((equal? (uri-path (request-uri (current-request))) '(/ "hey")) (send-response body: "hey there!\n" headers: '((content-type text/plain)))) (else (continue)))))))) (with-output-to-file start-file (lambda ()(print (current-process-id)))) (http-transport:try-start-server ipaddrstr start-port))) ;; This is recursively run by http-transport:run until sucessful ;; (define (http-transport:try-start-server ipaddrstr portnum) (let ((config-hostname (configf:lookup *configdat* "server" "hostname")) (config-use-proxy (equal? (configf:lookup *configdat* "client" "use-http_proxy") "yes"))) |
︙ | ︙ | |||
346 347 348 349 350 351 352 | ;; used and to shutdown after sometime if it is not. ;; (define (http-transport:keep-running) ;; if none running or if > 20 seconds since ;; server last used then start shutdown ;; This thread waits for the server to come alive (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server") | > > | | 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 | ;; used and to shutdown after sometime if it is not. ;; (define (http-transport:keep-running) ;; if none running or if > 20 seconds since ;; server last used then start shutdown ;; This thread waits for the server to come alive (debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server") (let* ((tmp-area (common:get-db-tmp-area)) (started-file (conc tmp-area "/.server-started")) (server-start-time (current-seconds)) (server-info (let loop ((start-time (current-seconds)) (changed #t) (last-sdat "not this")) (let ((sdat #f)) (thread-sleep! 0.01) (debug:print-info 0 *default-log-port* "Waiting for server alive signature") (mutex-lock! *heartbeat-mutex*) |
︙ | ︙ | |||
378 379 380 381 382 383 384 385 386 387 388 389 390 391 | sdat))))))) (iface (car server-info)) (port (cadr server-info)) (last-access 0) (server-timeout (server:get-timeout)) (server-going #f) (server-log-file (args:get-arg "-log"))) ;; always set when we are a server (let loop ((count 0) (server-state 'available) (bad-sync-count 0) (start-time (current-milliseconds))) ;; Use this opportunity to sync the tmp db to megatest.db (if (not server-going) ;; *dbstruct-db* (begin | > > > | 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 | sdat))))))) (iface (car server-info)) (port (cadr server-info)) (last-access 0) (server-timeout (server:get-timeout)) (server-going #f) (server-log-file (args:get-arg "-log"))) ;; always set when we are a server (with-output-to-file started-file (lambda ()(print (current-process-id)))) (let loop ((count 0) (server-state 'available) (bad-sync-count 0) (start-time (current-milliseconds))) ;; Use this opportunity to sync the tmp db to megatest.db (if (not server-going) ;; *dbstruct-db* (begin |
︙ | ︙ | |||
490 491 492 493 494 495 496 497 498 499 500 501 502 503 | (exit))) ;; all routes though here end in exit ... ;; ;; start_server? ;; (define (http-transport:launch) ;; lets not even bother to start if there are already three or more server files ready to go (let* ((num-alive (server:get-num-alive (server:get-list *toppath*)))) (if (> num-alive 3) (begin (debug:print 0 *default-log-port* "ERROR: Aborting server start because there are already " num-alive " possible servers either running or starting up") (exit)))) (let* ((th2 (make-thread (lambda () | > > > > > > > > > > > > > | 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 | (exit))) ;; all routes though here end in exit ... ;; ;; start_server? ;; (define (http-transport:launch) ;; check that a server start is in progress, pause or exit if so (let* ((tmp-area (common:get-db-tmp-area)) (server-start (conc tmp-area "/.server-start")) (server-started (conc tmp-area "/.server-started")) (start-time (common:lazy-modification-time server-start)) (started-time (common:lazy-modification-time server-started)) (server-starting (< start-time started-time)) ;; if start-time is less than started-time then a server is still starting (start-time-old (> (- (current-seconds) start-time) 5))) (if (and (not start-time-old) ;; last server start try was less than five seconds ago (not server-starting)) (begin (debug:print-info 0 *default-log-port* "NOT starting server, there is either a recently started server or a server in process of starting") (exit)))) ;; lets not even bother to start if there are already three or more server files ready to go (let* ((num-alive (server:get-num-alive (server:get-list *toppath*)))) (if (> num-alive 3) (begin (debug:print 0 *default-log-port* "ERROR: Aborting server start because there are already " num-alive " possible servers either running or starting up") (exit)))) (let* ((th2 (make-thread (lambda () |
︙ | ︙ | |||
514 515 516 517 518 519 520 | (thread-start! th2) (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor. (thread-start! th3) (set! *didsomething* #t) (thread-join! th2) (exit))) | | | | | | | | | | | | | | | | | | | 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 | (thread-start! th2) (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor. (thread-start! th3) (set! *didsomething* #t) (thread-join! th2) (exit))) ;; (define (http-transport:server-signal-handler signum) ;; (signal-mask! signum) ;; (handle-exceptions ;; exn ;; (debug:print 0 *default-log-port* " ... exiting ...") ;; (let ((th1 (make-thread (lambda () ;; (thread-sleep! 1)) ;; "eat response")) ;; (th2 (make-thread (lambda () ;; (debug:print-error 0 *default-log-port* "Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.") ;; (thread-sleep! 3) ;; give the flush three seconds to do it's stuff ;; (debug:print 0 *default-log-port* " Done.") ;; (exit 4)) ;; "exit on ^C timer"))) ;; (thread-start! th2) ;; (thread-start! th1) ;; (thread-join! th2)))) ;;====================================================================== ;; web pages ;;====================================================================== (define (http-transport:main-page) (let ((linkpath (root-path))) |
︙ | ︙ |
Modified server.scm from [d90e3727cd] to [52a482f03f].
︙ | ︙ | |||
463 464 465 466 467 468 469 | (sync-duration 0) ;; run time of the sync in milliseconds (this-wd-num (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x)))) (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls (debug:print-info 2 *default-log-port* "Periodic sync thread started.") (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)" this-wd-num="this-wd-num) (if (and legacy-sync (not *time-to-exit*)) (let* (;;(dbstruct (db:setup)) | | | > > > > > > > > > | > > > | > > > > > > | | 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 | (sync-duration 0) ;; run time of the sync in milliseconds (this-wd-num (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x)))) (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls (debug:print-info 2 *default-log-port* "Periodic sync thread started.") (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)" this-wd-num="this-wd-num) (if (and legacy-sync (not *time-to-exit*)) (let* (;;(dbstruct (db:setup)) (mtdb (dbr:dbstruct-mtdb dbstruct)) (mtpath (db:dbdat-get-path mtdb)) (tmp-area (common:get-db-tmp-area)) (start-file (conc tmp-area "/.start-sync")) (end-file (conc tmp-area "/.end-sync"))) (debug:print-info 0 *default-log-port* "Server running, periodic sync started.") (let loop () ;; sync for filesystem local db writes ;; (mutex-lock! *db-multi-sync-mutex*) (let* ((need-sync (>= *db-last-access* *db-last-sync*)) ;; no sync since last write (sync-in-progress *db-sync-in-progress*) (should-sync (and (not *time-to-exit*) (> (- (current-seconds) *db-last-sync*) 5))) ;; sync every five seconds minimum (start-time (current-seconds)) (mt-mod-time (file-modification-time mtpath)) (last-sync-start (if (common:file-exists? start-file) (file-modification-time start-file) 0)) (last-sync-end (if (common:file-exists? end-file) (file-modification-time end-file) 10)) (recently-synced (and (< (- start-time mt-mod-time) 4) ;; not useful if sync didn't modify megatest.db! (< mt-mod-time last-sync-start))) (sync-done (<= last-sync-start last-sync-end)) (will-sync (and (or need-sync should-sync) sync-done (not sync-in-progress) (not recently-synced)))) (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop. need-sync="need-sync" sync-in-progress=" sync-in-progress " should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync " sync-done=" sync-done) ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced)) ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync) (if will-sync (set! *db-sync-in-progress* #t)) (mutex-unlock! *db-multi-sync-mutex*) (if will-sync (let ((sync-start (current-milliseconds))) (with-output-to-file start-file (lambda ()(print (current-process-id)))) ;; put lock here (if (< sync-duration 300) (let ((res (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive (set! sync-duration (- (current-milliseconds) sync-start)) (if (> res 0) ;; some records were transferred, keep the db alive (begin (mutex-lock! *heartbeat-mutex*) (set! *db-last-access* (current-seconds)) (mutex-unlock! *heartbeat-mutex*) (debug:print-info 0 *default-log-port* "sync called, " res " records transferred.")) (debug:print-info 2 *default-log-port* "sync called but zero records transferred"))) ;; TODO: factor this next routine out into a function (with-input-from-pipe ;; this should not block other threads but need to verify this (conc "megatest -sync-to-megatest.db -m testsuite:" (common:get-area-name) ":" *toppath*) (lambda () (let loop ((inl (read-line)) (res #f)) (if (eof-object? inl) (begin (set! sync-duration (- (current-milliseconds) sync-start)) (cond |
︙ | ︙ | |||
524 525 526 527 528 529 530 531 532 533 534 535 536 537 | (loop (read-line) (or num-synced res)))))))))) (if will-sync (begin (mutex-lock! *db-multi-sync-mutex*) (set! *db-sync-in-progress* #f) (set! *db-last-sync* start-time) (mutex-unlock! *db-multi-sync-mutex*))) (if (and debug-mode (> (- start-time last-time) 60)) (begin (set! last-time start-time) (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*)))))) | > > > > | 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 | (loop (read-line) (or num-synced res)))))))))) (if will-sync (begin (mutex-lock! *db-multi-sync-mutex*) (set! *db-sync-in-progress* #f) (set! *db-last-sync* start-time) (with-output-to-file end-file (lambda ()(print (current-process-id)))) ;; release lock here (mutex-unlock! *db-multi-sync-mutex*))) (if (and debug-mode (> (- start-time last-time) 60)) (begin (set! last-time start-time) (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*)))))) |
︙ | ︙ |