Overview
Comment: | locking of main.db nearly complete |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.6584-ck5 |
Files: | files | file ages | folders |
SHA1: |
336e9917b16585d028b89c0c01debfdd |
User & Date: | matt on 2021-04-29 09:17:37 |
Other Links: | branch diff | manifest | tags |
Context
2021-04-29
| ||
21:58 | wip check-in: f3260cf6bc user: matt tags: v1.6584-ck5 | |
09:17 | locking of main.db nearly complete check-in: 336e9917b1 user: matt tags: v1.6584-ck5 | |
2021-04-28
| ||
23:27 | wip check-in: a758074358 user: matt tags: v1.6584-ck5 | |
Changes
Modified dbmod.scm from [9c5f03fda2] to [2abeb8436f].
︙ | ︙ | |||
226 227 228 229 230 231 232 | ;; Make the dbstruct, call for main db at least once ;; sync disk db to inmem ;; ;; called in http-transport and replicated in rmt.scm for *local* access. ;; (define (db:setup run-id) (assert *toppath* "FATAL: db:setup called before toppath is available.") | | > | > > | < < | | | | | | > | > > | > | > | > | 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 | ;; Make the dbstruct, call for main db at least once ;; sync disk db to inmem ;; ;; called in http-transport and replicated in rmt.scm for *local* access. ;; (define (db:setup run-id) (assert *toppath* "FATAL: db:setup called before toppath is available.") (let* ((dbstruct (make-dbr:dbstruct)) (db-file (db:run-id->path *toppath* run-id))) (db:get-dbdat dbstruct *toppath* db-file) (set! *dbstruct-db* dbstruct) dbstruct)) ;;====================================================================== ;; setting/getting a lock on the db for only one server per db ;; ;; NOTE: ;; These operate directly on the disk file, NOT on the inmemory db ;; The lockname is the filename (can have many to one, run-id to fname ;;====================================================================== ;; called before db is open? ;; (define (db:get-iam-server-lock dbh dbfname) (sqlite3:with-transaction dbh (lambda () (let* ((locked (db:get-locker dbh dbfname))) (if (not locked) (db:take-lock dbh dbfname) #f))))) ;; (exn sqlite3) (define (db:get-locker dbh dbfname) (condition-case (sqlite3:first-row dbh "SELECT owner_pid,owner_host,event_time FROM locks WHERE lockname=%;" dbfname) (exn (sqlite3) #f))) ;; should never fail because it is run in a transaction with a test for the lock ;; (define (db:take-lock dbh dbfname) ;; (condition-case ;; (begin (sqlite3:execute dbh "INSERT INTO locks (lockname,owner_pid,owner_host) VALUES (?,?,?);" dbfname (current-process-id) (get-host-name)) ;; #t) ;; (exn (sqlite3) #f))) #t) (define (db:release-lock dbh dbfname) (sqlite3:execute dbh "DELETE FROM locks WHERE lockname=?;" dbfname)) ;;====================================================================== ;; SQLITE3 HELPERS ;;====================================================================== |
︙ | ︙ |
Modified http-transportmod.scm from [a5cc6ea588] to [51f05a712a].
︙ | ︙ | |||
94 95 96 97 98 99 100 | ;; ;; (use spiffy uri-common intarweb http-client spiffy-request-vars intarweb spiffy-directory-listing) ;; ;; Configurations for server (tcp-buffer-size 2048) (max-connections 2048) | | | 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 | ;; ;; (use spiffy uri-common intarweb http-client spiffy-request-vars intarweb spiffy-directory-listing) ;; ;; Configurations for server (tcp-buffer-size 2048) (max-connections 2048) (defstruct servdat host port uuid) (define (http-transport:make-server-url hostport) (if (not hostport) #f |
︙ | ︙ | |||
224 225 226 227 228 229 230 | ;; get_next_port goes here (http-transport:try-start-server ipaddrstr (portlogger:open-run-close portlogger:find-port))) (begin (print "ERROR: Tried and tried but could not start the server")))) ;; any error in following steps will result in a retry | | | 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 | ;; get_next_port goes here (http-transport:try-start-server ipaddrstr (portlogger:open-run-close portlogger:find-port))) (begin (print "ERROR: Tried and tried but could not start the server")))) ;; any error in following steps will result in a retry (set! *server-info* (make-servdat host: ipaddrstr port: portnum)) (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum) ;; This starts the spiffy server ;; NEED WAY TO SET IP TO #f TO BIND ALL ;; (start-server bind-address: ipaddrstr port: portnum) (if config-hostname ;; this is a hint to bind directly (start-server port: portnum bind-address: (if (equal? config-hostname "-") ipaddrstr |
︙ | ︙ | |||
445 446 447 448 449 450 451 452 453 454 455 456 457 458 | (api-req (make-request method: 'POST uri: api-uri)) (server-dat (vector iface port api-uri api-url api-req (current-seconds) server-id))) server-dat)) ;;====================================================================== ;; NEW SERVER METHOD ;;====================================================================== (define *srvpktspec* `((server (host . h) (port . p) (servkey . k) (pid . i) (ipaddr . a) | > > > > > > > > > | 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 | (api-req (make-request method: 'POST uri: api-uri)) (server-dat (vector iface port api-uri api-url api-req (current-seconds) server-id))) server-dat)) ;;====================================================================== ;; NEW SERVER METHOD ;;====================================================================== ;; only use for main.db - need to re-write some of this :( ;; (define (get-lock-db sdat dbfile) (let* ((dbh (db:open-run-db dbfile db:initialize-db)) (res (db:get-iam-server-lock dbh dbfile))) (sqlite3:finalize! dbh) res)) (define *srvpktspec* `((server (host . h) (port . p) (servkey . k) (pid . i) (ipaddr . a) |
︙ | ︙ | |||
469 470 471 472 473 474 475 | pkts-dir pkt-dat pktspec: pkt-spec ptype: 'server))) (debug:print 0 *default-log-port* "Server on "host":"port" registered in pkt "uuid) uuid)) | < < < < < | 478 479 480 481 482 483 484 485 486 487 488 489 490 491 | pkts-dir pkt-dat pktspec: pkt-spec ptype: 'server))) (debug:print 0 *default-log-port* "Server on "host":"port" registered in pkt "uuid) uuid)) (define (get-pkts-dir #!optional (apath #f)) (let* ((effective-toppath (or *toppath* apath))) (assert effective-toppath "ERROR: get-pkts-dir called without *toppath* set. Exiting.") (let* ((pdir (conc effective-toppath "/.meta/srvpkts"))) (if (file-exists? pdir) pdir |
︙ | ︙ | |||
610 611 612 613 614 615 616 | ;; create a server pkt in *toppath*/.meta/srvpkts ;; TODO: ;; 1. change sdat to stuct ;; 2. add uuid to struct ;; 3. update uuid in sdat here ;; | | | | | < | > > > > > > | | | | | | | | | | | | | | | 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 | ;; create a server pkt in *toppath*/.meta/srvpkts ;; TODO: ;; 1. change sdat to stuct ;; 2. add uuid to struct ;; 3. update uuid in sdat here ;; (servdat-uuid-set! sdat (register-server pkts-dir *srvpktspec* (get-host-name) (servdat-port sdat) server-key (servdat-host sdat) db-file)) ;; now read pkts and see if we are a contender (let* ((all-pkts (get-all-server-pkts pkts-dir *srvpktspec*)) (viables (get-viable-servers all-pkts db-file)) (best-srv (get-best-candidate viables db-file)) (best-srv-key (if best-srv (alist-ref 'servkey best-srv) #f))) (debug:print 0 *default-log-port* "best-srv-key: "best-srv-key", server-key: "server-key) ;; am I the best-srv, compare server-keys to know (if (equal? best-srv-key server-key) (if (get-lock-db sdat db-file) ;; (db:get-iam-server-lock *dbstruct-db* *toppath* run-id) (debug:print 0 *default-log-port* "I'm the server!") (begin (debug:print 0 *default-log-port* "I'm not the server, exiting.") (bdat-time-to-exit-set! *bdat* #t) (exit))) (begin (debug:print 0 *default-log-port* "Keys do not match "best-srv-key", "server-key", exiting.") (bdat-time-to-exit-set! *bdat* #t))) sdat)) (begin (debug:print-info 0 *default-log-port* "Still waiting, last-sdat=" last-sdat) (sleep 4) (if (> (- (current-seconds) start-time) 120) ;; been waiting for two minutes (begin (debug:print-error 0 *default-log-port* "transport appears to have died, exiting server") #;(common:save-pkt `((action . died) (T . server) (pid . ,(current-process-id)) (ipaddr . ,(car sdat)) (port . ,(cadr sdat)) (msg . "Transport died?")) *configdat* #t) (exit)) (loop start-time (equal? sdat last-sdat) sdat))))))) (iface (servdat-host server-info)) (port (servdat-port server-info)) (last-access 0) (server-timeout (server:expiration-timeout)) (server-log-file (args:get-arg "-log"))) ;; always set when we are a server (let loop ((count 0) (server-state 'available) (bad-sync-count 0) (start-time (current-milliseconds))) ;; Use this opportunity to sync the tmp db to megatest.db (if (not *dbstruct-db* ) (let ((watchdog (bdat-watchdog *bdat*))) (debug:print 0 *default-log-port* "SERVER: dbprep") (db:setup dbname) ;; sets *dbstruct-db* as side effect (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version)) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine. (if watchdog (if (not (member (thread-state watchdog) '(ready running blocked sleeping dead))) (begin (debug:print-info 0 *default-log-port* "Starting watchdog thread (in state "(thread-state watchdog)")") (thread-start! watchdog))) (debug:print 0 *default-log-port* "ERROR: *watchdog* not setup, cannot start it.")))) ;; when things go wrong we don't want to be doing the various queries too often ;; so we strive to run this stuff only every four seconds or so. (let* ((sync-time (- (current-milliseconds) start-time)) (rem-time (quotient (- 4000 sync-time) 1000))) (if (and (<= rem-time 4) (> rem-time 0)) (thread-sleep! rem-time))) (if (< count 1) ;; 3x3 = 9 secs aprox (loop (+ count 1) 'running bad-sync-count (current-milliseconds))) ;; Check that iface and port have not changed (can happen if server port collides) (mutex-lock! *heartbeat-mutex*) (set! sdat *server-info*) (mutex-unlock! *heartbeat-mutex*) (if (or (not (equal? (servdat-host sdat) iface)) (not (equal? (servdat-port sdat) port))) (let ((new-iface (servdat-host sdat)) (new-port (servdat-port sdat))) (debug:print-info 0 *default-log-port* "WARNING: interface changed, refreshing iface and port info") (set! iface new-iface) (set! port new-port) (if (not *server-id*) (set! *server-id* (server:mk-signature))) ;; (debug:print 0 *default-log-port* (current-seconds) (current-directory) (current-process-id) (argv)) (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*) (flush-output *default-log-port*))) ;; Transfer *db-last-access* to last-access to use in checking that we are still alive (mutex-lock! *heartbeat-mutex*) (set! last-access *db-last-access*) (mutex-unlock! *heartbeat-mutex*) (if (common:low-noise-print 120 (conc "server running on " iface ":" port)) (begin (if (not *server-id*) (set! *server-id* (server:mk-signature))) ;; (debug:print 0 *default-log-port* (current-seconds) (current-directory) (current-process-id) (argv)) (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*) (flush-output *default-log-port*))) (if (common:low-noise-print 60 "dbstats") (begin (debug:print 0 *default-log-port* "Server stats:") (db:print-current-query-stats))) (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600))) (cond ((and *server-run* |
︙ | ︙ |
Modified launchmod.scm from [6c6ba3f325] to [ba5b81a174].
︙ | ︙ | |||
461 462 463 464 465 466 467 | (launch:setup) ;; should be properly in the run area home now (set! tconfigreg (tests:get-all)) ;; mapping of testname => test source path (let ((sighand (lambda (signum) ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting (if (eq? signum signal/stop) (debug:print-error 0 *default-log-port* "attempt to STOP process. Exiting.")) | | | 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 | (launch:setup) ;; should be properly in the run area home now (set! tconfigreg (tests:get-all)) ;; mapping of testname => test source path (let ((sighand (lambda (signum) ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting (if (eq? signum signal/stop) (debug:print-error 0 *default-log-port* "attempt to STOP process. Exiting.")) (bdat-time-to-exit-set! *bdat* #t) (print "Received signal " signum ", cleaning up before exit (set this test to COMPLETED/ABORT) . Please wait...") (let ((th1 (make-thread (lambda () (print "set test to COMPLETED/ABORT begin.") (rmt:test-set-state-status run-id test-id "COMPLETED" "ABORT" "received kill signal") (print "set test to COMPLETED/ABORT complete.") (print "Killed by signal " signum ". Exiting") (exit 1)))) |
︙ | ︙ |
Modified rmtmod.scm from [0f70a2dcff] to [9c11844c69].
︙ | ︙ | |||
1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 | (bdat-time-to-exit-set! *bdat* #t) #t)))) (debug:print-info 4 *default-log-port* "starting exit process, finalizing databases.") (if (and no-hurry (debug:debug-mode 18)) (rmt:print-db-stats)) (let ((th1 (make-thread (lambda () ;; thread for cleaning up, give it five seconds (if *dbstruct-db* (db:close-all *dbstruct-db*)) ;; one second allocated (if (bdat-task-db *bdat*) (let ((db (cdr (bdat-task-db *bdat*)))) (if (sqlite3:database? db) (begin (sqlite3:interrupt! db) (sqlite3:finalize! db #t) ;; (vector-set! (bdat-task-db *bdat*) 0 #f) | > > > > > > | 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 | (bdat-time-to-exit-set! *bdat* #t) #t)))) (debug:print-info 4 *default-log-port* "starting exit process, finalizing databases.") (if (and no-hurry (debug:debug-mode 18)) (rmt:print-db-stats)) (let ((th1 (make-thread (lambda () ;; thread for cleaning up, give it five seconds (if *dbstruct-db* (db:close-all *dbstruct-db*)) ;; one second allocated (if *server-info* (let ((pkt-file (conc (get-pkts-dir *toppath*) "/" (servdat-uuid *server-info*) ".pkt"))) (debug:print-info 0 *default-log-port* "removing pkt "pkt-file) (delete-file* pkt-file))) (if (bdat-task-db *bdat*) (let ((db (cdr (bdat-task-db *bdat*)))) (if (sqlite3:database? db) (begin (sqlite3:interrupt! db) (sqlite3:finalize! db #t) ;; (vector-set! (bdat-task-db *bdat*) 0 #f) |
︙ | ︙ |