Overview
Comment: | Added special handling of errors when sync fails in a server. Catch errors if creating the link tree directory fails. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.60 |
Files: | files | file ages | folders |
SHA1: |
a953799c682fda015f2cea4e9f928d7c |
User & Date: | matt on 2014-12-07 15:50:05 |
Other Links: | branch diff | manifest | tags |
Context
2014-12-07
| ||
18:47 | Added caching of megatest.config, no locking yet... check-in: 800fea92da user: matt tags: v1.60 | |
15:50 | Added special handling of errors when sync fails in a server. Catch errors if creating the link tree directory fails. check-in: a953799c68 user: matt tags: v1.60 | |
2014-12-04
| ||
22:23 | Merged fixes back to v1.60 from archiving check-in: 33d8665511 user: matt tags: v1.60 | |
Changes
Modified db.scm from [d9d867f59c] to [b058afcfaa].
︙ | ︙ | |||
491 492 493 494 495 496 497 | (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) (print "exn=" (condition->list exn)) (debug:print 0 " status: " ((condition-property-accessor 'sqlite3 'status) exn)) (debug:print 0 " src db: " (db:dbdat-get-path fromdb)) (for-each (lambda (dbdat) (debug:print 0 " dbpath: " (db:dbdat-get-path dbdat))) (cons todb slave-dbs)) | | > > > | > > | | 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 | (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) (print "exn=" (condition->list exn)) (debug:print 0 " status: " ((condition-property-accessor 'sqlite3 'status) exn)) (debug:print 0 " src db: " (db:dbdat-get-path fromdb)) (for-each (lambda (dbdat) (debug:print 0 " dbpath: " (db:dbdat-get-path dbdat))) (cons todb slave-dbs)) (if *server-run* ;; we are inside a server, throw a sync-failed error (signal (make-composite-condition (make-property-condition 'sync-failed 'message "db:sync-tables failed in a server context."))))) ;; (set! *time-to-exit* #t) ;; let watch dog know that it is time to die. ;; (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "shutting-down") ;; (portlogger:open-run-close portlogger:set-port port "released") ;; (exit 1))) (cond ((not fromdb) (debug:print 3 "WARNING: db:sync-tables called with fromdb missing") -1) ((not todb) (debug:print 3 "WARNING: db:sync-tables called with todb missing") -2) ((not (sqlite3:database? (db:dbdat-get-db fromdb))) (debug:print 0 "ERROR: db:sync-tables called with fromdb not a database " fromdb) -3) ((not (sqlite3:database? (db:dbdat-get-db todb))) (debug:print 0 "ERROR: db:sync-tables called with todb not a database " todb) -4) |
︙ | ︙ |
Modified http-transport.scm from [c0d3a601db] to [0d0f56da13].
︙ | ︙ | |||
390 391 392 393 394 395 396 | (equal? sdat last-sdat) sdat))))))) (iface (car server-info)) (port (cadr server-info)) (last-access 0) (server-timeout (server:get-timeout))) (let loop ((count 0) | | | > | | | | > | > > > > > > > > > > | | | | > > > > > | | | | | | | | | | | | | | < < < < < | | 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 | (equal? sdat last-sdat) sdat))))))) (iface (car server-info)) (port (cadr server-info)) (last-access 0) (server-timeout (server:get-timeout))) (let loop ((count 0) (server-state 'available) (bad-sync-count 0)) ;; Use this opportunity to sync the inmemdb to db (if *inmemdb* (let ((start-time (current-milliseconds)) (sync-time #f) (rem-time #f)) ;; inmemdb is a dbstruct (condition-case (db:sync-touched *inmemdb* *run-id* force-sync: #t) ((sync-failed)(cond ((> bad-sync-count 10) ;; time to give up (http-transport:server-shutdown server-id port)) (else ;; (> bad-sync-count 0) ;; we've had a fail or two, delay and loop (thread-sleep! 5) (loop count server-state (+ bad-sync-count 1))))) ((exn) (debug:print 0 "ERROR: error from sync code other than 'sync-failed. Attempting to gracefully shutdown the server") (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id " http-transport:keep-running crashed") (exit))) (set! sync-time (- (current-milliseconds) start-time)) (set! rem-time (quotient (- 4000 sync-time) 1000)) (debug:print 2 "SYNC: time= " sync-time ", rem-time=" rem-time) (if (and (<= rem-time 4) (> rem-time 0)) (thread-sleep! rem-time) (thread-sleep! 4))) ;; fallback for if the math is changed ... ;; ;; no *inmemdb* yet, set running after our first pass through and start the db ;; (if (eq? server-state 'available) (let ((new-server-id (tasks:server-am-i-the-server? (db:delay-if-busy tdbdat) run-id))) ;; try to ensure no double registering of servers (if (equal? new-server-id server-id) (begin (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "dbprep") (thread-sleep! 0.5) ;; give some margin for queries to complete before switching from file based access to server based access (set! *inmemdb* (db:setup run-id)) (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "running")) (begin ;; gotta exit nicely (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "collision") (http-transport:server-shutdown server-id port)))))) (if (< count 1) ;; 3x3 = 9 secs aprox (loop (+ count 1) 'running bad-sync-count)) ;; Check that iface and port have not changed (can happen if server port collides) (mutex-lock! *heartbeat-mutex*) (set! sdat *server-info*) (mutex-unlock! *heartbeat-mutex*) (if (or (not (equal? sdat (list iface port))) |
︙ | ︙ | |||
461 462 463 464 465 466 467 | ;; ;; Consider implementing some smarts here to re-insert the record or kill self is ;; the db indicates so ;; ;; (if (tasks:server-am-i-the-server? tdb run-id) ;; (tasks:server-set-state! tdb server-id "running")) ;; | | | 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 | ;; ;; Consider implementing some smarts here to re-insert the record or kill self is ;; the db indicates so ;; ;; (if (tasks:server-am-i-the-server? tdb run-id) ;; (tasks:server-set-state! tdb server-id "running")) ;; (loop 0 server-state bad-sync-count)) (http-transport:server-shutdown server-id port))))) (define (http-transport:server-shutdown server-id port) (let ((tdbdat (tasks:open-db))) (debug:print-info 0 "Starting to shutdown the server.") ;; need to delete only *my* server entry (future use) (set! *time-to-exit* #t) |
︙ | ︙ |
Modified launch.scm from [111f911e42] to [6835480f8d].
︙ | ︙ | |||
586 587 588 589 590 591 592 | (if (not (file-exists? linktree)) (begin (debug:print 0 "WARNING: linktree did not exist! Creating it now at " linktree) (create-directory linktree #t))) ;; (system (conc "mkdir -p " linktree)))) ;; create the directory for the tests dir links, this is needed no matter what... (if (and (not (directory-exists? lnkbase)) (not (file-exists? lnkbase))) | > > > > > | | 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 | (if (not (file-exists? linktree)) (begin (debug:print 0 "WARNING: linktree did not exist! Creating it now at " linktree) (create-directory linktree #t))) ;; (system (conc "mkdir -p " linktree)))) ;; create the directory for the tests dir links, this is needed no matter what... (if (and (not (directory-exists? lnkbase)) (not (file-exists? lnkbase))) (handle-exceptions exn (begin (debug:print "ERROR: Problem creating linktree base at " lnkbase) (print-error-message exn (current-error-port))) (create-directory lnkbase #t))) ;; update the toptest record with its location rundir, cache the path ;; This wass highly inefficient, one db write for every subtest, potentially ;; thousands of unnecessary updates, cache the fact it was set and don't set it ;; again. ;; Now create the link from the test path to the link tree, however |
︙ | ︙ |
Modified runs.scm from [411a2b6b6a] to [ad2ef66ee2].
︙ | ︙ | |||
1621 1622 1623 1624 1625 1626 1627 | ((not target) (debug:print 0 "ERROR: Missing required parameter for " switchname ", you must specify the target with -target") (exit 3)) ((not runname) (debug:print 0 "ERROR: Missing required parameter for " switchname ", you must specify the run name with -runname runname") (exit 3)) (else | | | | > | | 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 | ((not target) (debug:print 0 "ERROR: Missing required parameter for " switchname ", you must specify the target with -target") (exit 3)) ((not runname) (debug:print 0 "ERROR: Missing required parameter for " switchname ", you must specify the run name with -runname runname") (exit 3)) (else (let (;; (db #f) (keys #f)) (if (not (launch:setup-for-run)) (begin (debug:print 0 "Failed to setup, exiting") (exit 1))) ;; (if (args:get-arg "-server") ;; (cdb:remote-run server:start db (args:get-arg "-server"))) (set! keys (keys:config-get-fields *configdat*)) ;; have enough to process -target or -reqtarg here (if (args:get-arg "-reqtarg") (let* ((runconfigf (conc *toppath* "/runconfigs.config")) ;; DO NOT EVALUATE ALL (runconfig (read-config runconfigf #f #t environ-patt: #f))) (if (hash-table-ref/default runconfig (args:get-arg "-reqtarg") #f) (keys:target-set-args keys (args:get-arg "-reqtarg") args:arg-hash) (begin (debug:print 0 "ERROR: [" (args:get-arg "-reqtarg") "] not found in " runconfigf) ;; (if db (sqlite3:finalize! db)) (exit 1) ))) (if (args:get-arg "-target") (keys:target-set-args keys (args:get-arg "-target" args:arg-hash) args:arg-hash))) (if (not (car *configinfo*)) (begin (debug:print 0 "ERROR: Attempted to " action-desc " but run area config file not found") (exit 1)) ;; Extract out stuff needed in most or many calls ;; here then call proc (let* ((keyvals (keys:target->keyval keys target))) (proc target runname keys keyvals))) ;; (if db (sqlite3:finalize! db)) (set! *didsomething* #t)))))) ;;====================================================================== ;; Lock/unlock runs ;;====================================================================== (define (runs:handle-locking target keys runname lock unlock user) |
︙ | ︙ |