Comment: | Untested cleanup |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | multi-area |
Files: | files | file ages | folders |
SHA1: |
da7f14af0ef7f1b6ffccea82a0c210cb |
User & Date: | matt on 2015-04-05 23:02:59 |
Other Links: | branch diff | manifest | tags |
2015-04-05
| ||
23:16 | more untested cleanup check-in: c9180c4d63 user: matt tags: multi-area | |
23:02 | Untested cleanup check-in: da7f14af0e user: matt tags: multi-area | |
22:04 | More cleanup after big tear up check-in: 2b14c9b4a7 user: matt tags: multi-area | |
Modified client.scm from [671f590b03] to [141f2f00f0].
︙ | ︙ | |||
46 47 48 49 50 51 52 | (define (client:connect iface port) (case (server:get-transport) ((rpc) (rpc:client-connect iface port)) ((http) (http:client-connect iface port)) ((zmq) (zmq:client-connect iface port)) (else (rpc:client-connect iface port)))) | | | | | | 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | (define (client:connect iface port) (case (server:get-transport) ((rpc) (rpc:client-connect iface port)) ((http) (http:client-connect iface port)) ((zmq) (zmq:client-connect iface port)) (else (rpc:client-connect iface port)))) (define (client:setup run-id area-dat #!key (remaining-tries 10) (failed-connects 0)) (case (server:get-transport) ((rpc) (rpc-transport:client-setup run-id area-dat)) ((http)(client:setup-http run-id area-dat)) (else (rpc-transport:client-setup run-id area-dat)))) ;; (define (client:login-no-auto-setup server-info run-id) ;; (case (server:get-transport) ;; ((rpc) (rpc:login-no-auto-client-setup server-info run-id)) ;; ((http) (rmt:login-no-auto-client-setup server-info run-id)) ;; (else (rpc:login-no-auto-client-setup server-info run-id)))) ;; |
︙ | ︙ | |||
81 82 83 84 85 86 87 | ;; (hash-table-set! (common:get-remote remote) run-id start-res) ;; start-res) ;; return the server info ;; (if (member remaining-tries '(3 4 6)) ;; (begin ;; login failed ;; (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" host-info) ;; (hash-table-delete! (common:get-remote remote) run-id) ;; (open-run-close tasks:server-force-clean-run-record | | | | | | | | | | | > | | | | | | 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 | ;; (hash-table-set! (common:get-remote remote) run-id start-res) ;; start-res) ;; return the server info ;; (if (member remaining-tries '(3 4 6)) ;; (begin ;; login failed ;; (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" host-info) ;; (hash-table-delete! (common:get-remote remote) run-id) ;; (open-run-close tasks:server-force-clean-run-record ;; (lambda ()(tasks:open-db area-dat)) ;; run-id ;; (car host-info) ;; (cadr host-info) ;; " client:setup (host-info=#t)") ;; (thread-sleep! 5) ;; (client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1))) ;; (begin ;; (debug:print 25 "INFO: client:setup failed to connect, start-res=" start-res ", run-id=" run-id ", host-info=" host-info) ;; (thread-sleep! 5) ;; (client:setup run-id remaining-tries: (- remaining-tries 1)))))) ;; ;; YUK: rename server-dat here ;; (let* ((server-dat (open-run-close tasks:get-server (lambda ()(tasks:open-db area-dat)) run-id))) ;; (debug:print-info 0 "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) ;; (if server-dat ;; (let* ((iface (tasks:hostinfo-get-interface server-dat)) ;; (port (tasks:hostinfo-get-port server-dat)) ;; (start-res (http-transport:client-connect iface port)) ;; ;; (ping-res (server:ping-server run-id iface port)) ;; (ping-res (rmt:login-no-auto-client-setup start-res run-id))) ;; (if start-res ;; (begin ;; (hash-table-set! (common:get-remote remote) run-id start-res) ;; start-res) ;; (if (member remaining-tries '(2 5)) ;; (begin ;; login failed ;; (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) ;; (hash-table-delete! (common:get-remote remote) run-id) ;; (open-run-close tasks:server-force-clean-run-record ;; (lambda ()(tasks:open-db area-dat)) ;; run-id ;; (tasks:hostinfo-get-interface server-dat) ;; (tasks:hostinfo-get-port server-dat) ;; " client:setup (server-dat = #t)") ;; (thread-sleep! 2) ;; (server:try-running run-id) ;; (thread-sleep! 10) ;; give server a little time to start up ;; (client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1))) ;; (begin ;; (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) ;; (thread-sleep! 5) ;; (client:setup run-id remaining-tries: (- remaining-tries 1)))))) ;; (begin ;; no server registered ;; (if (eq? remaining-tries 2) ;; (begin ;; ;; (open-run-close tasks:server-clean-out-old-records-for-run-id (lambda ()(tasks:open-db area-dat)) run-id " client:setup (server-dat=#f)") ;; (client:setup run-id remaining-tries: 10)) ;; (begin ;; (thread-sleep! 2) ;; (debug:print 25 "INFO: client:setup start-res (not defined here), run-id=" run-id ", server-dat=" server-dat) ;; (if (< (open-run-close tasks:num-in-available-state (lambda ()(tasks:open-db area-dat)) run-id) 3) ;; (begin ;; ;; (open-run-close tasks:server-clean-out-old-records-for-run-id (lambda ()(tasks:open-db area-dat)) run-id " client:setup (server-dat=#f)") ;; (server:try-running run-id))) ;; (thread-sleep! 10) ;; give server a little time to start up ;; (client:setup run-id remaining-tries: (- remaining-tries 1))))))))))) ;; Do all the connection work, look up the transport type and set up the ;; connection if required. ;; ;; There are two scenarios. ;; 1. We are a test manager and we received transport-type and (common:get-remote remote) via cmdline ;; 2. We are a run tests, list runs or other interactive process and we must figure out ;; transport-type and (common:get-remote remote) from the monitor.db ;; ;; client:setup ;; ;; lookup_server, need to remove (common:get-remote remote) stuff ;; (define (client:setup-http run-id area-dat #!key (remaining-tries 10) (failed-connects 0)(remote #f)) (debug:print-info 2 "client:setup remaining-tries=" remaining-tries) (let* ((tdbdat (tasks:open-db area-dat)) (transport-type (megatest:area-transport area-dat))) (if (<= remaining-tries 0) (begin (debug:print 0 "ERROR: failed to start or connect to server for run-id " run-id) (exit 1)) (let* ((server-dat (tasks:get-server (db:delay-if-busy tdbdat) run-id))) (debug:print-info 4 "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) (if server-dat (let* ((iface (tasks:hostinfo-get-interface server-dat)) (hostname (tasks:hostinfo-get-hostname server-dat)) (port (tasks:hostinfo-get-port server-dat)) (start-res (case transport-type ((http)(http-transport:client-connect iface port)) ((nmsg)(nmsg-transport:client-connect hostname port)))) (ping-res (case transport-type ((http)(rmt:login-no-auto-client-setup start-res run-id)) ((nmsg)(let ((logininfo (rmt:login-no-auto-client-setup start-res run-id))) (if logininfo (car (vector-ref logininfo 1)) #f)))))) (if (and start-res ping-res) (begin (common:set-remote! remote run-id start-res) (debug:print-info 2 "connected to " (http-transport:server-dat-make-url start-res)) start-res) (begin ;; login failed but have a server record, clean out the record and try again (debug:print-info 0 "client:setup, login failed, will attempt to start server ... start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat) (case transport-type ((http)(http-transport:close-connections run-id))) (common:del-remote! remote run-id) (tasks:kill-server-run-id run-id) (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat) run-id (tasks:hostinfo-get-interface server-dat) (tasks:hostinfo-get-port server-dat) " client:setup (server-dat = #t)") (if (> remaining-tries 8) (thread-sleep! (+ 1 (random 5))) ;; spread out the starts a little (thread-sleep! (+ 15 (random 20)))) ;; it isn't going well. give it plenty of time (server:try-running run-id) (thread-sleep! 5) ;; give server a little time to start up (client:setup run-id area-dat remaining-tries: (- remaining-tries 1)) ))) (begin ;; no server registered (let ((num-available (tasks:num-in-available-state (db:dbdat-get-db tdbdat) run-id))) (debug:print-info 0 "client:setup, no server registered, remaining-tries=" remaining-tries " num-available=" num-available) (if (< num-available 2) (server:try-running run-id)) (thread-sleep! (+ 5 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms. (client:setup run-id area-dat remaining-tries: (- remaining-tries 1))))))))) ;; keep this as a function to ease future (define (client:start run-id server-info) (http-transport:client-connect (tasks:hostinfo-get-interface server-info) (tasks:hostinfo-get-port server-info))) ;; client:signal-handler |
︙ | ︙ |
Modified dashboard-guimonitor.scm from [886a02dbe2] to [c023768ca3].
︙ | ︙ | |||
173 174 175 176 177 178 179 | (iup:attribute-set! tabtop "TABTITLE1" "Collateral") (iup:attribute-set! tabtop "TABTITLE2" "Fossil") (iup:attribute-set! tabtop "TABTITLE3" "Tools") tabtop)))) ;; BUG: Remember to re-instate this!!!! ;; (on-exit (lambda () | | | | | 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 | (iup:attribute-set! tabtop "TABTITLE1" "Collateral") (iup:attribute-set! tabtop "TABTITLE2" "Fossil") (iup:attribute-set! tabtop "TABTITLE3" "Tools") tabtop)))) ;; BUG: Remember to re-instate this!!!! ;; (on-exit (lambda () ;; (let ((tdb (tasks:open-db area-dat))) ;; ;; (print "On-exit called") ;; (tasks:remove-monitor-record tdb) ;; (sqlite3:finalize! tdb)))) (define (gui-monitor db area-dat) (let ((keys (db:get-keys db)) (tdb (tasks:open-db area-dat))) (tasks:register-monitor db tdb) ;;; let the other monitors know we are here (control-panel db tdb keys) ;(tasks:remove-monitor-record db) ;(sqlite3:finalize! db) )) |
Modified dashboard-tests.scm from [8072d2165f] to [d0c66f198a].
︙ | ︙ | |||
416 417 418 419 420 421 422 | (exit 1)) (let* (;; (run-id (if testdat (db:test-get-run_id testdat) #f)) (keydat (if testdat (db:get-key-val-pairs dbstruct run-id) #f)) (rundat (if testdat (db:get-run-info dbstruct run-id) #f)) (runname (if testdat (db:get-value-by-header (db:get-rows rundat) (db:get-header rundat) "runname") #f)) | | | 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 | (exit 1)) (let* (;; (run-id (if testdat (db:test-get-run_id testdat) #f)) (keydat (if testdat (db:get-key-val-pairs dbstruct run-id) #f)) (rundat (if testdat (db:get-run-info dbstruct run-id) #f)) (runname (if testdat (db:get-value-by-header (db:get-rows rundat) (db:get-header rundat) "runname") #f)) (tdb (tdb:open-test-db-by-test-id-local dbstruct area-dat run-id test-id)) ;; These next two are intentional bad values to ensure errors if they should not ;; get filled in properly. (logfile "/this/dir/better/not/exist") (rundir (if testdat (db:test-get-rundir testdat) logfile)) (testdat-path (conc rundir "/testdat.db")) ;; this gets recalculated until found |
︙ | ︙ | |||
693 694 695 696 697 698 699 | (db:test-data-get-value x) (db:test-data-get-expected x) (db:test-data-get-tol x) (db:test-data-get-status x) (db:test-data-get-units x) (db:test-data-get-type x) (db:test-data-get-comment x))) | | | 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 | (db:test-data-get-value x) (db:test-data-get-expected x) (db:test-data-get-tol x) (db:test-data-get-status x) (db:test-data-get-units x) (db:test-data-get-type x) (db:test-data-get-comment x))) (tdb:open-run-close-db-by-test-id-local dbstruct area-dat run-id test-id #f tdb:read-test-data test-id "%"))) "\n"))) (if (not (equal? currval newval)) (iup:attribute-set! test-data "VALUE" newval ))))) ;; "TITLE" newval))))) test-data)) ;;(dashboard:run-controls) ))) (iup:attribute-set! tabs "TABTITLE0" "Steps") |
︙ | ︙ |
Modified db.scm from [69b8627abb] to [4816354319].
︙ | ︙ | |||
647 648 649 650 651 652 653 | (mtdb (if toppath (db:open-megatest-db area-dat))) (allow-cleanup (if run-ids #f #t)) (run-ids (if run-ids run-ids (if toppath (begin (db:delay-if-busy mtdb area-dat) (db:get-all-run-ids mtdb))))) | | | 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 | (mtdb (if toppath (db:open-megatest-db area-dat))) (allow-cleanup (if run-ids #f #t)) (run-ids (if run-ids run-ids (if toppath (begin (db:delay-if-busy mtdb area-dat) (db:get-all-run-ids mtdb))))) (tdbdat (tasks:open-db area-dat)) (servers (tasks:get-all-servers (db:delay-if-busy tdbdat area-dat)))) ;; kill servers (if (member 'killservers options) (for-each (lambda (server) (tasks:server-delete-record (db:delay-if-busy tdbdat area-dat) (vector-ref server 0) "dbmigration") |
︙ | ︙ |
Modified dcommon.scm from [203653f6f0] to [1f00e0f1a9].
︙ | ︙ | |||
510 511 512 513 514 515 516 | (updater) (set! dashboard:update-summary-tab updater) (iup:attribute-set! stats-matrix "WIDTHDEF" "40") (iup:vbox ;; (iup:label "Run statistics" #:expand "HORIZONTAL") stats-matrix))) | | | | 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 | (updater) (set! dashboard:update-summary-tab updater) (iup:attribute-set! stats-matrix "WIDTHDEF" "40") (iup:vbox ;; (iup:label "Run statistics" #:expand "HORIZONTAL") stats-matrix))) (define (dcommon:servers-table area-dat) (let* ((tdbdat (tasks:open-db area-dat)) (colnum 0) (rownum 0) (servers-matrix (iup:matrix #:expand "YES" #:numcol 7 #:numcol-visible 7 #:numlin-visible 5 )) |
︙ | ︙ |
Modified http-transport.scm from [6df4b07013] to [d8101c25db].
︙ | ︙ | |||
67 68 69 70 71 72 73 | (db #f) ;; (open-db)) ;; we don't want the server to be opening and closing the db unnecesarily (hostname (get-host-name)) (ipaddrstr (let ((ipstr (if (string=? "-" hostn) ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") (server:get-best-guess-address hostname) #f))) (if ipstr ipstr hostn))) ;; hostname))) | | | 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 | (db #f) ;; (open-db)) ;; we don't want the server to be opening and closing the db unnecesarily (hostname (get-host-name)) (ipaddrstr (let ((ipstr (if (string=? "-" hostn) ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") (server:get-best-guess-address hostname) #f))) (if ipstr ipstr hostn))) ;; hostname))) (start-port (portlogger:open-run-close portlogger:find-port area-dat)) (link-tree-path (configf:lookup configdat "setup" "linktree"))) ;; (set! db *inmemdb*) (debug:print-info 0 "portlogger recommended port: " start-port) (root-path (if link-tree-path link-tree-path (current-directory))) ;; WARNING: SECURITY HOLE. FIX ASAP! (handle-directory spiffy-directory-listing) |
︙ | ︙ | |||
130 131 132 133 134 135 136 | (begin (print-error-message exn) (if (< portnum 64000) (begin (debug:print 0 "WARNING: attempt to start server failed. Trying again ...") (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) (debug:print 0 "exn=" (condition->list exn)) | | | | 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 | (begin (print-error-message exn) (if (< portnum 64000) (begin (debug:print 0 "WARNING: attempt to start server failed. Trying again ...") (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) (debug:print 0 "exn=" (condition->list exn)) (portlogger:open-run-close portlogger:set-failed area-dat portnum) (debug:print 0 "WARNING: failed to start on portnum: " portnum ", trying next port") (thread-sleep! 0.1) ;; get_next_port goes here (http-transport:try-start-server run-id ipaddrstr (portlogger:open-run-close portlogger:find-port area-dat) server-id area-dat)) (begin (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat) run-id ipaddrstr portnum " http-transport:try-start-server") (print "ERROR: Tried and tried but could not start the server")))) ;; any error in following steps will result in a retry (set! *server-info* (list ipaddrstr portnum)) |
︙ | ︙ | |||
363 364 365 366 367 368 369 | (api-req (make-request method: 'POST uri: api-uri)) (server-dat (vector iface port api-uri api-url api-req (current-seconds)))) server-dat)) ;; run http-transport:keep-running in a parallel thread to monitor that the db is being ;; used and to shutdown after sometime if it is not. ;; | | | | 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 | (api-req (make-request method: 'POST uri: api-uri)) (server-dat (vector iface port api-uri api-url api-req (current-seconds)))) server-dat)) ;; run http-transport:keep-running in a parallel thread to monitor that the db is being ;; used and to shutdown after sometime if it is not. ;; (define (http-transport:keep-running server-id run-id area-dat) ;; if none running or if > 20 seconds since ;; server last used then start shutdown ;; This thread waits for the server to come alive (debug:print-info 0 "Starting the sync-back, keep alive thread in server for run-id=" run-id) (let* ((tdbdat (tasks:open-db area-dat)) (server-info (let loop ((start-time (current-seconds)) (changed #t) (last-sdat "not this")) (let ((sdat #f)) (thread-sleep! 0.01) (debug:print-info 0 "Waiting for server alive signature") (mutex-lock! *heartbeat-mutex*) |
︙ | ︙ | |||
411 412 413 414 415 416 417 | (sync-time #f) (rem-time #f)) ;; inmemdb is a dbstruct (condition-case (db:sync-touched *inmemdb* *run-id* force-sync: #t) ((sync-failed)(cond ((> bad-sync-count 10) ;; time to give up | | | 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 | (sync-time #f) (rem-time #f)) ;; inmemdb is a dbstruct (condition-case (db:sync-touched *inmemdb* *run-id* force-sync: #t) ((sync-failed)(cond ((> bad-sync-count 10) ;; time to give up (http-transport:server-shutdown server-id port area-dat)) (else ;; (> bad-sync-count 0) ;; we've had a fail or two, delay and loop (thread-sleep! 5) (loop count server-state (+ bad-sync-count 1))))) ((exn) (debug:print 0 "ERROR: error from sync code other than 'sync-failed. Attempting to gracefully shutdown the server") (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id " http-transport:keep-running crashed") (exit))) |
︙ | ︙ | |||
444 445 446 447 448 449 450 | (set! *inmemdb* (db:setup run-id)) ;; force initialization ;; (db:get-db *inmemdb* #t) (db:get-db *inmemdb* run-id) (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "running")) (begin ;; gotta exit nicely (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "collision") | | | 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 | (set! *inmemdb* (db:setup run-id)) ;; force initialization ;; (db:get-db *inmemdb* #t) (db:get-db *inmemdb* run-id) (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "running")) (begin ;; gotta exit nicely (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "collision") (http-transport:server-shutdown server-id port area-dat)))))) (if (< count 1) ;; 3x3 = 9 secs aprox (loop (+ count 1) 'running bad-sync-count)) ;; Check that iface and port have not changed (can happen if server port collides) (mutex-lock! *heartbeat-mutex*) (set! sdat *server-info*) |
︙ | ︙ | |||
485 486 487 488 489 490 491 | ;; Consider implementing some smarts here to re-insert the record or kill self is ;; the db indicates so ;; ;; (if (tasks:server-am-i-the-server? tdb run-id) ;; (tasks:server-set-state! tdb server-id "running")) ;; (loop 0 server-state bad-sync-count)) | | | | | | 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 | ;; Consider implementing some smarts here to re-insert the record or kill self is ;; the db indicates so ;; ;; (if (tasks:server-am-i-the-server? tdb run-id) ;; (tasks:server-set-state! tdb server-id "running")) ;; (loop 0 server-state bad-sync-count)) (http-transport:server-shutdown server-id port area-dat))))) (define (http-transport:server-shutdown server-id port area-dat) (let ((tdbdat (tasks:open-db area-dat))) (debug:print-info 0 "Starting to shutdown the server.") ;; need to delete only *my* server entry (future use) (set! *time-to-exit* #t) (if *inmemdb* (db:sync-touched *inmemdb* *run-id* force-sync: #t)) ;; ;; start_shutdown ;; (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "shutting-down") (portlogger:open-run-close portlogger:set-port area-dat port "released") (thread-sleep! 5) (debug:print-info 0 "Max cached queries was " *max-cache-size*) (debug:print-info 0 "Number of cached writes " *number-of-writes*) (debug:print-info 0 "Average cached write time " (if (eq? *number-of-writes* 0) "n/a (no writes)" (/ *writes-total-delay* |
︙ | ︙ | |||
523 524 525 526 527 528 529 | (exit))) ;; all routes though here end in exit ... ;; ;; start_server? ;; (define (http-transport:launch run-id area-dat) | | | | | | | 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 | (exit))) ;; all routes though here end in exit ... ;; ;; start_server? ;; (define (http-transport:launch run-id area-dat) (let* ((tdbdat (tasks:open-db area-dat))) (set! *run-id* run-id) (if (args:get-arg "-daemonize") (begin (daemon:ize) (if *alt-log-file* ;; we should re-connect to this port, I think daemon:ize disrupts it (begin (current-error-port *alt-log-file*) (current-output-port *alt-log-file*))))) (if (server:check-if-running run-id area-dat) (begin (debug:print 0 "INFO: Server for run-id " run-id " already running") (exit 0))) (let loop ((server-id (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id area-dat)) (remtries 4)) (if (not server-id) (if (> remtries 0) (begin (thread-sleep! 2) (loop (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id area-dat) (- remtries 1))) (begin ;; since we didn't get the server lock we are going to clean up and bail out (debug:print-info 2 "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue") (tasks:server-delete-records-for-this-pid (db:delay-if-busy tdbdat) " http-transport:launch") )) (let* ((th2 (make-thread (lambda () (debug:print-info 0 "Server run thread started") (http-transport:run (if (args:get-arg "-server") (args:get-arg "-server") "-") run-id server-id area-dat)) "Server run")) (th3 (make-thread (lambda () (debug:print-info 0 "Server monitor thread started") (http-transport:keep-running server-id run-id area-dat)) "Keep running"))) (thread-start! th2) (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor. (thread-start! th3) (set! *didsomething* #t) (thread-join! th2) (exit)))))) |
︙ | ︙ |
Modified launch.scm from [c48e97b3ce] to [b6be267d56].
︙ | ︙ | |||
872 873 874 875 876 877 878 | (create-directory work-area #t) (debug:print 0 "WARNING: No disk work area specified - running in the test directory under tmp_run"))) (set! cmdparms (base64:base64-encode (z3:encode-buffer (with-output-to-string (lambda () ;; (list 'hosts hosts) (write (list (list 'testpath test-path) | | | 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 | (create-directory work-area #t) (debug:print 0 "WARNING: No disk work area specified - running in the test directory under tmp_run"))) (set! cmdparms (base64:base64-encode (z3:encode-buffer (with-output-to-string (lambda () ;; (list 'hosts hosts) (write (list (list 'testpath test-path) (list 'transport (conc (megatest:area-transport area-dat))) ;; ;; (list 'serverinf *server-info*) (list 'toppath toppath) (list 'work-area work-area) (list 'test-name test-name) (list 'runscript runscript) (list 'run-id run-id ) (list 'test-id test-id ) |
︙ | ︙ |
Modified megatest.scm from [74462f253a] to [f59b483f32].
︙ | ︙ | |||
649 650 651 652 653 654 655 | ;; Server? Start up here. ;; (let ((tl (launch:setup-for-run *area-dat*)) (run-id (and (args:get-arg "-run-id") (string->number (args:get-arg "-run-id"))))) (if run-id (begin | | | 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 | ;; Server? Start up here. ;; (let ((tl (launch:setup-for-run *area-dat*)) (run-id (and (args:get-arg "-run-id") (string->number (args:get-arg "-run-id"))))) (if run-id (begin (server:launch run-id *area-dat*) (set! *didsomething* #t)) (debug:print 0 "ERROR: server requires run-id be specified with -run-id"))) ;; Not a server? This section will decide how to communicate ;; ;; Setup client for all expect listed here (if (null? (lset-intersection |
︙ | ︙ | |||
686 687 688 689 690 691 692 | ;; MAY STILL NEED THIS ;; (set! *megatest-db* (make-dbr:dbstruct path: toppath local: #t)))))))))) (if (or (args:get-arg "-list-servers") (args:get-arg "-stop-server")) (let ((tl (launch:setup-for-run *area-dat*))) (if tl | | | 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 | ;; MAY STILL NEED THIS ;; (set! *megatest-db* (make-dbr:dbstruct path: toppath local: #t)))))))))) (if (or (args:get-arg "-list-servers") (args:get-arg "-stop-server")) (let ((tl (launch:setup-for-run *area-dat*))) (if tl (let* ((tdbdat (tasks:open-db *area-dat*)) (servers (tasks:get-all-servers (db:delay-if-busy tdbdat))) (fmtstr "~5a~12a~8a~20a~24a~10a~10a~10a~10a\n") (servers-to-kill '()) (killinfo (args:get-arg "-stop-server")) (khost-port (if killinfo (if (substring-index ":" killinfo)(string-split ":") #f) #f)) (sid (if killinfo (if (substring-index ":" killinfo) #f (string->number killinfo)) #f))) (format #t fmtstr "Id" "MTver" "Pid" "Host" "Interface:OutPort" "InPort" "LastBeat" "State" "Transport") |
︙ | ︙ | |||
1049 1050 1051 1052 1053 1054 1055 | "-runtests" "run a test" (lambda (target runname keys keyvals) ;; ;; May or may not implement it this way ... ;; ;; Insert this run into the tasks queue | | | 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 | "-runtests" "run a test" (lambda (target runname keys keyvals) ;; ;; May or may not implement it this way ... ;; ;; Insert this run into the tasks queue ;; (open-run-close tasks:add (lambda ()(tasks:open-db *area-dat*)) ;; "runtests" ;; user ;; target ;; runname ;; (args:get-arg "-runtests") ;; #f)))) (runs:run-tests target |
︙ | ︙ |
Modified nmsg-transport.scm from [43df6f5458] to [fa92993bb8].
︙ | ︙ | |||
61 62 63 64 65 66 67 | ;;====================================================================== ;; S E R V E R ;;====================================================================== (define (nmsg-transport:run dbstruct area-dat hostn run-id server-id #!key (retrynum 1000)) (debug:print 2 "Attempting to start the server ...") | | | | | | | | 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 | ;;====================================================================== ;; S E R V E R ;;====================================================================== (define (nmsg-transport:run dbstruct area-dat hostn run-id server-id #!key (retrynum 1000)) (debug:print 2 "Attempting to start the server ...") (let* ((start-port (portlogger:open-run-close portlogger:find-port area-dat)) (server-thread (make-thread (lambda () (nmsg-transport:try-start-server dbstruct run-id start-port server-id)) "server thread")) (tdbdat (tasks:open-db area-dat))) (thread-start! server-thread) (thread-sleep! 0.1) (if (nmsg-transport:ping hostn start-port timeout: 2 expected-key: (current-process-id)) (let ((interface (if (equal? hostn "-")(get-host-name) hostn))) (tasks:server-set-interface-port (db:delay-if-busy tdbdat) server-id interface start-port) (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "dbprep") (set! *server-info* (list hostn start-port)) ;; probably not needed anymore? currently used by keep-running (thread-sleep! 3) ;; give some margin for queries to complete before switching from file based access to server based access ;; (set! *inmemdb* dbstruct) (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "running") (thread-start! (make-thread (lambda ()(nmsg-transport:keep-running server-id run-id area-dat)) "keep running")) (thread-join! server-thread)) (if (> retrynum 0) (begin (debug:print 0 "WARNING: Failed to connect to server (self) on host " hostn ":" start-port ", trying again.") (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id "failed to start, never received server alive signature") (portlogger:open-run-close portlogger:set-failed area-dat start-port) (nmsg-transport:run dbstruct area-dat hostn run-id server-id)) (begin (debug:print 0 "ERROR: could not find an open port to start server on. Giving up") (exit 1)))))) (define (nmsg-transport:try-start-server dbstruct run-id portnum server-id) (let ((repsoc (nn-socket 'rep))) (nn-bind repsoc (conc "tcp://*:" portnum)) (let loop ((msg-in (nn-recv repsoc))) (let* ((dat (db:string->obj msg-in transport: 'nmsg))) (debug:print 0 "server, received: " dat) (let ((result (api:execute-requests dbstruct dat))) (debug:print 0 "server, sending: " result) (nn-send repsoc (db:obj->string result transport: 'nmsg))) (loop (nn-recv repsoc)))))) ;; all routes though here end in exit ... ;; (define (nmsg-transport:launch run-id area-dat) (let* ((tdbdat (tasks:open-db area-dat)) (dbstruct (db:setup run-id)) (hostn (or (args:get-arg "-server") "-"))) (set! *run-id* run-id) (set! *inmemdb* dbstruct) ;; with nbfake daemonize isn't really needed ;; ;; (if (args:get-arg "-daemonize") ;; (begin ;; (daemon:ize) ;; (if *alt-log-file* ;; we should re-connect to this port, I think daemon:ize disrupts it ;; (begin ;; (current-error-port *alt-log-file*) ;; (current-output-port *alt-log-file*))))) (if (server:check-if-running run-id area-dat) (begin (debug:print-info 0 "Server for run-id " run-id " already running") (exit 0))) (let loop ((server-id (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id)) (remtries 4)) (if (not server-id) (if (> remtries 0) (begin (thread-sleep! 2) (if (not (server:check-if-running run-id area-dat)) (loop (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id) (- remtries 1)) (begin (debug:print-info 0 "Another server took the slot, exiting") (exit 0)))) (begin ;; since we didn't get the server lock we are going to clean up and bail out |
︙ | ︙ | |||
269 270 271 272 273 274 275 | sdat) (begin (thread-sleep! 0.5) (loop)))))) (iface (car server-info)) (port (cadr server-info)) (last-access 0) | | | 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 | sdat) (begin (thread-sleep! 0.5) (loop)))))) (iface (car server-info)) (port (cadr server-info)) (last-access 0) (tdbdat (tasks:open-db area-dat)) (server-timeout (let ((tmo (configf:lookup (megatest:area-configdat area-dat) "server" "timeout"))) (if (and (string? tmo) (string->number tmo)) (* 60 60 (string->number tmo)) ;; (* 3 24 60 60) ;; default to three days (* 60 1) ;; default to one minute ;; (* 60 60 25) ;; default to 25 hours |
︙ | ︙ |
Modified olddashboard.scm from [ad835aa81d] to [af152b63b7].
︙ | ︙ | |||
472 473 474 475 476 477 478 | (updater) (set! dashboard:update-summary-tab updater) (iup:attribute-set! stats-matrix "WIDTHDEF" "40") (iup:vbox ;; (iup:label "Run statistics" #:expand "HORIZONTAL") stats-matrix))) | | | | 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 | (updater) (set! dashboard:update-summary-tab updater) (iup:attribute-set! stats-matrix "WIDTHDEF" "40") (iup:vbox ;; (iup:label "Run statistics" #:expand "HORIZONTAL") stats-matrix))) (define (dcommon:servers-table area-dat) (let* ((tdbdat (tasks:open-db area-dat)) (colnum 0) (rownum 0) (servers-matrix (iup:matrix #:expand "YES" #:numcol 7 #:numcol-visible 7 #:numlin-visible 5 )) |
︙ | ︙ | |||
2124 2125 2126 2127 2128 2129 2130 | (> modtime last-db-update-time) (> (current-seconds)(+ last-db-update-time 1))))) (define *monitor-db-path* (conc *dbdir* "/monitor.db")) (define *last-monitor-update-time* 0) ;; Force creation of the db in case it isn't already there. | | | 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 | (> modtime last-db-update-time) (> (current-seconds)(+ last-db-update-time 1))))) (define *monitor-db-path* (conc *dbdir* "/monitor.db")) (define *last-monitor-update-time* 0) ;; Force creation of the db in case it isn't already there. (tasks:open-db area-dat) (define (dashboard:get-youngest-run-db-mod-time) (handle-exceptions exn (begin (debug:print 0 "WARNING: error in accessing databases in get-youngest-run-db-mod-time: " ((condition-property-accessor 'exn 'message) exn)) (current-seconds)) ;; something went wrong - just print an error and return current-seconds |
︙ | ︙ |
Modified portlogger.scm from [bdca7fc363] to [58d53e15b4].
︙ | ︙ | |||
15 16 17 18 19 20 21 | (declare (unit portlogger)) (declare (uses db)) ;; lsof -i | | | | | | 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 | (declare (unit portlogger)) (declare (uses db)) ;; lsof -i (define (portlogger:open-db fname area-dat) (let* ((avail (tasks:wait-on-journal fname 5 remove: #t)) ;; wait up to about 10 seconds for the journal to go away (exists (file-exists? fname)) (db (if avail (sqlite3:open-database fname) (begin (system (conc "rm -f " fname)) (sqlite3:open-database fname)))) (handler (make-busy-timeout 136000)) (canwrite (file-write-access? fname))) ;; (db-init (lambda () ;; (sqlite3:execute ;; db ;; "CREATE TABLE IF NOT EXISTS ports ( ;; port INTEGER PRIMARY KEY, ;; state TEXT DEFAULT 'not-used', ;; fail_count INTEGER DEFAULT 0, ;; update_time TIMESTAMP DEFAULT (strftime('%s','now')) );")))) (sqlite3:set-busy-handler! db handler) (db:set-sync db area-dat) ;; (sqlite3:execute db "PRAGMA synchronous = 0;") ;; (if (not exists) ;; needed with IF NOT EXISTS? (sqlite3:execute db "CREATE TABLE IF NOT EXISTS ports ( port INTEGER PRIMARY KEY, state TEXT DEFAULT 'not-used', fail_count INTEGER DEFAULT 0, update_time TIMESTAMP DEFAULT (strftime('%s','now')) );") db)) (define (portlogger:open-run-close proc area-dat . params) (let* ((fname (conc "/tmp/." (current-user-name) "-portlogger.db")) (avail (tasks:wait-on-journal fname 10))) ;; wait up to about 10 seconds for the journal to go away (handle-exceptions exn (begin ;; (release-dot-lock fname) (debug:print 0 "ERROR: portlogger:open-run-close failed. " proc " " params) (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn)) (debug:print 0 "exn=" (condition->list exn)) (if (file-exists? fname)(delete-file fname)) ;; brutally get rid of it (print-call-chain (current-error-port))) (let* (;; (lock (obtain-dot-lock fname 2 9 10)) (db (portlogger:open-db fname area-dat)) (res (apply proc db params))) (sqlite3:finalize! db) ;; (release-dot-lock fname) res)))) ;; (fold-row PROC INIT DATABASE SQL . PARAMETERS) (define (portlogger:take-port db portnum) |
︙ | ︙ |
Modified rmt.scm from [3e0b8a2905] to [9d5589428d].
︙ | ︙ | |||
74 75 76 77 78 79 80 | ;; (define (rmt:get-connection-info run-id area-dat #!key (remote #f)) (let ((cinfo (common:get-remote remote run-id))) (if cinfo cinfo ;; NB// can cache the answer for server running for 10 seconds ... ;; ;; (and (not (rmt:write-frequency-over-limit? cmd run-id)) | | | | 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 | ;; (define (rmt:get-connection-info run-id area-dat #!key (remote #f)) (let ((cinfo (common:get-remote remote run-id))) (if cinfo cinfo ;; NB// can cache the answer for server running for 10 seconds ... ;; ;; (and (not (rmt:write-frequency-over-limit? cmd run-id)) (if (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db area-dat)) run-id) (client:setup run-id area-dat remote: remote) #f)))) (define (rmt:discard-old-connections area-dat) ;; clean out old connections (mutex-lock! *db-multi-sync-mutex*) (let ((remote (megatest:area-remote area-dat)) (expire-time (- (current-seconds) (server:get-timeout area-dat) 10))) ;; don't forget the 10 second margin |
︙ | ︙ | |||
103 104 105 106 107 108 109 | (mutex-unlock! *db-multi-sync-mutex*)) (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id (define (rmt:send-receive cmd rid params area-dat #!key (attemptnum 1)(remote #f)) ;; start attemptnum at 1 so the modulo below works as expected (rmt:discard-old-connections area-dat) ;; (mutex-lock! *send-receive-mutex*) | > | | | | | | 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 | (mutex-unlock! *db-multi-sync-mutex*)) (define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id (define (rmt:send-receive cmd rid params area-dat #!key (attemptnum 1)(remote #f)) ;; start attemptnum at 1 so the modulo below works as expected (rmt:discard-old-connections area-dat) ;; (mutex-lock! *send-receive-mutex*) (let* ((transport-type (megatest:area-transport area-dat)) (run-id (if rid rid 0)) (configdat (megatest:area-configdat area-dat)) (connection-info (rmt:get-connection-info run-id area-dat))) ;; the nmsg method does the encoding under the hood (the http method should be changed to do this also) (if connection-info ;; use the server if have connection info (let* ((dat (case transport-type ((http)(condition-case (http-transport:client-api-send-receive run-id connection-info cmd params) ((commfail)(vector #f "communications fail")) ((exn)(vector #f "other fail")))) ((nmsg)(condition-case (nmsg-transport:client-api-send-receive run-id connection-info cmd params) ((timeout)(vector #f "timeout talking to server")))) (else (exit)))) (success (if (vector? dat) (vector-ref dat 0) #f)) (res (if (vector? dat) (vector-ref dat 1) #f))) (if (vector? connection-info)(http-transport:server-dat-update-last-access connection-info)) (if success (begin ;; (mutex-unlock! *send-receive-mutex*) (case ((http) res) ;; (db:string->obj res)) ((nmsg) res))) ;; (vector-ref res 1))) (begin ;; let ((new-connection-info (client:setup run-id))) (debug:print 0 "WARNING: Communication failed, trying call to rmt:send-receive again.") ;; (case *transport-type* ;; ((nmsg)(nn-close (http-transport:server-dat-get-socket connection-info)))) (common:del-remote! remote run-id) ;; don't keep using the same connection ;; NOTE: killing server causes this process to block forever. No idea why. Dec 2. ;; (if (eq? (modulo attemptnum 5) 0) ;; (tasks:kill-server-run-id run-id tag: "api-send-receive-failed")) ;; (mutex-unlock! *send-receive-mutex*) ;; close the mutex here to allow other threads access to communications (tasks:start-and-wait-for-server (tasks:open-db area-dat) run-id 15) ;; (nmsg-transport:client-api-send-receive run-id connection-info cmd param remtries: (- remtries 1)))))) ;; no longer killing the server in http-transport:client-api-send-receive ;; may kill it here but what are the criteria? ;; start with three calls then kill server ;; (if (eq? attemptnum 3)(tasks:kill-server-run-id run-id)) ;; (thread-sleep! 2) (rmt:send-receive cmd run-id params area-dat attemptnum: (+ attemptnum 1))))) ;; no connection info? try to start a server, or access locally if no ;; server and the query is read-only ;; ;; Note: The tasks db was checked for a server in starting mode in the rmt:get-connection-info call ;; (if (and (< attemptnum 15) (member cmd api:write-queries)) (let ((faststart (configf:lookup configdat "server" "faststart"))) (common:del-remote! remote run-id) ;; (mutex-unlock! *send-receive-mutex*) (if (and faststart (equal? faststart "no")) (begin (tasks:start-and-wait-for-server (db:delay-if-busy (tasks:open-db area-dat)) run-id 10) (thread-sleep! (random 5)) ;; give some time to settle and minimize collison? (rmt:send-receive cmd rid params area-dat attemptnum: (+ attemptnum 1))) (begin (server:kind-run run-id area-dat) (rmt:open-qry-close-locally cmd run-id params area-dat)))) (begin ;; (debug:print 0 "ERROR: Communication failed!") |
︙ | ︙ | |||
374 375 376 377 378 379 380 | (rmt:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id) area-dat)) (define (rmt:open-test-db-by-test-id run-id test-id area-dat #!key (work-area #f)) (let* ((test-path (if (string? work-area) work-area (rmt:test-get-rundir-from-test-id run-id test-id area-dat)))) (debug:print 3 "TEST PATH: " test-path) | | | 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 | (rmt:send-receive 'test-get-rundir-from-test-id run-id (list run-id test-id) area-dat)) (define (rmt:open-test-db-by-test-id run-id test-id area-dat #!key (work-area #f)) (let* ((test-path (if (string? work-area) work-area (rmt:test-get-rundir-from-test-id run-id test-id area-dat)))) (debug:print 3 "TEST PATH: " test-path) (open-test-db test-path area-dat))) ;; WARNING: This currently bypasses the transaction wrapped writes system (define (rmt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment area-dat) (rmt:send-receive 'test-set-state-status-by-id run-id (list run-id test-id newstate newstatus newcomment) area-dat)) (define (rmt:set-tests-state-status run-id testnames currstate currstatus newstate newstatus area-dat) (rmt:send-receive 'set-tests-state-status run-id (list run-id testnames currstate currstatus newstate newstatus) area-dat)) |
︙ | ︙ |
Modified rpc-transport.scm from [9f0167ff5f] to [75fd7447c7].
︙ | ︙ | |||
35 36 37 38 39 40 41 | ;; (apply (eval (string->symbol (conc "remote:" procstr))) params) (apply (eval (string->symbol procstr)) params))) ;; all routes though here end in exit ... ;; ;; start_server? ;; | | | | | | | | | | | | 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 | ;; (apply (eval (string->symbol (conc "remote:" procstr))) params) (apply (eval (string->symbol procstr)) params))) ;; all routes though here end in exit ... ;; ;; start_server? ;; (define (rpc-transport:launch run-id area-dat) (set! *run-id* run-id) (if (args:get-arg "-daemonize") (daemon:ize)) (if (server:check-if-running run-id area-dat) (begin (debug:print 0 "INFO: Server for run-id " run-id " already running") (exit 0))) (let loop ((server-id (open-run-close tasks:server-lock-slot (lambda ()(tasks:open-db area-dat))run-id)) (remtries 4)) (if (not server-id) (if (> remtries 0) (begin (thread-sleep! 2) (loop (open-run-close tasks:server-lock-slot (lambda ()(tasks:open-db area-dat)) run-id) (- remtries 1))) (begin ;; since we didn't get the server lock we are going to clean up and bail out (debug:print-info 2 "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue") (open-run-close tasks:server-delete-records-for-this-pid (lambda ()(tasks:open-db area-dat)) " rpc-transport:launch"))) (begin (rpc-transport:run (if (args:get-arg "-server")(args:get-arg "-server") "-") run-id server-id) (exit))))) (define (rpc-transport:run hostn run-id server-id area-dat) (debug:print 2 "Attempting to start the rpc server ...") ;; (trace rpc:publish-procedure!) (rpc:publish-procedure! 'server:login server:login) (rpc:publish-procedure! 'testing (lambda () "Just testing")) (let* ((configdat (megatest:area-configdat area-dat)) (db #f) (hostname (get-host-name)) (ipaddrstr (let ((ipstr (if (string=? "-" hostn) ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") (server:get-best-guess-address hostname) #f))) (if ipstr ipstr hostn))) ;; hostname))) (start-port (open-run-close tasks:server-get-next-port (lambda ()(tasks:open-db area-dat)))) (link-tree-path (configf:lookup configdat "setup" "linktree")) (rpc:listener (rpc-transport:find-free-port-and-open (rpc:default-server-port))) (th1 (make-thread (lambda () ((rpc:make-server rpc:listener) #t)) "rpc:server")) ;; (cute (rpc:make-server rpc:listener) "rpc:server") ;; 'rpc:server)) (hostname (if (string=? "-" hostn) (get-host-name) hostn)) (ipaddrstr (if (string=? "-" hostn) (server:get-best-guess-address hostname) ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".") #f)) (portnum (rpc:default-server-port)) (host:port (conc (if ipaddrstr ipaddrstr hostname) ":" portnum)) (tdb (tasks:open-db area-dat))) (thread-start! th1) (set! db *inmemdb*) (open-run-close tasks:server-set-interface-port (lambda ()(tasks:open-db area-dat)) server-id ipaddrstr portnum) (debug:print 0 "Server started on " host:port) ;; (trace rpc:publish-procedure!) ;; (rpc:publish-procedure! 'server:login server:login) ;; (rpc:publish-procedure! 'testing (lambda () "Just testing")) ;;====================================================================== ;; ;; end of publish-procedure section ;;====================================================================== ;; (on-exit (lambda () (open-run-close tasks:server-set-state! (lambda ()(tasks:open-db area-dat)) server-id "stopped"))) (set! *rpc:listener* rpc:listener) (tasks:server-set-state! tdb server-id "running") (set! *inmemdb* (db:setup run-id)) ;; if none running or if > 20 seconds since ;; server last used then start shutdown (let loop ((count 0)) (thread-sleep! 5) ;; no need to do this very often (let ((numrunning -1)) ;; (db:get-count-tests-running db))) (if (or (> numrunning 0) (> (+ *last-db-access* 60)(current-seconds))) (begin (debug:print-info 0 "Server continuing, tests running: " numrunning ", seconds since last db access: " (- (current-seconds) *last-db-access*)) (loop (+ 1 count))) (begin (debug:print-info 0 "Starting to shutdown the server side") (open-run-close tasks:server-delete-record (lambda ()(tasks:open-db area-dat)) server-id " rpc-transport:try-start-server stop") (thread-sleep! 10) (debug:print-info 0 "Max cached queries was " *max-cache-size*) (debug:print-info 0 "Server shutdown complete. Exiting") )))))) (define (rpc-transport:find-free-port-and-open port) (handle-exceptions |
︙ | ︙ | |||
177 178 179 180 181 182 183 | (let ((server-dat (list iface port #f #f #f))) (common:set-remote! remote run-id server-dat) server-dat) (begin (server:try-running run-id) (thread-sleep! 2) (rpc-transport:client-setup run-id area-dat remtries: (- remtries 1))))) | | | 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 | (let ((server-dat (list iface port #f #f #f))) (common:set-remote! remote run-id server-dat) server-dat) (begin (server:try-running run-id) (thread-sleep! 2) (rpc-transport:client-setup run-id area-dat remtries: (- remtries 1))))) (let* ((server-db-info (open-run-close tasks:get-server (lambda ()(tasks:open-db area-dat)) run-id))) (debug:print-info 0 "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries) (if server-db-info (let* ((iface (tasks:hostinfo-get-interface server-db-info)) (port (tasks:hostinfo-get-port server-db-info)) (server-dat (list iface port #f #f #f)) (ping-res ((rpc:procedure 'server:login host port) toppath))) (if start-res |
︙ | ︙ |
Modified runs.scm from [6f4d489558] to [45c9c10cd7].
︙ | ︙ | |||
220 221 222 223 224 225 226 | (test-records (make-hash-table)) ;; need to process runconfigs before generating these lists (all-tests-registry #f) ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names (all-test-names #f) ;; (hash-table-keys all-tests-registry)) (test-names #f) ;; (tests:filter-test-names all-test-names test-patts)) (required-tests #f) ;;(lset-intersection equal? (string-split test-patts ",") test-names))) ;; test-names)) ;; Added test-names as initial for required-tests but that failed to work (task-key (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id))) | | | | 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 | (test-records (make-hash-table)) ;; need to process runconfigs before generating these lists (all-tests-registry #f) ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names (all-test-names #f) ;; (hash-table-keys all-tests-registry)) (test-names #f) ;; (tests:filter-test-names all-test-names test-patts)) (required-tests #f) ;;(lset-intersection equal? (string-split test-patts ",") test-names))) ;; test-names)) ;; Added test-names as initial for required-tests but that failed to work (task-key (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id))) (tdbdat (tasks:open-db area-dat))) (if (tasks:need-server run-id area-dat)(tasks:start-and-wait-for-server tdbdat run-id 10)) (set-signal-handler! signal/int (lambda (signum) (signal-mask! signum) (print "Received signal " signum ", cleaning up before exit. Please wait...") (let ((tdbdat (tasks:open-db area-dat))) (rmt:tasks-set-state-given-param-key task-key "killed")) (print "Killed by signal " signum ". Exiting") (exit))) ;; register this run in monitor.db (rmt:tasks-add "run-tests" user target runname test-patts task-key) ;; params) (rmt:tasks-set-state-given-param-key task-key "running") |
︙ | ︙ | |||
917 918 919 920 921 922 923 | (max-concurrent-jobs (let ((mcj (config-lookup configdat "setup" "max_concurrent_jobs"))) (if (and mcj (string->number mcj)) (string->number mcj) 1))) ;; length of the register queue ahead (reglen (if (number? reglen-in) reglen-in 1)) (last-time-incomplete (- (current-seconds) 900)) ;; force at least one clean up cycle (last-time-some-running (current-seconds)) | | | 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 | (max-concurrent-jobs (let ((mcj (config-lookup configdat "setup" "max_concurrent_jobs"))) (if (and mcj (string->number mcj)) (string->number mcj) 1))) ;; length of the register queue ahead (reglen (if (number? reglen-in) reglen-in 1)) (last-time-incomplete (- (current-seconds) 900)) ;; force at least one clean up cycle (last-time-some-running (current-seconds)) (tdbdat (tasks:open-db area-dat))) ;; Initialize the test-registery hash with tests that already have a record ;; convert state to symbol and use that as the hash value (for-each (lambda (trec) (let ((id (db:test-get-id trec)) (tn (db:test-get-testname trec)) (ip (db:test-get-item-path trec)) |
︙ | ︙ | |||
1426 1427 1428 1429 1430 1431 1432 | ;; 'set-state-status ;; ;; NB// should pass in keys? ;; (define (runs:operate-on action target runnamepatt testpatt area-dat #!key (state #f)(status #f)(new-state-status #f)(mode 'remove-all)(options '())) (common:clear-caches) ;; clear all caches (let* ((db #f) | | | 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 | ;; 'set-state-status ;; ;; NB// should pass in keys? ;; (define (runs:operate-on action target runnamepatt testpatt area-dat #!key (state #f)(status #f)(new-state-status #f)(mode 'remove-all)(options '())) (common:clear-caches) ;; clear all caches (let* ((db #f) (tdbdat (tasks:open-db area-dat)) (keys (rmt:get-keys area-dat)) (rundat (mt:get-runs-by-patt keys runnamepatt target)) (header (vector-ref rundat 0)) (runs (vector-ref rundat 1)) (states (if state (string-split state ",") '())) (statuses (if status (string-split status ",") '())) (state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f)))) |
︙ | ︙ |
Modified server.scm from [f70ac864c6] to [7ddbaf8b7c].
︙ | ︙ | |||
46 47 48 49 50 51 52 | ;; ;; all routes though here end in exit ... ;; ;; start_server ;; (define (server:launch run-id area-dat) | | | | 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | ;; ;; all routes though here end in exit ... ;; ;; start_server ;; (define (server:launch run-id area-dat) (case (megatest:area-transport area-dat) ((http)(http-transport:launch run-id area-dat)) ((nmsg)(nmsg-transport:launch run-id area-dat)) ((rpc) (rpc-transport:launch run-id area-dat)) (else (debug:print 0 "ERROR: unknown server type " (megatest:area-transport area-dat))))) ;; (else (debug:print 0 "ERROR: No known transport set, transport=" transport ", using rpc") ;; (rpc-transport:launch run-id))))) ;;====================================================================== ;; S E R V E R U T I L I T I E S ;;====================================================================== |
︙ | ︙ | |||
80 81 82 83 84 85 86 | (lambda () (write (list (current-directory) (argv))))))) ;; When using zmq this would send the message back (two step process) ;; with spiffy or rpc this simply returns the return data to be returned ;; | | | | 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 | (lambda () (write (list (current-directory) (argv))))))) ;; When using zmq this would send the message back (two step process) ;; with spiffy or rpc this simply returns the return data to be returned ;; (define (server:reply return-addr query-sig success/fail result area-dat #!key (remote #f)) (debug:print-info 11 "server:reply return-addr=" return-addr ", result=" result) ;; (send-message pubsock target send-more: #t) ;; (send-message pubsock (case (server:get-transport) ((rpc) (db:obj->string (vector success/fail query-sig result))) ((http) (db:obj->string (vector success/fail query-sig result))) ((zmq) (let ((pub-socket (vector-ref (common:get-remote remote #f) 1))) (send-message pub-socket return-addr send-more: #t) (send-message pub-socket (db:obj->string (vector success/fail query-sig result))))) ((fs) result) (else (debug:print 0 "ERROR: unrecognised transport type: " (megatest:area-transport area-dat)) result))) ;; Given a run id start a server process ### NOTE ### > file 2>&1 ;; if the run-id is zero and the target-host is set ;; try running on that host ;; (define (server:run run-id area-dat) |
︙ | ︙ | |||
155 156 157 158 159 160 161 | ;; The generic run a server command. Dispatches the call to server 0 if run-id != 0 ;; (define (server:try-running run-id area-dat) (if (eq? run-id 0) (server:run run-id area-dat) (rmt:start-server run-id))) | | | | | | | 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 | ;; The generic run a server command. Dispatches the call to server 0 if run-id != 0 ;; (define (server:try-running run-id area-dat) (if (eq? run-id 0) (server:run run-id area-dat) (rmt:start-server run-id))) (define (server:check-if-running run-id area-dat) (let ((tdbdat (tasks:open-db area-dat))) (let loop ((server (tasks:get-server (db:delay-if-busy tdbdat) run-id)) (trycount 0)) (if server ;; note: client:start will set (common:get-remote remote). this needs to be changed ;; also, client:start will login to the server, also need to change that. ;; ;; client:start returns #t if login was successful. ;; (let ((res (case (megatest:area-transport area-dat) ((http)(server:ping-server run-id (tasks:hostinfo-get-interface server) (tasks:hostinfo-get-port server))) ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server) (tasks:hostinfo-get-port server) timeout: 2))))) ;; if the server didn't respond we must remove the record (if res #t (begin (debug:print-info 0 "server at " server " not responding, removing record") (tasks:server-force-clean-running-records-for-run-id (db:delay-if-busy tdbdat) run-id " server:check-if-running") res))) #f)))) ;; called in megatest.scm, host-port is string hostname:port ;; (define (server:ping run-id host:port area-dat) (let ((tdbdat (tasks:open-db area-dat))) (let* ((host-port (let ((slst (string-split host:port ":"))) (if (eq? (length slst) 2) (list (car slst)(string->number (cadr slst))) #f))) (toppath (launch:setup-for-run)) (server-db-dat (if (not host-port)(tasks:get-server (db:delay-if-busy tdbdat) run-id) #f))) (if (not run-id) |
︙ | ︙ |
Modified tasks.scm from [af8f785c6d] to [f2e471c669].
︙ | ︙ | |||
98 99 100 101 102 103 104 | ((file-read-access? dbpath) (sqlite3:open-database dbfile)) (else (sqlite3:open-database ":memory:")))) ;; (never-give-up-open-db dbpath)) (handler (make-busy-timeout 36000))) (if (and exists (not write-access)) (set! *db-write-access* write-access)) ;; only unset so other db's also can use this control (sqlite3:set-busy-handler! mdb handler) | | | 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 | ((file-read-access? dbpath) (sqlite3:open-database dbfile)) (else (sqlite3:open-database ":memory:")))) ;; (never-give-up-open-db dbpath)) (handler (make-busy-timeout 36000))) (if (and exists (not write-access)) (set! *db-write-access* write-access)) ;; only unset so other db's also can use this control (sqlite3:set-busy-handler! mdb handler) (db:set-sync mdb area-dat) ;; (sqlite3:execute mdb (conc "PRAGMA synchronous = 0;")) ;; (if (or (and (not exists) ;; (file-write-access? toppath)) ;; (not (file-read-access? dbpath))) ;; (begin ;; ;; TASKS QUEUE MOVED TO main.db ;; |
︙ | ︙ | |||
164 165 166 167 168 169 170 | (define (tasks:hostinfo-get-interface vec) (vector-ref vec 1)) (define (tasks:hostinfo-get-port vec) (vector-ref vec 2)) (define (tasks:hostinfo-get-pubport vec) (vector-ref vec 3)) (define (tasks:hostinfo-get-transport vec) (vector-ref vec 4)) (define (tasks:hostinfo-get-pid vec) (vector-ref vec 5)) (define (tasks:hostinfo-get-hostname vec) (vector-ref vec 6)) | | | | | | 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 | (define (tasks:hostinfo-get-interface vec) (vector-ref vec 1)) (define (tasks:hostinfo-get-port vec) (vector-ref vec 2)) (define (tasks:hostinfo-get-pubport vec) (vector-ref vec 3)) (define (tasks:hostinfo-get-transport vec) (vector-ref vec 4)) (define (tasks:hostinfo-get-pid vec) (vector-ref vec 5)) (define (tasks:hostinfo-get-hostname vec) (vector-ref vec 6)) (define (tasks:server-lock-slot mdb run-id area-dat) (tasks:server-clean-out-old-records-for-run-id mdb run-id " tasks:server-lock-slot") (if (< (tasks:num-in-available-state mdb run-id) 4) (begin (tasks:server-set-available mdb run-id area-dat) (thread-sleep! (/ (random 1500) 1000)) ;; (thread-sleep! 2) ;; Try removing this. It may not be needed. (tasks:server-am-i-the-server? mdb run-id)) #f)) ;; register that this server may come online (first to register goes though with the process) (define (tasks:server-set-available mdb run-id area-dat) (sqlite3:execute mdb "INSERT INTO servers (pid,hostname,port,pubport,start_time, priority,state,mt_version,heartbeat, interface,transport,run_id) VALUES(?, ?, ?, ?, strftime('%s','now'), ?, ?, ?,-1,?, ?, ?);" (current-process-id) ;; pid (get-host-name) ;; hostname -1 ;; port -1 ;; pubport (random 1000) ;; priority (used a tiebreaker on get-available) "available" ;; state (common:version-signature) ;; mt_version -1 ;; interface ;; (conc (server:get-transport)) ;; transport (conc (megatest:area-transport area-dat)) ;; transport run-id )) (define (tasks:num-in-available-state mdb run-id) (let ((res 0)) (sqlite3:for-each-row (lambda (num-in-queue) |
︙ | ︙ |
Modified tdb.scm from [6d49ce2a57] to [8118e8579d].
︙ | ︙ | |||
38 39 40 41 42 43 44 | ;;====================================================================== ;;====================================================================== ;; T E S T S P E C I F I C D B ;;====================================================================== ;; Create the sqlite db for the individual test(s) | | | 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 | ;;====================================================================== ;;====================================================================== ;; T E S T S P E C I F I C D B ;;====================================================================== ;; Create the sqlite db for the individual test(s) (define (open-test-db work-area area-dat) (debug:print-info 11 "open-test-db " work-area) (if (and work-area (directory? work-area) (file-read-access? work-area)) (let* ((dbpath (conc work-area "/testdat.db")) (dbexists (file-exists? dbpath)) (work-area-writeable (file-write-access? work-area)) |
︙ | ︙ | |||
69 70 71 72 73 74 75 | 136000)))) (if (and tdb-writeable *db-write-access*) (sqlite3:set-busy-handler! db handler)) (if (not dbexists) (begin | | | 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | 136000)))) (if (and tdb-writeable *db-write-access*) (sqlite3:set-busy-handler! db handler)) (if (not dbexists) (begin (db:set-sync db area-dat) ;; (sqlite3:execute db "PRAGMA synchronous = FULL;") (debug:print-info 11 "Initialized test database " dbpath) (tdb:testdb-initialize db))) ;; (sqlite3:execute db "PRAGMA synchronous = 0;") (debug:print-info 11 "open-test-db END (sucessful)" work-area) ;; now let's test that everything is correct (handle-exceptions exn |
︙ | ︙ | |||
95 96 97 98 99 100 101 | (let ((baddb (sqlite3:open-database ":memory:"))) (debug:print-info 11 "open-test-db END (unsucessful)" work-area) ;; provide an in-mem db (this is dangerous!) (tdb:testdb-initialize baddb) baddb))) ;; find and open the testdat.db file for an existing test | | | | | | | | 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 | (let ((baddb (sqlite3:open-database ":memory:"))) (debug:print-info 11 "open-test-db END (unsucessful)" work-area) ;; provide an in-mem db (this is dangerous!) (tdb:testdb-initialize baddb) baddb))) ;; find and open the testdat.db file for an existing test (define (tdb:open-test-db-by-test-id test-id area-dat #!key (work-area #f)) (let* ((test-path (if work-area work-area (rmt:test-get-rundir-from-test-id test-id)))) (debug:print 3 "TEST PATH: " test-path) (open-test-db test-path area-dat))) ;; find and open the testdat.db file for an existing test (define (tdb:open-test-db-by-test-id-local dbstruct area-dat run-id test-id #!key (work-area #f)) (let* ((test-path (if work-area work-area (db:test-get-rundir-from-test-id dbstruct run-id test-id)))) (debug:print 3 "TEST PATH: " test-path) (open-test-db test-path area-dat))) ;; find and open the testdat.db file for an existing test (define (tdb:open-run-close-db-by-test-id-local dbstruct area-dat run-id test-id work-area proc . params) (let* ((test-path (if work-area work-area (db:test-get-rundir-from-test-id dbstruct run-id test-id))) (tdb (open-test-db test-path area-dat))) (apply proc tdb params))) (define (tdb:testdb-initialize db) (debug:print 11 "db:testdb-initialize START") (sqlite3:with-transaction db (lambda () |
︙ | ︙ | |||
366 367 368 369 370 371 372 | (if (eq? time-a time-b) (string<? (conc (vector-ref a 2)) (conc (vector-ref b 2))) #f)) (string<? (conc time-a)(conc time-b)))))))) ;; | | | | 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 | (if (eq? time-a time-b) (string<? (conc (vector-ref a 2)) (conc (vector-ref b 2))) #f)) (string<? (conc time-a)(conc time-b)))))))) ;; (define (tdb:remote-update-testdat-meta-info run-id test-id work-area cpuload diskfree minutes area-dat) (let ((tdb (rmt:open-test-db-by-test-id run-id test-id area-dat work-area: work-area))) (if (sqlite3:database? tdb) (begin (sqlite3:execute tdb "INSERT INTO test_rundat (update_time,cpuload,diskfree,run_duration) VALUES (strftime('%s','now'),?,?,?);" cpuload diskfree minutes) (sqlite3:finalize! tdb)) (debug:print 2 "Can't update testdat.db for test " test-id " read-only or non-existant")))) |
Modified tests/unittests/basicserver.scm from [eb22c48242] to [28cf57e0e6].
︙ | ︙ | |||
19 20 21 22 23 24 25 | ;; (test #f '("SYSTEM" "RELEASE") (rmt:get-keys)) ;; (exit) ;; Server tests go here (for-each (lambda (run-id) | | | | | | | 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 | ;; (test #f '("SYSTEM" "RELEASE") (rmt:get-keys)) ;; (exit) ;; Server tests go here (for-each (lambda (run-id) (test #f #f (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db *area-dat*) *area-dat*) run-id)) (server:kind-run run-id *area-dat*) (test "did server start within 20 seconds?" #t (let loop ((remtries 20) (running (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db *area-dat*) *area-dat*) run-id))) (if running (> running 0) (if (> remtries 0) (begin (thread-sleep! 1) (loop (- remtries 1) (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db *area-dat*) *area-dat*) run-id))))))) (test "did server become available" #t (let loop ((remtries 10) (res (tasks:get-server (db:delay-if-busy (tasks:open-db *area-dat*) *area-dat*) run-id))) (if res (vector? res) (begin (if (> remtries 0) (begin (thread-sleep! 1.1) (loop (- remtries 1)(tasks:get-server (db:delay-if-busy (tasks:open-db *area-dat*) *area-dat*) run-id))) res))))) ) (list 0 1)) (define user (current-user-name)) (define runname "mytestrun") (define keys (rmt:get-keys)) |
︙ | ︙ | |||
115 116 117 118 119 120 121 | "runname"))) (for-each (lambda (run-id) ;; test killing server ;; (tasks:kill-server-run-id run-id) | | | | 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 | "runname"))) (for-each (lambda (run-id) ;; test killing server ;; (tasks:kill-server-run-id run-id) (test #f #f (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db *area-dat*) *area-dat*) run-id)) ) (list 0 1)) ;; Tests to assess reading/writing while servers are starting/stopping (define start-time (current-seconds)) (let loop ((test-state 'start)) (let* ((server-dats (tasks:get-server-records (db:delay-if-busy (tasks:open-db *area-dat*) *area-dat*) run-id)) (first-dat (if (not (null? server-dats)) (car server-dats) #f))) (map (lambda (dat) (apply print (intersperse (vector->list dat) ", "))) server-dats) (test #f test-one-rec (rmt:get-test-info-by-id run-id test-one-id)) |
︙ | ︙ | |||
161 162 163 164 165 166 167 | ;; (set! *transport-type* 'http) ;; ;; (test "setup for run" #t (begin (launch:setup-for-run) ;; (string? (getenv "MT_RUN_AREA_HOME")))) ;; ;; (test "server-register, get-best-server" #t (let ((res #f)) | | | | | | | | 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 | ;; (set! *transport-type* 'http) ;; ;; (test "setup for run" #t (begin (launch:setup-for-run) ;; (string? (getenv "MT_RUN_AREA_HOME")))) ;; ;; (test "server-register, get-best-server" #t (let ((res #f)) ;; (open-run-close tasks:server-register tasks:open-db *area-dat* 1 "bob" 1234 100 'live 'http) ;; (set! res (open-run-close tasks:get-best-server tasks:open-db *area-dat*)) ;; (number? (vector-ref res 3)))) ;; ;; (test "de-register server" #f (let ((res #f)) ;; (open-run-close tasks:server-deregister tasks:open-db *area-dat* "bob" port: 1234) ;; (vector? (open-run-close tasks:get-best-server tasks:open-db *area-dat*)))) ;; ;; (define server-pid #f) ;; ;; ;; Not sure how the following should work, replacing it with system of megatest -server ;; ;; (test "launch server" #t (let ((pid (process-fork (lambda () ;; ;; ;; (daemon:ize) ;; ;; (server:launch 'http))))) ;; ;; (set! server-pid pid) ;; ;; (number? pid))) ;; (system "../../bin/megatest -server - -debug 22 > server.log 2> server.log &") ;; ;; (let loop ((n 10)) ;; (thread-sleep! 1) ;; need to wait for server to start. ;; (let ((res (open-run-close tasks:get-best-server tasks:open-db *area-dat*))) ;; (print "tasks:get-best-server returned " res) ;; (if (and (not res) ;; (> n 0)) ;; (loop (- n 1))))) ;; ;; (test "get-best-server" #t (begin ;; (client:launch) ;; (let ((dat (open-run-close tasks:get-best-server tasks:open-db *area-dat*))) ;; (vector? dat)))) ;; ;; (define *keys* (keys:config-get-fields *configdat*)) ;; (define *keyvals* (keys:target->keyval *keys* "a/b/c")) ;; ;; (test #f #t (string? (car *runremote*))) ;; (test #f '(#t "successful login") (rmt:login)) ;; *runremote* *toppath* *my-client-signature*))) |
︙ | ︙ |
Modified zmq-transport.scm from [fa40b1e46e] to [46e65cf05a].
︙ | ︙ | |||
108 109 110 111 112 113 114 | (set! *runremote* (vector pull-socket pub-socket)) ;; overloading the use of (common:get-remote remote) BUG!? ;; what to do when we quit ;; ;; (on-exit (lambda () ;; (if (and toppath *server-info*) | | | 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 | (set! *runremote* (vector pull-socket pub-socket)) ;; overloading the use of (common:get-remote remote) BUG!? ;; what to do when we quit ;; ;; (on-exit (lambda () ;; (if (and toppath *server-info*) ;; (open-run-close tasks:server-deregister-self (lambda ()(tasks:open-db area-dat)) (car *server-info*)) ;; (let loop () ;; (let ((queue-len 0)) ;; (thread-sleep! (random 5)) ;; (mutex-lock! *incoming-mutex*) ;; (set! queue-len (length *incoming-data*)) ;; (mutex-unlock! *incoming-mutex*) ;; (if (> queue-len 0) |
︙ | ︙ | |||
146 147 148 149 150 151 152 | (loop '())) (loop (cons packet queue-lst))))))) ;; run zmq-transport:keep-running in a parallel thread to monitor that the db is being ;; used and to shutdown after sometime if it is not. ;; | | | 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 | (loop '())) (loop (cons packet queue-lst))))))) ;; run zmq-transport:keep-running in a parallel thread to monitor that the db is being ;; used and to shutdown after sometime if it is not. ;; (define (zmq-transport:keep-running area-dat) ;; if none running or if > 20 seconds since ;; server last used then start shutdown ;; This thread waits for the server to come alive (let* ((server-info (let loop () (let ((sdat #f)) (mutex-lock! *heartbeat-mutex*) (set! sdat *server-info*) |
︙ | ︙ | |||
176 177 178 179 180 181 182 | ;; GET REAL QUEUE LENGTH FROM THE VARIABLE (let ((queue-len 0)) ;; FOR NOW DO NOT DO THIS (cdb:client-call zmq-sockets 'sync #t 1))) ;; (print "Server running, count is " count) (if (< count 1) ;; 3x3 = 9 secs aprox (loop (+ count 1))) ;; NOTE: Get rid of this mechanism! It really is not needed... | | > > > | > > > | 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 | ;; GET REAL QUEUE LENGTH FROM THE VARIABLE (let ((queue-len 0)) ;; FOR NOW DO NOT DO THIS (cdb:client-call zmq-sockets 'sync #t 1))) ;; (print "Server running, count is " count) (if (< count 1) ;; 3x3 = 9 secs aprox (loop (+ count 1))) ;; NOTE: Get rid of this mechanism! It really is not needed... (open-run-close tasks:server-update-heartbeat (lambda () (tasks:open-db area-dat)) (car server-info)) ;; (if ;; (or (> numrunning 0) ;; stay alive for two days after last access (mutex-lock! *heartbeat-mutex*) (set! last-access *last-db-access*) (mutex-unlock! *heartbeat-mutex*) (if (> (+ last-access ;; (* 50 60 60) ;; 48 hrs ;; 60 ;; one minute ;; (* 60 60) ;; one hour (* 45 60) ;; 45 minutes, until the db deletion bug is fixed. ) (current-seconds)) (begin (debug:print-info 2 "Server continuing, seconds since last db access: " (- (current-seconds) last-access)) (loop 0)) (begin (debug:print-info 0 "Starting to shutdown the server.") ;; need to delete only *my* server entry (future use) (set! *time-to-exit* #t) (open-run-close tasks:server-deregister-self (lambda () (tasks:open-db area-dat)) (get-host-name)) (thread-sleep! 1) (debug:print-info 0 "Max cached queries was " *max-cache-size*) (debug:print-info 0 "Server shutdown complete. Exiting") (exit))))))) (define (zmq-transport:find-free-port-and-open iface s port stype #!key (trynum 50)) (let ((s (if s s (make-socket stype))) |
︙ | ︙ | |||
232 233 234 235 236 237 238 | (p1 (caddr s1)) (s2 (zmq-transport:find-free-port-and-open ipaddrstr #f (+ 1 (if p1 p1 (+ startport 1))) 'pub)) (p2 (caddr s2))) (set! *runremote* #f) (debug:print 0 "Server started on " ipaddrstr " ports " p1 " and " p2) (mutex-lock! *heartbeat-mutex*) (set! *server-info* (open-run-close tasks:server-register | | | 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 | (p1 (caddr s1)) (s2 (zmq-transport:find-free-port-and-open ipaddrstr #f (+ 1 (if p1 p1 (+ startport 1))) 'pub)) (p2 (caddr s2))) (set! *runremote* #f) (debug:print 0 "Server started on " ipaddrstr " ports " p1 " and " p2) (mutex-lock! *heartbeat-mutex*) (set! *server-info* (open-run-close tasks:server-register (lambda ()(tasks:open-db area-dat)) (current-process-id) ipaddrstr p1 0 'live 'zmq pubport: p2)) (debug:print-info 11 "*server-info* set to " *server-info*) |
︙ | ︙ | |||
302 303 304 305 306 307 308 | (debug:print-info 2 "Failed to login or connect to " conurl) (set! *runremote* #f) #f)))) ;; run zmq-transport:keep-running in a parallel thread to monitor that the db is being ;; used and to shutdown after sometime if it is not. ;; | | | | 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 | (debug:print-info 2 "Failed to login or connect to " conurl) (set! *runremote* #f) #f)))) ;; run zmq-transport:keep-running in a parallel thread to monitor that the db is being ;; used and to shutdown after sometime if it is not. ;; (define (zmq-transport:keep-running area-dat) ;; if none running or if > 20 seconds since ;; server last used then start shutdown ;; This thread waits for the server to come alive (let* ((server-info (let loop () (let ((sdat #f)) (mutex-lock! *heartbeat-mutex*) (set! sdat *runremote*) (mutex-unlock! *heartbeat-mutex*) (if sdat sdat (begin (sleep 4) (loop)))))) (iface (car server-info)) (port (cadr server-info)) (last-access 0) (tdb (tasks:open-db area-dat)) (spid (tasks:server-get-server-id tdb #f iface port #f))) (print "Keep-running got server pid " spid ", using iface " iface " and port " port) (let loop ((count 0)) (thread-sleep! 4) ;; no need to do this very often ;; NB// sync currently does NOT return queue-length (let () ;; (queue-len (cdb:client-call server-info 'sync #t 1))) ;; (print "Server running, count is " count) |
︙ | ︙ |