Overview
Comment: | Removed ability to switch transports. Streamlined calls to http send-receive so that a ping-like call can be made |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | re-re-factor-server |
Files: | files | file ages | folders |
SHA1: |
a776e42a6b580e3600e70b513013f158 |
User & Date: | matt on 2014-02-16 21:02:00 |
Other Links: | branch diff | manifest | tags |
Context
2014-02-16
| ||
22:26 | Removed check for megatest version on connecting to server. Can't have more than one server and api should be tolerant to minor version differences. check-in: b6474c4a62 user: matt tags: re-re-factor-server | |
21:02 | Removed ability to switch transports. Streamlined calls to http send-receive so that a ping-like call can be made check-in: a776e42a6b user: matt tags: re-re-factor-server | |
2014-02-15
| ||
23:01 | Removed heartbeat check, fixed typo check-in: d21a137b36 user: matt tags: re-re-factor-server | |
Changes
Modified client.scm from [52a055740a] to [20000f46f9].
︙ | ︙ | |||
52 53 54 55 56 57 58 | ;; *transport-type* and *runremote* from the monitor.db ;; ;; client:setup ;; ;; lookup_server, need to remove *runremote* stuff ;; (define (client:setup run-id #!key (remaining-tries 3)) | < < < < < < < < < < < < | < < < < | | < < < | | | | < < < < < < < | 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 | ;; *transport-type* and *runremote* from the monitor.db ;; ;; client:setup ;; ;; lookup_server, need to remove *runremote* stuff ;; (define (client:setup run-id #!key (remaining-tries 3)) (let ((hostinfo (and run-id (hash-table-ref/default *runremote* run-id #f)))) (if hostinfo hostinfo ;; have hostinfo - just return it (let* ((hostinfo (open-run-close tasks:get-server tasks:open-db run-id))) (if (not hostinfo) (if (> remaining-tries 0) (begin (server:ensure-running run-id) (client:setup run-id remaining-tries: (- remaining-tries 1))) (begin (debug:print 0 "ERROR: Expected to be able to connect to a server by now. No server available for run-id = " run-id) (exit 1))) (begin (hash-table-set! *runremote* run-id hostinfo) (debug:print-info 11 "CLIENT SETUP, hostinfo=" hostinfo) (client:start run-id hostinfo))))))) (define (client:start run-id server-info) ;; this saves the server-info in the *runremote* hash and returns it (http-transport:client-connect run-id (tasks:hostinfo-get-interface server-info) (tasks:hostinfo-get-port server-info))) ;; client:signal-handler (define (client:signal-handler signum) (handle-exceptions exn (debug:print " ... exiting ...") (let ((th1 (make-thread (lambda () |
︙ | ︙ |
Modified db.scm from [42d3ed003f] to [00935888f7].
︙ | ︙ | |||
1810 1811 1812 1813 1814 1815 1816 | immediate flush sync set-verbosity killserver )) | | > | > > > | > | | | < | 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 | immediate flush sync set-verbosity killserver )) (define (db:login dbstruct calling-path calling-version run-id client-signature) (cond ((not (equal? calling-path *toppath*)) (list #f "Login failed due to mismatch paths: " calling-path ", " *toppath*)) ((not (equal? *run-id* run-id)) (list #f "Login failed due to mismatch run-id: " run-id ", " *run-id*)) ((not (equal? megatest-version calling-version)) (list #f "Login failed due to mismatch megatest version: " calling-version ", " megatest-version)) (else (hash-table-set! *logged-in-clients* client-signature (current-seconds)) '(#t "successful login")))) (define (db:general-call db stmtname params) (let ((query (let ((q (alist-ref (if (string? stmtname) (string->symbol stmtname) stmtname) db:queries))) (if q (car q) #f)))) |
︙ | ︙ |
Modified http-transport.scm from [7f3beca95c] to [c086471463].
︙ | ︙ | |||
231 232 233 234 235 236 237 | (debug:print 0 "FATAL ERROR: http-transport:client-send-receive called with no server info") (exit 1)))) ;; (conc url "/ctrl")) ;; (conc url "/?dat=" msg))) (res #f)) (handle-exceptions exn (begin (print "ERROR IN http-transport:client-send-receive " ((condition-property-accessor 'exn 'message) exn)) | < > > | > | 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 | (debug:print 0 "FATAL ERROR: http-transport:client-send-receive called with no server info") (exit 1)))) ;; (conc url "/ctrl")) ;; (conc url "/?dat=" msg))) (res #f)) (handle-exceptions exn (begin (print "ERROR IN http-transport:client-send-receive " ((condition-property-accessor 'exn 'message) exn)) (if (> numretries 0) (begin (thread-sleep! 2) (http-transport:client-send-receive serverdat msg numretries: (- numretries 1))) #f)) (begin (debug:print-info 11 "fullurl=" fullurl "\n") ;; set up the http-client here (max-retry-attempts 5) ;; consider all requests indempotent (retry-request? (lambda (request) #t)) ;; (thread-sleep! (/ (if (> numretries 100) 100 numretries) 10)) |
︙ | ︙ | |||
291 292 293 294 295 296 297 | (debug:print 0 "FATAL ERROR: http-transport:client-send-receive called with no server info") (exit 1)))) (res #f)) (handle-exceptions exn (begin ;; TODO: Send this output to a log file so it isn't lost when running as daemon | < > > | > | | 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 | (debug:print 0 "FATAL ERROR: http-transport:client-send-receive called with no server info") (exit 1)))) (res #f)) (handle-exceptions exn (begin ;; TODO: Send this output to a log file so it isn't lost when running as daemon (if (> numretries 0) ;; on the zeroeth retry do not print the error message - this allows the call to be used as a ping (no junk on output). (begin (print "ERROR IN http-transport:client-send-receive " ((condition-property-accessor 'exn 'message) exn)) (if (> (random 100) 80)(server:ensure-running run-id)) ;; every so often try starting a server (http-transport:client-api-send-receive run-id serverdat cmd params numretries: (- numretries 1))) #f)) (begin (debug:print-info 11 "fullurl=" fullurl ", cmd=" cmd ", params=" params ", run-id=" run-id "\n") ;; set up the http-client here (max-retry-attempts 5) ;; consider all requests indempotent (retry-request? (lambda (request) #t)) ;; (thread-sleep! (/ (if (> numretries 100) 100 numretries) 10)) ;; (set! numretries (- numretries 1)) ;; #t)) |
︙ | ︙ | |||
362 363 364 365 366 367 368 | (debug:print-info 11 "got res=" res) res))))) ;; ;; connect ;; (define (http-transport:client-connect run-id iface port) | < | | > < | 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 | (debug:print-info 11 "got res=" res) res))))) ;; ;; connect ;; (define (http-transport:client-connect run-id iface port) (let* ((uri-dat (make-request method: 'POST uri: (uri-reference (conc "http://" iface ":" port "/ctrl")))) (uri-api-dat (make-request method: 'POST uri: (uri-reference (conc "http://" iface ":" port "/api")))) (serverdat (list iface port uri-dat uri-api-dat)) (login-res (rmt:login-no-auto-client-setup serverdat run-id))) (hash-table-set! *runremote* run-id serverdat) ;; may or may not be good ... (if (and (list? login-res) (car login-res)) (begin (debug:print-info 2 "Logged in and connected to " iface ":" port) (hash-table-set! *runremote* run-id serverdat) serverdat) (begin |
︙ | ︙ | |||
446 447 448 449 450 451 452 | (if (or (not (equal? sdat (list iface port))) (not server-id)) (begin (debug:print-info 0 "interface changed, refreshing iface and port info") (set! iface (car sdat)) (set! port (cadr sdat)))) | < < < < < < < < < < | 449 450 451 452 453 454 455 456 457 458 459 460 461 462 | (if (or (not (equal? sdat (list iface port))) (not server-id)) (begin (debug:print-info 0 "interface changed, refreshing iface and port info") (set! iface (car sdat)) (set! port (cadr sdat)))) ;; Transfer *last-db-access* to last-access to use in checking that we are still alive (mutex-lock! *heartbeat-mutex*) (set! last-access *last-db-access*) (mutex-unlock! *heartbeat-mutex*) ;; (debug:print 11 "last-access=" last-access ", server-timeout=" server-timeout) ;; |
︙ | ︙ | |||
508 509 510 511 512 513 514 | ;; ;; start_server? ;; (define (http-transport:launch run-id) (set! *run-id* run-id) (if (args:get-arg "-daemonize") (daemon:ize)) | < < < > | 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 | ;; ;; start_server? ;; (define (http-transport:launch run-id) (set! *run-id* run-id) (if (args:get-arg "-daemonize") (daemon:ize)) (let ((server-id (open-run-close tasks:server-lock-slot tasks:open-db run-id))) (if (not server-id) (begin ;; since we didn't get the server lock we are going to clean up and bail out (debug:print-info 2 "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue") (open-run-close tasks:server-delete-records-for-this-pid tasks:open-db)) (let* ((th2 (make-thread (lambda () (http-transport:run (if (args:get-arg "-server") (args:get-arg "-server") "-") |
︙ | ︙ |
Modified megatest.scm from [20b3d11528] to [9958005d06].
︙ | ︙ | |||
119 120 121 122 123 124 125 | -cleanup-db : remove any orphan records, vacuum the db -update-meta : update the tests metadata for all tests -env2file fname : write the environment to fname.csh and fname.sh -setvars VAR1=val1,VAR2=val2 : Add environment variables to a run NB// these are overwritten by values set in config files. -server -|hostname : start the server (reduces contention on megatest.db), use - to automatically figure out hostname | < | 119 120 121 122 123 124 125 126 127 128 129 130 131 132 | -cleanup-db : remove any orphan records, vacuum the db -update-meta : update the tests metadata for all tests -env2file fname : write the environment to fname.csh and fname.sh -setvars VAR1=val1,VAR2=val2 : Add environment variables to a run NB// these are overwritten by values set in config files. -server -|hostname : start the server (reduces contention on megatest.db), use - to automatically figure out hostname -daemonize : fork into background and disconnect from stdin/out -list-servers : list the servers -stop-server id : stop server specified by id (see output of -list-servers), use 0 to kill all -repl : start a repl (useful for extending megatest) -load file.scm : load and run file.scm -mark-incompletes : find and mark incomplete tests |
︙ | ︙ | |||
186 187 188 189 190 191 192 | ":variable" ":value" ":expected" ":tol" ":units" ;; misc "-server" | < | 185 186 187 188 189 190 191 192 193 194 195 196 197 198 | ":variable" ":value" ":expected" ":tol" ":units" ;; misc "-server" "-stop-server" "-port" "-extract-ods" "-pathmod" "-env2file" "-setvars" "-set-state-status" |
︙ | ︙ | |||
288 289 290 291 292 293 294 | (eq? pid-val 0)) (begin (printf "Sending signal/term to ~A\n" pid) (process-signal pid signal/term)))))) (process:children #f)) (original-exit exit-code))))) | < < < < < < | 286 287 288 289 290 291 292 293 294 295 296 297 298 299 | (eq? pid-val 0)) (begin (printf "Sending signal/term to ~A\n" pid) (process-signal pid signal/term)))))) (process:children #f)) (original-exit exit-code))))) ;;====================================================================== ;; Misc setup stuff ;;====================================================================== (debug:setup) (if (args:get-arg "-logging")(set! *logging* #t)) |
︙ | ︙ | |||
342 343 344 345 346 347 348 | ;;====================================================================== (if (args:get-arg "-server") ;; Server? Start up here. ;; (let ((tl (setup-for-run)) | < < < | < < < < < | < < < < < < < < < < < < < < < | | | | | | | | | < < > | | 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 | ;;====================================================================== (if (args:get-arg "-server") ;; Server? Start up here. ;; (let ((tl (setup-for-run)) (run-id (and (args:get-arg "-run-id") (string->number (args:get-arg "-run-id"))))) (if run-id (server:launch run-id) (debug:print 0 "ERROR: server requires run-id be specified with -run-id"))) ;; Not a server? This section will decide how to communicate ;; ;; Setup client for all expect listed here (if (null? (lset-intersection equal? (hash-table-keys args:arg-hash) '("-list-servers" "-stop-server" "-show-cmdinfo" "-list-runs"))) (if (setup-for-run) (let ((run-id (and (args:get-arg "-run-id") (string->number (args:get-arg "-run-id"))))) ;; (set! *fdb* (filedb:open-db (conc *toppath* "/db/paths.db"))) ;; if not list or kill then start a client (if appropriate) (if (or (args-defined? "-h" "-version" "-gen-megatest-area" "-gen-megatest-test") (eq? (length (hash-table-keys args:arg-hash)) 0)) (debug:print-info 1 "Server connection not needed") (begin (if run-id (begin (server:ensure-running run-id) (client:launch run-id)) (begin ;; without run-id we'll start a server for "0" (server:ensure-running 0) (client:launch 0))))))))) ;; MAY STILL NEED THIS ;; (set! *megatest-db* (make-dbr:dbstruct path: *toppath* local: #t)))))))))) (if (or (args:get-arg "-list-servers") (args:get-arg "-stop-server")) (let ((tl (setup-for-run))) (if tl (let* ((servers (open-run-close tasks:get-all-servers tasks:open-db)) (fmtstr "~5a~12a~8a~20a~24a~10a~10a~10a~10a\n") |
︙ | ︙ | |||
759 760 761 762 763 764 765 | ;; Get test paths matching target, runname, and testpatt (if (or (args:get-arg "-test-files")(args:get-arg "-test-paths")) ;; if we are in a test use the MT_CMDINFO data (if (getenv "MT_CMDINFO") (let* ((startingdir (current-directory)) (cmdinfo (read (open-input-string (base64:base64-decode (getenv "MT_CMDINFO"))))) ;; (runremote (assoc/default 'runremote cmdinfo)) | < < | 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 | ;; Get test paths matching target, runname, and testpatt (if (or (args:get-arg "-test-files")(args:get-arg "-test-paths")) ;; if we are in a test use the MT_CMDINFO data (if (getenv "MT_CMDINFO") (let* ((startingdir (current-directory)) (cmdinfo (read (open-input-string (base64:base64-decode (getenv "MT_CMDINFO"))))) ;; (runremote (assoc/default 'runremote cmdinfo)) (testpath (assoc/default 'testpath cmdinfo)) (test-name (assoc/default 'test-name cmdinfo)) (runscript (assoc/default 'runscript cmdinfo)) (db-host (assoc/default 'db-host cmdinfo)) (run-id (assoc/default 'run-id cmdinfo)) (itemdat (assoc/default 'itemdat cmdinfo)) (state (args:get-arg ":state")) (status (args:get-arg ":status")) (target (args:get-arg "-target")) (toppath (assoc/default 'toppath cmdinfo))) (change-directory toppath) ;; (set! *runremote* runremote) (if (not target) (begin (debug:print 0 "ERROR: -target is required.") (exit 1))) (if (not (setup-for-run)) (begin (debug:print 0 "Failed to setup, giving up on -test-paths or -test-files, exiting") |
︙ | ︙ | |||
810 811 812 813 814 815 816 | ;; Archive tests matching target, runname, and testpatt (if (args:get-arg "-archive") ;; if we are in a test use the MT_CMDINFO data (if (getenv "MT_CMDINFO") (let* ((startingdir (current-directory)) (cmdinfo (read (open-input-string (base64:base64-decode (getenv "MT_CMDINFO"))))) ;; (runremote (assoc/default 'runremote cmdinfo)) | < < | 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 | ;; Archive tests matching target, runname, and testpatt (if (args:get-arg "-archive") ;; if we are in a test use the MT_CMDINFO data (if (getenv "MT_CMDINFO") (let* ((startingdir (current-directory)) (cmdinfo (read (open-input-string (base64:base64-decode (getenv "MT_CMDINFO"))))) ;; (runremote (assoc/default 'runremote cmdinfo)) (testpath (assoc/default 'testpath cmdinfo)) (test-name (assoc/default 'test-name cmdinfo)) (runscript (assoc/default 'runscript cmdinfo)) (db-host (assoc/default 'db-host cmdinfo)) (run-id (assoc/default 'run-id cmdinfo)) (itemdat (assoc/default 'itemdat cmdinfo)) (state (args:get-arg ":state")) (status (args:get-arg ":status")) (target (args:get-arg "-target"))) (change-directory testpath) ;; (set! *runremote* runremote) (if (not target) (begin (debug:print 0 "ERROR: -target is required.") (exit 1))) (if (not (setup-for-run)) (begin (debug:print 0 "Failed to setup, giving up on -archive, exiting") |
︙ | ︙ | |||
892 893 894 895 896 897 898 | (define (megatest:step step state status logfile msg) (if (not (getenv "MT_CMDINFO")) (begin (debug:print 0 "ERROR: MT_CMDINFO env var not set, -step must be called *inside* a megatest invoked environment!") (exit 5)) (let* ((cmdinfo (read (open-input-string (base64:base64-decode (getenv "MT_CMDINFO"))))) ;; (runremote (assoc/default 'runremote cmdinfo)) | < < < | 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 | (define (megatest:step step state status logfile msg) (if (not (getenv "MT_CMDINFO")) (begin (debug:print 0 "ERROR: MT_CMDINFO env var not set, -step must be called *inside* a megatest invoked environment!") (exit 5)) (let* ((cmdinfo (read (open-input-string (base64:base64-decode (getenv "MT_CMDINFO"))))) ;; (runremote (assoc/default 'runremote cmdinfo)) (testpath (assoc/default 'testpath cmdinfo)) (test-name (assoc/default 'test-name cmdinfo)) (runscript (assoc/default 'runscript cmdinfo)) (db-host (assoc/default 'db-host cmdinfo)) (run-id (assoc/default 'run-id cmdinfo)) (test-id (assoc/default 'test-id cmdinfo)) (itemdat (assoc/default 'itemdat cmdinfo)) (work-area (assoc/default 'work-area cmdinfo)) (db #f)) (change-directory testpath) ;; (set! *runremote* runremote) (if (not (setup-for-run)) (begin (debug:print 0 "Failed to setup, exiting") (exit 1))) (if (and state status) (rmt:teststep-set-status! run-id test-id step state status msg logfile) (begin |
︙ | ︙ | |||
943 944 945 946 947 948 949 | (if (not (getenv "MT_CMDINFO")) (begin (debug:print 0 "ERROR: MT_CMDINFO env var not set, commands -test-status, -runstep and -setlog must be called *inside* a megatest environment!") (exit 5)) (let* ((startingdir (current-directory)) (cmdinfo (read (open-input-string (base64:base64-decode (getenv "MT_CMDINFO"))))) ;; (runremote (assoc/default 'runremote cmdinfo)) | < < | 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 | (if (not (getenv "MT_CMDINFO")) (begin (debug:print 0 "ERROR: MT_CMDINFO env var not set, commands -test-status, -runstep and -setlog must be called *inside* a megatest environment!") (exit 5)) (let* ((startingdir (current-directory)) (cmdinfo (read (open-input-string (base64:base64-decode (getenv "MT_CMDINFO"))))) ;; (runremote (assoc/default 'runremote cmdinfo)) (testpath (assoc/default 'testpath cmdinfo)) (test-name (assoc/default 'test-name cmdinfo)) (runscript (assoc/default 'runscript cmdinfo)) (db-host (assoc/default 'db-host cmdinfo)) (run-id (assoc/default 'run-id cmdinfo)) (test-id (assoc/default 'test-id cmdinfo)) (itemdat (assoc/default 'itemdat cmdinfo)) (work-area (assoc/default 'work-area cmdinfo)) (db #f) ;; (open-db)) (state (args:get-arg ":state")) (status (args:get-arg ":status"))) ;; (set! *runremote* runremote) (if (not (setup-for-run)) (begin (debug:print 0 "Failed to setup, exiting") (exit 1))) (if (args:get-arg "-runstep")(debug:print-info 1 "Running -runstep, first change to directory " work-area)) (change-directory work-area) |
︙ | ︙ |
Modified rmt.scm from [c7c7b5a349] to [0c6c337631].
︙ | ︙ | |||
34 35 36 37 38 39 40 | ;; S U P P O R T F U N C T I O N S ;;====================================================================== ;; cmd is a symbol ;; vars is a json string encoding the parameters for the call ;; (define (rmt:send-receive cmd run-id params) | < < < < < < < | | | | | | | | | > > > > > > | | | 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | ;; S U P P O R T F U N C T I O N S ;;====================================================================== ;; cmd is a symbol ;; vars is a json string encoding the parameters for the call ;; (define (rmt:send-receive cmd run-id params) (let* ((connection-info (client:setup (if run-id run-id 0))) (jparams (db:obj->string params)) ;; (rmt:dat->json-str params)) (res (http-transport:client-api-send-receive run-id connection-info cmd jparams))) (if res (db:string->obj res) ;; (rmt:json-str->dat res) (begin (debug:print 0 "ERROR: Bad value from http-transport:client-api-send-receive " res) #f)))) (define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params) (let* ((jparams (db:obj->string params)) ;; (rmt:dat->json-str params)) (res (http-transport:client-api-send-receive run-id connection-info cmd jparams numretries: 0))) (if res (db:string->obj res) ;; (rmt:json-str->dat res) (begin (debug:print 0 "ERROR: Bad value from http-transport:client-api-send-receive " res) #f)))) ;; Wrap json library for strings (why the ports crap in the first place?) (define (rmt:dat->json-str dat) (with-output-to-string (lambda () (json-write dat)))) |
︙ | ︙ | |||
75 76 77 78 79 80 81 | ;;====================================================================== ;;====================================================================== ;; M I S C ;;====================================================================== (define (rmt:login run-id) | | > > > > > | 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 | ;;====================================================================== ;;====================================================================== ;; M I S C ;;====================================================================== (define (rmt:login run-id) (rmt:send-receive 'login run-id (list *toppath* megatest-version run-id *my-client-signature*))) ;; This login does no retries under the hood - it acts a bit like a ping. ;; (define (rmt:login-no-auto-client-setup connection-info run-id) (rmt:send-receive-no-auto-client-setup connection-info 'login run-id (list *toppath* megatest-version run-id *my-client-signature*))) (define (rmt:kill-server run-id) (rmt:send-receive 'kill-server run-id (list run-id))) ;; hand off a call to one of the db:queries statements ;; added run-id to make looking up the correct db possible ;; (define (rmt:general-call stmtname run-id . params) |
︙ | ︙ |
Modified server.scm from [ba9371a66f] to [dad483fccb].
︙ | ︙ | |||
43 44 45 46 47 48 49 | ;; Call this to start the actual server ;; ;; all routes though here end in exit ... ;; ;; start_server ;; | | | < | | < | < < < < | 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | ;; Call this to start the actual server ;; ;; all routes though here end in exit ... ;; ;; start_server ;; (define (server:launch run-id) (if (server:check-if-running run-id) ;; a server is already running (exit) (http-transport:launch run-id))) ;;====================================================================== ;; Q U E U E M A N A G E M E N T ;;====================================================================== ;; We don't want to flush the queue if it was just flushed (define *server:last-write-flush* (current-milliseconds)) |
︙ | ︙ | |||
100 101 102 103 104 105 106 | (argv))))))) ;; When using zmq this would send the message back (two step process) ;; with spiffy or rpc this simply returns the return data to be returned ;; (define (server:reply return-addr query-sig success/fail result) | < < < < < | < < < < < < < | | < < < < | < | > > > | | 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 | (argv))))))) ;; When using zmq this would send the message back (two step process) ;; with spiffy or rpc this simply returns the return data to be returned ;; (define (server:reply return-addr query-sig success/fail result) (db:obj->string (vector success/fail query-sig result))) (define (server:ensure-running run-id) (let loop ((server (open-run-close tasks:get-server tasks:open-db run-id)) (trycount 0)) (if (not server) (begin (if (even? trycount) ;; just do the server start every other time through this loop (every 8 seconds) (let ((cmdln (conc (if (getenv "MT_MEGATEST") (getenv "MT_MEGATEST") "megatest") " -server - -run-id " run-id " &> " *toppath* "/db/" run-id ".log &"))) (debug:print 0 "INFO: Starting server (" cmdln ") as none running ...") (push-directory *toppath*) (system cmdln) (pop-directory) (thread-sleep! 3) ;; (process-run (car (argv)) (list "-server" "-" "-daemonize" "-transport" (args:get-arg "-transport" "http"))) ) (begin (debug:print-info 0 "Waiting for server to start") (thread-sleep! 4))) (if (< trycount 10) (loop (open-run-close tasks:get-server tasks:open-db run-id) (+ trycount 1)) (debug:print 0 "WARNING: Couldn't start or find a server."))) (debug:print 2 "INFO: Server(s) running " server)))) (define (server:check-if-running run-id) (let loop ((server (open-run-close tasks:get-server tasks:open-db run-id)) (trycount 0)) (if server ;; note: client:start will set *runremote*. this needs to be changed ;; also, client:start will login to the server, also need to change that. ;; ;; client:start returns #t if login was successful. ;; (client:start run-id server) #f))) |
Modified tasks.scm from [c92f244886] to [7086ef6c82].
︙ | ︙ | |||
92 93 94 95 96 97 98 | (define (tasks:hostinfo-get-pid vec) (vector-ref vec 5)) (define (tasks:hostinfo-get-hostname vec) (vector-ref vec 6)) (define (tasks:server-lock-slot mdb run-id) (let ((res '()) (best #f)) (tasks:server-clean-out-old-records-for-run-id mdb run-id) | > | > > > > > > > > > > > | < < | 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 | (define (tasks:hostinfo-get-pid vec) (vector-ref vec 5)) (define (tasks:hostinfo-get-hostname vec) (vector-ref vec 6)) (define (tasks:server-lock-slot mdb run-id) (let ((res '()) (best #f)) (tasks:server-clean-out-old-records-for-run-id mdb run-id) (if (tasks:less-than-two-available mdb run-id) (tasks:server-set-available mdb run-id)) (thread-sleep! 2) ;; Try removing this. It may not be needed. (tasks:server-am-i-the-server? mdb run-id))) ;; register that this server may come online (first to register goes though with the process) (define (tasks:server-set-available mdb run-id) (sqlite3:execute mdb "INSERT INTO servers (pid,hostname,port,pubport,start_time, priority,state,mt_version,heartbeat, interface,transport,run_id) VALUES(?, ?, ?, ?, strftime('%s','now'), ?, ?, ?,-1,?, ?, ?);" (current-process-id) ;; pid (get-host-name) ;; hostname -1 ;; port -1 ;; pubport (random 1000) ;; priority (used a tiebreaker on get-available) "available" ;; state (common:version-signature) ;; mt_version -1 ;; interface "http" ;; transport run-id )) (define (tasks:less-than-two-available mdb run-id) (let ((res 0)) (sqlite3:for-each-row (lambda (num-in-queue) (set! res num-in-queue)) mdb "SELECT count(id) FROM servers WHERE run_id=?;" run-id) res)) (define (tasks:server-clean-out-old-records-for-run-id mdb run-id) (sqlite3:execute mdb "DELETE FROM servers WHERE state in ('available','shutting-down') AND (strftime('%s','now') - start_time) > 30 AND run_id=?;" run-id) (if (server:check-if-running run-id) (sqlite3:execute mdb "DELETE FROM servers WHERE run_id=?;" run-id))) (define (tasks:server-set-state! mdb server-id state) (sqlite3:execute mdb "UPDATE servers SET state=? WHERE id=?;" state server-id)) (define (tasks:server-delete-record! mdb server-id) (sqlite3:execute mdb "DELETE FROM servers WHERE id=?;" server-id)) |
︙ | ︙ | |||
189 190 191 192 193 194 195 | (lambda (a . b) (set! res (cons (apply vector a b) res))) mdb (conc "SELECT " selstr " FROM servers WHERE run_id=? ORDER BY start_time DESC;") run-id) (vector header res))) | < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < < | < | < | 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 | (lambda (a . b) (set! res (cons (apply vector a b) res))) mdb (conc "SELECT " selstr " FROM servers WHERE run_id=? ORDER BY start_time DESC;") run-id) (vector header res))) (define (tasks:get-server mdb run-id) (let ((res #f) (best #f)) (sqlite3:for-each-row (lambda (id interface port pubport transport pid hostname) (set! res (vector id interface port pubport transport pid hostname))) mdb ;; removed: ;; strftime('%s','now')-heartbeat < 10 AND "SELECT id,interface,port,pubport,transport,pid,hostname FROM servers WHERE mt_version=? AND run_id=? AND state='running' ORDER BY start_time DESC LIMIT 1;" (common:version-signature) run-id) res)) (define (tasks:get-all-servers mdb) (let ((res '())) (sqlite3:for-each-row (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport) (set! res (cons (vector id pid hostname interface port pubport start-time priority state mt-version last-update transport) res))) mdb "SELECT id,pid,hostname,interface,port,pubport,start_time,priority,state,mt_version,strftime('%s','now')-heartbeat AS last_update,transport FROM servers ORDER BY start_time DESC;") res)) (define (tasks:kill-server status hostname port pid) (debug:print-info 1 "Removing defunct server record for " hostname ":" port) (if port (open-run-close tasks:server-deregister tasks:open-db hostname port: port) (open-run-close tasks:server-deregister tasks:open-db hostname pid: pid)) (if status ;; #t means alive (begin (if (equal? hostname (get-host-name)) (handle-exceptions exn (debug:print-info 0 "server may or may not be dead, check for megatest -server running as pid " pid "\n" " EXCEPTION: " ((condition-property-accessor 'exn 'message) exn)) (debug:print 1 "Sending signal/term to " pid " on " hostname) (process-signal pid signal/term) (thread-sleep! 5) ;; give it five seconds to die peacefully then do a brutal kill ;;(process-signal pid signal/kill) ) ;; local machine, send sig term (begin ;;(debug:print-info 1 "Stopping remote servers not yet supported.")))) (debug:print-info 1 "Telling alive server on " hostname ":" port " to commit servercide") (let ((serverdat (list hostname port))) (http-transport:client-connect hostname port) (cdb:kill-server serverdat pid))))) ;; remote machine, try telling server to commit suicide (begin (if status (if (equal? hostname (get-host-name)) (begin (debug:print-info 1 "Sending signal/term to " pid " on " hostname) (process-signal pid signal/term) ;; local machine, send sig term |
︙ | ︙ |