Changes In Branch runaway-servers-fix Through [4df9fd5d18] Excluding Merge-Ins
This is equivalent to a diff from 9ffefc583c to 4df9fd5d18
2017-03-23
| ||
12:49 | adding some file-exists? protections and a fix to force-server? from 1.63 check-in: 211ecbabeb user: bjbarcla tags: runaway-servers-fix | |
12:23 | merged in matts fix for testsuite; resolved conflicts with common:file-exists? additions check-in: 474e1543fa user: bjbarcla tags: v1.63 | |
12:12 | fixed let in common:force-server? check-in: 4df9fd5d18 user: bjbarcla tags: runaway-servers-fix | |
11:34 | Fixed wrong testsuite name calculation problem. check-in: 8c653071c7 user: matt tags: runaway-servers-fix | |
01:18 | merged runaway servers fix check-in: c80ac44cb9 user: bjbarcla tags: v1.63 | |
00:53 | Check that server isn't already running before doing kind-run. This was working fine with low load but when load got high it would run away. check-in: a097994a72 user: matt tags: runaway-servers-fix | |
2017-03-22
| ||
14:44 | applied patches for db init on non-homehost protection & no-cache mode check-in: 9ffefc583c user: bjbarcla tags: v1.63 | |
13:29 | Fixed several places where an attempt was being made to open databases in spite on NOT being on homehost. Closed-Leaf check-in: 6e46d4a93e user: matt tags: db-only-on-homehost | |
2017-03-21
| ||
23:12 | fix minor typos Closed-Leaf check-in: 3910c136f6 user: matt tags: cache-control | |
11:46 | Added defence against bad sqlite3 handles to the finalizer check-in: 79058725e7 user: matt tags: v1.63 | |
Modified client.scm from [d740aa52d4] to [3c66569adb].
︙ | ︙ | |||
106 107 108 109 110 111 112 | (case *transport-type* ((http)(http-transport:close-connections))) (remote-conndat-set! runremote #f) ;; (hash-table-delete! runremote run-id) (thread-sleep! 1) (client:setup-http areapath remaining-tries: (- remaining-tries 1)) ))) (begin ;; no server registered | | > < | 106 107 108 109 110 111 112 113 114 115 116 117 118 | (case *transport-type* ((http)(http-transport:close-connections))) (remote-conndat-set! runremote #f) ;; (hash-table-delete! runremote run-id) (thread-sleep! 1) (client:setup-http areapath remaining-tries: (- remaining-tries 1)) ))) (begin ;; no server registered ;; (server:kind-run areapath) (server:start-and-wait areapath) (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries) (thread-sleep! 1) ;; (+ 5 (random (- 20 remaining-tries)))) ;; give server a little time to start up, randomize a little to avoid start storms. (client:setup-http areapath remaining-tries: (- remaining-tries 1))))))))) |
Modified common.scm from [3bdeeed025] to [fcf4113792].
︙ | ︙ | |||
148 149 150 151 152 153 154 | (defstruct remote (hh-dat (common:get-homehost)) ;; homehost record ( addr . hhflag ) (server-url (if *toppath* (server:check-if-running *toppath*))) ;; (server:check-if-running *toppath*) #f)) (last-server-check 0) ;; last time we checked to see if the server was alive (conndat #f) (transport *transport-type*) | | > | 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 | (defstruct remote (hh-dat (common:get-homehost)) ;; homehost record ( addr . hhflag ) (server-url (if *toppath* (server:check-if-running *toppath*))) ;; (server:check-if-running *toppath*) #f)) (last-server-check 0) ;; last time we checked to see if the server was alive (conndat #f) (transport *transport-type*) (server-timeout (or (server:get-timeout) 100)) (force-server #f)) ;; default to 100 seconds ;; launching and hosts (defstruct host (reachable #f) (last-update 0) (last-used 0) (last-cpuload 1)) |
︙ | ︙ | |||
553 554 555 556 557 558 559 | (define (assoc/default key lst . default) (let ((res (assoc key lst))) (if res (cadr res)(if (null? default) #f (car default))))) (define (common:get-testsuite-name) (or (configf:lookup *configdat* "setup" "testsuite" ) | > | | | 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 | (define (assoc/default key lst . default) (let ((res (assoc key lst))) (if res (cadr res)(if (null? default) #f (car default))))) (define (common:get-testsuite-name) (or (configf:lookup *configdat* "setup" "testsuite" ) (getenv "MT_TESTSUITE_NAME") (if *toppath* (pathname-file *toppath*) #f))) ;; (pathname-file (current-directory))))) (define (common:get-db-tmp-area) (if *db-cache-path* *db-cache-path* (if *toppath* (let ((dbpath (create-directory (conc "/tmp/" (current-user-name) "/megatest_localdb/" |
︙ | ︙ | |||
923 924 925 926 927 928 929 | (debug:print-info 0 *default-log-port* "testpatt defined in "testpatt-key" from runconfigs: " rtestpatt) rtestpatt) (else args-testpatt)))) (define (common:get-linktree) (or (getenv "MT_LINKTREE") (if *configdat* | | > | 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 | (debug:print-info 0 *default-log-port* "testpatt defined in "testpatt-key" from runconfigs: " rtestpatt) rtestpatt) (else args-testpatt)))) (define (common:get-linktree) (or (getenv "MT_LINKTREE") (if *configdat* (configf:lookup *configdat* "setup" "linktree") #f))) (define (common:args-get-runname) (let ((res (or (args:get-arg "-runname") (args:get-arg ":runname") (getenv "MT_RUNNAME")))) ;; (if res (set-environment-variable "MT_RUNNAME" res)) ;; not sure if this is a good idea. side effect and all ... res)) |
︙ | ︙ | |||
1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 | ;; do we honor the caches of the config files? ;; (define (common:use-cache?) (not (or (args:get-arg "-no-cache") (and *configdat* (equal? (configf:lookup *configdat* "setup" "use-cache") "no"))))) ;;====================================================================== ;; M I S C L I S T S ;;====================================================================== ;; items in lista are matched value and position in listb ;; return the remaining items in listb or #f | > > > > > > > > > > > > | 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 | ;; do we honor the caches of the config files? ;; (define (common:use-cache?) (not (or (args:get-arg "-no-cache") (and *configdat* (equal? (configf:lookup *configdat* "setup" "use-cache") "no"))))) ;; force use of server? ;; (define (common:force-server?) (let* ((force-setting (configf:lookup "server" "force")) (force-type (if force-setting (string->symbol force-setting) #f))) (case force-type ((#f) #f) ((always) #t) ((test) (if (args:get-arg "-execute") ;; we are in a test #t #f))))) ;;====================================================================== ;; M I S C L I S T S ;;====================================================================== ;; items in lista are matched value and position in listb ;; return the remaining items in listb or #f |
︙ | ︙ |
Modified launch.scm from [d0b975cae1] to [d367925a31].
︙ | ︙ | |||
419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 | ;; (serverinf (assoc/default 'serverinf cmdinfo)) ;; (port (assoc/default 'port cmdinfo)) (serverurl (assoc/default 'serverurl cmdinfo)) (homehost (assoc/default 'homehost cmdinfo)) (run-id (assoc/default 'run-id cmdinfo)) (test-id (assoc/default 'test-id cmdinfo)) (target (assoc/default 'target cmdinfo)) (itemdat (assoc/default 'itemdat cmdinfo)) (env-ovrd (assoc/default 'env-ovrd cmdinfo)) (set-vars (assoc/default 'set-vars cmdinfo)) ;; pre-overrides from -setvar (runname (assoc/default 'runname cmdinfo)) (megatest (assoc/default 'megatest cmdinfo)) (runtlim (assoc/default 'runtlim cmdinfo)) (item-path (item-list->path itemdat)) (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo)) (keys #f) (keyvals #f) (fullrunscript (if (not runscript) #f (if (substring-index "/" runscript) runscript ;; use unadultered if contains slashes (let ((fulln (conc testpath "/" runscript))) (if (and (file-exists? fulln) (file-execute-access? fulln)) fulln runscript))))) ;; assume it is on the path ) ;; (rollup-status 0) ;; On NFS it can be slow and unreliable to get needed startup information. ;; i. Check if we are on the homehost, if so, proceed ;; ii. Check if host and port passed in via CMDINFO are valid and if ;; possible use them. (let ((bestadrs (server:get-best-guess-address (get-host-name)))) (if (equal? homehost bestadrs) ;; we are likely on the homehost | > > > > > > > > | 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 | ;; (serverinf (assoc/default 'serverinf cmdinfo)) ;; (port (assoc/default 'port cmdinfo)) (serverurl (assoc/default 'serverurl cmdinfo)) (homehost (assoc/default 'homehost cmdinfo)) (run-id (assoc/default 'run-id cmdinfo)) (test-id (assoc/default 'test-id cmdinfo)) (target (assoc/default 'target cmdinfo)) (areaname (assoc/default 'areaname cmdinfo)) (itemdat (assoc/default 'itemdat cmdinfo)) (env-ovrd (assoc/default 'env-ovrd cmdinfo)) (set-vars (assoc/default 'set-vars cmdinfo)) ;; pre-overrides from -setvar (runname (assoc/default 'runname cmdinfo)) (megatest (assoc/default 'megatest cmdinfo)) (runtlim (assoc/default 'runtlim cmdinfo)) (item-path (item-list->path itemdat)) (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo)) (keys #f) (keyvals #f) (fullrunscript (if (not runscript) #f (if (substring-index "/" runscript) runscript ;; use unadultered if contains slashes (let ((fulln (conc testpath "/" runscript))) (if (and (file-exists? fulln) (file-execute-access? fulln)) fulln runscript))))) ;; assume it is on the path ) ;; (rollup-status 0) ;; immediated set some key variables from CMDINFO data, yes, these will be set again below ... ;; (setenv "MT_TESTSUITENAME" areaname) (setenv "MT_RUN_AREA_HOME" top-path) (set! *toppath* top-path) (setenv "MT_TEST_RUN_DIR" work-area) ;; On NFS it can be slow and unreliable to get needed startup information. ;; i. Check if we are on the homehost, if so, proceed ;; ii. Check if host and port passed in via CMDINFO are valid and if ;; possible use them. (let ((bestadrs (server:get-best-guess-address (get-host-name)))) (if (equal? homehost bestadrs) ;; we are likely on the homehost |
︙ | ︙ | |||
596 597 598 599 600 601 602 | (list "MT_TEST_RUN_DIR" work-area) (list "MT_TEST_NAME" test-name) (list "MT_ITEM_INFO" (conc itemdat)) (list "MT_ITEMPATH" item-path) (list "MT_RUNNAME" runname) (list "MT_MEGATEST" megatest) (list "MT_TARGET" target) | | | 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 | (list "MT_TEST_RUN_DIR" work-area) (list "MT_TEST_NAME" test-name) (list "MT_ITEM_INFO" (conc itemdat)) (list "MT_ITEMPATH" item-path) (list "MT_RUNNAME" runname) (list "MT_MEGATEST" megatest) (list "MT_TARGET" target) (list "MT_LINKTREE" (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree")) (list "MT_TESTSUITENAME" (common:get-testsuite-name)))) (if mt-bindir-path (setenv "PATH" (conc (getenv "PATH") ":" mt-bindir-path))) ;; (change-directory top-path) ;; Can setup as client for server mode now ;; (client:setup) |
︙ | ︙ | |||
758 759 760 761 762 763 764 | (mutex-unlock! *launch-setup-mutex*) *toppath*) (let ((res (launch:setup-body force: force areapath: areapath))) (mutex-unlock! *launch-setup-mutex*) res))) (define (launch:setup-body #!key (force #f) (areapath #f)) | > > | | | | | | | | | | | | | | | | | < < < | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | < | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | < | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 | (mutex-unlock! *launch-setup-mutex*) *toppath*) (let ((res (launch:setup-body force: force areapath: areapath))) (mutex-unlock! *launch-setup-mutex*) res))) (define (launch:setup-body #!key (force #f) (areapath #f)) (if (and (eq? *configstatus* 'fulldata) *toppath*) ;; no need to reprocess *toppath* ;; return toppath (let* ((use-cache (common:use-cache?)) (toppath (or *toppath* areapath (getenv "MT_RUN_AREA_HOME"))) ;; preserve toppath (runname (common:args-get-runname)) (target (common:args-get-target)) (linktree (common:get-linktree)) (sections (if target (list "default" target) #f)) ;; for runconfigs (mtconfig (or (args:get-arg "-config") "megatest.config")) ;; allow overriding megatest.config (rundir (if (and runname target linktree)(conc linktree "/" target "/" runname) #f)) (mtcachef (and rundir (conc rundir "/" ".megatest.cfg-" megatest-version "-" megatest-fossil-hash))) (rccachef (and rundir (conc rundir "/" ".runconfigs.cfg-" megatest-version "-" megatest-fossil-hash))) (cancreate (and rundir (file-exists? rundir)(file-write-access? rundir))) (cxt (hash-table-ref/default *contexts* toppath #f))) ;; create our cxt for this area if it doesn't already exist (if (not cxt)(hash-table-set! *contexts* toppath (make-cxt))) ;; (print "runname: " runname " target: " target " mtcachef: " mtcachef " rccachef: " rccachef) (set! *toppath* toppath) ;; This is needed when we are running as a test using CMDINFO as a datasource (cond ;; if mtcachef exists just read it, however we need to assume toppath is available in $MT_RUN_AREA_HOME ((and mtcachef (file-exists? mtcachef) (get-environment-variable "MT_RUN_AREA_HOME") use-cache) (set! *configdat* (configf:read-alist mtcachef)) (set! *runconfigdat* (configf:read-alist rccachef)) (set! *configinfo* (list *configdat* (get-environment-variable "MT_RUN_AREA_HOME"))) (set! *configstatus* 'fulldata) (set! *toppath* (get-environment-variable "MT_RUN_AREA_HOME")) *toppath*) ;; we have all the info needed to fully process runconfigs and megatest.config (mtcachef (let* ((first-pass (find-and-read-config ;; NB// sets MT_RUN_AREA_HOME as side effect mtconfig environ-patt: "env-override" given-toppath: toppath pathenvvar: "MT_RUN_AREA_HOME")) (first-rundat (let ((toppath (if toppath toppath (car first-pass)))) (read-config ;; (conc toppath "/runconfigs.config") ;; this should be converted to runconfig:read but it is non-trivial, leaving it for now. (conc (if (string? toppath) toppath (get-environment-variable "MT_RUN_AREA_HOME")) "/runconfigs.config") *runconfigdat* #t sections: sections)))) (set! *runconfigdat* first-rundat) (if first-pass ;; (begin (set! *configdat* (car first-pass)) (set! *configinfo* first-pass) (set! *toppath* (or toppath (cadr first-pass))) ;; use the gathered data unless already have it (set! toppath *toppath*) (if (not *toppath*) (begin (debug:print-error 0 *default-log-port* "you are not in a megatest area!") (exit 1))) (setenv "MT_RUN_AREA_HOME" *toppath*) ;; the seed read is done, now read runconfigs, cache it then read megatest.config one more time and cache it (let* ((keys (rmt:get-keys)) (key-vals (keys:target->keyval keys target)) (linktree (common:get-linktree)) ;; (or (getenv "MT_LINKTREE")(if *configdat* (configf:lookup *configdat* "setup" "linktree") #f))) (second-pass (find-and-read-config mtconfig environ-patt: "env-override" given-toppath: toppath pathenvvar: "MT_RUN_AREA_HOME")) (runconfigdat (begin ;; this read of the runconfigs will see any adjustments made by re-reading megatest.config (for-each (lambda (kt) (setenv (car kt) (cadr kt))) key-vals) (read-config (conc toppath "/runconfigs.config") *runconfigdat* #t ;; consider using runconfig:read some day ... sections: sections)))) (if cancreate (configf:write-alist runconfigdat rccachef)) (set! *runconfigdat* runconfigdat) (if cancreate (configf:write-alist *configdat* mtcachef)) (if cancreate (set! *configstatus* 'fulldata)))) ;; no configs found? should not happen but let's try to recover gracefully, return an empty hash-table (set! *configdat* (make-hash-table)) ))) ;; else read what you can and set the flag accordingly (else (let* ((cfgdat (find-and-read-config (or (args:get-arg "-config") "megatest.config") environ-patt: "env-override" given-toppath: (get-environment-variable "MT_RUN_AREA_HOME") pathenvvar: "MT_RUN_AREA_HOME"))) (if cfgdat (let* ((toppath (or (get-environment-variable "MT_RUN_AREA_HOME")(cadr cfgdat))) (rdat (read-config (conc toppath ;; convert this to use runconfig:read! "/runconfigs.config") *runconfigdat* #t sections: sections))) (set! *configinfo* cfgdat) (set! *configdat* (car cfgdat)) (set! *runconfigdat* rdat) (set! *toppath* toppath) (set! *configstatus* 'partial)) (begin (debug:print-error 0 *default-log-port* "No " mtconfig " file found. Giving up.") (exit 2)))))) ;; additional house keeping (let* ((linktree (common:get-linktree))) (if linktree (begin (if (not (file-exists? linktree)) (begin (handle-exceptions exn (begin (debug:print-error 0 *default-log-port* "Something went wrong when trying to create linktree dir at " linktree) (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) (exit 1)) (create-directory linktree #t)))) (handle-exceptions exn (begin (debug:print-error 0 *default-log-port* "Something went wrong when trying to create link to linktree at " *toppath*) (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))) (let ((tlink (conc *toppath* "/lt"))) (if (not (file-exists? tlink)) (create-symbolic-link linktree tlink))))) (begin (debug:print-error 0 *default-log-port* "linktree not defined in [setup] section of megatest.config") ))) (if (and *toppath* (directory-exists? *toppath*)) (begin (setenv "MT_RUN_AREA_HOME" *toppath*) (setenv "MT_TESTSUITENAME" (common:get-testsuite-name))) (begin (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.") ;;(exit 1) (set! *toppath* #f) ;; force it to be false so we return #f #f )) *toppath*))) (define (get-best-disk confdat testconfig) (let* ((disks (or (and testconfig (hash-table-ref/default testconfig "disks" #f)) (hash-table-ref/default confdat "disks" #f))) (minspace (let ((m (configf:lookup confdat "setup" "minspace"))) (string->number (or m "10000"))))) (if disks |
︙ | ︙ | |||
1206 1207 1208 1209 1210 1211 1212 | ;; (list 'serverinf *server-info*) (list 'homehost (let* ((hhdat (common:get-homehost))) (if hhdat (car hhdat) #f))) (list 'serverurl (if *runremote* (remote-server-url *runremote*) | | > | 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 | ;; (list 'serverinf *server-info*) (list 'homehost (let* ((hhdat (common:get-homehost))) (if hhdat (car hhdat) #f))) (list 'serverurl (if *runremote* (remote-server-url *runremote*) #f)) ;; (list 'areaname (common:get-testsuite-name)) (list 'toppath *toppath*) (list 'work-area work-area) (list 'test-name test-name) (list 'runscript runscript) (list 'run-id run-id ) (list 'test-id test-id ) ;; (list 'item-path item-path ) |
︙ | ︙ |
Modified rmt.scm from [37fe170e89] to [9187b44a8c].
︙ | ︙ | |||
105 106 107 108 109 110 111 | ((not (pair? (remote-hh-dat runremote))) ;; not on homehost (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little (remote-hh-dat-set! runremote (common:get-homehost)) (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2") (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; on homehost and this is a read | > | > | | > | > > | | 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 | ((not (pair? (remote-hh-dat runremote))) ;; not on homehost (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little (remote-hh-dat-set! runremote (common:get-homehost)) (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2") (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; on homehost and this is a read ((and (not (remote-force-server runremote)) ;; honor forced use of server (cdr (remote-hh-dat runremote)) ;; on homehost (member cmd api:read-only-queries)) ;; this is a read (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 3") (rmt:open-qry-close-locally cmd 0 params)) ;; on homehost and this is a write, we already have a server, but server has died ((and (cdr (remote-hh-dat runremote)) ;; on homehost (not (member cmd api:read-only-queries)) ;; this is a write (remote-server-url runremote) ;; have a server (not (server:check-if-running *toppath*))) ;; server has died. (set! *runremote* (make-remote)) (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1") (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; on homehost and this is a write, we already have a server ((and (not (remote-force-server runremote)) ;; honor forced use of server (cdr (remote-hh-dat runremote)) ;; on homehost (not (member cmd api:read-only-queries)) ;; this is a write (remote-server-url runremote)) ;; have a server (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4") (rmt:open-qry-close-locally cmd 0 params)) ;; on homehost, no server contact made and this is a write, passively start a server ((and (not (remote-force-server runremote)) ;; honor forced use of server (cdr (remote-hh-dat runremote)) ;; new (not (remote-server-url runremote)) (not (member cmd api:read-only-queries))) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5") (let ((server-url (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call (if server-url (remote-server-url-set! runremote server-url) ;; the string can be consumed by the client setup if needed (server:kind-run *toppath*))) (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5.1") (rmt:open-qry-close-locally cmd 0 params)) ((and (not (cdr (remote-hh-dat runremote))) ;; not on a homehost (not (remote-conndat runremote))) ;; and no connection (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6 hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote)) (mutex-unlock! *rmt-mutex*) (server:start-and-wait *toppath*) (if (common:force-server?)(remote-force-server-set! runremote #t)) (remote-conndat-set! runremote (rmt:get-connection-info *toppath*)) ;; calls client:setup which calls client:setup-http (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as ;; all set up if get this far, dispatch the query ((and (not (remote-force-server runremote)) (cdr (remote-hh-dat runremote))) ;; we are on homehost (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 7") (rmt:open-qry-close-locally cmd (if rid rid 0) params)) ;; not on homehost, do server query (else (mutex-unlock! *rmt-mutex*) |
︙ | ︙ |
Modified server.scm from [c9206854e2] to [948061a1f4].
︙ | ︙ | |||
255 256 257 258 259 260 261 | (let ((sig (server:mk-signature))) (set! *my-client-signature* sig) *my-client-signature*))) ;; kind start up of servers, wait 40 seconds before allowing another server for a given ;; run-id to be launched (define (server:kind-run areapath) | > | | | | | | | | | > | > > | > > | < < < < < < < < < | 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 | (let ((sig (server:mk-signature))) (set! *my-client-signature* sig) *my-client-signature*))) ;; kind start up of servers, wait 40 seconds before allowing another server for a given ;; run-id to be launched (define (server:kind-run areapath) (if (not (server:check-if-running areapath)) ;; why try if there is already a server running? (let* ((last-run-dat (hash-table-ref/default *server-kind-run* areapath '(0 0))) ;; callnum, whenrun (call-num (car last-run-dat)) (when-run (cadr last-run-dat)) (run-delay (+ (case call-num ((0) 0) ((1) 20) ((2) 300) (else 600)) (random 5))) ;; add a small random number just in case a lot of jobs hit the work hosts simultaneously (lock-file (conc areapath "/logs/server-start.lock"))) (if (> (- (current-seconds) when-run) run-delay) (begin (common:simple-file-lock lock-file expire-time: 15) (server:run areapath) (thread-sleep! 5) ;; don't release the lock for at least a few seconds (common:simple-file-release-lock lock-file))) (hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds)))))) (define (server:start-and-wait areapath #!key (timeout 60)) (let ((give-up-time (+ (current-seconds) timeout))) (let loop ((server-url (server:check-if-running areapath))) (if (or server-url (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available. server-url (let ((num-ok (length (server:get-best (server:get-list areapath))))) (if (< num-ok 1) ;; if there are no decent candidates for servers then try starting a new one (server:kind-run areapath)) (thread-sleep! 5) (loop (server:check-if-running areapath))))))) (define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG. ;; no longer care if multiple servers are started by accident. older servers will drop off in time. ;; (define (server:check-if-running areapath) (let* ((servers (server:get-best (server:get-list areapath)))) (if (null? servers) #f (let loop ((hed (car servers)) |
︙ | ︙ |