Overview
Comment: | Removed references to homehost |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.6584-ck5 |
Files: | files | file ages | folders |
SHA1: |
1d9883427627a2e998a5824ca303b7ea |
User & Date: | matt on 2021-04-25 05:14:25 |
Other Links: | branch diff | manifest | tags |
Context
2021-04-25
| ||
05:34 | Added area key check-in: f160cf8d52 user: matt tags: v1.6584-ck5 | |
05:14 | Removed references to homehost check-in: 1d98834276 user: matt tags: v1.6584-ck5 | |
2021-04-23
| ||
23:57 | Basic initialization of dbstruct works. check-in: 1b388397ae user: matt tags: v1.6584-ck5 | |
Changes
Modified archivemod.scm from [a7eb8f34d2] to [fd3e9e2a92].
︙ | ︙ | |||
51 52 53 54 55 56 57 | chicken.process chicken.process-context chicken.process-context.posix chicken.sort chicken.string chicken.time chicken.time.posix | > | | 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 | chicken.process chicken.process-context chicken.process-context.posix chicken.sort chicken.string chicken.time chicken.time.posix system-information (prefix base64 base64:) ;; csv-xml directory-utils matchable regex s11n srfi-1 |
︙ | ︙ | |||
415 416 417 418 419 420 421 | (archiver (let ((s (configf:lookup *configdat* "archive" "archiver"))) (if s (string->symbol s) 'bup))) (rsync-exe (or (configf:lookup *configdat* "archive" "rsync") "rsync")) (print-prefix "Running: ") (archive-info (archive:allocate-new-archive-block blockid-cache *toppath* tsname min-space target-patt run-patt "megatest-db")) (archive-dir (if archive-info (cdr archive-info) #f)) (archive-id (if archive-info (car archive-info) -1)) | | | 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 | (archiver (let ((s (configf:lookup *configdat* "archive" "archiver"))) (if s (string->symbol s) 'bup))) (rsync-exe (or (configf:lookup *configdat* "archive" "rsync") "rsync")) (print-prefix "Running: ") (archive-info (archive:allocate-new-archive-block blockid-cache *toppath* tsname min-space target-patt run-patt "megatest-db")) (archive-dir (if archive-info (cdr archive-info) #f)) (archive-id (if archive-info (car archive-info) -1)) (home-host (get-host-name)) ;; common:get-homehost)) ;; TODO: Fix this. (archive-time (seconds->std-time-str (current-seconds))) (archive-staging-db (conc *toppath* "/.db-snapshot/archive_" archive-time)) (tmp-db-path (conc (common:get-db-tmp-area) "/megatest.db")) (dbfile (conc archive-staging-db "/megatest.db"))) (create-directory archive-staging-db #t) (let-values (((pid-val exit-status exit-code) (run-n-wait rsync-exe params: (list "-v" (conc (car home-host) ":"tmp-db-path) archive-staging-db) print-cmd: print-prefix))) (if (eq? exit-code 0) |
︙ | ︙ |
Modified launchmod.scm from [032f2a94bb] to [6c6ba3f325].
︙ | ︙ | |||
369 370 371 372 373 374 375 | (ezsteps (assoc/default 'ezsteps cmdinfo)) (subrun (assoc/default 'subrun cmdinfo)) ;; (runremote (assoc/default 'runremote cmdinfo)) ;; (transport (assoc/default 'transport cmdinfo)) ;; not used ;; (serverinf (assoc/default 'serverinf cmdinfo)) ;; (port (assoc/default 'port cmdinfo)) (serverurl (assoc/default 'serverurl cmdinfo)) | | | 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 | (ezsteps (assoc/default 'ezsteps cmdinfo)) (subrun (assoc/default 'subrun cmdinfo)) ;; (runremote (assoc/default 'runremote cmdinfo)) ;; (transport (assoc/default 'transport cmdinfo)) ;; not used ;; (serverinf (assoc/default 'serverinf cmdinfo)) ;; (port (assoc/default 'port cmdinfo)) (serverurl (assoc/default 'serverurl cmdinfo)) ;; (homehost (assoc/default 'homehost cmdinfo)) (run-id (assoc/default 'run-id cmdinfo)) (test-id (assoc/default 'test-id cmdinfo)) (target (assoc/default 'target cmdinfo)) (areaname (assoc/default 'areaname cmdinfo)) (itemdat (assoc/default 'itemdat cmdinfo)) (env-ovrd (assoc/default 'env-ovrd cmdinfo)) (set-vars (assoc/default 'set-vars cmdinfo)) ;; pre-overrides from -setvar |
︙ | ︙ | |||
1442 1443 1444 1445 1446 1447 1448 | (set! cmdparms (base64:base64-encode (z3:encode-buffer (with-output-to-string (lambda () ;; (list 'hosts hosts) (write (list (list 'testpath test-path) ;; (list 'transport (conc *transport-type*)) ;; (list 'serverinf *server-info*) | | | 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 | (set! cmdparms (base64:base64-encode (z3:encode-buffer (with-output-to-string (lambda () ;; (list 'hosts hosts) (write (list (list 'testpath test-path) ;; (list 'transport (conc *transport-type*)) ;; (list 'serverinf *server-info*) #;(list 'homehost (let* ((hhdat (common:get-homehost))) (if hhdat (car hhdat) #f))) (list 'serverurl (if *runremote* (remote-server-url *runremote*) #f)) ;; (list 'areaname (common:get-area-name)) |
︙ | ︙ | |||
2279 2280 2281 2282 2283 2284 2285 | )) ;; we can safely cache megatest.config since we have a valid runconfig data)))) ;;====================================================================== ;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage (define (common:watchdog) (debug:print-info 13 *default-log-port* "common:watchdog entered.") | | | 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 | )) ;; we can safely cache megatest.config since we have a valid runconfig data)))) ;;====================================================================== ;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage (define (common:watchdog) (debug:print-info 13 *default-log-port* "common:watchdog entered.") #;(if (launch:setup) (if (common:on-homehost?) (let ((dbstruct (db:setup #t))) (debug:print-info 13 *default-log-port* "after db:setup with dbstruct=" dbstruct) (cond ((dbr:dbstruct-read-only dbstruct) (debug:print-info 13 *default-log-port* "loading read-only watchdog") (common:readonly-watchdog dbstruct)) |
︙ | ︙ |
Modified megatest.scm from [ef10051c2f] to [58763bc3e0].
︙ | ︙ | |||
880 881 882 883 884 885 886 | ;; for some switches always print the command to stderr ;; (if (args:any-defined? "-run" "-runall" "-remove-runs" "-set-state-status" "-kill-runs" "-kill-rerun") (debug:print 0 *default-log-port* (string-intersperse (argv) " "))) ;; some switches imply homehost. Exit here if not on homehost ;; | | | 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 | ;; for some switches always print the command to stderr ;; (if (args:any-defined? "-run" "-runall" "-remove-runs" "-set-state-status" "-kill-runs" "-kill-rerun") (debug:print 0 *default-log-port* (string-intersperse (argv) " "))) ;; some switches imply homehost. Exit here if not on homehost ;; #;(let ((homehost-required (list "-cleanup-db" "-server"))) (if (apply args:any-defined? homehost-required) (if (not (common:on-homehost?)) (for-each (lambda (switch) (if (args:get-arg switch) (begin (debug:print 0 *default-log-port* "ERROR: you must be on the homehost to run with " switch |
︙ | ︙ | |||
2453 2454 2455 2456 2457 2458 2459 | (exit 0))) (if (or (getenv "MT_RUNSCRIPT") (args:get-arg "-repl") (args:get-arg "-load")) (let* ((toppath (launch:setup)) (dbstruct (if (and toppath | | | 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 | (exit 0))) (if (or (getenv "MT_RUNSCRIPT") (args:get-arg "-repl") (args:get-arg "-load")) (let* ((toppath (launch:setup)) (dbstruct (if (and toppath #;(common:on-homehost?)) (db:setup #f) #f))) ;; make-dbr:dbstruct path: toppath local: (args:get-arg "-local")) #f))) (if *toppath* (cond ((getenv "MT_RUNSCRIPT") ;; How to run megatest scripts ;; |
︙ | ︙ |
Modified rmtmod.scm from [06da62af7d] to [7416ecbdb6].
︙ | ︙ | |||
149 150 151 152 153 154 155 | (client:setup areapath) #f)))) ;;====================================================================== ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) ;; | | > > | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | < | > | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 | (client:setup areapath) #f)))) ;;====================================================================== ;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname)) ;; (define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected #f) ;; ;; #;(common:telemetry-log (conc "rmt:"(->string cmd)) ;; ;; payload: `((rid . ,rid) ;; ;; (params . ,params))) ;; ;; ;; ;; (if (> attemptnum 2) ;; ;; (debug:print 0 *default-log-port* "INFO: attemptnum in rmt:send-receive is " attemptnum)) ;; ;; ;; ;; (cond ;; ;; ((> attemptnum 2) (thread-sleep! 0.053)) ;; ;; ((> attemptnum 10) (thread-sleep! 0.5)) ;; ;; ((> attemptnum 20) (thread-sleep! 1))) ;; ;; (if (and (> attemptnum 5) (= 0 (modulo attemptnum 15))) ;; ;; (begin (server:run *toppath*) (thread-sleep! 3))) ;; ;; ;; ;; ;; ;; ;;DOT digraph megatest_state_status { ;; ;; ;;DOT ranksep=0; ;; ;; ;;DOT // rankdir=LR; ;; ;; ;;DOT node [shape="box"]; ;; ;; ;;DOT "rmt:send-receive" -> MUTEXLOCK; ;; ;; ;;DOT { edge [style=invis];"case 1" -> "case 2" -> "case 3" -> "case 4" -> "case 5" -> "case 6" -> "case 7" -> "case 8" -> "case 9" -> "case 10" -> "case 11"; } ;; ;; ;; do all the prep locked under the rmt-mutex ;; ;; (mutex-lock! *rmt-mutex*) ;; ;; ;; ;; ;; 1. check if server is started IFF cmd is a write OR if we are not on the homehost, store in runremote ;; ;; ;; 2. check the age of the connections. refresh the connection if it is older than timeout-20 seconds. ;; ;; ;; 3. do the query, if on homehost use local access ;; ;; ;; ;; ;; (let* ((start-time (current-seconds)) ;; snapshot time so all use cases get same value ;; ;; (areapath *toppath*);; TODO - resolve from dbstruct to be compatible with multiple areas ;; ;; (runremote (or area-dat ;; ;; *runremote*)) ;; ;; (attemptnum (+ 1 attemptnum)) ;; ;; (readonly-mode (rmtmod:calc-ro-mode runremote *toppath*))) ;; ;; ;; ;; ;; DOT INIT_RUNREMOTE; // leaving off - doesn't really add to the clarity ;; ;; ;; DOT MUTEXLOCK -> INIT_RUNREMOTE [label="no remote?"]; ;; ;; ;; DOT INIT_RUNREMOTE -> MUTEXLOCK; ;; ;; ;; ensure we have a record for our connection for given area ;; ;; (if (not runremote) ;; can remove this one. should never get here. ;; ;; (begin ;; ;; (set! *runremote* (make-and-init-remote)) ;; ;; (let* ((server-info (remote-server-info *runremote*))) ;; ;; (if server-info ;; ;; (begin ;; ;; (remote-server-url-set! *runremote* (server:record->url server-info)) ;; ;; (remote-server-id-set! *runremote* (server:record->id server-info))))) ;; ;; (set! runremote *runremote*))) ;; new runremote will come from this on next iteration ;; ;; ;; ;; ;; DOT SET_HOMEHOST; // leaving off - doesn't really add to the clarity ;; ;; ;; DOT MUTEXLOCK -> SET_HOMEHOST [label="no homehost?"]; ;; ;; ;; DOT SET_HOMEHOST -> MUTEXLOCK; ;; ;; ;; ensure we have a homehost record ;; ;; (if (not (pair? (remote-hh-dat runremote))) ;; not on homehost ;; ;; (thread-sleep! 0.1) ;; since we shouldn't get here, delay a little ;; ;; (remote-hh-dat-set! runremote (common:get-homehost))) ;; ;; ;; ;; ;;(print "BB> readonly-mode is "readonly-mode" dbfile is "dbfile) ;; ;; (cond ;; ;; ;;DOT EXIT; ;; ;; ;;DOT MUTEXLOCK -> EXIT [label="> 15 attempts"]; {rank=same "case 1" "EXIT" } ;; ;; ;; give up if more than 150 attempts ;; ;; ((> attemptnum 150) ;; ;; (debug:print 0 *default-log-port* "ERROR: 150 tries to start/connect to server. Giving up.") ;; ;; (exit 1)) ;; ;; ;; ;; ;;DOT CASE2 [label="local\nreadonly\nquery"]; ;; ;; ;;DOT MUTEXLOCK -> CASE2; {rank=same "case 2" CASE2} ;; ;; ;;DOT CASE2 -> "rmt:open-qry-close-locally"; ;; ;; ;; readonly mode, read request- handle it - case 2 ;; ;; ((and readonly-mode ;; ;; (member cmd api:read-only-queries)) ;; ;; (mutex-unlock! *rmt-mutex*) ;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 2") ;; ;; (rmt:open-qry-close-locally cmd 0 params) ;; ;; ) ;; ;; ;; ;; ;;DOT CASE3 [label="write in\nread-only mode"]; ;; ;; ;;DOT MUTEXLOCK -> CASE3 [label="readonly\nmode?"]; {rank=same "case 3" CASE3} ;; ;; ;;DOT CASE3 -> "#f"; ;; ;; ;; readonly mode, write request. Do nothing, return #f ;; ;; (readonly-mode (extras-readonly-mode *rmt-mutex* *default-log-port* cmd params)) ;; ;; ;; ;; ;; This block was for pre-emptively resetting the connection if there had been no communication for some time. ;; ;; ;; I don't think it adds any value. If the server is not there, just fail and start a new connection. ;; ;; ;; also, the expire-time calculation might not be correct. We want, time-since-last-server-access > (server:get-timeout) ;; ;; ;; ;; ;; ;;DOT CASE4 [label="reset\nconnection"]; ;; ;; ;;DOT MUTEXLOCK -> CASE4 [label="have connection,\nlast_access > expire_time"]; {rank=same "case 4" CASE4} ;; ;; ;;DOT CASE4 -> "rmt:send-receive"; ;; ;; ;; reset the connection if it has been unused too long ;; ;; ((and runremote ;; ;; (remote-conndat runremote) ;; ;; (> (current-seconds) ;; if it has been more than server-timeout seconds since last contact, close this connection and start a new on ;; ;; (+ (http-transport:server-dat-get-last-access (remote-conndat runremote)) ;; ;; (remote-server-timeout runremote)))) ;; ;; (debug:print-info 0 *default-log-port* "Connection to " (remote-server-url runremote) " expired due to no accesses, forcing new connection.") ;; ;; (http-transport:close-connections area-dat: runremote) ;; ;; (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection. ;; ;; (mutex-unlock! *rmt-mutex*) ;; ;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; ;; ;; ;; ;;DOT CASE5 [label="local\nread"]; ;; ;; ;;DOT MUTEXLOCK -> CASE5 [label="server not required,\non homehost,\nread-only query"]; {rank=same "case 5" CASE5}; ;; ;; ;;DOT CASE5 -> "rmt:open-qry-close-locally"; ;; ;; ;; ;; ;; on homehost and this is a read ;; ;; ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required ;; ;; (cdr (remote-hh-dat runremote)) ;; on homehost ;; ;; (member cmd api:read-only-queries)) ;; this is a read ;; ;; (mutex-unlock! *rmt-mutex*) ;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 5") ;; ;; (rmt:open-qry-close-locally cmd 0 params)) ;; ;; ;; ;; ;;DOT CASE6 [label="init\nremote"]; ;; ;; ;;DOT MUTEXLOCK -> CASE6 [label="on homehost,\nwrite query,\nhave server,\ncan't reach it"]; {rank=same "case 6" CASE6}; ;; ;; ;;DOT CASE6 -> "rmt:send-receive"; ;; ;; ;; on homehost and this is a write, we already have a server, but server has died ;; ;; ((and (cdr (remote-hh-dat runremote)) ;; on homehost ;; ;; (not (member cmd api:read-only-queries)) ;; this is a write ;; ;; (remote-server-url runremote) ;; have a server ;; ;; (not (server:ping (remote-server-url runremote) (remote-server-id runremote)))) ;; server has died. NOTE: this is not a cheap call! Need better approach. ;; ;; (set! *runremote* (make-and-init-remote)) ;; ;; (let* ((server-info (remote-server-info *runremote*))) ;; ;; (if server-info ;; ;; (begin ;; ;; (remote-server-url-set! *runremote* (server:record->url server-info)) ;; ;; (remote-server-id-set! *runremote* (server:record->id server-info))))) ;; ;; (remote-force-server-set! runremote (common:force-server?)) ;; ;; (mutex-unlock! *rmt-mutex*) ;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 6") ;; ;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; ;; ;; ;; ;;DOT CASE7 [label="homehost\nwrite"]; ;; ;; ;;DOT MUTEXLOCK -> CASE7 [label="server not required,\non homehost,\na write,\nhave a server"]; {rank=same "case 7" CASE7}; ;; ;; ;;DOT CASE7 -> "rmt:open-qry-close-locally"; ;; ;; ;; on homehost and this is a write, we already have a server ;; ;; ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required ;; ;; (cdr (remote-hh-dat runremote)) ;; on homehost ;; ;; (not (member cmd api:read-only-queries)) ;; this is a write ;; ;; (remote-server-url runremote)) ;; have a server ;; ;; (mutex-unlock! *rmt-mutex*) ;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 4.1") ;; ;; (rmt:open-qry-close-locally cmd 0 params)) ;; ;; ;; ;; ;;DOT CASE8 [label="force\nserver"]; ;; ;; ;;DOT MUTEXLOCK -> CASE8 [label="server not required,\nhave homehost info,\nno connection yet,\nnot a read-only query"]; {rank=same "case 8" CASE8}; ;; ;; ;;DOT CASE8 -> "rmt:open-qry-close-locally"; ;; ;; ;; on homehost, no server contact made and this is a write, passively start a server ;; ;; ((and (not (remote-force-server runremote)) ;; honor forced use of server, i.e. server NOT required ;; ;; (cdr (remote-hh-dat runremote)) ;; have homehost ;; ;; (not (remote-server-url runremote)) ;; no connection yet ;; ;; (not (member cmd api:read-only-queries))) ;; not a read-only query ;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8") ;; ;; (let ((server-info (server:check-if-running *toppath*))) ;; (server:read-dotserver->url *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call ;; ;; (if server-info ;; ;; (begin ;; ;; (remote-server-url-set! runremote (server:record->url server-info)) ;; the string can be consumed by the client setup if needed ;; ;; (remote-server-id-set! runremote (server:record->id server-info))) ;; ;; (if (common:force-server?) ;; ;; (server:start-and-wait *toppath*) ;; ;; (server:kind-run *toppath*))) ;; ;; (remote-force-server-set! runremote (common:force-server?)) ;; ;; (mutex-unlock! *rmt-mutex*) ;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8.1") ;; ;; (rmt:open-qry-close-locally cmd 0 params))) ;; ;; ;; ;; ;;DOT CASE9 [label="force server\nnot on homehost"]; ;; ;; ;;DOT MUTEXLOCK -> CASE9 [label="no connection\nand either require server\nor not on homehost"]; {rank=same "case 9" CASE9}; ;; ;; ;;DOT CASE9 -> "start\nserver" -> "rmt:send-receive"; ;; ;; ((or (and (remote-force-server runremote) ;; we are forcing a server and don't yet have a connection to one ;; ;; (not (remote-conndat runremote))) ;; ;; (and (not (cdr (remote-hh-dat runremote))) ;; not on a homehost ;; ;; (not (remote-conndat runremote)))) ;; and no connection ;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 9, hh-dat: " (remote-hh-dat runremote) " conndat: " (remote-conndat runremote)) ;; ;; (mutex-unlock! *rmt-mutex*) ;; ;; (if (not (server:check-if-running *toppath*)) ;; who knows, maybe one has started up? ;; ;; (server:start-and-wait *toppath*)) ;; ;; (remote-conndat-set! runremote (rmt:get-connection-info *toppath*)) ;; calls client:setup which calls client:setup-http ;; ;; (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; TODO: add back-off timeout as ;; ;; ;; ;; ;;DOT CASE10 [label="on homehost"]; ;; ;; ;;DOT MUTEXLOCK -> CASE10 [label="server not required,\non homehost"]; {rank=same "case 10" CASE10}; ;; ;; ;;DOT CASE10 -> "rmt:open-qry-close-locally"; ;; ;; ;; all set up if get this far, dispatch the query ;; ;; ((and (not (remote-force-server runremote)) ;; ;; (cdr (remote-hh-dat runremote))) ;; we are on homehost ;; ;; (mutex-unlock! *rmt-mutex*) ;; ;; (debug:print-info 12 *default-log-port* "rmt:send-receive, case 10") ;; ;; (rmt:open-qry-close-locally cmd (if rid rid 0) params)) ;; ;; ;; ;; ;;DOT CASE11 [label="send_receive"]; ;; ;; ;;DOT MUTEXLOCK -> CASE11 [label="else"]; {rank=same "case 11" CASE11}; ;; ;; ;;DOT CASE11 -> "rmt:send-receive" [label="call failed"]; ;; ;; ;;DOT CASE11 -> "RESULT" [label="call succeeded"]; ;; ;; ;; not on homehost, do server query ;; ;; (else (extras-case-11 *default-log-port* runremote cmd params attemptnum rid))))) ;; ;; ;;DOT } ;; bunch of small functions factored out of send-receive to make debug easier ;; ;; No Title ;; Error: (vector-ref) out of range ;; #(#<condition: (exn type)> (#("db.scm:3740: regex#regexp" #f #f) #("db.scm:3739: regex#string-substitute" #f #f) #("db.scm:3738: base64#base64-decode" #f #f) #("db.scm:3737: z3#z3:decode-buffer" #f #f) #("db.scm:3736: with-input-from-string" #f #f) #("db.scm:3741: s11n#deserialize" #f #f) #("api.scm:374: api:execute-requests" #f #f) #("api.scm:139: call-with-current-continuation" #f #f) #("api.scm:139: with-exception-handler" #f #f) #("api.scm:139: ##sys#call-with-values" #f #f) #("api.scm:158: string->symbol" #f #f) #("api.scm:160: current-milliseconds" #f #f) #("api.scm:161: dbr:dbstruct-read-only" #f #f) #("api.scm:139: k15" #f #f) #("api.scm:139: g19" #f #f) #("api.scm:142: get-call-chain" #f #f)) #("get-test-info-by-id" (1102 507299))) |
︙ | ︙ | |||
1763 1764 1765 1766 1767 1768 1769 | (exit 1))))))) ;;====================================================================== ;; (begin ;; (debug:print 0 *default-log-port* "ERROR: cannot migrate version unless on homehost. Exiting.") ;; (exit 1)))) (define (common:run-sync?) | | | | 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 | (exit 1))))))) ;;====================================================================== ;; (begin ;; (debug:print 0 *default-log-port* "ERROR: cannot migrate version unless on homehost. Exiting.") ;; (exit 1)))) (define (common:run-sync?) ;; (and (common:on-homehost?) (args:get-arg "-server")) ;; this one seems to be the general entry point ;; (define (server:start-and-wait areapath #!key (timeout 60)) |
︙ | ︙ | |||
1787 1788 1789 1790 1791 1792 1793 | (< num-ok 1)) ;; if there are no decent candidates for servers then try starting a new one (server:kind-run areapath)) (thread-sleep! 5) (loop (server:check-if-running areapath) (+ try-num 1))))))) (define (make-and-init-remote) | | | 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 | (< num-ok 1)) ;; if there are no decent candidates for servers then try starting a new one (server:kind-run areapath)) (thread-sleep! 5) (loop (server:check-if-running areapath) (+ try-num 1))))))) (define (make-and-init-remote) (make-remote ;; hh-dat: (common:get-homehost) server-info: (if *toppath* (server:check-if-running *toppath*) #f) server-timeout: (server:expiration-timeout))) ;; called in megatest.scm, host-port is string hostname:port |
︙ | ︙ |
Modified runsmod.scm from [1219e25fbf] to [92c90befb4].
︙ | ︙ | |||
1022 1023 1024 1025 1026 1027 1028 | ;; (if (runs:dat-load-mgmt-function runsdat) ((runs:dat-load-mgmt-function runsdat)) (runs:dat-load-mgmt-function-set! runsdat (lambda () ;; jobtools maxload is useful for where the full Megatest run is done on one machine | | | | 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 | ;; (if (runs:dat-load-mgmt-function runsdat) ((runs:dat-load-mgmt-function runsdat)) (runs:dat-load-mgmt-function-set! runsdat (lambda () ;; jobtools maxload is useful for where the full Megatest run is done on one machine (if (and #;(not (common:on-homehost?)) maxload) ;; only gate if maxload is specified, NOTE: maxload is normalized, i.e. load=1 means all cpus fully utilized (common:wait-for-normalized-load maxload "Waiting for load to drop before starting more tests" #f)) ;; jobtools maxhomehostload is intended to prevent overloading on the homehost which can cause database corruption issues #;(if maxhomehostload (common:wait-for-homehost-load maxhomehostload (conc "Waiting for homehost load to drop below normalized value of " maxhomehostload)))))) (if (and (not (null? prereqs-not-met)) (runs:lownoise (conc "waiting on tests " prereqs-not-met hed) 60)) |
︙ | ︙ |
Modified servermod.scm from [4b44aabd8d] to [ea2cb26e2d].
︙ | ︙ | |||
65 66 67 68 69 70 71 | (conc "http://" (car hostport) ":" (cadr hostport)))) ;;====================================================================== ;; logic for getting homehost. Returns (host . at-home) ;; IF *toppath* is not set, wait up to five seconds trying every two seconds ;; (this is to accomodate the watchdog) ;; | | | 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | (conc "http://" (car hostport) ":" (cadr hostport)))) ;;====================================================================== ;; logic for getting homehost. Returns (host . at-home) ;; IF *toppath* is not set, wait up to five seconds trying every two seconds ;; (this is to accomodate the watchdog) ;; #;(define (common:get-homehost #!key (trynum 5)) (assert *toppath* "ERROR: common:get-homehost called before launch:setup. This is fatal.") ;; called often especially at start up. use mutex to eliminate collisions (mutex-lock! *homehost-mutex*) (cond (*home-host* (mutex-unlock! *homehost-mutex*) *home-host*) |
︙ | ︙ | |||
123 124 125 126 127 128 129 | (set! *home-host* (cons homehost at-home)) (mutex-unlock! *homehost-mutex*) *home-host*)))) ;;====================================================================== ;; am I on the homehost? ;; | | | | | > | > | > | | | | | | | | | | | | | | | 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 | (set! *home-host* (cons homehost at-home)) (mutex-unlock! *homehost-mutex*) *home-host*)))) ;;====================================================================== ;; am I on the homehost? ;; #;(define (common:on-homehost?) (let ((hh (common:get-homehost))) (if hh (cdr hh) #f))) #;(define (common:wait-for-homehost-load maxnormload msg) (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local. #f (common:get-homehost))) (hh (if hh-dat (car hh-dat) #f))) (common:wait-for-normalized-load maxnormload msg hh))) ;; Given a run id start a server process ### NOTE ### > file 2>&1 ;; if the run-id is zero and the target-host is set ;; try running on that host ;; incidental: rotate logs in logs/ dir. ;; (define (server:run areapath) ;; areapath is *toppath* for a given testsuite area (let* ((curr-host (get-host-name)) ;; (attempt-in-progress (server:start-attempted? areapath)) ;; (dot-server-url (server:check-if-running areapath)) (curr-ip (server:get-best-guess-address curr-host)) (curr-pid (current-process-id)) ;; (homehost (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" )) ;; (target-host (car homehost)) (testsuite (common:get-area-name)) (logfile (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log")) (profile-mode (or (configf:lookup *configdat* "misc" "profilesw") "")) (cmdln (conc (common:get-megatest-exe) " -server " (or (get-host-name) "-") (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes") " -daemonize " "") ;; " -log " logfile " -m testsuite:" testsuite " " profile-mode )) ;; (log-rotate (make-thread common:rotate-logs "server run, rotate logs thread")) ;; why are we rotating logs here? This is a sensitive location with a lot going on!? (load-limit (configf:lookup-number *configdat* "jobtools" "max-server-start-load" default: 3.0))) ;; we want the remote server to start in *toppath* so push there (push-directory areapath) (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...") ;; (thread-start! log-rotate) ;; host.domain.tld match host? #;(if (and target-host ;; look at target host, is it host.domain.tld or ip address and does it ;; match current ip or hostname (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host)) (not (equal? curr-ip target-host))) (begin (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile) (setenv "TARGETHOST" target-host))) (setenv "NBFAKE_LOG" logfile) ;; (thread-sleep! (/ (pseudo-random-integer 5000) 1000)) ;; add about a random (up to 5 seconds) initial delay. It seems pretty common that many running tests request a server at the same time ;; (common:wait-for-normalized-load load-limit " delaying server start due to load" target-host) ;; do not try starting servers on an already overloaded machine, just wait forever #;(common:wait-for-homehost-load load-limit (conc " delaying server start due to load on homehost. limit is " load-limit)) (system (conc "nbfake " cmdln)) (unsetenv "NBFAKE_LOG") ;; (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST")) ;; (thread-join! log-rotate) (pop-directory))) (define (server:record->url servr) (handle-exceptions exn (begin (debug:print-info 0 *default-log-port* "Unable to get server url from " servr ", exn=" exn) |
︙ | ︙ |