Overview
Comment: | protected config file call to delete-file with exception handler. Fixed logic on connecting using CMDINFO. Fixed -list-servers and -kill-servers. Turned exception handler back on in portlogger. Removed the addition of a little noise from the server timeout handling in rmt.scm |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.64 |
Files: | files | file ages | folders |
SHA1: |
7b318f91bdd04d485610aaf5b8f20137 |
User & Date: | matt on 2017-03-27 23:59:53 |
Other Links: | branch diff | manifest | tags |
Context
2017-03-28
| ||
00:35 | Improved reliability but now have issue with connection. check-in: 45da129709 user: matt tags: v1.64 | |
2017-03-27
| ||
23:59 | protected config file call to delete-file with exception handler. Fixed logic on connecting using CMDINFO. Fixed -list-servers and -kill-servers. Turned exception handler back on in portlogger. Removed the addition of a little noise from the server timeout handling in rmt.scm check-in: 7b318f91bd user: matt tags: v1.64 | |
12:41 | prereq proc is supposed to return test records, not test names. check-in: 2b9e485b7f user: matt tags: v1.64 | |
Changes
Modified configf.scm from [346c0caf52] to [0cf569e087].
︙ | ︙ | |||
669 670 671 672 673 674 675 | (with-output-to-file fname ;; first write out the file (lambda () (pp dat))) (if (file-exists? fname) ;; now verify it is readable (if (configf:read-alist fname) #t ;; data is good. (begin | | > > | > | 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 | (with-output-to-file fname ;; first write out the file (lambda () (pp dat))) (if (file-exists? fname) ;; now verify it is readable (if (configf:read-alist fname) #t ;; data is good. (begin (handle-exceptions exn #f (debug:print 0 *default-log-port* "WARNING: content " dat " for cache " fname " is not readable. Deleting generated file.") (delete-file fname)) #f)) #f))) ;; convert hierarchial list to ini format ;; (define (configf:config->ini data) (map |
︙ | ︙ |
Modified launch.scm from [cc10125ef0] to [f8bf4a3053].
︙ | ︙ | |||
470 471 472 473 474 475 476 | (> (length host-port) 1)) (let* ((host (car host-port)) (port (cadr host-port)) (start-res (http-transport:client-connect host port)) (ping-res (rmt:login-no-auto-client-setup start-res))) (if (and start-res ping-res) | | > | | | | | | | | 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 | (> (length host-port) 1)) (let* ((host (car host-port)) (port (cadr host-port)) (start-res (http-transport:client-connect host port)) (ping-res (rmt:login-no-auto-client-setup start-res))) (if (and start-res ping-res) ;; (begin ;; let ((url (http-transport:server-dat-make-url start-res))) (begin (remote-conndat-set! *runremote* start-res) ;; (remote-server-url-set! *runremote* url) ;; (if (server:ping url) (debug:print-info 0 *default-log-port* "connected to " host ":" port " using CMDINFO data.")) ;; (begin ;; (debug:print-info 0 *default-log-port* "have CMDINFO data but failed to connect to " url) ;; (remote-conndat-set! *runremote* #f) ;; (remote-server-url-set! *runremote* #f)))) (debug:print-info 0 *default-log-port* "received " host ":" port " for url but could not connect.") )) (begin (debug:print-info 0 *default-log-port* (if host-port (conc "received invalid host-port information " host-port) "no host-port information received")) ;; potential for bad situation if simultaneous starting of hundreds of jobs on servers, set needcare. |
︙ | ︙ |
Modified megatest.scm from [164cc6d2b1] to [84dec1a162].
︙ | ︙ | |||
155 156 157 158 159 160 161 | -setvars VAR1=val1,VAR2=val2 : Add environment variables to a run NB// these are overwritten by values set in config files. -server -|hostname : start the server (reduces contention on megatest.db), use - to automatically figure out hostname -transport http|rpc : use http or rpc for transport (default is http) -log logfile : send stdout and stderr to logfile -list-servers : list the servers | < | | 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 | -setvars VAR1=val1,VAR2=val2 : Add environment variables to a run NB// these are overwritten by values set in config files. -server -|hostname : start the server (reduces contention on megatest.db), use - to automatically figure out hostname -transport http|rpc : use http or rpc for transport (default is http) -log logfile : send stdout and stderr to logfile -list-servers : list the servers -kill-servers : kill all servers -repl : start a repl (useful for extending megatest) -load file.scm : load and run file.scm -mark-incompletes : find and mark incomplete tests -ping run-id|host:port : ping server, exit with 0 if found -debug N|N,M,O... : enable debug 0-N or N and M and O ... -config fname : override the megatest.config file with fname -append-config fname : append fname to the megatest.config file |
︙ | ︙ | |||
251 252 253 254 255 256 257 | ":expected" ":tol" ":units" ;; misc "-start-dir" "-contour" "-server" | < < | 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 | ":expected" ":tol" ":units" ;; misc "-start-dir" "-contour" "-server" "-transport" "-port" "-extract-ods" "-pathmod" "-env2file" "-envcap" "-envdelta" "-setvars" |
︙ | ︙ | |||
317 318 319 320 321 322 323 324 325 326 327 328 329 330 | "-cache-db" "-use-db-cache" ;; misc "-repl" "-lock" "-unlock" "-list-servers" "-run-wait" ;; wait on a run to complete (i.e. no RUNNING) "-local" ;; run some commands using local db access "-generate-html" ;; misc queries "-list-disks" "-list-targets" | > | 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 | "-cache-db" "-use-db-cache" ;; misc "-repl" "-lock" "-unlock" "-list-servers" "-kill-servers" "-run-wait" ;; wait on a run to complete (i.e. no RUNNING) "-local" ;; run some commands using local db access "-generate-html" ;; misc queries "-list-disks" "-list-targets" |
︙ | ︙ | |||
489 490 491 492 493 494 495 | (eq? pid-val 0)) (begin (printf "Sending signal/term to ~A\n" pid) (process-signal pid signal/term)))))) (process:children #f)) (original-exit exit-code))))) | | | | 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 | (eq? pid-val 0)) (begin (printf "Sending signal/term to ~A\n" pid) (process-signal pid signal/term)))))) (process:children #f)) (original-exit exit-code))))) ;; for some switches always print the command to stderr ;; (if (args:any? "-run" "-runall" "-remove-runs" "-set-state-status") (debug:print 0 *default-log-port* (string-intersperse (argv) " "))) ;;====================================================================== ;; Misc setup stuff ;;====================================================================== (debug:setup) |
︙ | ︙ | |||
765 766 767 768 769 770 771 | (if (args:get-arg "-server") (let ((tl (launch:setup)) (transport-type (string->symbol (or (args:get-arg "-transport") "http")))) (server:launch 0 transport-type) (set! *didsomething* #t))) (if (or (args:get-arg "-list-servers") | < | | | < < < < < | | | | > | | < | < < < < < < < > | | < < > | < < < | < < > | | | | | | > > > | > | 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 | (if (args:get-arg "-server") (let ((tl (launch:setup)) (transport-type (string->symbol (or (args:get-arg "-transport") "http")))) (server:launch 0 transport-type) (set! *didsomething* #t))) (if (or (args:get-arg "-list-servers") (args:get-arg "-kill-servers")) (let ((tl (launch:setup))) (if tl ;; all roads from here exit (let* ((servers (server:get-list *toppath*)) (fmtstr "~8a~22a~20a~20a~8a\n")) (format #t fmtstr "pid" "Interface:port" "age (hms)" "Last mod" "State") (format #t fmtstr "===" "==============" "=========" "========" "=====") (for-each ;; ( mod-time host port start-time pid ) (lambda (server) (let* ((mtm (any->number (car server))) (mod (if mtm (- (current-seconds) mtm) "unk")) (age (- (current-seconds)(or (any->number (list-ref server 3)) (current-seconds)))) (url (conc (cadr server) ":" (caddr server))) (pid (list-ref server 4)) (alv (if (number? mod)(< mod 10) #f))) (format #t fmtstr pid url (seconds->hr-min-sec age) (seconds->hr-min-sec mod) (if alv "alive" "dead")) (if (and alv (args:get-arg "-kill-servers")) (begin (debug:print-info 0 *default-log-port* "Attempting to kill server with pid " pid) (server:kill server))))) (sort servers (lambda (a b) (let ((ma (or (any->number (car a)) 9e9)) (mb (or (any->number (car b)) 9e9))) (> ma mb))))) (debug:print-info 1 *default-log-port* "Done with listservers") (set! *didsomething* #t) (exit)) (exit)))) ;; must do, would have to add checks to many/all calls below ;;====================================================================== ;; Weird special calls that need to run *after* the server has started? ;;====================================================================== (if (args:get-arg "-list-targets") (if (launch:setup) |
︙ | ︙ |
Modified portlogger.scm from [e604a481b0] to [b8f7cf5181].
︙ | ︙ | |||
48 49 50 51 52 53 54 | fail_count INTEGER DEFAULT 0, update_time TIMESTAMP DEFAULT (strftime('%s','now')) );") db)) (define (portlogger:open-run-close proc . params) (let* ((fname (conc "/tmp/." (current-user-name) "-portlogger.db")) (avail (tasks:wait-on-journal fname 10))) ;; wait up to about 10 seconds for the journal to go away | | | | | | | | | < < < | | < | 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 | fail_count INTEGER DEFAULT 0, update_time TIMESTAMP DEFAULT (strftime('%s','now')) );") db)) (define (portlogger:open-run-close proc . params) (let* ((fname (conc "/tmp/." (current-user-name) "-portlogger.db")) (avail (tasks:wait-on-journal fname 10))) ;; wait up to about 10 seconds for the journal to go away (handle-exceptions exn (begin ;; (release-dot-lock fname) (debug:print-error 0 *default-log-port* "portlogger:open-run-close failed. " proc " " params) (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) (debug:print 0 *default-log-port* "exn=" (condition->list exn)) (if (file-exists? fname)(delete-file fname)) ;; brutally get rid of it (print-call-chain (current-error-port))) (let* (;; (lock (obtain-dot-lock fname 2 9 10)) (db (portlogger:open-db fname)) (res (apply proc db params))) (sqlite3:finalize! db) ;; (release-dot-lock fname) res)))) ;; (fold-row PROC INIT DATABASE SQL . PARAMETERS) (define (portlogger:take-port db portnum) (let* ((qry1 (sqlite3:prepare db "INSERT INTO ports (port,state) VALUES (?,?);")) (qry2 (sqlite3:prepare db "UPDATE ports SET state=?,update_time=strftime('%s','now') WHERE port=?;")) (qry3 (sqlite3:prepare db "SELECT state FROM ports WHERE port=?;")) (res (sqlite3:with-transaction |
︙ | ︙ |
Modified rmt.scm from [f051a84a44] to [01e080d921].
︙ | ︙ | |||
95 96 97 98 99 100 101 | (debug:print 0 *default-log-port* "WARNING: write transaction requested on a readonly area. cmd="cmd" params="params) #f ) ;; reset the connection if it has been unused too long ((and runremote (remote-conndat runremote) | | | | 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 | (debug:print 0 *default-log-port* "WARNING: write transaction requested on a readonly area. cmd="cmd" params="params) #f ) ;; reset the connection if it has been unused too long ((and runremote (remote-conndat runremote) (let ((expire-time (+ (- start-time (remote-server-timeout runremote))))) ;; NOTE: REMOVED the 30 second noise. If adding it back be sure to offset!! add 30 seconds of noise so that not all running tests expire at the same time causing a storm of server starts (< (http-transport:server-dat-get-last-access (remote-conndat runremote)) expire-time))) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 8") (remote-conndat-set! runremote #f) ;; invalidate the connection, thus forcing a new connection. (mutex-unlock! *rmt-mutex*) (rmt:send-receive cmd rid params attemptnum: attemptnum)) ;; ensure we have a record for our connection for given area ((not runremote) ;; can remove this one. should never get here. (set! *runremote* (make-remote)) ;; new runremote will come from this on next iteration (mutex-unlock! *rmt-mutex*) (debug:print-info 12 *default-log-port* "rmt:send-receive, case 1") |
︙ | ︙ |
Modified server.scm from [34ba33b083] to [a878389459].
︙ | ︙ | |||
417 418 419 420 421 422 423 | (define (server:get-timeout) (let ((tmo (configf:lookup *configdat* "server" "timeout"))) (if (and (string? tmo) (string->number tmo)) (* 60 60 (string->number tmo)) ;; (* 3 24 60 60) ;; default to three days ;;(* 60 60 1) ;; default to one hour | | | 417 418 419 420 421 422 423 424 425 426 | (define (server:get-timeout) (let ((tmo (configf:lookup *configdat* "server" "timeout"))) (if (and (string? tmo) (string->number tmo)) (* 60 60 (string->number tmo)) ;; (* 3 24 60 60) ;; default to three days ;;(* 60 60 1) ;; default to one hour (* 60 5) ;; default to five minutes ))) |