Overview
Comment: | wip |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v2.001 |
Files: | files | file ages | folders |
SHA1: |
9f1a2f71d3274e2d33ca3f573cb70c92 |
User & Date: | matt on 2021-12-19 20:28:12 |
Other Links: | branch diff | manifest | tags |
Context
2021-12-20
| ||
15:00 | wip, closer ... check-in: d45dbac9d7 user: matt tags: v2.001 | |
2021-12-19
| ||
20:28 | wip check-in: 9f1a2f71d3 user: matt tags: v2.001 | |
2021-12-18
| ||
20:09 | Improved results from unit tests. WIP check-in: bb1843a1b0 user: matt tags: v2.001 | |
Changes
Modified apimod.scm from [0baabe14dd] to [f6411932bc].
︙ | ︙ | |||
201 202 203 204 205 206 207 | ;;=============================================== ((get-keys-write) (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl ;; SERVERS ;; ((start-server) (apply server:kind-run params)) ((kill-server) (set! *server-run* #f)) | | | | 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 | ;;=============================================== ((get-keys-write) (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl ;; SERVERS ;; ((start-server) (apply server:kind-run params)) ((kill-server) (set! *server-run* #f)) ((start-server get-server) (api:start-server dbstruct params)) ((get-server-info) (apply db:get-server-info dbstruct params)) ((register-server) (apply db:register-server dbstruct params) );; dbstruct host port servkey pid ipaddr dbpath) ((deregister-server) (apply db:deregister-server dbstruct params) );; dbstruct host port servkey pid ipaddr dbpath) ((get-count-servers) (apply db:get-count-servers dbstruct params)) ((get-servers-info) (apply db:get-servers-info dbstruct params)) ;; TESTS ;;((test-set-state-status-by-id) (apply mt:test-set-state-status-by-id dbstruct params)) ;;BB - commented out above because it was calling below, eventually, incorrectly (dbstruct passed to mt:test-set-state-status-by-id, which previosly did more, but now only passes thru to db:set-state-status-and-roll-up-items. ((test-set-state-status-by-id) ;; (define (db:set-state-status-and-roll-up-items dbstruct run-id test-name item-path state status comment) |
︙ | ︙ |
Modified commonmod.scm from [3bc09277ff] to [66ca132e41].
︙ | ︙ | |||
273 274 275 276 277 278 279 | common:get-normalized-cpu-load-raw common:unix-ping launch:is-test-alive common:get-num-cpus common:wait-for-normalized-load common:wait-for-cpuload tasks:kill-server | | | | | | | | 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 | common:get-normalized-cpu-load-raw common:unix-ping launch:is-test-alive common:get-num-cpus common:wait-for-normalized-load common:wait-for-cpuload tasks:kill-server ;; server:get-logs-list ;; server:get-list ;; server:get-num-alive ;; server:get-best ;; server:get-first-best ;; server:get-rand-best server:record->id server:get-num-servers server:logf-get-start-info get-uname realpath common:real-path common:get-disk-space-used |
︙ | ︙ | |||
2823 2824 2825 2826 2827 2828 2829 | (thread-sleep! 0.5) (if (file-exists? gzfile) (delete-file gzfile)) (system (conc "gzip " logfile)) (unset-environment-variable! "TARGETHOST_LOGF") (unset-environment-variable! "TARGETHOST")))) | | | | 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 | (thread-sleep! 0.5) (if (file-exists? gzfile) (delete-file gzfile)) (system (conc "gzip " logfile)) (unset-environment-variable! "TARGETHOST_LOGF") (unset-environment-variable! "TARGETHOST")))) #;(define (server:get-logs-list area-path) (let* (;; (server-logs-cmd (conc "grep -iL exiting " area-path "/logs/server-*-*.log")) ;; (server-logs (string-split (string-chomp (with-input-from-pipe server-logs-cmd read-string)))) (server-logs (glob (conc area-path"/logs/server-*-*.log"))) ) server-logs)) ;; get a list of servers with all relevant data ;; ( mod-time host port start-time pid ) ;; #;(define (server:get-list areapath #!key (limit #f)) (let ((fname-rx (regexp "^(|.*/)server-(\\d+)-(\\S+).log$")) (day-seconds (* 24 60 60))) ;; if the directory exists continue to get the list ;; otherwise attempt to create the logs dir and then ;; continue (if (if (directory-exists? (conc areapath "/logs")) '() |
︙ | ︙ | |||
2885 2886 2887 2888 2889 2890 2891 | (if (null? tal) (if (and limit (> (length new-res) limit)) new-res ;; (take new-res limit) <= need intelligent sorting before this will work new-res) (loop (string-chomp (car tal)) (cdr tal) new-res))))))))) | | | 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 | (if (null? tal) (if (and limit (> (length new-res) limit)) new-res ;; (take new-res limit) <= need intelligent sorting before this will work new-res) (loop (string-chomp (car tal)) (cdr tal) new-res))))))))) #;(define (server:get-num-alive srvlst) (let ((num-alive 0)) (for-each (lambda (server) (handle-exceptions exn (begin (debug:print-info 0 *default-log-port* "Unable to get server start-time and/or mod-time from " server ", exn=" exn)) |
︙ | ︙ | |||
2912 2913 2914 2915 2916 2917 2918 | ;; active (i.e. mod-time < 10 seconds ;; ;; mod-time host port start-time pid ;; ;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off ;; and servers should stick around for about two hours or so. ;; | | | 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 | ;; active (i.e. mod-time < 10 seconds ;; ;; mod-time host port start-time pid ;; ;; sort by start-time descending. I.e. get the oldest first. Young servers will thus drop off ;; and servers should stick around for about two hours or so. ;; #;(define (server:get-best srvlst) (let* ((nums (server:get-num-servers)) (now (current-seconds)) (slst (sort (filter (lambda (rec) (if (and (list? rec) (> (length rec) 2)) (let ((start-time (list-ref rec 3)) |
︙ | ︙ | |||
2940 2941 2942 2943 2944 2945 2946 | (lambda (a b) (< (list-ref a 3) (list-ref b 3)))))) (if (> (length slst) nums) (take slst nums) slst))) | | | | 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 | (lambda (a b) (< (list-ref a 3) (list-ref b 3)))))) (if (> (length slst) nums) (take slst nums) slst))) #;(define (server:get-first-best areapath) (let ((srvrs (server:get-best (server:get-list areapath)))) (if (and srvrs (not (null? srvrs))) (car srvrs) #f))) #;(define (server:get-rand-best areapath) (let ((srvrs (server:get-best (server:get-list areapath)))) (if (and (list? srvrs) (not (null? srvrs))) (let* ((len (length srvrs)) (idx (pseudo-random-integer len))) (list-ref srvrs idx)) #f))) |
︙ | ︙ |
Modified dashboard.scm from [955f1d46eb] to [feba132a9c].
︙ | ︙ | |||
238 239 240 241 242 243 244 245 246 247 248 249 250 251 | ;; RA => Might require revert for filters ;; create a watch dog to move changes from lt/.db/*.db to megatest.db ;; ;;;(if (file-write-access? (conc *toppath* "/megatest.db")) ;;(debug:print-info 13 *default-log-port* "Before common:watchdog spawn") (thread-start! (make-thread common:watchdog "Watchdog thread")) ;;(debug:print-info 13 *default-log-port* "After common:watchdog spawn") ;; (if (not (args:get-arg "-use-db-cache")) ;; (begin ;; (debug:print-info 0 *default-log-port* "Forcing db-cache mode due to read-only access to megatest.db") ;; (hash-table-set! args:arg-hash "-use-db-cache" #t)));;;) ;;) | > | 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 | ;; RA => Might require revert for filters ;; create a watch dog to move changes from lt/.db/*.db to megatest.db ;; ;;;(if (file-write-access? (conc *toppath* "/megatest.db")) ;;(debug:print-info 13 *default-log-port* "Before common:watchdog spawn") (thread-start! (make-thread common:watchdog "Watchdog thread")) ;;(debug:print-info 13 *default-log-port* "After common:watchdog spawn") ;; (if (not (args:get-arg "-use-db-cache")) ;; (begin ;; (debug:print-info 0 *default-log-port* "Forcing db-cache mode due to read-only access to megatest.db") ;; (hash-table-set! args:arg-hash "-use-db-cache" #t)));;;) ;;) |
︙ | ︙ | |||
3667 3668 3669 3670 3671 3672 3673 | "dashboard:runs-tab-updater")) ;;====================================================================== ;; The heavy lifting starts here ;;====================================================================== (define (dashboard-main) | | | 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 | "dashboard:runs-tab-updater")) ;;====================================================================== ;; The heavy lifting starts here ;;====================================================================== (define (dashboard-main) (let ((mtdb-path (conc *toppath* "/megatest.db"))) ;; not used for now, update for .db area and use for write access detection #;(if (and (common:file-exists? mtdb-path) (file-writable? mtdb-path)) (if (not (args:get-arg "-skip-version-check")) (common:exit-on-version-changed))) (let* ((commondat (dboard:commondat-make))) ;; Move this stuff to db.scm? I'm not sure that is the right thing to do... (cond |
︙ | ︙ |
Modified dbmod.scm from [ddeeddaa42] to [43fb4b6c81].
︙ | ︙ | |||
225 226 227 228 229 230 231 232 233 234 235 236 237 238 | db:hoh-set! db:hoh-get db:get-cache-stmth db:register-server db:deregister-server db:get-server-info db:get-count-servers db:get-steps-info-by-id make-dbr:dbdat dbr:dbdat-db dbr:dbdat-inmem dbr:dbdat-last-sync dbr:dbdat-last-write | > | 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 | db:hoh-set! db:hoh-get db:get-cache-stmth db:register-server db:deregister-server db:get-server-info db:get-count-servers db:get-servers-info db:get-steps-info-by-id make-dbr:dbdat dbr:dbdat-db dbr:dbdat-inmem dbr:dbdat-last-sync dbr:dbdat-last-write |
︙ | ︙ | |||
5887 5888 5889 5890 5891 5892 5893 5894 | (sqlite3:fold-row (lambda (res count) (max res count)) 0 db "SELECT count(*) FROM servers WHERE apath=?;" apath)))) | > > > > > > > > > > > > | > | 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 | (sqlite3:fold-row (lambda (res count) (max res count)) 0 db "SELECT count(*) FROM servers WHERE apath=?;" apath)))) (define (db:get-servers-info dbstruct apath) (db:with-db dbstruct #f #f (lambda (db) (sqlite3:fold-row (lambda (res count) (max res count)) 0 db "SELECT * FROM servers WHERE apath=?;" apath)))) ) |
Modified dcommon.scm from [dedc418b9b] to [cfabfe1da5].
︙ | ︙ | |||
903 904 905 906 907 908 909 | #:numcol 7 #:numcol-visible 7 #:numlin-visible 5 )) (colnames (list "Id" "MTver" "Pid" "Host" "Interface:OutPort" "RunTime" "State" "RunId")) (updater (lambda () (if (dashboard:monitor-changed? commondat tabdat) | | | 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 | #:numcol 7 #:numcol-visible 7 #:numlin-visible 5 )) (colnames (list "Id" "MTver" "Pid" "Host" "Interface:OutPort" "RunTime" "State" "RunId")) (updater (lambda () (if (dashboard:monitor-changed? commondat tabdat) (let ((servers (rmt:get-servers-info *remotedat* *toppath*)#;(server:get-list *toppath* limit: 10))) (iup:attribute-set! servers-matrix "NUMLIN" (length servers)) ;; (set! colnum 0) ;; (for-each (lambda (colname) ;; ;; (print "colnum: " colnum " colname: " colname) ;; (iup:attribute-set! servers-matrix (conc "0:" colnum) colname) ;; (set! colnum (+ 1 colnum))) ;; colnames) |
︙ | ︙ |
Modified megatest.scm from [ca9d861939] to [6434fb218b].
︙ | ︙ | |||
1133 1134 1135 1136 1137 1138 1139 | (adjutant-run) (set! *didsomething* #t))) (if (or (args:get-arg "-list-servers") (args:get-arg "-kill-servers")) (let ((tl (launch:setup))) (if tl ;; all roads from here exit | | > > > > > > > > > | 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 | (adjutant-run) (set! *didsomething* #t))) (if (or (args:get-arg "-list-servers") (args:get-arg "-kill-servers")) (let ((tl (launch:setup))) (if tl ;; all roads from here exit (let* ((servers (rmt:get-servers-info *remotedat* *toppath*)) (fmtstr "~8a~22a~20a~20a~8a\n")) ;; id INTEGER PRIMARY KEY, ;; host TEXT, ;; port INTEGER, ;; servkey TEXT, ;; pid TEXT, ;; ipaddr TEXT, ;; apath TEXT, ;; dbname TEXT, ;; event_time (format #t fmtstr "pid" "Interface:port" "age (hms)" "Last mod" "State") (format #t fmtstr "===" "==============" "=========" "========" "=====") (for-each ;; ( mod-time host port start-time pid ) (lambda (server) (let* ((mtm (any->number (car server))) (mod (if mtm (- (current-seconds) mtm) "unk")) (age (- (current-seconds)(or (any->number (list-ref server 3)) (current-seconds)))) |
︙ | ︙ |
Modified rmtmod.scm from [7039de8d85] to [310ad66fe4].
︙ | ︙ | |||
430 431 432 433 434 435 436 | ;;====================================================================== ;;====================================================================== ;; S E R V E R ;;====================================================================== (define (rmt:kill-server run-id) | | | | 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 | ;;====================================================================== ;;====================================================================== ;; S E R V E R ;;====================================================================== (define (rmt:kill-server run-id) (rmt:send-receive 'kill-server #f (list run-id))) (define (rmt:start-server run-id) (rmt:send-receive 'start-server #f (list run-id))) (define (rmt:get-server-info apath dbname) (rmt:send-receive 'get-server-info #f (list apath dbname))) ;;====================================================================== ;; M I S C ;;====================================================================== |
︙ | ︙ | |||
2132 2133 2134 2135 2136 2137 2138 | ,dbname))) (define (rmt:get-count-servers remdat apath) (remotedat-conns remdat) ;; just checking types (rmt:open-main-connection remdat apath) ;; we need a channel to main.db (rmt:send-receive-real remdat apath ;; params: host port servkey pid ipaddr dbpath (db:run-id->dbname #f) | | | > > | 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 | ,dbname))) (define (rmt:get-count-servers remdat apath) (remotedat-conns remdat) ;; just checking types (rmt:open-main-connection remdat apath) ;; we need a channel to main.db (rmt:send-receive-real remdat apath ;; params: host port servkey pid ipaddr dbpath (db:run-id->dbname #f) 'get-count-servers `(,apath))) (define (rmt:get-servers-info apath) (rmt:send-receive 'get-servers-info #f `(,apath))) (define (rmt:deregister-server remdat apath iface port server-key dbname) (remotedat-conns remdat) ;; just checking types (rmt:open-main-connection remdat apath) ;; we need a channel to main.db (rmt:send-receive-real remdat apath ;; params: host port servkey pid ipaddr dbpath (db:run-id->dbname #f) 'deregister-server `(,iface |
︙ | ︙ | |||
2202 2203 2204 2205 2206 2207 2208 | (let* ((remdat *remotedat*) (server-start-time (current-seconds)) (pkts-dir (get-pkts-dir)) (server-key (rmt:get-signature)) ;; This servers key (is-main (equal? (args:get-arg "-db") ".db/main.db")) (last-access 0) | | > > > > > > > > > | 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 | (let* ((remdat *remotedat*) (server-start-time (current-seconds)) (pkts-dir (get-pkts-dir)) (server-key (rmt:get-signature)) ;; This servers key (is-main (equal? (args:get-arg "-db") ".db/main.db")) (last-access 0) (server-timeout (server:expiration-timeout)) (shutdown-server-sequence (lambda (port) (set! *unclean-shutdown* #f) (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id)) (rmt:server-shutdown) (portlogger:open-run-close portlogger:set-port port "released") (exit))) (timed-out? (lambda () (<= (+ last-access server-timeout) (current-seconds))))) ;; main and run db servers have both got wait logic (could/should merge it) (if is-main (rmt:wait-for-server pkts-dir dbname server-key) (rmt:wait-for-stable-interface)) ;; this is our forever loop (let* ((iface (servdat-host *server-info*)) (port (servdat-port *server-info*))) |
︙ | ︙ | |||
2288 2289 2290 2291 2292 2293 2294 2295 | (if (common:low-noise-print 60 "dbstats") (begin (debug:print 0 *default-log-port* "Server stats:") (db:print-current-query-stats))) (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600))) (cond ((and *server-run* | > > > > > > | < | < | < < | 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 | (if (common:low-noise-print 60 "dbstats") (begin (debug:print 0 *default-log-port* "Server stats:") (db:print-current-query-stats))) (let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600))) (cond ((not *server-run*) (debug:print-info 0 *default-log-port* "*server-run* set to #f. Shutting down.") (shutdown-server-sequence port)) ((timed-out?) (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) (shutdown-server-sequence port)) ((and *server-run* (not (timed-out?)) #;(if is-main ;; intention here was to exit main server quickly. (> (rmt:get-count-servers remdat *toppath*) 1) #t)) (if (common:low-noise-print 120 "server continuing") (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))) (loop 0 bad-sync-count (current-milliseconds))) (else (set! *unclean-shutdown* #f) (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access)) (shutdown-server-sequence port) #;(debug:print-info 0 *default-log-port* "Sending 'quit to server, received: " (open-send-receive-nn (conc iface":"port) ;; do this here and not in server-shutdown (sexpr->string 'quit))) ))))))) ;; Call this to start the actual server ;; |
︙ | ︙ |
Modified tests/unittests/server.scm from [b0131aa814] to [40ab640762].
︙ | ︙ | |||
65 66 67 68 69 70 71 | (thread-sleep! 2) (test #f #t (rmt:general-open-connection *remotedat* *toppath* ".db/2.db")) (test #f '("SYSTEM" "RELEASE") (rmt:get-keys)) (test #f 1 (rmt:send-receive 'register-run #f (list keyvals "run1" "new" "n/a" "justme" #f))) | | < > > | | 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 | (thread-sleep! 2) (test #f #t (rmt:general-open-connection *remotedat* *toppath* ".db/2.db")) (test #f '("SYSTEM" "RELEASE") (rmt:get-keys)) (test #f 1 (rmt:send-receive 'register-run #f (list keyvals "run1" "new" "n/a" "justme" #f))) ;; (print "Got here.") (test #f 1 (rmt:send-receive 'register-run 1 (list keyvals "run1" "new" "n/a" "justme" #f))) (test #f 2 (rmt:register-run keyvals "run2" "new" "n/a" "justme" #f)) ;; (test #f 2 (rmt:deregister-server *remotedat* *toppath* iface port server-key dbname (test #f 2 (rmt:get-count-servers *remotedat* *toppath*)) (test #f "run2" (rmt:get-run-name-from-id 2)) (test #f #t (list? (rmt:get-servers-info *toppath*))) (exit) |