Overview
Comment: | Added debugging tags to server state changes. Cleaned up dashboard to display new server data. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.60 |
Files: | files | file ages | folders |
SHA1: |
904e5f7d6cd7ae02500bfad9bf4651eb |
User & Date: | matt on 2014-02-24 22:15:53 |
Other Links: | branch diff | manifest | tags |
Context
2014-02-24
| ||
22:41 | Experimentatal change to more aggressively try to connect to servers check-in: 23a0587e45 user: matt tags: v1.60 | |
22:15 | Added debugging tags to server state changes. Cleaned up dashboard to display new server data. check-in: 904e5f7d6c user: matt tags: v1.60 | |
19:44 | Random port assignment, minimize re-use of ports check-in: bbd7cb0f5a user: matt tags: v1.60 | |
Changes
Modified client.scm from [3302993138] to [c34489a609].
︙ | ︙ | |||
71 72 73 74 75 76 77 | (if (eq? remaining-tries 4) (begin ;; login failed (hash-table-delete! *runremote* run-id) (open-run-close tasks:server-force-clean-run-record tasks:open-db run-id (car server-dat) | | > | > | 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 | (if (eq? remaining-tries 4) (begin ;; login failed (hash-table-delete! *runremote* run-id) (open-run-close tasks:server-force-clean-run-record tasks:open-db run-id (car server-dat) (cadr server-dat) " client:setup (server-dat=#t)") (thread-sleep! 5) (client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1))) (begin (thread-sleep! 5) (client:setup run-id remaining-tries: (- remaining-tries 1)))))) (let* ((server-dat (open-run-close tasks:get-server tasks:open-db run-id))) (if server-dat (let ((start-res (http-transport:client-connect run-id (tasks:hostinfo-get-interface server-dat) (tasks:hostinfo-get-port server-dat)))) (if start-res start-res (if (eq? remaining-tries 2) (begin ;; login failed (hash-table-delete! *runremote* run-id) (open-run-close tasks:server-force-clean-run-record tasks:open-db run-id (tasks:hostinfo-get-interface server-dat) (tasks:hostinfo-get-port server-dat) " client:setup (server-dat = #f)") (thread-sleep! 2) (server:try-running run-id) (thread-sleep! 10) ;; give server a little time to start up (client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1))) (begin (thread-sleep! 5) (client:setup run-id remaining-tries: (- remaining-tries 1)))))) |
︙ | ︙ |
Modified dashboard.scm from [daaa85c93d] to [16f8f4859b].
︙ | ︙ | |||
978 979 980 981 982 983 984 | ;;====================================================================== ;; ;; General info about the run(s) and megatest area (define (dashboard:summary db) (let ((rawconfig (read-config (conc *toppath* "/megatest.config") #f 'return-string))) (iup:vbox (iup:split | | > | > > > | | | | 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 | ;;====================================================================== ;; ;; General info about the run(s) and megatest area (define (dashboard:summary db) (let ((rawconfig (read-config (conc *toppath* "/megatest.config") #f 'return-string))) (iup:vbox (iup:split #:value 500 (iup:frame #:title "General Info" (iup:vbox (iup:hbox (iup:label "Area Path") (iup:textbox #:value *toppath* #:expand "HORIZONTAL")) (iup:hbox (dcommon:keys-matrix rawconfig) (dcommon:general-info) ))) (iup:frame #:title "Server" (dcommon:servers-table))) (iup:frame #:title "Megatest config settings" (iup:hbox (dcommon:section-matrix rawconfig "setup" "Varname" "Value") |
︙ | ︙ |
Modified dcommon.scm from [aa49c4cb44] to [8f2d34a514].
︙ | ︙ | |||
356 357 358 359 360 361 362 | ;; General data ;; (define (dcommon:general-info) (let ((general-matrix (iup:matrix #:alignment1 "ALEFT" #:expand "YES" ;; "HORIZONTAL" #:numcol 1 | | | | | | | | | 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 | ;; General data ;; (define (dcommon:general-info) (let ((general-matrix (iup:matrix #:alignment1 "ALEFT" #:expand "YES" ;; "HORIZONTAL" #:numcol 1 #:numlin 2 #:numcol-visible 1 #:numlin-visible 2))) (iup:attribute-set! general-matrix "WIDTH1" "150") (iup:attribute-set! general-matrix "0:1" "About this Megatest area") ;; User (this is not always obvious - it is common to run as a different user (iup:attribute-set! general-matrix "1:0" "User") (iup:attribute-set! general-matrix "1:1" (current-user-name)) ;; Megatest area ;; (iup:attribute-set! general-matrix "2:0" "Area") ;; (iup:attribute-set! general-matrix "2:1" *toppath*) ;; Megatest version (iup:attribute-set! general-matrix "2:0" "Version") (iup:attribute-set! general-matrix "2:1" (conc megatest-version "-" (substring megatest-fossil-hash 0 4))) general-matrix)) (define (dcommon:run-stats dbstruct) (let* ((stats-matrix (iup:matrix expand: "YES")) (changed #f) (updater (lambda () |
︙ | ︙ | |||
443 444 445 446 447 448 449 | (define (dcommon:servers-table) (let* ((colnum 0) (rownum 0) (servers-matrix (iup:matrix #:expand "YES" #:numcol 7 #:numcol-visible 7 | | | > | | < < | | | | | | | | | | | | | | | | | | | | | | | | | | | > > | | | | 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 | (define (dcommon:servers-table) (let* ((colnum 0) (rownum 0) (servers-matrix (iup:matrix #:expand "YES" #:numcol 7 #:numcol-visible 7 #:numlin-visible 5 )) (colnames (list "Id" "MTver" "Pid" "Host" "Interface:OutPort" "RunTime" "State" "RunId")) (updater (lambda () (let ((servers (open-run-close tasks:get-all-servers tasks:open-db))) (iup:attribute-set! servers-matrix "NUMLIN" (length servers)) ;; (set! colnum 0) ;; (for-each (lambda (colname) ;; ;; (print "colnum: " colnum " colname: " colname) ;; (iup:attribute-set! servers-matrix (conc "0:" colnum) colname) ;; (set! colnum (+ 1 colnum))) ;; colnames) (set! rownum 1) (for-each (lambda (server) (set! colnum 0) (let* ((vals (list (vector-ref server 0) ;; Id (vector-ref server 9) ;; MT-Ver (vector-ref server 1) ;; Pid (vector-ref server 2) ;; Hostname (conc (vector-ref server 3) ":" (vector-ref server 4)) ;; IP:Port (seconds->hr-min-sec (- (current-seconds)(vector-ref server 6))) ;; (vector-ref server 5) ;; Pubport ;; (vector-ref server 10) ;; Last beat ;; (vector-ref server 6) ;; Start time ;; (vector-ref server 7) ;; Priority ;; (vector-ref server 8) ;; State (vector-ref server 8) ;; State (vector-ref server 12) ;; RunId ))) (for-each (lambda (val) ;; (print "rownum: " rownum " colnum: " colnum " val: " val) (iup:attribute-set! servers-matrix (conc rownum ":" colnum) val) (iup:attribute-set! servers-matrix "FITTOTEXT" (conc "C" colnum)) (set! colnum (+ 1 colnum))) vals) (set! rownum (+ rownum 1))) (iup:attribute-set! servers-matrix "REDRAW" "ALL")) servers))))) (set! colnum 0) (for-each (lambda (colname) (iup:attribute-set! servers-matrix (conc "0:" colnum) colname) (iup:attribute-set! servers-matrix "FITTOTEXT" (conc "C" colnum)) (set! colnum (+ colnum 1))) colnames) (set! dashboard:update-servers-table updater) ;; (iup:attribute-set! servers-matrix "WIDTHDEF" "40") ;; (iup:hbox ;; (iup:vbox ;; (iup:button "Start" ;; ;; #:size "50x" ;; #:expand "YES" ;; #:action (lambda (obj) ;; (let ((cmd (conc ;; "xterm -geometry 180x20 -e \"" ;; "megatest -server - &"))) ;; ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &"))) ;; (system cmd)))) ;; (iup:button "Stop" ;; #:expand "YES" ;; ;; #:size "50x" ;; #:action (lambda (obj) ;; (let ((cmd (conc ;; "xterm -geometry 180x20 -e \"" ;; "megatest -stop-server 0 &"))) ;; ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &"))) ;; (system cmd)))) ;; (iup:button "Restart" ;; #:expand "YES" ;; ;; #:size "50x" ;; #:action (lambda (obj) ;; (let ((cmd (conc ;; "xterm -geometry 180x20 -e \"" ;; "megatest -stop-server 0;megatest -server - &"))) ;; ;; ";echo Press any key to continue;bash -c 'read -n 1 -s'\" &"))) ;; (system cmd))))) ;; servers-matrix ;; ))) servers-matrix )) ;; The main menu (define (dcommon:main-menu) (iup:menu ;; a menu is a special attribute to a dialog (think Gnome putting the menu at screen top) (iup:menu-item "Files" (iup:menu ;; Note that you can use either #:action or action: for options (iup:menu-item "Open" action: (lambda (obj) (iup:show (iup:file-dialog)) (print "File->open " obj))) |
︙ | ︙ |
Modified http-transport.scm from [f8416b573d] to [462dc5100a].
︙ | ︙ | |||
153 154 155 156 157 158 159 | tasks:open-db server-id ipaddrstr portnum) (debug:print 1 "INFO: Trying to start server on " ipaddrstr ":" portnum) ;; This starts the spiffy server ;; NEED WAY TO SET IP TO #f TO BIND ALL (start-server bind-address: ipaddrstr port: portnum) | | | 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 | tasks:open-db server-id ipaddrstr portnum) (debug:print 1 "INFO: Trying to start server on " ipaddrstr ":" portnum) ;; This starts the spiffy server ;; NEED WAY TO SET IP TO #f TO BIND ALL (start-server bind-address: ipaddrstr port: portnum) (open-run-close tasks:server-delete tasks:open-db ipaddrstr portnum " http-transport:try-start-server") (debug:print 1 "INFO: server has been stopped"))) ;;====================================================================== ;; S E R V E R U T I L I T I E S ;;====================================================================== ;;====================================================================== |
︙ | ︙ | |||
389 390 391 392 393 394 395 | (debug:print-info 0 "Average non-cached time " (if (eq? *number-non-write-queries* 0) "n/a (no queries)" (/ *total-non-write-delay* *number-non-write-queries*)) " ms") (debug:print-info 0 "Server shutdown complete. Exiting") | | | | 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 | (debug:print-info 0 "Average non-cached time " (if (eq? *number-non-write-queries* 0) "n/a (no queries)" (/ *total-non-write-delay* *number-non-write-queries*)) " ms") (debug:print-info 0 "Server shutdown complete. Exiting") (tasks:server-delete-record tdb server-id " http-transport:keep-running") (exit)))))) ;; all routes though here end in exit ... ;; ;; start_server? ;; (define (http-transport:launch run-id) (set! *run-id* run-id) (if (args:get-arg "-daemonize") (daemon:ize)) (if (server:check-if-running run-id) (begin (debug:print 0 "INFO: Server for run-id " run-id " already running") (exit 0))) (let ((server-id (open-run-close tasks:server-lock-slot tasks:open-db run-id))) (if (not server-id) (begin ;; since we didn't get the server lock we are going to clean up and bail out (debug:print-info 2 "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue") (open-run-close tasks:server-delete-records-for-this-pid tasks:open-db " http-transport:launch") ) (let* ((th2 (make-thread (lambda () (http-transport:run (if (args:get-arg "-server") (args:get-arg "-server") "-") run-id |
︙ | ︙ |
Modified server.scm from [faceca54c0] to [9a921aa4ae].
︙ | ︙ | |||
127 128 129 130 131 132 133 | ;; client:start returns #t if login was successful. ;; (let ((res (client:start run-id server))) ;; if the server didn't respond we must remove the record (if res res (begin | | > | 127 128 129 130 131 132 133 134 135 136 137 | ;; client:start returns #t if login was successful. ;; (let ((res (client:start run-id server))) ;; if the server didn't respond we must remove the record (if res res (begin (open-run-close tasks:server-force-clean-running-records-for-run-id tasks:open-db run-id " server:check-if-running") res))) #f))) |
Modified tasks.scm from [8d2eda9003] to [d835e3e194].
︙ | ︙ | |||
89 90 91 92 93 94 95 | (define (tasks:hostinfo-get-port vec) (vector-ref vec 2)) (define (tasks:hostinfo-get-pubport vec) (vector-ref vec 3)) (define (tasks:hostinfo-get-transport vec) (vector-ref vec 4)) (define (tasks:hostinfo-get-pid vec) (vector-ref vec 5)) (define (tasks:hostinfo-get-hostname vec) (vector-ref vec 6)) (define (tasks:server-lock-slot mdb run-id) | | | 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 | (define (tasks:hostinfo-get-port vec) (vector-ref vec 2)) (define (tasks:hostinfo-get-pubport vec) (vector-ref vec 3)) (define (tasks:hostinfo-get-transport vec) (vector-ref vec 4)) (define (tasks:hostinfo-get-pid vec) (vector-ref vec 5)) (define (tasks:hostinfo-get-hostname vec) (vector-ref vec 6)) (define (tasks:server-lock-slot mdb run-id) (tasks:server-clean-out-old-records-for-run-id mdb run-id " tasks:server-lock-slot") (if (< (tasks:num-in-available-state mdb run-id) 4) (begin (tasks:server-set-available mdb run-id) (thread-sleep! 2) ;; Try removing this. It may not be needed. (tasks:server-am-i-the-server? mdb run-id)) #f)) |
︙ | ︙ | |||
125 126 127 128 129 130 131 | (lambda (num-in-queue) (set! res num-in-queue)) mdb "SELECT count(id) FROM servers WHERE run_id=? AND state = 'available';" run-id) res)) | | | > | | > | | | | | > | | > | | | 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 | (lambda (num-in-queue) (set! res num-in-queue)) mdb "SELECT count(id) FROM servers WHERE run_id=? AND state = 'available';" run-id) res)) (define (tasks:server-clean-out-old-records-for-run-id mdb run-id tag) (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state in ('available','shutting-down') AND (strftime('%s','now') - start_time) > 300 AND run_id=?;" (conc "defunct" tag) run-id)) (define (tasks:server-force-clean-running-records-for-run-id mdb run-id tag) (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state = 'running' AND run_id=?;" (conc "defunct" tag) run-id)) (define (tasks:server-force-clean-run-record mdb run-id iface port tag) (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state = 'running' AND run_id=? AND interface=? AND port=?;" (conc "defunct" tag) run-id iface port)) (define (tasks:server-delete-records-for-this-pid mdb tag) (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE hostname=? AND pid=?;" (conc "defunct" tag) (get-host-name) (current-process-id))) (define (tasks:server-delete-record mdb server-id tag) (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE id=?;" (conc "defunct" tag) server-id) ;; use this opportuntity to clean out records over one month old (sqlite3:execute mdb "DELETE FROM servers WHERE state not in ('running','shutting-down') AND (strftime('%s','now') - start_time) > 2628000;")) (define (tasks:server-set-state! mdb server-id state) (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE id=?;" state server-id)) (define (tasks:server-set-interface-port mdb server-id interface port) (sqlite3:execute mdb "UPDATE servers SET interface=?,port=?,heartbeat=strftime('%s','now') WHERE id=?;" interface port server-id)) ;; Get random port not used in long time ;; (define (tasks:server-get-next-port mdb) (let* ((lownum 30000) (highnum 64000) (used-ports '()) |
︙ | ︙ | |||
236 237 238 239 240 241 242 | WHERE run_id=? AND state='running' ORDER BY start_time DESC LIMIT 1;" run-id) ;; (common:version-signature) run-id) res)) (define (tasks:get-all-servers mdb) (let ((res '())) (sqlite3:for-each-row | | > | | | 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 | WHERE run_id=? AND state='running' ORDER BY start_time DESC LIMIT 1;" run-id) ;; (common:version-signature) run-id) res)) (define (tasks:get-all-servers mdb) (let ((res '())) (sqlite3:for-each-row (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 (set! res (cons (vector id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) res))) mdb "SELECT id,pid,hostname,interface,port,pubport,start_time,priority,state,mt_version,strftime('%s','now')-heartbeat AS last_update,transport,run_id FROM servers WHERE state NOT LIKE 'defunct%' ORDER BY start_time DESC;") res)) (define (tasks:kill-server status hostname port pid) (debug:print-info 1 "Removing defunct server record for " hostname ":" port) (if port (open-run-close tasks:server-deregister tasks:open-db hostname port: port) (open-run-close tasks:server-deregister tasks:open-db hostname pid: pid)) |
︙ | ︙ |