Comment: | Merged in v1.62 and few minor fixes |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | cached-copy-srehman |
Files: | files | file ages | folders |
SHA1: |
41915f799710b83b33bcfb402e8737dd |
User & Date: | mrwellan on 2016-10-27 17:55:42 |
Other Links: | branch diff | manifest | tags |
2016-10-27
| ||
23:52 | Converted -list-runs to use cached db check-in: cbeea6e758 user: matt tags: cached-copy-srehman | |
17:55 | Merged in v1.62 and few minor fixes check-in: 41915f7997 user: mrwellan tags: cached-copy-srehman | |
16:24 | added caching of specified databases (defaulted to megatest.db) check-in: 32dacffcc7 user: srehman tags: cached-copy-srehman | |
15:21 | Run tab resize fixed check-in: 5ff16368ff user: ritikaag tags: v1.62 | |
Modified Makefile from [81378c20f8] to [83b5fe2a28].
1 2 3 4 5 6 7 8 | # make install CSCOPTS='-accumulate-profile -profile-name $(PWD)/profile-ww$(shell date +%V.%u)' PREFIX=$(PWD) CSCOPTS= INSTALL=install SRCFILES = common.scm items.scm launch.scm \ ods.scm runconfig.scm server.scm configf.scm \ db.scm keys.scm margs.scm megatest-version.scm \ process.scm runs.scm tasks.scm tests.scm genexample.scm \ | > > | 1 2 3 4 5 6 7 8 9 10 | # make install CSCOPTS='-accumulate-profile -profile-name $(PWD)/profile-ww$(shell date +%V.%u)' # rm <files>.o ; make install CSCOPTS='-profile' ; ... ; chicken-profile | less PREFIX=$(PWD) CSCOPTS= INSTALL=install SRCFILES = common.scm items.scm launch.scm \ ods.scm runconfig.scm server.scm configf.scm \ db.scm keys.scm margs.scm megatest-version.scm \ process.scm runs.scm tasks.scm tests.scm genexample.scm \ |
︙ | ︙ |
Modified api.scm from [d744d47aad] to [bcdab13d33].
︙ | ︙ | |||
105 106 107 108 109 110 111 | ;; - returns #( flag result ) ;; (define (api:execute-requests dbstruct dat) (handle-exceptions exn (let ((call-chain (get-call-chain))) (print-call-chain (current-error-port)) | | | 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 | ;; - returns #( flag result ) ;; (define (api:execute-requests dbstruct dat) (handle-exceptions exn (let ((call-chain (get-call-chain))) (print-call-chain (current-error-port)) (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens (if (not (vector? dat)) ;; it is an error to not receive a vector (vector #f #f "remote must be called with a vector") (vector ;; return a vector + the returned data structure #t (let ((cmd (vector-ref dat 0)) (params (vector-ref dat 1))) |
︙ | ︙ |
Modified dashboard.scm from [929e1f3c0c] to [1d05b40be7].
︙ | ︙ | |||
1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 | )) (tb (iup:treebox #:value 0 #:name "Runs" #:expand "YES" #:addexpanded "NO" #:selection-cb (lambda (obj id state) (debug:catch-and-dump (lambda () (let* ((run-path (tree:node->path obj id)) (run-id (tree-path->run-id tabdat (cdr run-path)))) ;; (dboard:tabdat-view-changed-set! tabdat #t) ;; ?? done below when run-id is a number | > | 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 | )) (tb (iup:treebox #:value 0 #:name "Runs" #:expand "YES" #:addexpanded "NO" #:size "10x" #:selection-cb (lambda (obj id state) (debug:catch-and-dump (lambda () (let* ((run-path (tree:node->path obj id)) (run-id (tree-path->run-id tabdat (cdr run-path)))) ;; (dboard:tabdat-view-changed-set! tabdat #t) ;; ?? done below when run-id is a number |
︙ | ︙ |
Modified db.scm from [dbffcb7557] to [320057ef6f].
︙ | ︙ | |||
202 203 204 205 206 207 208 | ;; (define (db:lock-create-open fname initproc) ;; (if (file-exists? fname) ;; (let ((db (sqlite3:open-database fname))) ;; (sqlite3:set-busy-handler! db (make-busy-timeout 136000)) ;; (db:set-sync db) ;; (sqlite3:execute db "PRAGMA synchronous = 0;") ;; db) | | | 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 | ;; (define (db:lock-create-open fname initproc) ;; (if (file-exists? fname) ;; (let ((db (sqlite3:open-database fname))) ;; (sqlite3:set-busy-handler! db (make-busy-timeout 136000)) ;; (db:set-sync db) ;; (sqlite3:execute db "PRAGMA synchronous = 0;") ;; db) (let* ((parent-dir (or (pathname-directory fname)(current-directory))) ;; no parent? go local (dir-writable (file-write-access? parent-dir)) (file-exists (file-exists? fname)) (file-write (if file-exists (file-write-access? fname) dir-writable ))) (if file-write ;; dir-writable (let (;; (lock (obtain-dot-lock fname 1 5 10)) |
︙ | ︙ | |||
781 782 783 784 785 786 787 | FOR EACH ROW BEGIN UPDATE run_stats SET last_update=(strftime('%s','now')) WHERE id=old.id; END;")) (define (db:cache-for-read-only source target) | | | | | | | | | | | | | | | | | | | | | | | 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 | FOR EACH ROW BEGIN UPDATE run_stats SET last_update=(strftime('%s','now')) WHERE id=old.id; END;")) (define (db:cache-for-read-only source target) (let* ((toppath (launch:setup)) (cache-db (db:open-megatest-db path: target)) (source-db (db:open-megatest-db path: source)) (curr-time (current-seconds)) (res '())) (print source-db) (begin (if (not (file-exists? target)) ((db:sync-tables (db:sync-main-list source-db) source-db cache-db) (db:sync-tables db:sync-tests-only source-db cache-db) (db:clean-up-rundb cache-db)) ((sqlite3:for-each-row (lambda (id release runname state status owner event_time comment fail_count pass_count ) (set! res (cons (id release runname state status owner event_time comment fail_count pass_count ) res))) (db:dbdat-get-db source-db) "SELECT id, release, runname, state, status, owner, event_time, comment, fail_count, pass_count FROM runs;")) ) (print res) (sqlite3:finalize! (db:dbdat-get-db cache-db)) )) ) ;; options: ;; ;; 'killservers - kills all servers ;; 'dejunk - removes junk records ;; 'adj-testids - move test-ids into correct ranges ;; 'old2new - sync megatest.db records to .db/{main,1,2 ...}.db |
︙ | ︙ | |||
3074 3075 3076 3077 3078 3079 3080 | ;; ((fs) obj) ((http fs) (string-substitute (regexp "=") "_" (base64:base64-encode (z3:encode-buffer (with-output-to-string | | | | | 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 | ;; ((fs) obj) ((http fs) (string-substitute (regexp "=") "_" (base64:base64-encode (z3:encode-buffer (with-output-to-string (lambda ()(serialize obj))))) ;; BB: serialize - this is what causes problems between different builds of megatest communicating. serialize is sensitive to binary image of mtest. #t)) ((zmq nmsg)(with-output-to-string (lambda ()(serialize obj)))) (else obj))) ;; rpc (define (db:string->obj msg #!key (transport 'http)) (case transport ;; ((fs) msg) ((http fs) (if (string? msg) (with-input-from-string (z3:decode-buffer (base64:base64-decode (string-substitute (regexp "_") "=" msg #t))) (lambda ()(deserialize))) (begin (debug:print-error 0 *default-log-port* "reception failed. Received " msg " but cannot translate it.") msg))) ;; crude reply for when things go awry ((zmq nmsg)(with-input-from-string msg (lambda ()(deserialize)))) (else msg))) ;; rpc (define (db:test-set-status-state dbstruct run-id test-id status state msg) (let ((dbdat (db:get-db dbstruct run-id))) (if (member state '("LAUNCHED" "REMOTEHOSTSTART")) (db:general-call dbdat 'set-test-start-time (list test-id))) (if msg (db:general-call dbdat 'state-status-msg (list state status msg test-id)) |
︙ | ︙ |
Modified docs/manual/megatest_manual.html from [8afff84a15] to [2d6199dc08].
︙ | ︙ | |||
865 866 867 868 869 870 871 | megatest uses the network filesystem to call home to your master sqlite3 database.</p></div> </div> </div> <div class="sect1"> <h2 id="_road_map">Road Map</h2> <div class="sectionbody"> | | | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > | 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 | megatest uses the network filesystem to call home to your master sqlite3 database.</p></div> </div> </div> <div class="sect1"> <h2 id="_road_map">Road Map</h2> <div class="sectionbody"> <div class="paragraph"><p>Note 1: This road-map is still evolving and subject to change without notice.</p></div> <div class="sect2"> <h3 id="_architecture_refactor">Architecture Refactor</h3> <div class="sect3"> <h4 id="_goals">Goals</h4> <div class="olist arabic"><ol class="arabic"> <li> <p> Reduce load on the file system. Sqlite3 files on network filesystem can be a burden. </p> </li> <li> <p> Reduce number of servers and frequency of start/stop. This is mostly an issue of clutter but also a reduction in "moving parts". </p> </li> <li> <p> Coalesce activities to a single home host where possible. Give the user feedback that they have started the dashboard on a host other than the home host. </p> </li> <li> <p> Reduce number of processes involved in managing running tests. </p> </li> </ol></div> </div> <div class="sect3"> <h4 id="_changes_needed">Changes Needed</h4> <div class="olist arabic"><ol class="arabic"> <li> <p> ACID compliant db will be on /tmp and synced to megatest.db with a five second max delay. </p> </li> <li> <p> Read/writes to db for processes on homehost will go direct to /tmp megatest.db file. </p> </li> <li> <p> Read/wites fron non-homehost processes will go through one server. Bulk reads (e.g. for dashboard or list-runs) will be cached on the current host in /tmp and synced from the home megatest.db in the testsuite area. </p> </li> <li> <p> Db syncs rely on the target db file timestame minus some margin. </p> </li> <li> <p> Since bulk reads do not use the server we can switch to simple RPC for the network transport. </p> </li> <li> <p> Test running manager process extended to manage multiple running tests. </p> </li> </ol></div> </div> </div> <div class="sect2"> <h3 id="_current_items">Current Items</h3> <div class="sect3"> <h4 id="_ww05_migrate_to_inmem_db">ww05 - migrate to inmem-db</h4> <div class="olist arabic"><ol class="arabic"> <li> <p> |
︙ | ︙ | |||
1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 | <div class="paragraph"><p>The default is the graphviz based tree but if your tests don’t view well in that mode then use "nodot" to turn it off.</p></div> <div class="listingblock"> <div class="content monospaced"> <pre>[setup] nodot</pre> </div></div> </div> <div class="sect2"> <h3 id="_database_settings">Database settings</h3> <table class="tableblock frame-topbot grid-all" style=" width:70%; "> | > > > > > > > > > > > | 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 | <div class="paragraph"><p>The default is the graphviz based tree but if your tests don’t view well in that mode then use "nodot" to turn it off.</p></div> <div class="listingblock"> <div class="content monospaced"> <pre>[setup] nodot</pre> </div></div> </div> <div class="sect2"> <h3 id="_dashboard_settings">Dashboard settings</h3> <div class="listingblock"> <div class="title">Runs tab buttons, font and size</div> <div class="content monospaced"> <pre>[dashboard] btn-height x14 btn-fontsz 10 cell-width 60</pre> </div></div> </div> <div class="sect2"> <h3 id="_database_settings">Database settings</h3> <table class="tableblock frame-topbot grid-all" style=" width:70%; "> |
︙ | ︙ |
Modified docs/manual/reference.txt from [458696201b] to [206fb51b8f].
︙ | ︙ | |||
100 101 102 103 104 105 106 107 108 109 110 111 112 113 | The default is the graphviz based tree but if your tests don't view well in that mode then use "nodot" to turn it off. ----------------- [setup] nodot ----------------- Database settings ~~~~~~~~~~~~~~~~~ .Database config settings in [setup] section of megatest.config [width="70%",cols="^,2m,2m,2m",frame="topbot",options="header"] |====================== | > > > > > > > > > > > | 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 | The default is the graphviz based tree but if your tests don't view well in that mode then use "nodot" to turn it off. ----------------- [setup] nodot ----------------- Dashboard settings ~~~~~~~~~~~~~~~~~~ .Runs tab buttons, font and size ------------------ [dashboard] btn-height x14 btn-fontsz 10 cell-width 60 ------------------ Database settings ~~~~~~~~~~~~~~~~~ .Database config settings in [setup] section of megatest.config [width="70%",cols="^,2m,2m,2m",frame="topbot",options="header"] |====================== |
︙ | ︙ |
Modified docs/plan.txt from [51bfa826bf] to [92bba79ce7].
1 2 3 | Road Map -------- | | > > | > > > > > > > > > > > > > > > > > > > > > > > > > > | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 | Road Map -------- Note 1: This road-map is still evolving and subject to change without notice. Architecture Refactor ~~~~~~~~~~~~~~~~~~~~~ Goals ^^^^^ . Reduce load on the file system. Sqlite3 files on network filesystem can be a burden. . Reduce number of servers and frequency of start/stop. This is mostly an issue of clutter but also a reduction in "moving parts". . Coalesce activities to a single home host where possible. Give the user feedback that they have started the dashboard on a host other than the home host. . Reduce number of processes involved in managing running tests. Changes Needed ^^^^^^^^^^^^^^ . ACID compliant db will be on /tmp and synced to megatest.db with a five second max delay. . Read/writes to db for processes on homehost will go direct to /tmp megatest.db file. . Read/wites fron non-homehost processes will go through one server. Bulk reads (e.g. for dashboard or list-runs) will be cached on the current host in /tmp and synced from the home megatest.db in the testsuite area. . Db syncs rely on the target db file timestame minus some margin. . Since bulk reads do not use the server we can switch to simple RPC for the network transport. . Test running manager process extended to manage multiple running tests. Current Items ~~~~~~~~~~~~~ ww05 - migrate to inmem-db ^^^^^^^^^^^^^^^^^^^^^^^^^^ |
︙ | ︙ |
Modified megatest.scm from [1120b5ee36] to [d0741b04ce].
︙ | ︙ | |||
144 145 146 147 148 149 150 | -import-megatest.db : migrate a database from v1.55 series to v1.60 series -sync-to-megatest.db : migrate data back to megatest.db -update-meta : update the tests metadata for all tests -setvars VAR1=val1,VAR2=val2 : Add environment variables to a run NB// these are overwritten by values set in config files. -server -|hostname : start the server (reduces contention on megatest.db), use - to automatically figure out hostname | | | 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 | -import-megatest.db : migrate a database from v1.55 series to v1.60 series -sync-to-megatest.db : migrate data back to megatest.db -update-meta : update the tests metadata for all tests -setvars VAR1=val1,VAR2=val2 : Add environment variables to a run NB// these are overwritten by values set in config files. -server -|hostname : start the server (reduces contention on megatest.db), use - to automatically figure out hostname -transport http|rpc : use http or rpc for transport (default is http) -daemonize : fork into background and disconnect from stdin/out -log logfile : send stdout and stderr to logfile -list-servers : list the servers -stop-server id : stop server specified by id (see output of -list-servers), use 0 to kill all -repl : start a repl (useful for extending megatest) -load file.scm : load and run file.scm |
︙ | ︙ | |||
482 483 484 485 486 487 488 | (on-exit std-exit-procedure) ;;====================================================================== ;; Misc general calls ;;====================================================================== | | | < | | | < | > | 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 | (on-exit std-exit-procedure) ;;====================================================================== ;; Misc general calls ;;====================================================================== (if (and (args:get-arg "-cache-db") (args:get-arg "-source-db")) (let* ((temp-dir (or (args:get-arg "-target-db") (create-directory (conc "/tmp/" (getenv "USER") "/" (string-translate (current-directory) "/" "_"))))) (target-db (conc temp-dir "/cached.db")) (source-db (args:get-arg "-source-db"))) (db:cache-for-read-only source-db target-db) (set! *didsomething* #t))) ;; handle a clean-cache request as early as possible ;; (if (args:get-arg "-clean-cache") (begin (set! *didsomething* #t) ;; suppress the help output. (if (getenv "MT_TARGET") ;; no point in trying if no target |
︙ | ︙ | |||
747 748 749 750 751 752 753 | (if (args:get-arg "-server") ;; Server? Start up here. ;; (let ((tl (launch:setup)) (run-id (and (args:get-arg "-run-id") | | > | > | 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 | (if (args:get-arg "-server") ;; Server? Start up here. ;; (let ((tl (launch:setup)) (run-id (and (args:get-arg "-run-id") (string->number (args:get-arg "-run-id")))) (transport-type (string->symbol (or (args:get-arg "-transport") "http")))) (if run-id (begin (server:launch run-id transport-type) (set! *didsomething* #t)) (debug:print-error 0 *default-log-port* "server requires run-id be specified with -run-id"))) ;; Not a server? This section will decide how to communicate ;; ;; Setup client for all expect listed here (if (null? (lset-intersection equal? (hash-table-keys args:arg-hash) '("-list-servers" "-stop-server" "-kill-server" "-show-cmdinfo" "-list-runs" "-ping"))) (if (launch:setup) (let ((run-id (and (args:get-arg "-run-id") (string->number (args:get-arg "-run-id"))))) ;; (set! *fdb* (filedb:open-db (conc *toppath* "/db/paths.db"))) |
︙ | ︙ | |||
784 785 786 787 788 789 790 | #t )))))) ;; MAY STILL NEED THIS ;; (set! *megatest-db* (make-dbr:dbstruct path: *toppath* local: #t)))))))))) (if (or (args:get-arg "-list-servers") | | > > | | 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 | #t )))))) ;; MAY STILL NEED THIS ;; (set! *megatest-db* (make-dbr:dbstruct path: *toppath* local: #t)))))))))) (if (or (args:get-arg "-list-servers") (args:get-arg "-stop-server") (args:get-arg "-kill-server")) (let ((tl (launch:setup))) (if tl (let* ((tdbdat (tasks:open-db)) (servers (tasks:get-all-servers (db:delay-if-busy tdbdat))) (fmtstr "~5a~12a~8a~20a~24a~10a~10a~10a~10a\n") (servers-to-kill '()) (kill-switch (if (args:get-arg "-kill-server") "-9" "")) (killinfo (or (args:get-arg "-stop-server") (args:get-arg "-kill-server") )) (khost-port (if killinfo (if (substring-index ":" killinfo)(string-split ":") #f) #f)) (sid (if killinfo (if (substring-index ":" killinfo) #f (string->number killinfo)) #f))) (format #t fmtstr "Id" "MTver" "Pid" "Host" "Interface:OutPort" "InPort" "LastBeat" "State" "Transport") (format #t fmtstr "==" "=====" "===" "====" "=================" "======" "========" "=====" "=========") (for-each (lambda (server) (let* ((id (vector-ref server 0)) |
︙ | ︙ | |||
825 826 827 828 829 830 831 | (if (> last-update 20) ;; Mark as dead if not updated in last 20 seconds (tasks:server-deregister (db:delay-if-busy tdbdat) hostname pullport: pullport pid: pid))) (format #t fmtstr id mt-ver pid hostname (conc interface ":" pullport) pubport last-update (if status "alive" "dead") transport) (if (or (equal? id sid) (equal? sid 0)) ;; kill all/any (begin | | | | 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 | (if (> last-update 20) ;; Mark as dead if not updated in last 20 seconds (tasks:server-deregister (db:delay-if-busy tdbdat) hostname pullport: pullport pid: pid))) (format #t fmtstr id mt-ver pid hostname (conc interface ":" pullport) pubport last-update (if status "alive" "dead") transport) (if (or (equal? id sid) (equal? sid 0)) ;; kill all/any (begin (debug:print-info 0 *default-log-port* "Attempting to kill "kill-switch" server with pid " pid) (tasks:kill-server hostname pid kill-switch: kill-switch))))) servers) (debug:print-info 1 *default-log-port* "Done with listservers") (set! *didsomething* #t) (exit)) ;; must do, would have to add checks to many/all calls below (exit)))) ;;====================================================================== |
︙ | ︙ |
Modified rpc-transport.scm from [7e38f4f2de] to [62a65daa58].
︙ | ︙ | |||
36 37 38 39 40 41 42 | (apply (eval (string->symbol procstr)) params))) ;; all routes though here end in exit ... ;; ;; start_server? ;; (define (rpc-transport:launch run-id) | > > | | | | | | | | | | | | | | | | | | | | | | | 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 | (apply (eval (string->symbol procstr)) params))) ;; all routes though here end in exit ... ;; ;; start_server? ;; (define (rpc-transport:launch run-id) (let* ((tdbdat (tasks:open-db))) (BB> "rpc-transport:launch fired for run-id="run-id) (set! *run-id* run-id) (if (args:get-arg "-daemonize") (daemon:ize)) (if (server:check-if-running run-id) (begin (debug:print 0 *default-log-port* "INFO: Server for run-id " run-id " already running") (exit 0))) (let loop ((server-id (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id)) (remtries 4)) (if (not server-id) (if (> remtries 0) (begin (thread-sleep! 2) (loop (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id) (- remtries 1))) (begin ;; since we didn't get the server lock we are going to clean up and bail out (debug:print-info 2 *default-log-port* "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue") (tasks:server-delete-records-for-this-pid (db:delay-if-busy tdbdat) " rpc-transport:launch"))) (begin (rpc-transport:run (if (args:get-arg "-server")(args:get-arg "-server") "-") run-id server-id) (exit)))))) (define (rpc-transport:run hostn run-id server-id) (debug:print 2 *default-log-port* "Attempting to start the rpc server ...") ;; (trace rpc:publish-procedure!) (rpc:publish-procedure! 'server:login server:login) (rpc:publish-procedure! 'testing (lambda () "Just testing")) |
︙ | ︙ |
Modified runs.scm from [9f66bcb951] to [c631ccf0a3].
︙ | ︙ | |||
1322 1323 1324 1325 1326 1327 1328 | (debug:print-info 4 *default-log-port* "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns)) ))) ;; now *if* -run-wait we wait for all tests to be done ;; Now wait for any RUNNING tests to complete (if in run-wait mode) (thread-sleep! 5) ;; I think there is a race condition here. Let states/statuses settle (let wait-loop ((num-running (rmt:get-count-tests-running-for-run-id run-id)) (prev-num-running 0)) | | | 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 | (debug:print-info 4 *default-log-port* "Exiting loop with...\n hed=" hed "\n tal=" tal "\n reruns=" reruns)) ))) ;; now *if* -run-wait we wait for all tests to be done ;; Now wait for any RUNNING tests to complete (if in run-wait mode) (thread-sleep! 5) ;; I think there is a race condition here. Let states/statuses settle (let wait-loop ((num-running (rmt:get-count-tests-running-for-run-id run-id)) (prev-num-running 0)) (BB> "num-running=" num-running ", prev-num-running=" prev-num-running) (if (and (or (args:get-arg "-run-wait") (equal? (configf:lookup *configdat* "setup" "run-wait") "yes")) (> num-running 0)) (begin ;; Here we mark any old defunct tests as incomplete. Do this every fifteen minutes ;; (debug:print 0 *default-log-port* "Got here eh! num-running=" num-running " (> num-running 0) " (> num-running 0)) (if (> (current-seconds)(+ last-time-incomplete 900)) |
︙ | ︙ |
Modified server.scm from [1ba0421ee5] to [19061b35b0].
︙ | ︙ | |||
45 46 47 48 49 50 51 | ;; Call this to start the actual server ;; ;; all routes though here end in exit ... ;; ;; start_server ;; | | > | | | 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | ;; Call this to start the actual server ;; ;; all routes though here end in exit ... ;; ;; start_server ;; (define (server:launch run-id transport-type) (BB> "server:launch fired for run-id="run-id" transport-type="transport-type) (case transport-type ((http)(http-transport:launch run-id)) ;;((nmsg)(nmsg-transport:launch run-id)) ((rpc) (rpc-transport:launch run-id)) (else (debug:print-error 0 *default-log-port* "unknown server type " transport-type)))) ;; (else (debug:print-error 0 *default-log-port* "No known transport set, transport=" transport ", using rpc") ;; (rpc-transport:launch run-id))))) ;;====================================================================== ;; S E R V E R U T I L I T I E S ;;====================================================================== |
︙ | ︙ | |||
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 | (else (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*) result))) ;; Given a run id start a server process ### NOTE ### > file 2>&1 ;; if the run-id is zero and the target-host is set ;; try running on that host ;; (define (server:run run-id) (let* ((curr-host (get-host-name)) (curr-ip (server:get-best-guess-address curr-host)) (target-host (configf:lookup *configdat* "server" "homehost" )) (testsuite (common:get-testsuite-name)) (logfile (conc *toppath* "/logs/" run-id ".log")) (cmdln (conc (common:get-megatest-exe) " -server " (or target-host "-") " -run-id " run-id (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes") (conc " -daemonize -log " logfile) "") " -m testsuite:" testsuite))) ;; (conc " >> " logfile " 2>&1 &"))))) (debug:print 0 *default-log-port* "INFO: Starting server (" cmdln ") as none running ...") (push-directory *toppath*) (if (not (directory-exists? "logs"))(create-directory "logs")) ;; Rotate logs, logic: | > > | > > | 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 | (else (debug:print-error 0 *default-log-port* "unrecognised transport type: " *transport-type*) result))) ;; Given a run id start a server process ### NOTE ### > file 2>&1 ;; if the run-id is zero and the target-host is set ;; try running on that host ;; incidental: rotate logs in logs/ dir. ;; (define (server:run run-id) (let* ((curr-host (get-host-name)) (curr-ip (server:get-best-guess-address curr-host)) (target-host (configf:lookup *configdat* "server" "homehost" )) (testsuite (common:get-testsuite-name)) (logfile (conc *toppath* "/logs/" run-id ".log")) (cmdln (conc (common:get-megatest-exe) " -server " (or target-host "-") " -run-id " run-id (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes") (conc " -daemonize -log " logfile) "") " -m testsuite:" testsuite))) ;; (conc " >> " logfile " 2>&1 &"))))) (debug:print 0 *default-log-port* "INFO: Starting server (" cmdln ") as none running ...") (push-directory *toppath*) (if (not (directory-exists? "logs"))(create-directory "logs")) ;; Rotate logs, logic: ;; if > 500k and older than 1 week: ;; remove previous compressed log and compress this log ;; (directory-fold (lambda (file rem) (if (and (string-match "^.*.log" file) (> (file-size (conc "logs/" file)) 200000)) (let ((gzfile (conc "logs/" file ".gz"))) (if (file-exists? gzfile) (begin |
︙ | ︙ | |||
139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 | ;; look at target host, is it host.domain.tld or ip address and does it ;; match current ip or hostname (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host)) (not (equal? curr-ip target-host))) (begin (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile) (setenv "TARGETHOST" target-host))) (setenv "TARGETHOST_LOGF" logfile) (common:wait-for-normalized-load 4 " delaying server start due to load" remote-host: (get-environment-variable "TARGETHOST")) ;; do not try starting servers on an already overloaded machine, just wait forever (system (conc "nbfake " cmdln)) (unsetenv "TARGETHOST_LOGF") (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST")) ;; (system cmdln) (pop-directory))) | > | | 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 | ;; look at target host, is it host.domain.tld or ip address and does it ;; match current ip or hostname (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host)) (not (equal? curr-ip target-host))) (begin (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile) (setenv "TARGETHOST" target-host))) (setenv "TARGETHOST_LOGF" logfile) (common:wait-for-normalized-load 4 " delaying server start due to load" remote-host: (get-environment-variable "TARGETHOST")) ;; do not try starting servers on an already overloaded machine, just wait forever (system (conc "nbfake " cmdln)) (unsetenv "TARGETHOST_LOGF") (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST")) ;; (system cmdln) (pop-directory))) (define (server:get-client-signature) ;; BB> why is this proc named "get-"? it returns nothing -- set! has not return value. (if *my-client-signature* *my-client-signature* (let ((sig (server:mk-signature))) (set! *my-client-signature* sig) *my-client-signature*))) ;; kind start up of servers, wait 40 seconds before allowing another server for a given ;; run-id to be launched |
︙ | ︙ |
Modified tasks.scm from [7aab5e9e48] to [a06114a2ac].
︙ | ︙ | |||
227 228 229 230 231 232 233 234 235 236 237 238 239 240 | (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state = 'running' AND run_id=?;" (conc "defunct" tag) run-id)) (define (tasks:server-force-clean-run-record mdb run-id iface port tag) (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state = 'running' AND run_id=? AND interface=? AND port=?;" (conc "defunct" tag) run-id iface port)) (define (tasks:server-delete-records-for-this-pid mdb tag) (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE hostname=? AND pid=?;" (conc "defunct" tag) (get-host-name) (current-process-id))) (define (tasks:server-delete-record mdb server-id tag) (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE id=?;" (conc "defunct" tag) server-id) | > > > > > > > > | 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 | (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state = 'running' AND run_id=?;" (conc "defunct" tag) run-id)) (define (tasks:server-force-clean-run-record mdb run-id iface port tag) (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE state = 'running' AND run_id=? AND interface=? AND port=?;" (conc "defunct" tag) run-id iface port)) ;; BB> adding missing func for --list-servers (define (tasks:server-deregister mdb hostname #!key (pullport #f) (pid #f) (action #f)) ;;pullport pid: pid action: 'delete)) (if (eq? action 'delete) (sqlite3:execute mdb "DELETE FROM servers WHERE pid=? AND port=? AND hostname=?;" pid pullport hostname) (sqlite3:execute mdb "UPDATE servers SET state='defunct', heartbeat=strftime('%s','now') WHERE hostname=? AND pid=?;" hostname pid))) (define (tasks:server-delete-records-for-this-pid mdb tag) (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE hostname=? AND pid=?;" (conc "defunct" tag) (get-host-name) (current-process-id))) (define (tasks:server-delete-record mdb server-id tag) (sqlite3:execute mdb "UPDATE servers SET state=?,heartbeat=strftime('%s','now') WHERE id=?;" (conc "defunct" tag) server-id) |
︙ | ︙ | |||
421 422 423 424 425 426 427 | "SELECT id,pid,hostname,interface,port,pubport,start_time,priority,state,mt_version,strftime('%s','now')-heartbeat AS last_update,transport,run_id FROM servers WHERE run_id=? AND state NOT LIKE 'defunct%' ORDER BY start_time DESC;" run-id) (reverse res))) ;; no elegance here ... ;; | | | | 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 | "SELECT id,pid,hostname,interface,port,pubport,start_time,priority,state,mt_version,strftime('%s','now')-heartbeat AS last_update,transport,run_id FROM servers WHERE run_id=? AND state NOT LIKE 'defunct%' ORDER BY start_time DESC;" run-id) (reverse res))) ;; no elegance here ... ;; (define (tasks:kill-server hostname pid #!key (kill-switch "")) (debug:print-info 0 *default-log-port* "Attempting to kill server process " pid " on host " hostname) (setenv "TARGETHOST" hostname) (setenv "TARGETHOST_LOGF" "server-kills.log") (system (conc "nbfake kill "kill-switch" "pid)) (unsetenv "TARGETHOST_LOGF") (unsetenv "TARGETHOST")) ;; look up a server by run-id and send it a kill, also delete the record for that server ;; (define (tasks:kill-server-run-id run-id #!key (tag "default")) (let* ((tdbdat (tasks:open-db)) |
︙ | ︙ |
Modified utils/viewscreen from [4887de0888] to [dee289c6f4].
1 2 3 4 5 6 7 8 9 10 | #!/bin/bash if ! type screen &> /dev/null;then xterm -geometry 180x20 -e "$*;echo Press any key to continue;bash -c 'read -n 1 -s'" & exit fi if [[ $(screen -list | egrep 'Attached|Detached'|awk '{print $1}') == "" ]];then # echo "No screen found for displaying to. Run \"screen\" in an xterm" # exit 1 | | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 | #!/bin/bash if ! type screen &> /dev/null;then xterm -geometry 180x20 -e "$*;echo Press any key to continue;bash -c 'read -n 1 -s'" & exit fi if [[ $(screen -list | egrep 'Attached|Detached'|awk '{print $1}') == "" ]];then # echo "No screen found for displaying to. Run \"screen\" in an xterm" # exit 1 xterm -e screen -e^ff & sleep 1 screen -X hardstatus off screen -X hardstatus alwayslastline screen -X hardstatus string '%{= kG}[ %{G}%H %{g}][%= %{= kw}%?%-Lw%?%{r}(%{W}%n*%f%t%?(%u)%?%{r})%{w}%?%+Lw%?%?%= %{g}][%{B} %m-%d %{W} %c %{g}]' fi cmd="cd $PWD;$*" screen -X screen bash -c "$cmd;echo \"Press any key to continue, ctrl-f <space> to see other windows\";bash -c 'read -n 1 -s'" |