Changes In Branch v1.80-start-all Through [53900a0d02] Excluding Merge-Ins
This is equivalent to a diff from ffe3df4e65 to 53900a0d02
2023-11-06
| ||
03:22 | Start all servers (rebased) Leaf check-in: 8a1f055698 user: matt tags: v1.80-start-all | |
2023-10-20
| ||
05:12 |
Merged fork
This node ran run-core-tests.sh through kill-rerun - a pretty good result. Dashboard comes up quickly also. check-in: e607892c7d user: mrwellan tags: v1.80 | |
04:57 | Merged fork check-in: 53900a0d02 user: mrwellan tags: v1.80-start-all | |
2023-10-19
| ||
21:02 | Start servers for all dbs on first access of main.db. WARNING: This sometimes runs away! check-in: dbfd08bd90 user: matt tags: v1.80-start-all | |
18:55 | changed a debug msg to level2, increased delay from 0.5 to 2 secs Leaf check-in: 60b31fb56a user: mmgraham tags: v1.80-processes | |
2023-10-14
| ||
20:19 | removed a bit of not-needed junk from rmt.scm Leaf check-in: ffe3df4e65 user: matt tags: v1.80-matt-fixme | |
2023-10-13
| ||
20:42 | Merged in cached writes check-in: cdc7397963 user: matt tags: v1.80-matt-fixme | |
Modified api.scm from [13a08c65d1] to [47ba07ff8b].
︙ | |||
310 311 312 313 314 315 316 317 318 | 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 | + + - - + + + + + + + + + + + + + + + + + + + + + + + | (set! *api-process-request-count* (- *api-process-request-count* 1)) ;; (serialize payload) (api:unregister-thread (current-thread)) payload)) (else (assert #f "FATAL: failed to deserialize indat "indat)))))) (define *last-refresh-of-dbs* 0) (define *db-starts-running* #f) (define (api:dispatch-request dbstruct cmd run-id params) |
︙ |
Modified archive.scm from [e07377cf5e] to [e156e4a1c8].
︙ | |||
357 358 359 360 361 362 363 | 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 | - + | (print-prefix "Running: ") (archive-info (archive:allocate-new-archive-block blockid-cache *toppath* tsname min-space target-patt run-patt "megatest-db")) (archive-dir (if archive-info (cdr archive-info) #f)) (archive-id (if archive-info (car archive-info) -1)) (home-host (server:choose-server *toppath* 'homehost)) (archive-time (seconds->std-time-str (current-seconds))) (archive-staging-db (conc *toppath* "/.db-snapshot/archive_" archive-time)) |
︙ |
Modified common.scm from [516effd7ae] to [0854266963].
︙ | |||
19 20 21 22 23 24 25 26 27 28 29 30 31 32 | 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 | + | ;;====================================================================== (declare (unit common)) (declare (uses commonmod)) (declare (uses rmtmod)) (declare (uses debugprint)) (declare (uses mtargs)) (use srfi-1 data-structures posix regex-case (prefix base64 base64:) format dot-locking csv-xml z3 udp ;; sql-de-lite hostinfo md5 message-digest typed-records directory-utils stack matchable regex posix (srfi 18) extras ;; tcp (prefix nanomsg nmsg:) (prefix sqlite3 sqlite3:) |
︙ | |||
245 246 247 248 249 250 251 | 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 | - + | ;;====================================================================== (define *common:this-exe-fullpath* (common:get-this-exe-fullpath)) (define *common:this-exe-dir* (pathname-directory *common:this-exe-fullpath*)) (define *common:this-exe-name* (pathname-strip-directory *common:this-exe-fullpath*)) (define (common:get-sync-lock-filepath) |
︙ | |||
1531 1532 1533 1534 1535 1536 1537 | 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 | - + | ;;====================================================================== ;; lazy-safe get file mod time. on any error (file not existing etc.) return 0 ;; (define (common:lazy-modification-time fpath) (handle-exceptions exn (begin |
︙ | |||
2278 2279 2280 2281 2282 2283 2284 | 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 | - + | ;; returns: ok/not dbspace required-space ;; (define (common:check-db-dir-space) (let* ((required (string->number ;; default is 1GB (or actually a billion bytes) This is the number of 1 kB blocks. (or (configf:lookup *configdat* "setup" "dbdir-space-required") "1000000"))) |
︙ |
Modified commonmod.scm from [7e88abb9dd] to [5c1deb5d33].
︙ | |||
158 159 160 161 162 163 164 165 166 167 168 169 170 171 | 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 | + + + + + + + | (if valstr (val->alist valstr) '()))) ;; should it return empty list or #f to indicate not set? (define (get-section cfgdat section) (hash-table-ref/default cfgdat section '())) (define (common:make-tmpdir-name areapath tmpadj) (let* ((area (pathname-file areapath)) (dname (conc "/tmp/"(current-user-name)"/megatest_localdb/" area "/" (string-translate areapath "/" ".") tmpadj "/.mtdb"))) (unless (directory-exists? dname) (create-directory dname #t)) dname)) ;; dot-locking egg seems not to work, using this for now ;; if lock is older than expire-time then remove it and try again ;; to get the lock ;; (define (common:simple-file-lock fname #!key (expire-time 300)) (let* ((lock-exists (file-exists? fname)) |
︙ |
Modified dashboard-tests.scm from [d3d14d0eb8] to [63a55f86f7].
︙ | |||
461 462 463 464 465 466 467 | 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 | - + | dlog)) ;;====================================================================== ;; ;;====================================================================== (define (dashboard-tests:examine-test run-id test-id) ;; run-id run-key origtest) |
︙ |
Modified dashboard.scm from [d064a48d13] to [92015a98e3].
︙ | |||
400 401 402 403 404 405 406 | 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 | - - + + | (define (dboard:tabdat-make-data) (let ((dat (make-dboard:tabdat))) (dboard:setup-tabdat dat) (dboard:setup-num-rows dat) dat)) (define (dboard:setup-tabdat tabdat) |
︙ | |||
926 927 928 929 930 931 932 | 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 | - + - + | (if (null? all-test-ids) (hash-table-delete! (dboard:tabdat-allruns-by-id tabdat) run-id) (hash-table-set! (dboard:tabdat-allruns-by-id tabdat) run-id run-struct)) (if (or (null? tal) (> elapsed-time 2)) ;; stop loading data after 5 seconds, on the next call more data *should* be loaded since get-tests-for-run uses last update (begin (when (> elapsed-time 2) |
︙ |
Modified db.scm from [3332ad9a8b] to [0a367c507f].
︙ | |||
131 132 133 134 135 136 137 | 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 | - + | (debug:print-error 0 *default-log-port* " query " stmt " failed, params: " params ", error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn) (print-call-chain (current-error-port)) default))) (apply sqlite3:first-result db stmt params))) (define (db:setup do-sync) (assert *toppath* "FATAL: db:setup called before launch:setup has been run.") |
︙ | |||
265 266 267 268 269 270 271 | 265 266 267 268 269 270 271 272 273 274 275 276 277 278 | - - - - - - - | (debug:print-error 0 *default-log-port* " params: " params ", error: " ((condition-property-accessor 'exn 'message) exn) ", arguments: " ((condition-property-accessor 'exn 'arguments) exn) ", location: " ((condition-property-accessor 'exn 'location) exn) )) |
︙ | |||
465 466 467 468 469 470 471 | 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 | - + | (max (get-mtime fname) (get-mtime wal-file) (get-mtime shm-file)))) ;; (define (db:all-db-sync dbstruct) ;; (let* ((dbdat (db:open-db dbstruct #f db:initialize-main-db)) ;; (data-synced 0) ;; count of changed records |
︙ | |||
554 555 556 557 558 559 560 | 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 | - + | ;; 'closeall - close all opened dbs ;; 'schema - attempt to apply schema changes ;; run-ids: '(1 2 3 ...) or #f (for all) ;; (define (db:multi-db-sync dbstruct . options) (let* (;; (dbdat (db:open-db dbstruct #f dbfile:db-init-proc)) (data-synced 0) ;; count of changed records |
︙ | |||
1252 1253 1254 1255 1256 1257 1258 | 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 | - + | ;;====================================================================== ;; no-sync.db - small bits of data to be shared between servers ;;====================================================================== (define (db:get-dbsync-path) (case (rmt:transport-mode) |
︙ | |||
1578 1579 1580 1581 1582 1583 1584 | 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 | - + | res)) ;; TODO: Switch this to use max(update_time) from each run db? Then if using a server there is no disk traffic (using cachedb db) ??? ;; ;; NOTE: This DOESN'T (necessarily) get the real run ids, but the number of the <number>.db!! (define (db:get-changed-run-ids since-time) |
︙ | |||
4356 4357 4358 4359 4360 4361 4362 | 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 | - + | (debug:print 0 *default-log-port* "could not get lock for " from-db " from no-sync-db") #f )))) ;; sync for filesystem local db writes ;; (define (db:run-lock-and-sync no-sync-db) |
︙ | |||
4412 4413 4414 4415 4416 4417 4418 | 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 | - + | (thread-sleep! 0.05) ;; delay for startup (let ((legacy-sync (common:run-sync?)) (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300)) (debug-mode (debug:debug-mode 1)) (last-time (current-seconds)) ;; last time through the sync loop (no-sync-db (db:open-no-sync-db)) (sync-duration 0) ;; run time of the sync in milliseconds |
︙ | |||
4520 4521 4522 4523 4524 4525 4526 | 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 | - + | ;; (for-each (lambda (subdb) (let* (;;(dbstruct (db:setup)) (mtdb (dbr:subdb-mtdb subdb)) (mtpath (db:dbdat-get-path mtdb)) |
︙ |
Modified dbfile.scm from [172c69b638] to [6840895eab].
︙ | |||
240 241 242 243 244 245 246 | 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 | + - + | #t ) #f ) ) (define (dbfile:make-tmpdir-name areapath tmpadj) (let* ((area (pathname-file areapath)) |
︙ | |||
485 486 487 488 489 490 491 | 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 | - + | ;; opens and returns handle and nothing else ;; ;; NOTE: this is already protected by mutex *no-sync-db-mutex* ;; (define (dbfile:raw-open-no-sync-db dbpath) (if (not (file-exists? dbpath)) (create-directory dbpath #t)) |
︙ | |||
578 579 580 581 582 583 584 585 586 587 588 589 590 591 | 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 | + | (define (dbfile:register-process nsdb host port pid starttime endtime status purpose dbname mtversion) (sqlite3:execute nsdb "INSERT INTO processes (host,port,pid,starttime,endtime,status,purpose,dbname,mtversion) VALUES (?,?,?,?,?,?,?,?,?);" host port pid starttime endtime status purpose dbname mtversion)) (define (dbfile:set-process-status nsdb host pid newstatus) (sqlite3:execute nsdb "UPDATE processes SET status=? WHERE host=? AND pid=?;" newstatus host pid)) ;; get list of process records to examine for suitabliity of connecting to (define (dbfile:get-process-options nsdb purpose dbname) (sqlite3:fold-row ;; host port pid starttime status mtversion (lambda (res . row) (cons row res)) '() nsdb |
︙ | |||
673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 | 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 | + + + + - + + + | ;; transaction protected lock aquisition ;; either: ;; fails returns (#f lock-creation-time identifier) ;; succeeds (returns (#t lock-creation-time identifier) ;; use (db:no-sync-del! db keyname) to release the lock ;; (define (db:no-sync-get-lock-with-id db keyname identifier) (debug:print 2 *default-log-port* "db:no-sync-get-lock-with-id: db: " db " keyname: " keyname " identifier: " identifier) (sqlite3:with-transaction db (lambda () (condition-case (let* ((curr-val (db:no-sync-get/default db keyname #f))) (debug:print 2 *default-log-port* "db:no-sync-get-lock-with-id: curr-val: " curr-val) (if curr-val (match (db:extract-time-identifier curr-val) ;; result->timestamp, identifier ((timestamp . ident) (cons (equal? ident identifier) timestamp)) (else (debug:print 2 *default-log-port* "db:no-sync-get-lock-with-id: malformed lock") |
︙ |
Modified dbmod.scm from [7ac026a501] to [00037bb607].
︙ | |||
198 199 200 201 202 203 204 | 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 | - + | (tmpadj "") ;; add to tmp path (syncdir 'todisk)) ;; todisk is used when caching in /tmp and writing data back to MTRAH (let* ((dbstruct (or dbstruct-in (make-dbr:dbstruct areapath: areapath))) (dbfname (or dbfname-in (dbmod:run-id->dbfname run-id))) (dbpath (dbmod:get-dbdir dbstruct)) ;; directory where all the .db files are kept (dbfullname (conc dbpath"/"dbfname)) ;; (dbmod:run-id->full-dbfname dbstruct run-id)) (dbexists (file-exists? dbfullname)) |
︙ |
Modified megatest.scm from [d24777178a] to [af8974dd23].
︙ | |||
966 967 968 969 970 971 972 | 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 | - + | (let* (;; (run-id (args:get-arg "-run-id")) (dbfname (args:get-arg "-db")) (tl (launch:setup)) (keys (keys:config-get-fields *configdat*))) (case (rmt:transport-mode) ((tcp) (let* ((timeout (server:expiration-timeout))) |
︙ | |||
1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 | 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 | + + + + | ) ) sfiles ) ) ) dbfiles ) ;; remove this db, because otherwise metadata contains records for old servers, and this causes a problem with db:no-sync-get-lock-with-id. (if (file-exists? (conc *toppath* "/.mtdb/no-sync.db")) (delete-file (conc *toppath* "/.mtdb/no-sync.db")) ) (set! *didsomething* #t) (exit) ) ) ;;====================================================================== |
︙ | |||
2130 2131 2132 2133 2134 2135 2136 | 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 | - + - + | (begin (debug:print-info 1 *default-log-port* "Missing required argument -source <archive path>") (exit 1))) (if (common:file-exists? (conc *toppath* "/megatest.db")) (begin (debug:print-info 1 *default-log-port* "File " (conc *toppath* "/megatest.db") " already exists. Please remove it before trying to replicate db") (exit 1))) |
︙ |
Modified rmt.scm from [1cb17321d0] to [b6600b2d34].
︙ | |||
142 143 144 145 146 147 148 | 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 | - + | (cons 'none 0)) (loop (car tal)(cdr tal) newmax-cmd currmax))))))) (mutex-unlock! *db-stats-mutex*) res)) (define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5)) (let* ((qry-is-write (not (member cmd api:read-only-queries))) |
︙ |
Modified tasks.scm from [4adbc308eb] to [93c938d59a].
︙ | |||
82 83 84 85 86 87 88 | 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | - + | (debug:print 5 *default-log-port* " exn=" (condition->list exn)) (thread-sleep! 1) (tasks:open-db numretries (- numretries 1))) (begin (print-call-chain (current-error-port)) (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn)) (debug:print 5 *default-log-port* " exn=" (condition->list exn)))) |
︙ |
Modified tcp-transportmod.scm from [a1fcad65c5] to [98befc0e3b].
︙ | |||
130 131 132 133 134 135 136 137 138 139 140 141 142 | 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 | + + - + - + - + - + + - + + - + + - + - + - + + + + | ;; (max-connections 4096) ;; do all the busy work of finding and setting up conn for ;; connecting to a server ;; (define (tt:client-connect-to-server ttdat dbfname run-id testsuite) (assert (tt:valid-run-id run-id dbfname) "FATAL: invalid run-id "run-id) (debug:print-info 2 *default-log-port* "tt:client-connect-to-server " dbfname " " run-id) (let* ((conn (hash-table-ref/default (tt-conns ttdat) dbfname #f)) (server-start-proc (lambda () (tt:server-process-run (tt-areapath ttdat) testsuite ;; (dbfile:testsuite-name) (common:find-local-megatest) dbfname ;; run-id |
︙ | |||
221 222 223 224 225 226 227 228 229 230 231 232 233 234 | 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 | + | (try-again))))) ;; client side handler ;; ;;(tt:handler #<tt> get-keys #f () 2 #f "/home/matt/data/megatest/ext-tests" #f "main.db" "ext-tests" "/home/matt/data/megatest/bin/.22.04/../megatest") ;; (define (tt:handler ttdat cmd run-id params attemptnum area-dat areapath readonly-mode dbfname testsuite mtexe) (debug:print 2 *default-log-port* "tt:handler cmd: " cmd " run-id: " run-id " attemptnum: " attemptnum) ;; NOTE: areapath is passed in and in tt struct. We'll use passed in value for now. (let* ((conn (tt:client-connect-to-server ttdat dbfname run-id testsuite))) ;; (hash-table-ref/default (tt-conns ttdat) dbfname #f))) (if conn ;; have connection, call the server (let* ((res (tt:send-receive ttdat conn cmd run-id params))) ;; res is (status errmsg result meta) ; (debug:print 0 *default-log-port* "conn:" conn " res: " res) |
︙ | |||
283 284 285 286 287 288 289 | 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 | - + | ;; start server - addressed in client-connect-to-server ;; delay - addressed in client-connect-to-server ;; try again (thread-sleep! 0.25) ;; dunno, I think this needs to be here (tt:handler ttdat cmd run-id params (+ attemptnum 1) area-dat areapath readonly-mode dbfname testsuite mtexe)) )))) (begin ;; no server file, delay and try again |
︙ | |||
462 463 464 465 466 467 468 469 470 471 472 473 474 475 | 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 | + | ;; This is the routine called in megatest.scm to start a server ;; ;; Server viability is checked in keep-running. Blindly start and run here. ;; (define (tt:start-server areapath run-id dbfname-in handler keys) (assert areapath "FATAL: areapath not provided for tt:start-server") ;; is there already a server for this dbfile? Then exit. (debug:print 2 *default-log-port* "tt:start-server: " dbfname-in) (let* ((ttdat (make-tt areapath: areapath)) (dbfname (or dbfname-in (dbmod:run-id->dbfname run-id))) (servers (tt:find-server areapath dbfname))) ;; should use tt:get-current-server-info instead (if (> (length servers) 4) (begin (debug:print 0 *default-log-port* "INFO: found server(s) already running for db "dbfname", "(string-intersperse servers ",")" Exiting.") (exit)) |
︙ | |||
753 754 755 756 757 758 759 | 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 | - + + + - + | bad-dat))))))))) ;; Given an area path, start a server process ### NOTE ### > file 2>&1 ;; if the target-host is set ;; try running on that host ;; incidental: rotate logs in logs/ dir. ;; |
︙ | |||
786 787 788 789 790 791 792 | 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 | - + | " -server - ";; (or target-host "-") " -m testsuite:"testsuite " -db "dbfname ;; (dbmod:run-id->dbfname run-id) " " profile-mode (conc " >> " logfile " 2>&1 &")))) ;; we want the remote server to start in *toppath* so push there ;; (push-directory areapath) ;; use cd in the command line instead |
︙ |
Modified utils/mt_xterm from [5e40a3e5f1] to [27e4db9521].
︙ | |||
18 19 20 21 22 23 24 | 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 | - - - - + + + + + - - - | # along with Megatest. If not, see <http://www.gnu.org/licenses/>. MT_TMPDISPLAY=$DISPLAY MT_TMPUSER=$USER MT_HOME=$HOME tmpfile=`mktemp` |
︙ |