Overview
Comment: | Multi-servers working. Needs polish but the machine didn't overload on > 10 parallel runs of sixtyfivek |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.81-multi-server |
Files: | files | file ages | folders |
SHA1: |
16d75bb8f135501ae0823e8a601122d2 |
User & Date: | matt on 2024-07-03 19:06:41 |
Other Links: | branch diff | manifest | tags |
Context
2024-07-03
| ||
19:08 | Missing change check-in: 25cae1c008 user: mrwellan tags: v1.81-multi-server | |
19:06 | Multi-servers working. Needs polish but the machine didn't overload on > 10 parallel runs of sixtyfivek check-in: 16d75bb8f1 user: matt tags: v1.81-multi-server | |
2024-07-02
| ||
13:55 | Start on increasing allowed number of servers for a db file check-in: 66e4d2383a user: mrwellan tags: v1.81-multi-server | |
Changes
Modified api.scm from [bf55d9f0a8] to [03e4361c1a].
︙ | |||
242 243 244 245 246 247 248 | 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 | + + + + + + + - + + + | (else (assert ok "FATAL: database file and run-id not aligned."))))) (ttdat *server-info*) (server-state (tt-state ttdat)) (maxthreads 20) ;; make this a parameter? (status (cond ((> numthreads maxthreads) (let* ((testsuite (common:get-testsuite-name)) (mtexe (common:find-local-megatest)) (proc (lambda () ;; we are overloaded, try to start another server (debug:print 0 *default-log-port* "Too many threads running, starting another server") (tt:server-process-run *toppath* testsuite mtexe run-id)))) (set! *server-start-requests* (cons proc *server-start-requests*))) |
︙ | |||
275 276 277 278 279 280 281 | 284 285 286 287 288 289 290 291 292 293 294 295 296 297 | - - | (else (assert #f "FATAL: failed to deserialize indat "indat)))))) ;; (set! *api-process-request-count* (- *api-process-request-count* 1)) ;; (serialize payload) (api:unregister-thread (current-thread)) result))) |
︙ |
Modified common.scm from [1039cd02f7] to [098f0c5374].
︙ | |||
399 400 401 402 403 404 405 | 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 | - + + + - - - + + - - - + + - - | (define (common:version-changed?) (not (equal? (common:get-last-run-version) (common:version-signature)))) ;; From 1.70 to 1.80, db's are compatible. |
︙ |
Modified tcp-transportmod.scm from [f5588f1f60] to [a9eba46ae1].
︙ | |||
206 207 208 209 210 211 212 | 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 | + - + | (debug:print-info 2 *default-log-port* "already connected to a server for " dbfname) conn) ;; we are already connected to the server ;; no conn (let* ((sdats (tt:get-server-info-sorted ttdat dbfname)) (sdat (if (null? sdats) #f (let ((indx (max (random (- (length sdats) 1)) 0))) |
︙ | |||
330 331 332 333 334 335 336 337 338 339 340 341 342 343 | 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 | + + + | (debug:print 0 *default-log-port* "WARNING: server for "dbfname" is busy, cmd is "cmd", will try again in "dly" seconds. This is attempt "(- attemptnum 1)) (debug:print 0 *default-log-port* errmsg) (thread-sleep! dly) (tt:handler ttdat cmd run-id params (+ attemptnum 1) readonly-mode dbfname testsuite mtexe server-start-proc))) ((loaded) (debug:print 0 *default-log-port* "WARNING: server for "dbfname" is loaded, slowing queries.") (tt:backoff-incr (tt-conn-host conn)(tt-conn-port conn)) ;; this would be a good place to force reconnection and connect to a different server result) ;; (tt:handler ttdat cmd run-id params (+ attemptnum 1) readonly-mode dbfname testsuite mtexe)) (else result))) (else ;; did not receive properly formated result (if (not res) ;; tt:send-receive telling us that communication failed (let* ((host (tt-conn-host conn)) (port (tt-conn-port conn)) |
︙ | |||
380 381 382 383 384 385 386 | 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 | - + - + | ;; (tt:handler ttdat cmd run-id params (+ attemptnum 1) readonly-mode dbfname testsuite mtexe) ))))) (begin (thread-sleep! 1) ;; no conn yet set up, give it a rest and try again (tt:handler ttdat cmd run-id params attemptnum readonly-mode dbfname testsuite mtexe server-start-proc))))) ;; gets server info and appends path to server file |
︙ | |||
516 517 518 519 520 521 522 523 524 525 526 527 528 529 | 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 | + + | ;;====================================================================== ;; server ;;====================================================================== (define (tt:sync-dbs ttdat) #f) (define *server-start-requests* '()) ;; start the listener and start responding to requests ;; ;; NOTE: organise by dbfname, not run-id so we don't need ;; to pull in more modules ;; ;; This is the routine called in megatest.scm to start a server. NOTE: sequence is different for main.db vs. X.db |
︙ | |||
571 572 573 574 575 576 577 | 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 | - - - - - - - - + + + + + + + + + + + - + | (same-host (or (not prime-host) ;; i.e. this is the first host (equal? prime-host host))) (keep-srv (and good-ping same-host))) (if keep-srv (loop (cdr servrs) host (cons servdat result)) |
︙ | |||
648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 | 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 | + + - + | nosyncdbpath (lambda (nsdb) (dbfile:insert-or-update-process nsdb *procinf*))) (debug:print 0 *default-log-port* "Exiting now.") (exit)))))) (define (tt:keep-running ttdat dbfname dbstruct) (thread-sleep! 1) ;; at this point the server is running and responding to calls, we just monitor ;; for db calls and exit if there are none. ;; if I am not in the first 3 servers, exit (let* ((start-time (current-seconds))) (let loop () (let* ((servers (tt:get-server-info-sorted ttdat dbfname)) (home-host (if (null? servers) #f (caar servers))) (my-index (list-index (lambda (x) (equal? (list-ref x 6) (tt-servinf-file ttdat))) servers)) (ok (cond ((not my-index) (debug:print 0 *default-log-port* "WARNING: Apparently I don't exist.") |
︙ | |||
699 700 701 702 703 704 705 | 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 | + + + + + + - + + + | (let* ((sinfo-file (tt-servinf-file ttdat))) ;; (debug:print 0 *default-log-port* "INFO: touching "sinfo-file) (set! (file-modification-time sinfo-file) (current-seconds)) ((dbr:dbstruct-sync-proc dbstruct) last-update) (dbr:dbstruct-last-update-set! dbstruct curr-secs)))) (if (< (- (current-seconds) (tt-last-access ttdat)) (tt-server-timeout-param)) ;; process any requests to start a new server due to load on this one (let* ((requests *server-start-requests*)) (set! *server-start-requests* '()) (if (> (length requests) 0) (debug:print-info 0 *default-log-port* "Processing "(length requests)" server start requests")) (for-each (lambda (proc) |
︙ | |||
812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 | 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 | + + + - + + - - - + + + | (define *last-server-start* (make-hash-table)) (define (tt:too-recent-server-start dbfname) (let* ((last-run-time (hash-table-ref/default *last-server-start* dbfname #f))) (and last-run-time (< (- (current-seconds) last-run-time) 5)))) (define *last-server-start-request-time* 0) ;; Given an area path, start a server process ### NOTE ### > file 2>&1 ;; if the target-host is set ;; try running on that host ;; incidental: rotate logs in logs/ dir. ;; (define (tt:server-process-run areapath testsuite mtexe run-id #!key (profile-mode "")) ;; areapath is *toppath* for a given testsuite area (assert areapath "FATAL: tt:server-process-run called without areapath defined.") (assert testsuite "FATAL: tt:server-process-run called without testsuite defined.") (assert mtexe "FATAL: tt:server-process-run called without mtexe defined.") ;; mtest -server - -m testsuite:ext-tests -db 6.db (let* ((dbfname (dbmod:run-id->dbfname run-id))) (if (or (< (- (current-seconds) *last-server-start-request-time*) 5) ;; attempted start less than 5 sec ago |
︙ |