Overview
Comment: | Fixed some problems with load management and wrong var used in pushing db handles back onto the dbstack. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.64 |
Files: | files | file ages | folders |
SHA1: |
753236add67849dd9443f386fc75b1ad |
User & Date: | matt on 2017-08-06 00:58:04 |
Other Links: | branch diff | manifest | tags |
Context
2017-08-06
| ||
19:21 | Fixed case where starting test-control-panel was delayed in starting due to writing out a .testconfig file (required use of no-sync lock which required use of server check-in: b686cea533 user: matt tags: v1.64 | |
00:58 | Fixed some problems with load management and wrong var used in pushing db handles back onto the dbstack. check-in: 753236add6 user: matt tags: v1.64 | |
2017-08-04
| ||
00:05 | Fixed possible memory growth problem where used dbstructs were not being pushed back on the stack check-in: 6cd5eb771a user: matt tags: v1.64 | |
Changes
Modified common.scm from [f31b14bafc] to [071499421b].
︙ | ︙ | |||
1517 1518 1519 1520 1521 1522 1523 | (first (car loadavg)) (next (cadr loadavg)) (adjload (* maxload numcpus)) (loadjmp (- first next))) (cond ((and (> first adjload) (> count 0)) | | | | | | | 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 | (first (car loadavg)) (next (cadr loadavg)) (adjload (* maxload numcpus)) (loadjmp (- first next))) (cond ((and (> first adjload) (> count 0)) (debug:print-info 0 *default-log-port* "server start delayed " waitdelay " seconds due to load " first " exceeding max of " adjload " on server " (or remote-host (get-host-name)) " (normalized load-limit: " maxload ") " (if msg msg "")) (thread-sleep! waitdelay) (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host)) ((and (> loadjmp numcpus) (> count 0)) (debug:print-info 0 *default-log-port* "waiting " waitdelay " seconds due to load jump " loadjmp " > numcpus " numcpus (if msg msg "")) (thread-sleep! waitdelay) (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1) msg: msg remote-host: remote-host))))) (define (common:wait-for-homehost-load maxload msg) (let* ((hh-dat (if (common:on-homehost?) ;; if we are on the homehost then pass in #f so the calls are local. #f (common:get-homehost))) (hh (if hh-dat (car hh-dat) #f)) (numcpus (common:get-num-cpus hh))) (common:wait-for-normalized-load maxload msg hh))) (define (common:get-num-cpus remote-host) (let ((proc (lambda () (let loop ((numcpu 0) (inl (read-line))) (if (eof-object? inl) numcpu (loop (if (string-match "^processor\\s+:\\s+\\d+$" inl) (+ numcpu 1) numcpu) (read-line))))))) (if remote-host (with-input-from-pipe (conc "ssh " remote-host " cat /proc/cpuinfo") proc) (with-input-from-file "/proc/cpuinfo" proc)))) ;; wait for normalized cpu load to drop below maxload ;; (define (common:wait-for-normalized-load maxload msg remote-host) (let ((num-cpus (common:get-num-cpus remote-host))) (common:wait-for-cpuload maxload num-cpus 15 msg: msg remote-host: remote-host))) (define (get-uname . params) (let* ((uname-res (process:cmd-run->list (conc "uname " (if (null? params) "-a" (car params))))) (uname #f)) (if (null? (car uname-res)) |
︙ | ︙ |
Modified db.scm from [2dd7277f18] to [144a083df6].
︙ | ︙ | |||
1043 1044 1045 1046 1047 1048 1049 | (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb)) options) data-synced)) (define (db:tmp->megatest.db-sync dbstruct last-update) (let* ((mtdb (dbr:dbstruct-mtdb dbstruct)) (tmpdb (db:get-db dbstruct)) | | | | > | 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 | (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb)) options) data-synced)) (define (db:tmp->megatest.db-sync dbstruct last-update) (let* ((mtdb (dbr:dbstruct-mtdb dbstruct)) (tmpdb (db:get-db dbstruct)) (refndb (dbr:dbstruct-refndb dbstruct)) (res (db:sync-tables (db:sync-all-tables-list dbstruct) last-update tmpdb refndb mtdb))) (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb) res)) ;;;; run-ids ;; if #f use *db-local-sync* : or 'local-sync-flags ;; if #t use timestamps : or 'timestamps ;; ;; NB// no-sync-db is the db handle, not a flag! ;; |
︙ | ︙ | |||
1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 | db (conc "SELECT d.id,d.archive_area_name,disk_path,last_df,last_df_time FROM archive_disks AS d INNER JOIN archive_blocks AS b ON d.id=b.archive_disk_id WHERE b.id IN (" (string-intersperse (map conc res) ",") ") AND last_df > ?;") dneeded)) blocks)) ;; returns id of the record, register a disk allocated to archiving and record it's last known ;; available space ;; (define (db:archive-register-disk dbstruct bdisk-name bdisk-path df) (let* ((dbdat (db:get-db dbstruct)) ;; archive tables are in main.db | > | 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 | db (conc "SELECT d.id,d.archive_area_name,disk_path,last_df,last_df_time FROM archive_disks AS d INNER JOIN archive_blocks AS b ON d.id=b.archive_disk_id WHERE b.id IN (" (string-intersperse (map conc res) ",") ") AND last_df > ?;") dneeded)) (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat) blocks)) ;; returns id of the record, register a disk allocated to archiving and record it's last known ;; available space ;; (define (db:archive-register-disk dbstruct bdisk-name bdisk-path df) (let* ((dbdat (db:get-db dbstruct)) ;; archive tables are in main.db |
︙ | ︙ | |||
1432 1433 1434 1435 1436 1437 1438 | WHERE archive_disk_id=? AND disk_path=?;" bdisk-id archive-path du)) res) (begin (sqlite3:execute db "INSERT OR REPLACE INTO archive_blocks (archive_disk_id,disk_path,last_du) VALUES (?,?,?);" bdisk-id archive-path (or du 0)) | | > > | 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 | WHERE archive_disk_id=? AND disk_path=?;" bdisk-id archive-path du)) res) (begin (sqlite3:execute db "INSERT OR REPLACE INTO archive_blocks (archive_disk_id,disk_path,last_du) VALUES (?,?,?);" bdisk-id archive-path (or du 0)) (db:archive-register-block-name dbstruct bdisk-id archive-path du: du))) (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat) res)) ;; The "archived" field in tests is overloaded; 0 = not archived, > 0 archived in block with given id ;; (define (db:test-set-archive-block-id dbstruct run-id test-id archive-block-id) (db:with-db dbstruct |
︙ | ︙ | |||
4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 | (if (string-match (regexp "^[/~]+.*") outputfile) ;; full path? outputfile (begin (debug:print 0 *default-log-port* "WARNING: path given, " outputfile " is relative, prefixing with current directory") (conc (current-directory) "/" outputfile))) results) ;; brutal clean up (system "rm -rf tempdir"))) ;; (db:extract-ods-file db "outputfile.ods" '(("sysname" "%")("fsname" "%")("datapath" "%")) "%") | > | 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 | (if (string-match (regexp "^[/~]+.*") outputfile) ;; full path? outputfile (begin (debug:print 0 *default-log-port* "WARNING: path given, " outputfile " is relative, prefixing with current directory") (conc (current-directory) "/" outputfile))) results) ;; brutal clean up (stack-push! (dbr:dbstruct-dbstack dbstruct) dbdat) (system "rm -rf tempdir"))) ;; (db:extract-ods-file db "outputfile.ods" '(("sysname" "%")("fsname" "%")("datapath" "%")) "%") |
Modified server.scm from [cebb70c145] to [45bfab59ba].
︙ | ︙ | |||
128 129 130 131 132 133 134 | (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host)) (not (equal? curr-ip target-host))) (begin (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile) (setenv "TARGETHOST" target-host))) (setenv "TARGETHOST_LOGF" logfile) | | | 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 | (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host)) (not (equal? curr-ip target-host))) (begin (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile) (setenv "TARGETHOST" target-host))) (setenv "TARGETHOST_LOGF" logfile) (common:wait-for-normalized-load load-limit " delaying server start due to load" target-host) ;; do not try starting servers on an already overloaded machine, just wait forever (system (conc "nbfake " cmdln)) (unsetenv "TARGETHOST_LOGF") (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST")) (thread-join! log-rotate) (pop-directory))) ;; given a path to a server log return: host port startseconds |
︙ | ︙ | |||
501 502 503 504 505 506 507 | (not sync-in-progress) (not recently-synced)))) (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop. need-sync="need-sync" sync-in-progress=" sync-in-progress " should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync " sync-done=" sync-done " sync-period=" sync-period) (if (and (> sync-period 5) (common:low-noise-print 30 "sync-period")) | | | 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 | (not sync-in-progress) (not recently-synced)))) (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop. need-sync="need-sync" sync-in-progress=" sync-in-progress " should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync " sync-done=" sync-done " sync-period=" sync-period) (if (and (> sync-period 5) (common:low-noise-print 30 "sync-period")) (debug:print-info 0 *default-log-port* "Increased sync period due to long sync times, sync took: " sync-period " seconds.")) ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced)) ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync) (if will-sync (set! *db-sync-in-progress* #t)) (mutex-unlock! *db-multi-sync-mutex*) (if will-sync (let (;; (max-sync-duration (configf:lookup-number *configdat* "server" "max-sync-duration")) ;; KEEPING THIS AVAILABLE BUT SHOULD NOT USE, I'M PRETTY SURE IT DOES NOT WORK! (sync-start (current-milliseconds))) |
︙ | ︙ |