Overview
Comment: | Added server killing, cleaning out junk records. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.60 | v1.6001_beta2 |
Files: | files | file ages | folders |
SHA1: |
71bd40f62724cbb700452132ad1b96fe |
User & Date: | mrwellan on 2014-09-11 11:44:36 |
Other Links: | branch diff | manifest | tags |
Context
2014-09-11
| ||
13:03 | Experimental tweaks to address stuck server start issue check-in: 7c83ed2d8b user: mrwellan tags: v1.60 | |
11:44 | Added server killing, cleaning out junk records. check-in: 71bd40f627 user: mrwellan tags: v1.60, v1.6001_beta2 | |
10:13 | Added -import-megatest.db to help. check-in: b931cba810 user: mrwellan tags: v1.60, v1.6001_beta | |
Changes
Modified db.scm from [f6b0a09960] to [922a7b73d4].
︙ | ︙ | |||
764 765 766 767 768 769 770 | ;; 1. Look at test records either deleted or part of deleted run: ;; a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown' ;; b. If test dir gone, delete the test record ;; 2. Look at run records ;; a. If have tests that are not deleted, set state='unknown' ;; b. .... ;; | | | < < | | 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 | ;; 1. Look at test records either deleted or part of deleted run: ;; a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown' ;; b. If test dir gone, delete the test record ;; 2. Look at run records ;; a. If have tests that are not deleted, set state='unknown' ;; b. .... ;; (define (db:clean-up db) (debug:print 0 "WARNING: db clean up not ported to v1.60, cleanup action will be on megatest.db") (let* (;; (db (db:get-db dbstruct #f)) (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM tests)+(SELECT count(id) FROM runs);")) (statements (map (lambda (stmt) (sqlite3:prepare db stmt)) (list ;; delete all tests that belong to runs that are 'deleted' "DELETE FROM tests WHERE run_id in (SELECT id FROM runs WHERE state='deleted');" |
︙ | ︙ | |||
797 798 799 800 801 802 803 | count-stmt) (map sqlite3:execute statements) (sqlite3:for-each-row (lambda (tot) (debug:print-info 0 "Records count after clean: " tot)) count-stmt))) (map sqlite3:finalize! statements) (sqlite3:finalize! count-stmt) | | | 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 | count-stmt) (map sqlite3:execute statements) (sqlite3:for-each-row (lambda (tot) (debug:print-info 0 "Records count after clean: " tot)) count-stmt))) (map sqlite3:finalize! statements) (sqlite3:finalize! count-stmt) ;; (db:find-and-mark-incomplete db) (sqlite3:execute db "VACUUM;"))) ;;====================================================================== ;; M E T A G E T A N D S E T V A R S ;;====================================================================== ;; returns number if string->number is successful, string otherwise |
︙ | ︙ | |||
1632 1633 1634 1635 1636 1637 1638 | (define (db:replace-test-records dbstruct run-id testrecs) (db:with-db dbstruct run-id #t (lambda (db) (let* ((qmarks (string-intersperse (make-list (length db:test-record-fields) "?") ",")) (qrystr (conc "INSERT OR REPLACE INTO tests (" db:test-record-qry-selector ") VALUES (" qmarks ");")) (qry (sqlite3:prepare db qrystr))) (debug:print 0 "INFO: migrating test records for run with id " run-id) | > > > | | | | | | 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 | (define (db:replace-test-records dbstruct run-id testrecs) (db:with-db dbstruct run-id #t (lambda (db) (let* ((qmarks (string-intersperse (make-list (length db:test-record-fields) "?") ",")) (qrystr (conc "INSERT OR REPLACE INTO tests (" db:test-record-qry-selector ") VALUES (" qmarks ");")) (qry (sqlite3:prepare db qrystr))) (debug:print 0 "INFO: migrating test records for run with id " run-id) (sqlite3:with-transaction db (lambda () (for-each (lambda (rec) ;; (debug:print 0 "INFO: Inserting values: " (string-intersperse (map conc (vector->list rec)) ",") "\n") (apply sqlite3:execute qry (vector->list rec))) testrecs))) (sqlite3:finalize! qry))))) ;; map a test-id into the proper range ;; (define (db:adj-test-id mtdb min-test-id test-id) (if (>= test-id min-test-id) test-id |
︙ | ︙ |
Modified megatest.scm from [04d9533be8] to [ae9ba80a5d].
︙ | ︙ | |||
1291 1292 1293 1294 1295 1296 1297 | ;; ;; ;; redo me (list "uname" "rundir" "final_logf" "comment")) ;; ;; ;; redo me (set! *didsomething* #t))) (if (args:get-arg "-import-megatest.db") (let* ((toppath (launch:setup-for-run)) (dbstruct (if toppath (make-dbr:dbstruct path: toppath) #f)) (mtdb (if toppath (db:open-megatest-db))) | | > > > > > > > > > > > > > > | 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 | ;; ;; ;; redo me (list "uname" "rundir" "final_logf" "comment")) ;; ;; ;; redo me (set! *didsomething* #t))) (if (args:get-arg "-import-megatest.db") (let* ((toppath (launch:setup-for-run)) (dbstruct (if toppath (make-dbr:dbstruct path: toppath) #f)) (mtdb (if toppath (db:open-megatest-db))) (run-ids (if toppath (db:get-all-run-ids mtdb))) (mdb (tasks:open-db)) (servers (tasks:get-all-servers mdb))) ;; kill servers (for-each (lambda (server) (tasks:server-delete-record mdb (vector-ref server 0) "dbmigration") (tasks:kill-server (vector-ref server 2)(vector-ref server 1))) servers) (sqlite3:finalize! mdb) ;; clear out junk records ;; (db:clean-up mtdb) ;; adjust test-ids to fit into proper range ;; (db:prep-megatest.db-for-migration mtdb) ;; sync runs, test_meta etc. ;; |
︙ | ︙ |
Modified tasks.scm from [929251744a] to [393b62edba].
︙ | ︙ | |||
293 294 295 296 297 298 299 | "SELECT id FROM servers WHERE run_id=? AND (state = 'running' OR (state = 'dbprep' AND (strftime('%s','now') - start_time) < 60));" run-id) res)) (define (tasks:get-all-servers mdb) (let ((res '())) (sqlite3:for-each-row (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) | | > > | | < | > | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 | "SELECT id FROM servers WHERE run_id=? AND (state = 'running' OR (state = 'dbprep' AND (strftime('%s','now') - start_time) < 60));" run-id) res)) (define (tasks:get-all-servers mdb) (let ((res '())) (sqlite3:for-each-row (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) ;; 0 1 2 3 4 5 6 7 8 9 10 11 12 (set! res (cons (vector id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) res))) mdb "SELECT id,pid,hostname,interface,port,pubport,start_time,priority,state,mt_version,strftime('%s','now')-heartbeat AS last_update,transport,run_id FROM servers WHERE state NOT LIKE 'defunct%' ORDER BY start_time DESC;") res)) ;; no elegance here ... ;; (define (tasks:kill-server hostname pid) (debug:print-info 0 "Attempting to kill server process " pid " on host " hostname) (setenv "TARGETHOST" hostname) (system (conc "nbfake kill " pid))) ;; (if status ;; #t means alive ;; (begin ;; (if (equal? hostname (get-host-name)) ;; (handle-exceptions ;; exn ;; (debug:print-info 0 "server may or may not be dead, check for megatest -server running as pid " pid "\n" ;; " EXCEPTION: " ((condition-property-accessor 'exn 'message) exn)) ;; (debug:print 1 "Sending signal/term to " pid " on " hostname) ;; (process-signal pid signal/term) ;; (thread-sleep! 5) ;; give it five seconds to die peacefully then do a brutal kill ;; ;;(process-signal pid signal/kill) ;; ) ;; local machine, send sig term ;; (begin ;; ;;(debug:print-info 1 "Stopping remote servers not yet supported.")))) ;; (debug:print-info 1 "Telling alive server on " hostname ":" port " to commit servercide") ;; (let ((serverdat (list hostname port))) ;; (hash-table-set! *runremote* run-id (http-transport:client-connect hostname port)) ;; (cdb:kill-server serverdat pid))))) ;; remote machine, try telling server to commit suicide ;; (begin ;; (if status ;; (if (equal? hostname (get-host-name)) ;; (begin ;; (debug:print-info 1 "Sending signal/term to " pid " on " hostname) ;; (process-signal pid signal/term) ;; local machine, send sig term ;; (thread-sleep! 5) ;; give it five seconds to die peacefully then do a brutal kill ;; (process-signal pid signal/kill)) ;; (debug:print 0 "WARNING: Can't kill frozen server on remote host " hostname)))))) ;;====================================================================== ;; Tasks and Task monitors ;;====================================================================== |
︙ | ︙ |