Overview
Comment: | Added an alternative to faux-lock that is simpler and uses a transaction. Added locking to test launch process. This may address the copy collisions we have seen. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.64 |
Files: | files | file ages | folders |
SHA1: |
6ea70b977cdc9de384f2e0d125403bb7 |
User & Date: | matt on 2017-07-09 21:33:40 |
Other Links: | branch diff | manifest | tags |
Context
2017-07-10
| ||
10:47 | Bump version to v1.6424 check-in: 7249f37836 user: mrwellan tags: v1.64 | |
2017-07-09
| ||
21:33 | Added an alternative to faux-lock that is simpler and uses a transaction. Added locking to test launch process. This may address the copy collisions we have seen. check-in: 6ea70b977c user: matt tags: v1.64 | |
2017-07-07
| ||
16:57 | Added delte-run option check-in: 1c79e4a834 user: ritikaag tags: v1.64 | |
Changes
Modified api.scm from [1f6842e15f] to [c4438e36a1].
︙ | ︙ | |||
212 213 214 215 216 217 218 219 220 221 222 223 224 225 | ((tasks-set-state-given-param-key) (apply tasks:set-state-given-param-key dbstruct params)) ((tasks-get-last) (apply tasks:get-last dbstruct params)) ;; NO SYNC DB ((no-sync-set) (apply db:no-sync-set *no-sync-db* params)) ((no-sync-get/default) (apply db:no-sync-get/default *no-sync-db* params)) ((no-sync-del!) (apply db:no-sync-del! *no-sync-db* params)) ;; ARCHIVES ;; ((archive-get-allocations) ((archive-register-disk) (apply db:archive-register-disk dbstruct params)) ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params)) ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey)) | > | 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 | ((tasks-set-state-given-param-key) (apply tasks:set-state-given-param-key dbstruct params)) ((tasks-get-last) (apply tasks:get-last dbstruct params)) ;; NO SYNC DB ((no-sync-set) (apply db:no-sync-set *no-sync-db* params)) ((no-sync-get/default) (apply db:no-sync-get/default *no-sync-db* params)) ((no-sync-del!) (apply db:no-sync-del! *no-sync-db* params)) ((no-sync-get-lock) (apply db:no-sync-get-lock *no-sync-db* params)) ;; ARCHIVES ;; ((archive-get-allocations) ((archive-register-disk) (apply db:archive-register-disk dbstruct params)) ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params)) ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey)) |
︙ | ︙ |
Modified common.scm from [af8ef99e5c] to [4881cf65d4].
︙ | ︙ | |||
2069 2070 2071 2072 2073 2074 2075 | (string-intersperse (map (lambda (x) (number->string x 16)) (map string->number (string-split instr))) "/")) | > > > > > > | | 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 | (string-intersperse (map (lambda (x) (number->string x 16)) (map string->number (string-split instr))) "/")) ;;====================================================================== ;; L O C K I N G M E C H A N I S M S ;;====================================================================== ;; faux-lock is deprecated. Please use simple-lock below ;; (define (common:faux-lock keyname #!key (wait-time 8)(allow-lock-steal #t)) (if (rmt:no-sync-get/default keyname #f) ;; do not be tempted to compare to pid. locking is a one-shot action, if already locked for this pid it doesn't actually count (if (> wait-time 0) (begin (thread-sleep! 1) (if (eq? wait-time 1) ;; only one second left, steal the lock (begin (debug:print-info 0 *default-log-port* "stealing lock for " keyname) |
︙ | ︙ | |||
2091 2092 2093 2094 2095 2096 2097 | (define (common:faux-unlock keyname #!key (force #f)) (if (or force (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f)))) (begin (if (rmt:no-sync-get/default keyname #f) (rmt:no-sync-del! keyname)) #t) #f)) | > > > > | > > > > | 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 | (define (common:faux-unlock keyname #!key (force #f)) (if (or force (equal? (conc (current-process-id)) (conc (rmt:no-sync-get/default keyname #f)))) (begin (if (rmt:no-sync-get/default keyname #f) (rmt:no-sync-del! keyname)) #t) #f)) ;; simple lock. improve and converge on this one. ;; (define (common:simple-lock keyname) (rmt:no-sync-get-lock keyname)) ;;====================================================================== ;; ;;====================================================================== (define (common:in-running-test?) (and (args:get-arg "-execute") (get-environment-variable "MT_CMDINFO"))) (define (common:get-color-from-status status) (cond ((equal? status "PASS") "green") ((equal? status "FAIL") "red") |
︙ | ︙ |
Modified db.scm from [43a8d2d12e] to [05a6ad4f54].
︙ | ︙ | |||
1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 | (if newres newres res)) res))) (define (db:no-sync-close-db db) (db:safely-close-sqlite3-db db)) ;; use a global for some primitive caching, it is just silly to ;; re-read the db over and over again for the keys since they never ;; change ;; why get the keys from the db? why not get from the *configdat* ;; using keys:config-get-fields? | > > > > > > > > > > > > > > > > > > > | 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 | (if newres newres res)) res))) (define (db:no-sync-close-db db) (db:safely-close-sqlite3-db db)) ;; transaction protected lock aquisition ;; either: ;; fails returns (#f . lock-creation-time) ;; succeeds (returns (#t . lock-creation-time) ;; use (db:no-sync-del! db keyname) to release the lock ;; (define (db:no-sync-get-lock db keyname) (sqlite3:with-transaction (db:no-sync-db db) (lambda () (handle-exceptions exn (let ((lock-time (current-seconds))) (sqlite3:execute db "INSERT INTO no_sync_metadat (var,val) VALUES(?,?);" keyname lock-time) `(#t . ,lock-time)) `(#f . ,(sqlite3:first-result db "SELECT val FROM no_sync_metadat WHERE var=?;" keyname)))))) ;; use a global for some primitive caching, it is just silly to ;; re-read the db over and over again for the keys since they never ;; change ;; why get the keys from the db? why not get from the *configdat* ;; using keys:config-get-fields? |
︙ | ︙ |
Modified launch.scm from [1d514c355f] to [bc68bfb44c].
︙ | ︙ | |||
1236 1237 1238 1239 1240 1241 1242 | ;; 3. create link from run dir to megatest runs area ;; 4. remotely run the test on allocated host ;; - could be ssh to host from hosts table (update regularly with load) ;; - could be netbatch ;; (launch-test db (cadr status) test-conf)) (define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params) (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex | > > > > > > > > > > > > > | | 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 | ;; 3. create link from run dir to megatest runs area ;; 4. remotely run the test on allocated host ;; - could be ssh to host from hosts table (update regularly with load) ;; - could be netbatch ;; (launch-test db (cadr status) test-conf)) (define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params) (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex (let* ((lock-key (conc "test-" test-id)) (got-lock (let loop ((lock (rmt:no-sync-get-lock lock-key)) (expire-time (+ (current-seconds) 15))) ;; give up on getting the lock and steal it after 15 seconds (if (car lock) #t (if (> (current-seconds) expire-time) (begin (debug:print-info 0 *default-log-port* "Timed out waiting for a lock to launch test " keyvals " " runname " " test-name " " test-path) (rmt:no-sync-del! lock-key) ;; destroy the lock (loop (rmt:no-sync-get-lock lock-key) expire-time)) ;; (begin (thread-sleep! 1) (loop (rmt:no-sync-get-lock lock-key) expire-time)))))) (item-path (item-list->path itemdat)) (contour #f)) ;; NOT READY FOR THIS (args:get-arg "-contour"))) (let loop ((delta (- (current-seconds) *last-launch*)) (launch-delay (configf:lookup-number *configdat* "setup" "launch-delay" default: 1))) (if (> launch-delay delta) (begin (if (common:low-noise-print 1200 "test launch delay") ;; every two hours or so remind the user about launch delay. (debug:print-info 0 *default-log-port* "NOTE: test launches are delayed by " launch-delay " seconds. See megatest.config launch-delay setting to adjust.")) ;; launch of " test-name " for " (- launch-delay delta) " seconds")) |
︙ | ︙ | |||
1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 | cmdstr (conc cmdstr " >> mt_launch.log 2>&1 &"))) (car fullcmd)) (if useshell '() (cdr fullcmd))))) (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork. (if (not launchwait) ;; give the OS a little time to allow the process to start (thread-sleep! 0.01)) (with-output-to-file "mt_launch.log" (lambda () (print "LAUNCHCMD: " (string-intersperse fullcmd " ")) (if (list? launch-results) (apply print launch-results) | > | 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 | cmdstr (conc cmdstr " >> mt_launch.log 2>&1 &"))) (car fullcmd)) (if useshell '() (cdr fullcmd))))) (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork. (rmt:no-sync-del! lock-key) ;; release the lock for starting this test (if (not launchwait) ;; give the OS a little time to allow the process to start (thread-sleep! 0.01)) (with-output-to-file "mt_launch.log" (lambda () (print "LAUNCHCMD: " (string-intersperse fullcmd " ")) (if (list? launch-results) (apply print launch-results) |
︙ | ︙ |
Modified rmt.scm from [ec4c6ff50c] to [bc431c6c0b].
︙ | ︙ | |||
865 866 867 868 869 870 871 872 873 874 875 876 877 878 | (define (rmt:no-sync-get/default var default) (rmt:send-receive 'no-sync-get/default #f `(,var ,default))) (define (rmt:no-sync-del! var) (rmt:send-receive 'no-sync-del! #f `(,var))) ;;====================================================================== ;; A R C H I V E S ;;====================================================================== (define (rmt:archive-get-allocations testname itempath dneeded) (rmt:send-receive 'archive-get-allocations #f (list testname itempath dneeded))) | > > > | 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 | (define (rmt:no-sync-get/default var default) (rmt:send-receive 'no-sync-get/default #f `(,var ,default))) (define (rmt:no-sync-del! var) (rmt:send-receive 'no-sync-del! #f `(,var))) (define (rmt:no-sync-get-lock keyname) (rmt:send-receive 'no-sync-get-lock #f `(,keyname))) ;;====================================================================== ;; A R C H I V E S ;;====================================================================== (define (rmt:archive-get-allocations testname itempath dneeded) (rmt:send-receive 'archive-get-allocations #f (list testname itempath dneeded))) |
︙ | ︙ |