Overview
Comment: | Experimental fix for test panel crash on test delete |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | refactor-api |
Files: | files | file ages | folders |
SHA1: |
043797f2d2938a36de621c38bd54269d |
User & Date: | matt on 2013-07-28 10:04:26 |
Other Links: | branch diff | manifest | tags |
Context
2013-07-28
| ||
14:57 | Partial implementation of /api check-in: 41cad6ae52 user: matt tags: refactor-api | |
10:04 | Experimental fix for test panel crash on test delete check-in: 043797f2d2 user: matt tags: refactor-api | |
2013-07-26
| ||
09:47 | Adding forgotten file check-in: 987788b967 user: matt tags: dev | |
Changes
Modified runs.scm from [1b46e2afce] to [b71546766a].
︙ | ︙ | |||
906 907 908 909 910 911 912 913 914 915 916 917 918 919 | (define (get-dir-up-n dir . params) (let ((dparts (string-split dir "/")) (count (if (null? params) 1 (car params)))) (conc "/" (string-intersperse (take dparts (- (length dparts) count)) "/")))) ;; Remove runs ;; fields are passing in through ;; action: ;; 'remove-runs ;; 'set-state-status ;; ;; NB// should pass in keys? | > > > > > > > > > > > > > > > > > > > > > > > > | 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 | (define (get-dir-up-n dir . params) (let ((dparts (string-split dir "/")) (count (if (null? params) 1 (car params)))) (conc "/" (string-intersperse (take dparts (- (length dparts) count)) "/")))) (define (runs:recursive-delete-with-error-msg real-dir) (if (> (system (conc "rm -rf " real-dir)) 0) (debug:print 0 "ERROR: There was a problem removing " real-dir " with rm -f"))) (define (runs:safe-delete-test-dir real-dir) ;; first delete all sub-directories (directory-fold (lambda (f x) (let ((fullname (conc real-dir "/" f))) (if (directory? fullname)(runs:recursive-delete-with-error-msg fullname))) (+ 1 x)) 0 real-dir) ;; then files other than *testdat.db* (directory-fold (lambda (f x) (let ((fullname (conc real-dir "/" f))) (if (not (string-search (regexp "testdat.db") f)) (runs:recursive-delete-with-error-msg fullname))) (+ 1 x)) 0 real-dir) ;; then the entire directory (runs:recursive-delete-with-error-msg real-dir)) ;; Remove runs ;; fields are passing in through ;; action: ;; 'remove-runs ;; 'set-state-status ;; ;; NB// should pass in keys? |
︙ | ︙ | |||
987 988 989 990 991 992 993 | (test-fulln (db:test-get-fullname new-test-dat))) (case action ((remove-runs) (debug:print-info 0 "test: " test-name " itest-state: " test-state) (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) (begin (if (not (hash-table-ref/default test-retry-time test-fulln #f)) | > > | | | < | 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 | (test-fulln (db:test-get-fullname new-test-dat))) (case action ((remove-runs) (debug:print-info 0 "test: " test-name " itest-state: " test-state) (if (member test-state (list "RUNNING" "LAUNCHED" "REMOTEHOSTSTART" "KILLREQ")) (begin (if (not (hash-table-ref/default test-retry-time test-fulln #f)) (begin ;; want to set to REMOVING BUT CANNOT do it here? (hash-table-set! test-retry-time test-fulln (current-seconds)))) (if (> (- (current-seconds)(hash-table-ref test-retry-time test-fulln)) allow-run-time) ;; This test is not in a correct state for cleaning up. Let's try some graceful shutdown steps first ;; Set the test to "KILLREQ" and wait five seconds then try again. Repeat up to five times then give ;; up and blow it away. (begin (debug:print 0 "WARNING: could not gracefully remove test " test-fulln ", tried to kill it to no avail. Forcing state to FAILEDKILL and continuing") (cdb:remote-run db:test-set-state-status-by-id db (db:test-get-id test) "FAILEDKILL" "n/a" #f) (thread-sleep! 1)) (begin (cdb:remote-run db:test-set-state-status-by-id db (db:test-get-id test) "KILLREQ" "n/a" #f) (thread-sleep! 1))) ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ... (if (null? tal) (loop new-test-dat tal) (loop (car tal)(append tal (list new-test-dat))))) (begin (cdb:remote-run db:test-set-state-status-by-id db (db:test-get-id test) "REMOVING" "LOCKED" #f) (debug:print-info 1 "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir) (if (and real-dir (> (string-length real-dir) 5) (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc. (begin ;; let* ((realpath (resolve-pathname run-dir))) (debug:print-info 1 "Recursively removing " real-dir) (if (file-exists? real-dir) (runs:safe-delete-test-dir real-dir) (debug:print 0 "WARNING: test dir " real-dir " appears to not exist or is not readable"))) (if real-dir (debug:print 0 "WARNING: directory " real-dir " does not exist") (debug:print 0 "WARNING: no real directory corrosponding to link " run-dir ", nothing done"))) (if (symbolic-link? run-dir) (begin (debug:print-info 1 "Removing symlink " run-dir) |
︙ | ︙ | |||
1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 | exn (debug:print 0 "ERROR: Failed to remove directory " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue") (delete-directory run-dir))) (if run-dir (debug:print 0 "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink") (debug:print 0 "NOTE: the run dir for this test is undefined. Test may have already been deleted.")) )) (if (not (null? tal)) (loop (car tal)(cdr tal)))))) ((set-state-status) (debug:print-info 2 "new state " (car state-status) ", new status " (cadr state-status)) (cdb:remote-run db:test-set-state-status-by-id db (db:test-get-id test) (car state-status)(cadr state-status) #f) (if (not (null? tal)) (loop (car tal)(cdr tal)))) | > > | 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 | exn (debug:print 0 "ERROR: Failed to remove directory " run-dir ((condition-property-accessor 'exn 'message) exn) ", attempting to continue") (delete-directory run-dir))) (if run-dir (debug:print 0 "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink") (debug:print 0 "NOTE: the run dir for this test is undefined. Test may have already been deleted.")) )) ;; Only delete the records *after* removing the directory. If things fail we have a record (cdb:remote-run db:delete-test-records db #f (db:test-get-id test)) (if (not (null? tal)) (loop (car tal)(cdr tal)))))) ((set-state-status) (debug:print-info 2 "new state " (car state-status) ", new status " (cadr state-status)) (cdb:remote-run db:test-set-state-status-by-id db (db:test-get-id test) (car state-status)(cadr state-status) #f) (if (not (null? tal)) (loop (car tal)(cdr tal)))) |
︙ | ︙ |