Megatest

Changes On Branch 75c8dc47137e160c
Login

Changes In Branch test-specific-db Through [75c8dc4713] Excluding Merge-Ins

This is equivalent to a diff from e6642c0f2d to 75c8dc4713

2012-10-07
12:14
merged to trunk while sitting next to a creek in Prescott wilderness check-in: 30895dfd86 user: user tags: trunk, test4-clean
2012-09-18
17:31
Partially broken implementation of steps data move to testrundat check-in: baf93f49cf user: mrwellan tags: test-specific-db
02:49
Competed initial implementation of testrundat check-in: 75c8dc4713 user: matt tags: test-specific-db
2012-09-17
18:02
Partial implementation of testrundat check-in: a8e4d577bc user: mrwellan tags: test-specific-db
2012-09-14
14:18
Brought up to date with latest from trunk check-in: 191987e384 user: mrwellan tags: test-specific-db
14:17
Merged in fix for symlink crash check-in: e6642c0f2d user: mrwellan tags: trunk
14:16
Merged in v1.4403 changes to trunk check-in: e0a7e24c94 user: mrwellan tags: trunk
13:43
Added check for symbolic link to creation logic check-in: 3c167a3ae4 user: mrwellan tags: v1.44

Modified db.scm from [c61472a5e1] to [7749e22ea8].

153
154
155
156
157
158
159





























































160
161
162
163
164
165
166
                                status TEXT DEFAULT 'n/a',
                                type TEXT DEFAULT '',
                              CONSTRAINT test_data_constraint UNIQUE (test_id,category,variable));")
    ;; Must do this *after* running patch db !! No more. 
    (db:set-var db "MEGATEST_VERSION" megatest-version)
    ))






























































;;======================================================================
;; TODO:
;;   put deltas into an assoc list with version numbers
;;   apply all from last to current
;;======================================================================
(define (patch-db db)
  (handle-exceptions







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
                                status TEXT DEFAULT 'n/a',
                                type TEXT DEFAULT '',
                              CONSTRAINT test_data_constraint UNIQUE (test_id,category,variable));")
    ;; Must do this *after* running patch db !! No more. 
    (db:set-var db "MEGATEST_VERSION" megatest-version)
    ))

;; Create the sqlite db for the individual test(s)
(define (open-test-db testpath) 
  (let* ((dbpath    (conc testpath "/testdat.db"))
	 (dbexists  (file-exists? dbpath))
	 (db        (sqlite3:open-database dbpath)) ;; (never-give-up-open-db dbpath))
	 (handler   (make-busy-timeout (if (args:get-arg "-override-timeout")
					   (string->number (args:get-arg "-override-timeout"))
					   36000))))
    (debug:print 4 "INFO: test dbpath=" dbpath)
    (sqlite3:set-busy-handler! db handler)
    (if (not dbexists)
	(begin
	  (sqlite3:execute db "PRAGMA synchronous = FULL;")
	  (debug:print 0 "Initialized test database " dbpath)
	  (db:testdb-initialize db)))
    (sqlite3:execute db "PRAGMA synchronous = 0;")
    db))

(define (db:testdb-initialize db)
  (for-each
   (lambda (sqlcmd)
     (sqlite3:execute db sqlcmd))
   (list "CREATE TABLE IF NOT EXISTS test_rundat (
              id INTEGER PRIMARY KEY,
              update_time TIMESTAMP,
              cpuload INTEGER DEFAULT -1,
              diskfree INTEGER DEFAULT -1,
              diskusage INTGER DEFAULT -1);"
	  "CREATE TABLE IF NOT EXISTS test_data (
              id INTEGER PRIMARY KEY,
              test_id INTEGER,
              category TEXT DEFAULT '',
              variable TEXT,
	      value REAL,
	      expected REAL,
	      tol REAL,
              units TEXT,
              comment TEXT DEFAULT '',
              status TEXT DEFAULT 'n/a',
              type TEXT DEFAULT '',
              CONSTRAINT test_data_constraint UNIQUE (test_id,category,variable));"
           "CREATE TABLE IF NOT EXISTS test_steps (
              id INTEGER PRIMARY KEY,
              test_id INTEGER, 
              stepname TEXT, 
              state TEXT DEFAULT 'NOT_STARTED', 
              status TEXT DEFAULT 'n/a',
              event_time TIMESTAMP,
              comment TEXT DEFAULT '',
              logfile TEXT DEFAULT '',
              CONSTRAINT test_steps_constraint UNIQUE (test_id,stepname,state));"
	   ;; test_meta can be used for handing commands to the test
	   ;; e.g. KILLREQ
	   ;;      the ackstate is set to 1 once the command has been completed
	   "CREATE TABLE IF NOT EXISTS test_meta (
              id INTEGER PRIMARY KEY,
              var TEXT,
              val TEXT,
              ackstate INTEGER DEFAULT 0,
              CONSTRAINT metadat_constraint UNIQUE (var));")))

;;======================================================================
;; TODO:
;;   put deltas into an assoc list with version numbers
;;   apply all from last to current
;;======================================================================
(define (patch-db db)
  (handle-exceptions
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
     qrystr)
    res))

;;======================================================================
;; QUEUE UP META, TEST STATUS AND STEPS
;;======================================================================

(define (db:updater db)
  (let loop ((start-time (current-time)))
    (thread-sleep! 0.5) ;; move save time around to minimize regular collisions?
    (db:write-cached-data db)
    (loop start-time)))
    
(define (db:test-update-meta-info db test-id minutes cpuload diskfree tmpfree)
  (mutex-lock! *incoming-mutex*)
  (set! *incoming-data* (cons (vector 'meta-info
				      (current-seconds)
				      (list cpuload
					    diskfree
					    minutes
					    test-id)) ;; run-id test-name item-path minutes cpuload diskfree tmpfree) 
			      *incoming-data*))
  (mutex-unlock! *incoming-mutex*)
  (if *cache-on*
      (debug:print 6 "INFO: *cache-on* is " *cache-on* ", skipping cache write as part of test-update-meta-info")
      (db:write-cached-data db)))

(define (db:write-cached-data db)
  (let ((meta-stmt (sqlite3:prepare db "UPDATE tests SET cpuload=?,diskfree=?,run_duration=?,state='RUNNING' WHERE id=? AND state NOT IN ('COMPLETED','KILLREQ','KILLED');"))
	(step-stmt (sqlite3:prepare db "INSERT OR REPLACE into test_steps (test_id,stepname,state,status,event_time,comment,logfile) VALUES(?,?,?,?,?,?,?);")) ;; strftime('%s','now')#f)
	(data (sort *incoming-data* (lambda (a b)(< (vector-ref a 1)(vector-ref b 1))))))
    (if (> (length data) 0)
	(debug:print 4 "Writing cached data " data))
    (mutex-lock! *incoming-mutex*)
    (sqlite3:with-transaction 
     db
     (lambda ()
       (for-each (lambda (entry)
		   (case (vector-ref entry 0)
		     ((meta-info)
		      (apply sqlite3:execute meta-stmt (vector-ref entry 2)))
		     ((step-status)
		      (apply sqlite3:execute step-stmt (vector-ref entry 2)))
		     (else
		      (debug:print 0 "ERROR: Queued entry not recognised " entry))))
		 data)))
    (sqlite3:finalize! meta-stmt) ;; sqlite is the bottleneck, clear the statements asap?
    (sqlite3:finalize! step-stmt)
    (set! *incoming-data* '())
    (mutex-unlock! *incoming-mutex*)))

(define (db:roll-up-pass-fail-counts db run-id test-name item-path status)
  (if (and (not (equal? item-path ""))
	   (or (equal? status "PASS")
	       (equal? status "WARN")
	       (equal? status "FAIL")
	       (equal? status "WAIVED")







|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|







784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
     qrystr)
    res))

;;======================================================================
;; QUEUE UP META, TEST STATUS AND STEPS
;;======================================================================

;; (define (db:updater db)
;;   (let loop ((start-time (current-time)))
;;     (thread-sleep! 0.5) ;; move save time around to minimize regular collisions?
;;     (db:write-cached-data db)
;;     (loop start-time)))
;;     
;; (define (db:test-update-meta-info db test-id minutes cpuload diskfree tmpfree)
;;   (mutex-lock! *incoming-mutex*)
;;   (set! *incoming-data* (cons (vector 'meta-info
;; 				      (current-seconds)
;; 				      (list cpuload
;; 					    diskfree
;; 					    minutes
;; 					    test-id)) ;; run-id test-name item-path minutes cpuload diskfree tmpfree) 
;; 			      *incoming-data*))
;;   (mutex-unlock! *incoming-mutex*)
;;   (if *cache-on*
;;       (debug:print 6 "INFO: *cache-on* is " *cache-on* ", skipping cache write as part of test-update-meta-info")
;;       (db:write-cached-data db)))
;; 
;; (define (db:write-cached-data db)
;;   (let ((meta-stmt (sqlite3:prepare db "UPDATE tests SET cpuload=?,diskfree=?,run_duration=?,state='RUNNING' WHERE id=? AND state NOT IN ('COMPLETED','KILLREQ','KILLED');"))
;; 	(step-stmt (sqlite3:prepare db "INSERT OR REPLACE into test_steps (test_id,stepname,state,status,event_time,comment,logfile) VALUES(?,?,?,?,?,?,?);")) ;; strftime('%s','now')#f)
;; 	(data (sort *incoming-data* (lambda (a b)(< (vector-ref a 1)(vector-ref b 1))))))
;;     (if (> (length data) 0)
;; 	(debug:print 4 "Writing cached data " data))
;;     (mutex-lock! *incoming-mutex*)
;;     (sqlite3:with-transaction 
;;      db
;;      (lambda ()
;;        (for-each (lambda (entry)
;; 		   (case (vector-ref entry 0)
;; 		     ((meta-info)
;; 		      (apply sqlite3:execute meta-stmt (vector-ref entry 2)))
;; 		     ((step-status)
;; 		      (apply sqlite3:execute step-stmt (vector-ref entry 2)))
;; 		     (else
;; 		      (debug:print 0 "ERROR: Queued entry not recognised " entry))))
;; 		 data)))
;;     (sqlite3:finalize! meta-stmt) ;; sqlite is the bottleneck, clear the statements asap?
;;     (sqlite3:finalize! step-stmt)
;;     (set! *incoming-data* '())
;;     (mutex-unlock! *incoming-mutex*)))

(define (db:roll-up-pass-fail-counts db run-id test-name item-path status)
  (if (and (not (equal? item-path ""))
	   (or (equal? status "PASS")
	       (equal? status "WARN")
	       (equal? status "FAIL")
	       (equal? status "WAIVED")
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
    (mutex-lock! *incoming-mutex*)
    (set! *incoming-data* (cons (vector 'step-status
					(current-seconds)
					;; FIXME - this should not update the logfile unless it is specified.
					(list test-id teststep-name state-in status-in (current-seconds) (if comment comment "") (if logfile logfile "")))
				*incoming-data*))
    (mutex-unlock! *incoming-mutex*)
    (if (not *cache-on*)(db:write-cached-data db))
    #t))

;;======================================================================
;; Extract ods file from the db
;;======================================================================

;; runspatt is a comma delimited list of run patterns







|







1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
    (mutex-lock! *incoming-mutex*)
    (set! *incoming-data* (cons (vector 'step-status
					(current-seconds)
					;; FIXME - this should not update the logfile unless it is specified.
					(list test-id teststep-name state-in status-in (current-seconds) (if comment comment "") (if logfile logfile "")))
				*incoming-data*))
    (mutex-unlock! *incoming-mutex*)
    ;; (if (not *cache-on*)(db:write-cached-data db))
    #t))

;;======================================================================
;; Extract ods file from the db
;;======================================================================

;; runspatt is a comma delimited list of run patterns

Added docs/plan.txt version [b6f3c7c220].



























>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13

Move test specific db to test dir
=================================

. Create teststats.db
. Redirect test run stats to teststats.db
. Redirect test steps data to teststats.db
. Redirect test_data to teststats.db
. Direct dboard to get stats from teststats.db
. Redirect kill requests to teststats.db
. Kill requests need to kill all processes in the tree
. Roll up overall stats to megatest.db every five minutes or when test done
. Add any necessary tests

Modified launch.scm from [228baa4ee9] to [95108f25d5].

62
63
64
65
66
67
68

69
70
71
72
73
74
75
	       (env-ovrd  (assoc/default 'env-ovrd  cmdinfo))
	       (set-vars  (assoc/default 'set-vars  cmdinfo)) ;; pre-overrides from -setvar
	       (runname   (assoc/default 'runname   cmdinfo))
	       (megatest  (assoc/default 'megatest  cmdinfo))
	       (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo))
	       (fullrunscript (if runscript (conc testpath "/" runscript) #f))
	       (db        #f)

	       (rollup-status 0))
	  
	  (debug:print 2 "Exectuing " test-name " (id: " test-id ") on " (get-host-name))
	  ;; apply pre-overrides before other variables. The pre-override vars must not
	  ;; clobbers things from the official sources such as megatest.config and runconfigs.config
	  (if (string? set-vars)
	      (let ((varpairs (string-split set-vars ",")))







>







62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
	       (env-ovrd  (assoc/default 'env-ovrd  cmdinfo))
	       (set-vars  (assoc/default 'set-vars  cmdinfo)) ;; pre-overrides from -setvar
	       (runname   (assoc/default 'runname   cmdinfo))
	       (megatest  (assoc/default 'megatest  cmdinfo))
	       (mt-bindir-path (assoc/default 'mt-bindir-path cmdinfo))
	       (fullrunscript (if runscript (conc testpath "/" runscript) #f))
	       (db        #f)
	       (tdb       #f)
	       (rollup-status 0))
	  
	  (debug:print 2 "Exectuing " test-name " (id: " test-id ") on " (get-host-name))
	  ;; apply pre-overrides before other variables. The pre-override vars must not
	  ;; clobbers things from the official sources such as megatest.config and runconfigs.config
	  (if (string? set-vars)
	      (let ((varpairs (string-split set-vars ",")))
89
90
91
92
93
94
95


96
97
98
99
100
101
102
103
104








105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
	  (setenv "MT_MEGATEST"  megatest)
	  (setenv "MT_TARGET"    target)
	  (if mt-bindir-path (setenv "PATH" (conc (getenv "PATH") ":" mt-bindir-path)))
	  (change-directory top-path)
	  (if (not (setup-for-run))
	      (begin
		(debug:print 0 "Failed to setup, exiting") 


		(exit 1)))
	  (change-directory *toppath*)
	  ;; now can find our db
	  (set! db (open-db))
	  (if (not (args:get-arg "-server"))
	      (server:client-setup db))
	  ;; (set! *cache-on* #t)
	  (set-megatest-env-vars db run-id) ;; these may be needed by the launching process
	  (change-directory work-area) 








	  (set-run-config-vars db run-id)
	  ;; environment overrides are done *before* the remaining critical envars.
	  (alist->env-vars env-ovrd)
	  (set-megatest-env-vars db run-id)
	  (set-item-env-vars itemdat)
	  (save-environment-as-files "megatest")
	  (test-set-meta-info db run-id test-name itemdat)
	  (test-set-status! db test-id "REMOTEHOSTSTART" "n/a" (args:get-arg "-m") #f)
	  (if (args:get-arg "-xterm")
	      (set! fullrunscript "xterm")
	      (if (and fullrunscript (not (file-execute-access? fullrunscript)))
		  (system (conc "chmod ug+x " fullrunscript))))
	  ;; We are about to actually kick off the test
	  ;; so this is a good place to remove the records for 
	  ;; any previous runs
	  ;; (db:test-remove-steps db run-id testname itemdat)
	  
	  ;; from here on out we will open and close the db
	  ;; on every access to reduce the probablitiy of 
	  ;; contention or stuck access on nfs.
	  (sqlite3:finalize! db)

	  (let* ((m            (make-mutex))
		 (kill-job?    #f)
		 (exit-info    (vector #t #t #t))
		 (job-thread   #f)
		 (runit        (lambda ()
				 ;; (let-values
				 ;;  (((pid exit-status exit-code)







>
>









>
>
>
>
>
>
>
>






|










<
<
<
<
<







90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132





133
134
135
136
137
138
139
	  (setenv "MT_MEGATEST"  megatest)
	  (setenv "MT_TARGET"    target)
	  (if mt-bindir-path (setenv "PATH" (conc (getenv "PATH") ":" mt-bindir-path)))
	  (change-directory top-path)
	  (if (not (setup-for-run))
	      (begin
		(debug:print 0 "Failed to setup, exiting") 
		;; (sqlite3:finalize! db)
		;; (sqlite3:finalize! tdb)
		(exit 1)))
	  (change-directory *toppath*)
	  ;; now can find our db
	  (set! db (open-db))
	  (if (not (args:get-arg "-server"))
	      (server:client-setup db))
	  ;; (set! *cache-on* #t)
	  (set-megatest-env-vars db run-id) ;; these may be needed by the launching process
	  (change-directory work-area) 

	  ;; Open up the test specific database
	  (set! tdb (open-test-db work-area))
	  (on-exit (lambda ()
		     (debug:print 0 "Finalizing both tdb and db!!!")
		     (sqlite3:finalize! tdb)
		     (sqlite3:finalize! db)))

	  (set-run-config-vars db run-id)
	  ;; environment overrides are done *before* the remaining critical envars.
	  (alist->env-vars env-ovrd)
	  (set-megatest-env-vars db run-id)
	  (set-item-env-vars itemdat)
	  (save-environment-as-files "megatest")
	  (test-set-meta-info db tdb run-id test-name itemdat)
	  (test-set-status! db test-id "REMOTEHOSTSTART" "n/a" (args:get-arg "-m") #f)
	  (if (args:get-arg "-xterm")
	      (set! fullrunscript "xterm")
	      (if (and fullrunscript (not (file-execute-access? fullrunscript)))
		  (system (conc "chmod ug+x " fullrunscript))))
	  ;; We are about to actually kick off the test
	  ;; so this is a good place to remove the records for 
	  ;; any previous runs
	  ;; (db:test-remove-steps db run-id testname itemdat)
	  





	  (let* ((m            (make-mutex))
		 (kill-job?    #f)
		 (exit-info    (vector #t #t #t))
		 (job-thread   #f)
		 (runit        (lambda ()
				 ;; (let-values
				 ;;  (((pid exit-status exit-code)
252
253
254
255
256
257
258
259
260
261
262

263
264
265
266
267

268
269
270
271
272
273
274
275
							 (inexact->exact 
							  (round 
							   (- 
							    (current-seconds) 
							    start-seconds)))))
					(kill-tries 0))
				   (let loop ((minutes   (calc-minutes)))
				     (let* ((db       (open-db))
					    (cpuload  (get-cpu-load))
					    (diskfree (get-df (current-directory)))
					    (tmpfree  (get-df "/tmp")))

				       (if (not (args:get-arg "-server"))
					   (server:client-setup db))
				       (if (not cpuload)  (begin (debug:print 0 "WARNING: CPULOAD not found.")  (set! cpuload "n/a")))
				       (if (not diskfree) (begin (debug:print 0 "WARNING: DISKFREE not found.") (set! diskfree "n/a")))
				       (set! kill-job? (test-get-kill-request db run-id test-name itemdat))

				       (rdb:test-update-meta-info db test-id minutes cpuload diskfree tmpfree)
				       (if kill-job? 
					   (begin
					     (mutex-lock! m)
					     (let* ((pid (vector-ref exit-info 0)))
					       (if (number? pid)
						   (begin
						     (debug:print 0 "WARNING: Request received to kill job (attempt # " kill-tries ")")







|
|
|
|
>
|
|
|
|

>
|







258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
							 (inexact->exact 
							  (round 
							   (- 
							    (current-seconds) 
							    start-seconds)))))
					(kill-tries 0))
				   (let loop ((minutes   (calc-minutes)))
				     ;; (let* (;; (db       (open-db))
					    ;; (cpuload  (get-cpu-load))
					    ;; (diskfree (get-df (current-directory)))
					    ;; (tmpfree  (get-df "/tmp")))
				     (begin
				       ;; (if (not (args:get-arg "-server"))
				       ;;	   (server:client-setup db))
				       ;; (if (not cpuload)  (begin (debug:print 0 "WARNING: CPULOAD not found.")  (set! cpuload "n/a")))
				       ;; (if (not diskfree) (begin (debug:print 0 "WARNING: DISKFREE not found.") (set! diskfree "n/a")))
				       (set! kill-job? (test-get-kill-request db run-id test-name itemdat))
				       (test-set-meta-info db tdb run-id test-name itemdat minutes: minutes)
				       ;; (rdb:test-update-meta-info db test-id minutes cpuload diskfree tmpfree)
				       (if kill-job? 
					   (begin
					     (mutex-lock! m)
					     (let* ((pid (vector-ref exit-info 0)))
					       (if (number? pid)
						   (begin
						     (debug:print 0 "WARNING: Request received to kill job (attempt # " kill-tries ")")
287
288
289
290
291
292
293

294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
							(car processes))
						       (system (conc "kill -9 " pid))))
						   (begin
						     (debug:print 0 "WARNING: Request received to kill job but problem with process, attempting to kill manager process")
						     (test-set-status! db test-id "KILLED"  "FAIL"
								       (args:get-arg "-m") #f)
						     (sqlite3:finalize! db)

						     (exit 1))))
					     (set! kill-tries (+ 1 kill-tries))
					     (mutex-unlock! m)))
				       (sqlite3:finalize! db)
				       (thread-sleep! (+ 10 (random 10))) ;; add some jitter to the call home time to spread out the db accesses
				       (loop (calc-minutes)))))))
		 (th1          (make-thread monitorjob))
		 (th2          (make-thread runit)))
	    (set! job-thread th2)
	    (thread-start! th1)
	    (thread-start! th2)
	    (thread-join! th2)
	    (mutex-lock! m)
	    (set! db (open-db))
	    (if (not (args:get-arg "-server"))
		(server:client-setup db))
	    (let* ((item-path (item-list->path itemdat))
		   (testinfo  (rdb:get-test-info db run-id test-name item-path)))
	      (if (not (equal? (db:test-get-state testinfo) "COMPLETED"))
		  (begin
		    (debug:print 2 "Test NOT logged as COMPLETED, (state=" (db:test-get-state testinfo) "), updating result, rollup-status is " rollup-status)
		    (test-set-status! db test-id 
				      (if kill-job? "KILLED" "COMPLETED")







>



|









|
|
|







295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
							(car processes))
						       (system (conc "kill -9 " pid))))
						   (begin
						     (debug:print 0 "WARNING: Request received to kill job but problem with process, attempting to kill manager process")
						     (test-set-status! db test-id "KILLED"  "FAIL"
								       (args:get-arg "-m") #f)
						     (sqlite3:finalize! db)
						     (sqlite3:finalize! tdb)
						     (exit 1))))
					     (set! kill-tries (+ 1 kill-tries))
					     (mutex-unlock! m)))
				       ;; (sqlite3:finalize! db)
				       (thread-sleep! (+ 10 (random 10))) ;; add some jitter to the call home time to spread out the db accesses
				       (loop (calc-minutes)))))))
		 (th1          (make-thread monitorjob))
		 (th2          (make-thread runit)))
	    (set! job-thread th2)
	    (thread-start! th1)
	    (thread-start! th2)
	    (thread-join! th2)
	    (mutex-lock! m)
	    ;; (set! db (open-db))
	    ;; (if (not (args:get-arg "-server"))
	    ;;	(server:client-setup db))
	    (let* ((item-path (item-list->path itemdat))
		   (testinfo  (rdb:get-test-info db run-id test-name item-path)))
	      (if (not (equal? (db:test-get-state testinfo) "COMPLETED"))
		  (begin
		    (debug:print 2 "Test NOT logged as COMPLETED, (state=" (db:test-get-state testinfo) "), updating result, rollup-status is " rollup-status)
		    (test-set-status! db test-id 
				      (if kill-job? "KILLED" "COMPLETED")
339
340
341
342
343
344
345

346
347
348
349
350
351
352
	      )
	    (mutex-unlock! m)
	    ;; (exec-results (cmd-run->list fullrunscript)) ;;  (list ">" (conc test-name "-run.log"))))
	    ;; (success      exec-results)) ;; (eq? (cadr exec-results) 0)))
	    (debug:print 2 "Output from running " fullrunscript ", pid " (vector-ref exit-info 0) " in work area " 
			 work-area ":\n====\n exit code " (vector-ref exit-info 2) "\n" "====\n")
	    (sqlite3:finalize! db)

	    (if (not (vector-ref exit-info 1))
		(exit 4)))))))

;; set up the very basics needed for doing anything here.
(define (setup-for-run)
  ;; would set values for KEYS in the environment here for better support of env-override but 
  ;; have chicken/egg scenario. need to read megatest.config then read it again. Going to 







>







348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
	      )
	    (mutex-unlock! m)
	    ;; (exec-results (cmd-run->list fullrunscript)) ;;  (list ">" (conc test-name "-run.log"))))
	    ;; (success      exec-results)) ;; (eq? (cadr exec-results) 0)))
	    (debug:print 2 "Output from running " fullrunscript ", pid " (vector-ref exit-info 0) " in work area " 
			 work-area ":\n====\n exit code " (vector-ref exit-info 2) "\n" "====\n")
	    (sqlite3:finalize! db)
	    (sqlite3:finalize! tdb)
	    (if (not (vector-ref exit-info 1))
		(exit 4)))))))

;; set up the very basics needed for doing anything here.
(define (setup-for-run)
  ;; would set values for KEYS in the environment here for better support of env-override but 
  ;; have chicken/egg scenario. need to read megatest.config then read it again. Going to 

Modified runs.scm from [25e65a825b] to [735502950d].

335
336
337
338
339
340
341


342
343
344
345
346
347
348
349
	      (let* ((have-resources  (runs:can-run-more-tests db test-record)) ;; look at the test jobgroup and tot jobs running
		     (prereqs-not-met (db:get-prereqs-not-met db run-id waitons item-path mode: testmode))
		     (fails           (calc-fails prereqs-not-met))
		     (non-completed   (calc-not-completed prereqs-not-met)))
		(debug:print 8 "INFO: have-resources: " have-resources " prereqs-not-met: " 
			     (string-intersperse 
			      (map (lambda (t)


				     (conc (db:test-get-state t)"/"(db:test-get-status t)))
				   prereqs-not-met) ", ") " fails: " fails)
		;; Don't know at this time if the test have been launched at some time in the past
		;; i.e. is this a re-launch?
		(cond
		 ((and have-resources
		       (or (null? prereqs-not-met)
			   (and (eq? testmode 'toplevel)







>
>
|







335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
	      (let* ((have-resources  (runs:can-run-more-tests db test-record)) ;; look at the test jobgroup and tot jobs running
		     (prereqs-not-met (db:get-prereqs-not-met db run-id waitons item-path mode: testmode))
		     (fails           (calc-fails prereqs-not-met))
		     (non-completed   (calc-not-completed prereqs-not-met)))
		(debug:print 8 "INFO: have-resources: " have-resources " prereqs-not-met: " 
			     (string-intersperse 
			      (map (lambda (t)
				     (if (string? t)
					 (conc " WARNING: t is a string=" t )
					 (conc (db:test-get-state t)"/"(db:test-get-status t))))
				   prereqs-not-met) ", ") " fails: " fails)
		;; Don't know at this time if the test have been launched at some time in the past
		;; i.e. is this a re-launch?
		(cond
		 ((and have-resources
		       (or (null? prereqs-not-met)
			   (and (eq? testmode 'toplevel)

Modified tests.scm from [ce4320f87b] to [e990011da7].

399
400
401
402
403
404
405









406

407
408
409
410
411


412
413
414
415
416
417
418
419










420
421
422
423
424
425
426
;; teststep-set-status! used to be here

(define (test-get-kill-request db run-id test-name itemdat)
  (let* ((item-path (item-list->path itemdat))
	 (testdat   (db:get-test-info db run-id test-name item-path)))
    (equal? (test:get-state testdat) "KILLREQ")))










(define (test-set-meta-info db run-id testname itemdat)

  (let ((item-path (item-list->path itemdat))
	(cpuload  (get-cpu-load))
	(hostname (get-host-name))
	(diskfree (get-df (current-directory)))
	(uname    (get-uname "-srvpio")))


    (sqlite3:execute db "UPDATE tests SET host=?,cpuload=?,diskfree=?,uname=? WHERE run_id=? AND testname=? AND item_path=?;"
		  hostname
		  cpuload
		  diskfree
		  uname
		  run-id
		  testname
		  item-path)))











;;======================================================================
;; A R C H I V I N G
;;======================================================================

(define (test:archive db test-id)
  #f)







>
>
>
>
>
>
>
>
>
|
>
|
|
<
|
<
>
>
|
<
|
|
<
|
|
|
>
>
>
>
>
>
>
>
>
>







399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418

419

420
421
422

423
424

425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
;; teststep-set-status! used to be here

(define (test-get-kill-request db run-id test-name itemdat)
  (let* ((item-path (item-list->path itemdat))
	 (testdat   (db:get-test-info db run-id test-name item-path)))
    (equal? (test:get-state testdat) "KILLREQ")))

(define (test:tdb-get-rundat-count tdb)
  (let ((res 0))
    (sqlite3:for-each-row
     (lambda (count)
       (set! res count))
     tdb
     "SELECT count(id) FROM test_rundat;")
    res))

(define (test-set-meta-info db tdb run-id testname itemdat #!key (minutes #f))
  (let* ((num-records (test:tdb-get-rundat-count tdb))
	 (item-path   (item-list->path itemdat))
	 (cpuload  (get-cpu-load))

	 (diskfree (get-df (current-directory))))

    (if (eq? (modulo num-records 10) 0) ;; every ten records update central
	(begin
	  (sqlite3:execute db "UPDATE tests SET cpuload=?,diskfree=? WHERE run_id=? AND testname=? AND item_path=?;"

			   cpuload
			   diskfree

			   run-id
			   testname
			   item-path)
	  (if (eq? num-records 0)
	      (begin
		(sqlite3:execute db "UPDATE tests SET uname=?,host=? WHERE run_id=? AND testname=? AND item_path=?;"
		  	             (get-uname "-srvpio") (get-host-name) run-id testname item-path)
                (if minutes 
                    (sqlite3:execute db "UPDATE tests SET minutes=? WHERE run_id=? AND testname=? AND item_path=?;"
                                        minutes run-id testname item-path))))))
    (sqlite3:execute tdb "INSERT INTO test_rundat (cpuload,diskfree) VALUES (?,?);"
		     cpuload diskfree)))
	  

;;======================================================================
;; A R C H I V I N G
;;======================================================================

(define (test:archive db test-id)
  #f)