Megatest

Check-in [58b53eb970]
Login
Overview
Comment:More conversion to rpc
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | test-specific-db
Files: files | file ages | folders
SHA1: 58b53eb970abf32589928298e3ac93bc86c4a5e2
User & Date: mrwellan on 2012-10-02 18:16:40
Other Links: branch diff | manifest | tags
Context
2012-10-02
22:24
bumped version check-in: 029ed471cf user: matt tags: test-specific-db, v1.4609
18:16
More conversion to rpc check-in: 58b53eb970 user: mrwellan tags: test-specific-db
2012-10-01
23:11
Added checking on -debug for non-numbers check-in: 1ad597218e user: matt tags: test-specific-db, v1.4608
Changes

Modified db.scm from [50c1ee9122] to [bc17cfd6cb].

1015
1016
1017
1018
1019
1020
1021
1022
1023
1024

1025
1026
1027

1028

1029
1030
1031
1032
1033
1034
1035
1036
1037

(define (db:updater)
  (let loop ((start-time (current-time)))
    (thread-sleep! 0.5) ;; move save time around to minimize regular collisions?
    (db:write-cached-data)
    (loop start-time)))
    
(define (cdb:test-set-state-status test-id status state)
  (debug:print 4 "INFO: Adding status/state to queue: " status "/" state)
  (mutex-lock! *incoming-mutex*)

  (set! *incoming-data* (cons (vector 'state-status
				      (current-seconds)
				      (list state

					    status 

					    test-id)) ;; run-id test-name item-path minutes cpuload diskfree tmpfree) 
			      *incoming-data*))
  (mutex-unlock! *incoming-mutex*)
  (if *cache-on*
      (debug:print 6 "INFO: *cache-on* is " *cache-on* ", skipping cache write")
      (db:write-cached-data)))

;; (define (remote:test-update-meta-info db test-id minutes cpuload diskfree tmpfree)
;;   (mutex-lock! *incoming-mutex*)







|


>
|
|
|
>
|
>
|
|







1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040

(define (db:updater)
  (let loop ((start-time (current-time)))
    (thread-sleep! 0.5) ;; move save time around to minimize regular collisions?
    (db:write-cached-data)
    (loop start-time)))
    
(define (cdb:test-set-status-state test-id status state #!key (msg #f))
  (debug:print 4 "INFO: Adding status/state to queue: " status "/" state)
  (mutex-lock! *incoming-mutex*)
  (if msg
      (set! *incoming-data* (cons (vector 'state-status-msg
					  (current-seconds)
					  (list state status msg test-id))
				  *incoming-data*))
      (set! *incoming-data* (cons (vector 'state-status
					  (current-seconds)
					  (list state status test-id)) ;; run-id test-name item-path minutes cpuload diskfree tmpfree) 
				  *incoming-data*)))
  (mutex-unlock! *incoming-mutex*)
  (if *cache-on*
      (debug:print 6 "INFO: *cache-on* is " *cache-on* ", skipping cache write")
      (db:write-cached-data)))

;; (define (remote:test-update-meta-info db test-id minutes cpuload diskfree tmpfree)
;;   (mutex-lock! *incoming-mutex*)
1050
1051
1052
1053
1054
1055
1056
1057
1058

1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078


1079
1080
1081
1082
1083
1084

1085
1086
1087
1088
1089
1090
1091
;; The queue is a list of vectors where the zeroth slot indicates the type of query to
;; apply and the second slot is the time of the query and the third entry is a list of 
;; values to be applied
;;
(define (db:write-cached-data)
  (open-run-close
   (lambda (db . params)
     (let ((meta-stmt         (sqlite3:prepare db "UPDATE tests SET cpuload=?,diskfree=?,run_duration=?,state='RUNNING' WHERE id=? AND state NOT IN ('COMPLETED','KILLREQ','KILLED');"))
	   (state-status-stmt (sqlite3:prepare db "UPDATE tests SET state=?,status=? WHERE id=?;"))

	   (step-stmt         (sqlite3:prepare db "INSERT OR REPLACE into test_steps (test_id,stepname,state,status,event_time,comment,logfile) VALUES(?,?,?,?,?,?,?);")) ;; strftime('%s','now')#f)
	   (data              #f))
       (mutex-lock! *incoming-mutex*)
       (set! data (sort *incoming-data* (lambda (a b)(< (vector-ref a 1)(vector-ref b 1)))))
       (set! *incoming-data* '())
       (mutex-unlock! *incoming-mutex*)
       (if (> (length data) 0)
	   (debug:print 4 "INFO: Writing cached data " data))
       (sqlite3:with-transaction 
	db
	(lambda ()
	  (for-each (lambda (entry)
		      (debug:print 4 "INFO: flushing " entry " to db")
		      (case (vector-ref entry 0)
			((meta-info)
			 (apply sqlite3:execute meta-stmt (vector-ref entry 2)))
			((step-status)
			 (apply sqlite3:execute step-stmt (vector-ref entry 2)))
			((state-status)
			 (apply sqlite3:execute state-status-stmt (vector-ref entry 2)))


			(else
			 (debug:print 0 "ERROR: Queued entry not recognised " entry))))
		    data)))
       (sqlite3:finalize! meta-stmt) ;; sqlite is the bottleneck, clear the statements asap?
       (sqlite3:finalize! step-stmt)
       (sqlite3:finalize! state-status-stmt)

       ))
   #f))

;; (define (db:write-cached-data db)
;;   (let ((meta-stmt (sqlite3:prepare db "UPDATE tests SET cpuload=?,diskfree=?,run_duration=?,state='RUNNING' WHERE id=? AND state NOT IN ('COMPLETED','KILLREQ','KILLED');"))
;; 	(step-stmt (sqlite3:prepare db "INSERT OR REPLACE into test_steps (test_id,stepname,state,status,event_time,comment,logfile) VALUES(?,?,?,?,?,?,?);")) ;; strftime('%s','now')#f)
;; 	(data (sort *incoming-data* (lambda (a b)(< (vector-ref a 1)(vector-ref b 1))))))







|
|
>
|
|


















>
>






>







1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
;; The queue is a list of vectors where the zeroth slot indicates the type of query to
;; apply and the second slot is the time of the query and the third entry is a list of 
;; values to be applied
;;
(define (db:write-cached-data)
  (open-run-close
   (lambda (db . params)
     (let ((meta-stmt             (sqlite3:prepare db "UPDATE tests SET cpuload=?,diskfree=?,run_duration=?,state='RUNNING' WHERE id=? AND state NOT IN ('COMPLETED','KILLREQ','KILLED');"))
	   (state-status-stmt     (sqlite3:prepare db "UPDATE tests SET state=?,status=? WHERE id=?;"))
	   (state-status-msg-stmt (sqlite3:prepare db "UPDATE tests SET state=?,status=?,comment=? WHERE id=?;"))
	   (step-stmt             (sqlite3:prepare db "INSERT OR REPLACE into test_steps (test_id,stepname,state,status,event_time,comment,logfile) VALUES(?,?,?,?,?,?,?);")) ;; strftime('%s','now')#f)
	   (data                  #f))
       (mutex-lock! *incoming-mutex*)
       (set! data (sort *incoming-data* (lambda (a b)(< (vector-ref a 1)(vector-ref b 1)))))
       (set! *incoming-data* '())
       (mutex-unlock! *incoming-mutex*)
       (if (> (length data) 0)
	   (debug:print 4 "INFO: Writing cached data " data))
       (sqlite3:with-transaction 
	db
	(lambda ()
	  (for-each (lambda (entry)
		      (debug:print 4 "INFO: flushing " entry " to db")
		      (case (vector-ref entry 0)
			((meta-info)
			 (apply sqlite3:execute meta-stmt (vector-ref entry 2)))
			((step-status)
			 (apply sqlite3:execute step-stmt (vector-ref entry 2)))
			((state-status)
			 (apply sqlite3:execute state-status-stmt (vector-ref entry 2)))
			((state-status-msg)
			 (apply sqlite3:execute state-status-msg-stmt (vector-ref entry 2)))
			(else
			 (debug:print 0 "ERROR: Queued entry not recognised " entry))))
		    data)))
       (sqlite3:finalize! meta-stmt) ;; sqlite is the bottleneck, clear the statements asap?
       (sqlite3:finalize! step-stmt)
       (sqlite3:finalize! state-status-stmt)
       (sqlite3:finalize! state-status-msg-stmt)
       ))
   #f))

;; (define (db:write-cached-data db)
;;   (let ((meta-stmt (sqlite3:prepare db "UPDATE tests SET cpuload=?,diskfree=?,run_duration=?,state='RUNNING' WHERE id=? AND state NOT IN ('COMPLETED','KILLREQ','KILLED');"))
;; 	(step-stmt (sqlite3:prepare db "INSERT OR REPLACE into test_steps (test_id,stepname,state,status,event_time,comment,logfile) VALUES(?,?,?,?,?,?,?);")) ;; strftime('%s','now')#f)
;; 	(data (sort *incoming-data* (lambda (a b)(< (vector-ref a 1)(vector-ref b 1))))))
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
	(sqlite3:execute 
	 db
	 "UPDATE tests 
             SET fail_count=(SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND status='FAIL'),
                 pass_count=(SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND (status='PASS' OR status='WARN' OR status='WAIVED'))
             WHERE run_id=? AND testname=? AND item_path='';"
	 run-id test-name run-id test-name run-id test-name)
        (thread-sleep! 0.1) ;; give other processes a chance here
	(if (equal? status "RUNNING") ;; running takes priority over all other states, force the test state to RUNNING
	    (sqlite3:execute db "UPDATE tests SET state=? WHERE run_id=? AND testname=? AND item_path='';" "RUNNING" run-id test-name)
	    (sqlite3:execute
	     db
	     "UPDATE tests
                       SET state=CASE WHEN (SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND state in ('RUNNING','NOT_STARTED')) > 0 THEN 
                          'RUNNING'







|







1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
	(sqlite3:execute 
	 db
	 "UPDATE tests 
             SET fail_count=(SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND status='FAIL'),
                 pass_count=(SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND (status='PASS' OR status='WARN' OR status='WAIVED'))
             WHERE run_id=? AND testname=? AND item_path='';"
	 run-id test-name run-id test-name run-id test-name)
        ;; (thread-sleep! 0.1) ;; give other processes a chance here, no, better to be done ASAP?
	(if (equal? status "RUNNING") ;; running takes priority over all other states, force the test state to RUNNING
	    (sqlite3:execute db "UPDATE tests SET state=? WHERE run_id=? AND testname=? AND item_path='';" "RUNNING" run-id test-name)
	    (sqlite3:execute
	     db
	     "UPDATE tests
                       SET state=CASE WHEN (SELECT count(id) FROM tests WHERE run_id=? AND testname=? AND item_path != '' AND state in ('RUNNING','NOT_STARTED')) > 0 THEN 
                          'RUNNING'

Modified launch.scm from [372d06f4be] to [312826fb4b].

38
39
40
41
42
43
44







45
46
47
48
49
50
51
52
53
54
55
56
57
;; where the {VAR=first,second,third ...} is optional.

;; given an exit code and whether or not logpro was used calculate OK/BAD
;; return #t if we are ok, #f otherwise
(define (steprun-good? logpro exitcode)
  (or (eq? exitcode 0)
      (and logpro (eq? exitcode 2))))








(define (launch:execute encoded-cmd)
  (let* ((cmdinfo   (read (open-input-string (base64:base64-decode encoded-cmd)))))
    (setenv "MT_CMDINFO" encoded-cmd)
    (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed)
                        ;; (test-name sqlitespeed) (runscript runscript.rb) (db-host localhost) (run-id 1))
	(let* ((testpath  (assoc/default 'testpath  cmdinfo))
	       (top-path  (assoc/default 'toppath   cmdinfo))
	       (work-area (assoc/default 'work-area cmdinfo))
	       (test-name (assoc/default 'test-name cmdinfo))
	       (runscript (assoc/default 'runscript cmdinfo))
	       (ezsteps   (assoc/default 'ezsteps   cmdinfo))
	       (db-host   (assoc/default 'db-host   cmdinfo))







>
>
>
>
>
>
>





|







38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
;; where the {VAR=first,second,third ...} is optional.

;; given an exit code and whether or not logpro was used calculate OK/BAD
;; return #t if we are ok, #f otherwise
(define (steprun-good? logpro exitcode)
  (or (eq? exitcode 0)
      (and logpro (eq? exitcode 2))))

;; if handed a string, process it, else look for MT_CMDINFO
(define (launch:get-cmdinfo-assoc-list #!key (encoded-cmd #f))
  (let ((enccmd (if encoded-cmd encoded-cmd (getenv "MT_CMDINFO"))))
    (if enccdm
	(read (open-input-string (base64:base64-decode enccmd)))
	'())))

(define (launch:execute encoded-cmd)
  (let* ((cmdinfo   (read (open-input-string (base64:base64-decode encoded-cmd)))))
    (setenv "MT_CMDINFO" encoded-cmd)
    (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed)
	;; (test-name sqlitespeed) (runscript runscript.rb) (db-host localhost) (run-id 1))
	(let* ((testpath  (assoc/default 'testpath  cmdinfo))
	       (top-path  (assoc/default 'toppath   cmdinfo))
	       (work-area (assoc/default 'work-area cmdinfo))
	       (test-name (assoc/default 'test-name cmdinfo))
	       (runscript (assoc/default 'runscript cmdinfo))
	       (ezsteps   (assoc/default 'ezsteps   cmdinfo))
	       (db-host   (assoc/default 'db-host   cmdinfo))
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
	  (open-run-close set-run-config-vars #f run-id)
	  ;; environment overrides are done *before* the remaining critical envars.
	  (alist->env-vars env-ovrd)
	  (open-run-close set-megatest-env-vars #f run-id)
	  (set-item-env-vars itemdat)
	  (save-environment-as-files "megatest")
	  (open-run-close test-set-meta-info #f test-id run-id test-name itemdat 0)
	  (open-run-close tests:test-set-status! #f test-id "REMOTEHOSTSTART" "n/a" (args:get-arg "-m") #f)
	  (if (args:get-arg "-xterm")
	      (set! fullrunscript "xterm")
	      (if (and fullrunscript (not (file-execute-access? fullrunscript)))
		  (system (conc "chmod ug+x " fullrunscript))))
	  ;; We are about to actually kick off the test
	  ;; so this is a good place to remove the records for 
	  ;; any previous runs
	  ;; (db:test-remove-steps db run-id testname itemdat)
	  
	  (let* ((m            (make-mutex))
		 (kill-job?    #f)
		 (exit-info    (vector #t #t #t))
		 (job-thread   #f)
		 (runit        (lambda ()
				 ;; (let-values
				 ;;  (((pid exit-status exit-code)
				 ;;    (run-n-wait fullrunscript)))
				 (open-run-close tests:test-set-status! #f test-id "RUNNING" "n/a" #f #f)
				 ;; if there is a runscript do it first
				 (if fullrunscript
				     (let ((pid (process-run fullrunscript)))
				       (let loop ((i 0))
					 (let-values
					  (((pid-val exit-status exit-code) (process-wait pid #t)))
					  (mutex-lock! m)







|

















|







112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
	  (open-run-close set-run-config-vars #f run-id)
	  ;; environment overrides are done *before* the remaining critical envars.
	  (alist->env-vars env-ovrd)
	  (open-run-close set-megatest-env-vars #f run-id)
	  (set-item-env-vars itemdat)
	  (save-environment-as-files "megatest")
	  (open-run-close test-set-meta-info #f test-id run-id test-name itemdat 0)
	  (tests:test-set-status! test-id "REMOTEHOSTSTART" "n/a" (args:get-arg "-m") #f)
	  (if (args:get-arg "-xterm")
	      (set! fullrunscript "xterm")
	      (if (and fullrunscript (not (file-execute-access? fullrunscript)))
		  (system (conc "chmod ug+x " fullrunscript))))
	  ;; We are about to actually kick off the test
	  ;; so this is a good place to remove the records for 
	  ;; any previous runs
	  ;; (db:test-remove-steps db run-id testname itemdat)
	  
	  (let* ((m            (make-mutex))
		 (kill-job?    #f)
		 (exit-info    (vector #t #t #t))
		 (job-thread   #f)
		 (runit        (lambda ()
				 ;; (let-values
				 ;;  (((pid exit-status exit-code)
				 ;;    (run-n-wait fullrunscript)))
				 (tests:test-set-status! test-id "RUNNING" "n/a" #f #f)
				 ;; if there is a runscript do it first
				 (if fullrunscript
				     (let ((pid (process-run fullrunscript)))
				       (let loop ((i 0))
					 (let-values
					  (((pid-val exit-status exit-code) (process-wait pid #t)))
					  (mutex-lock! m)
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
								   (if (eq? pid-val 0)
								       (begin
									 (thread-sleep! 2)
									 (processloop (+ i 1))))
								   ))
                                                     (let ((exinfo (vector-ref exit-info 2))
                                                           (logfna (if logpro-used (conc stepname ".html") "")))
                                                        ;; testing if procedures called in a remote call cause problems (ans: no or so I suspect)
						        (open-run-close db:teststep-set-status! #f test-id stepname "end" exinfo itemdat #f logfna))
						     (if logpro-used
							 (open-run-close db:test-set-log! #f test-id (conc stepname ".html")))
						     ;; set the test final status
						     (let* ((this-step-status (cond
									       ((and (eq? (vector-ref exit-info 2) 2) logpro-used) 'warn)
									       ((eq? (vector-ref exit-info 2) 0)                   'pass)
									       (else 'fail)))







|
|







203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
								   (if (eq? pid-val 0)
								       (begin
									 (thread-sleep! 2)
									 (processloop (+ i 1))))
								   ))
                                                     (let ((exinfo (vector-ref exit-info 2))
                                                           (logfna (if logpro-used (conc stepname ".html") "")))
						       ;; testing if procedures called in a remote call cause problems (ans: no or so I suspect)
						       (open-run-close db:teststep-set-status! #f test-id stepname "end" exinfo itemdat #f logfna))
						     (if logpro-used
							 (open-run-close db:test-set-log! #f test-id (conc stepname ".html")))
						     ;; set the test final status
						     (let* ((this-step-status (cond
									       ((and (eq? (vector-ref exit-info 2) 2) logpro-used) 'warn)
									       ((eq? (vector-ref exit-info 2) 0)                   'pass)
									       (else 'fail)))
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
						       (debug:print 4 "Exit value received: " (vector-ref exit-info 2) " logpro-used: " logpro-used 
								    " this-step-status: " this-step-status " overall-status: " overall-status 
								    " next-status: " next-status " rollup-status: " rollup-status)
						       (case next-status
							 ((warn)
							  (set! rollup-status 2)
							  ;; NB// test-set-status! does rdb calls under the hood
							  (open-run-close tests:test-set-status! #f test-id "RUNNING" "WARN" 
									    (if (eq? this-step-status 'warn) "Logpro warning found" #f)
									    #f))
							 ((pass)
							  (open-run-close tests:test-set-status! #f test-id "RUNNING" "PASS" #f #f))
							 (else ;; 'fail
							  (set! rollup-status 1) ;; force fail
							  (open-run-close tests:test-set-status! #f test-id "RUNNING" "FAIL" (conc "Failed at step " stepname) #f)
							  ))))
						   (if (and (steprun-good? logpro-used (vector-ref exit-info 2))
							    (not (null? tal)))
						       (loop (car tal) (cdr tal) stepname)))
					     (debug:print 4 "WARNING: a prior step failed, stopping at " ezstep))))))))
		 (monitorjob   (lambda ()
				 (let* ((start-seconds (current-seconds))
					(calc-minutes  (lambda ()
							 (inexact->exact 
							  (round 
							   (- 
							    (current-seconds) 







|
|
|

|


|




|







228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
						       (debug:print 4 "Exit value received: " (vector-ref exit-info 2) " logpro-used: " logpro-used 
								    " this-step-status: " this-step-status " overall-status: " overall-status 
								    " next-status: " next-status " rollup-status: " rollup-status)
						       (case next-status
							 ((warn)
							  (set! rollup-status 2)
							  ;; NB// test-set-status! does rdb calls under the hood
							  (tests:test-set-status! test-id "RUNNING" "WARN" 
									  (if (eq? this-step-status 'warn) "Logpro warning found" #f)
									  #f))
							 ((pass)
							  (tests:test-set-status! test-id "RUNNING" "PASS" #f #f))
							 (else ;; 'fail
							  (set! rollup-status 1) ;; force fail
							  (tests:test-set-status! test-id "RUNNING" "FAIL" (conc "Failed at step " stepname) #f)
							  ))))
						   (if (and (steprun-good? logpro-used (vector-ref exit-info 2))
							    (not (null? tal)))
						       (loop (car tal) (cdr tal) stepname)))
						 (debug:print 4 "WARNING: a prior step failed, stopping at " ezstep))))))))
		 (monitorjob   (lambda ()
				 (let* ((start-seconds (current-seconds))
					(calc-minutes  (lambda ()
							 (inexact->exact 
							  (round 
							   (- 
							    (current-seconds) 
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
								(begin
								  (debug:print 0 "Killing " (cadr parts) "; kill -9  " p-id)
								  (system (conc "kill -9 " p-id))))))
							(car processes))
						       (system (conc "kill -9 " pid))))
						   (begin
						     (debug:print 0 "WARNING: Request received to kill job but problem with process, attempting to kill manager process")
						     (open-run-close tests:test-set-status! #f test-id "KILLED"  "FAIL"
								       (args:get-arg "-m") #f)
						     (sqlite3:finalize! tdb)
						     (exit 1))))
					     (set! kill-tries (+ 1 kill-tries))
					     (mutex-unlock! m)))
				       ;; (sqlite3:finalize! db)
				       (thread-sleep! (+ 10 (random 10))) ;; add some jitter to the call home time to spread out the db accesses
				       (loop (calc-minutes)))))))
		 (th1          (make-thread monitorjob))
		 (th2          (make-thread runit)))
	    (set! job-thread th2)
	    (thread-start! th1)
	    (thread-start! th2)
	    (thread-join! th2)
	    (mutex-lock! m)
	    (let* ((item-path (item-list->path itemdat))
		   (testinfo  (open-run-close db:get-test-info-by-id #f test-id))) ;; )) ;; run-id test-name item-path)))
	      (if (not (equal? (db:test-get-state testinfo) "COMPLETED"))
		  (begin
		    (debug:print 2 "Test NOT logged as COMPLETED, (state=" (db:test-get-state testinfo) "), updating result, rollup-status is " rollup-status)
		    (open-run-close tests:test-set-status! #f test-id 
				      (if kill-job? "KILLED" "COMPLETED")
				      (cond
				       ((not (vector-ref exit-info 1)) "FAIL") ;; job failed to run
				       ((eq? rollup-status 0)
					;; if the current status is AUTO the defer to the calculated value (i.e. leave this AUTO)
					(if (equal? (db:test-get-status testinfo) "AUTO") "AUTO" "PASS"))
				       ((eq? rollup-status 1) "FAIL")
				       ((eq? rollup-status 2)
					;; if the current status is AUTO the defer to the calculated value but qualify (i.e. make this AUTO-WARN)
					(if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN"))
				       (else "FAIL"))
				      (args:get-arg "-m") #f)))
	      ;; for automated creation of the rollup html file this is a good place...
	      (if (not (equal? item-path ""))
		  (open-run-close tests:summarize-items #f run-id test-name #f)) ;; don't force - just update if no
	      )
	    (mutex-unlock! m)
	    ;; (exec-results (cmd-run->list fullrunscript)) ;;  (list ">" (conc test-name "-run.log"))))
	    ;; (success      exec-results)) ;; (eq? (cadr exec-results) 0)))







|
|



















|
|
|
|
|
|
|
|
|
|
|
|
|







276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
								(begin
								  (debug:print 0 "Killing " (cadr parts) "; kill -9  " p-id)
								  (system (conc "kill -9 " p-id))))))
							(car processes))
						       (system (conc "kill -9 " pid))))
						   (begin
						     (debug:print 0 "WARNING: Request received to kill job but problem with process, attempting to kill manager process")
						     (tests:test-set-status! test-id "KILLED"  "FAIL"
								     (args:get-arg "-m") #f)
						     (sqlite3:finalize! tdb)
						     (exit 1))))
					     (set! kill-tries (+ 1 kill-tries))
					     (mutex-unlock! m)))
				       ;; (sqlite3:finalize! db)
				       (thread-sleep! (+ 10 (random 10))) ;; add some jitter to the call home time to spread out the db accesses
				       (loop (calc-minutes)))))))
		 (th1          (make-thread monitorjob))
		 (th2          (make-thread runit)))
	    (set! job-thread th2)
	    (thread-start! th1)
	    (thread-start! th2)
	    (thread-join! th2)
	    (mutex-lock! m)
	    (let* ((item-path (item-list->path itemdat))
		   (testinfo  (open-run-close db:get-test-info-by-id #f test-id))) ;; )) ;; run-id test-name item-path)))
	      (if (not (equal? (db:test-get-state testinfo) "COMPLETED"))
		  (begin
		    (debug:print 2 "Test NOT logged as COMPLETED, (state=" (db:test-get-state testinfo) "), updating result, rollup-status is " rollup-status)
		    (tests:test-set-status! test-id 
				    (if kill-job? "KILLED" "COMPLETED")
				    (cond
				     ((not (vector-ref exit-info 1)) "FAIL") ;; job failed to run
				     ((eq? rollup-status 0)
				      ;; if the current status is AUTO the defer to the calculated value (i.e. leave this AUTO)
				      (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO" "PASS"))
				     ((eq? rollup-status 1) "FAIL")
				     ((eq? rollup-status 2)
				      ;; if the current status is AUTO the defer to the calculated value but qualify (i.e. make this AUTO-WARN)
				      (if (equal? (db:test-get-status testinfo) "AUTO") "AUTO-WARN" "WARN"))
				     (else "FAIL"))
				    (args:get-arg "-m") #f)))
	      ;; for automated creation of the rollup html file this is a good place...
	      (if (not (equal? item-path ""))
		  (open-run-close tests:summarize-items #f run-id test-name #f)) ;; don't force - just update if no
	      )
	    (mutex-unlock! m)
	    ;; (exec-results (cmd-run->list fullrunscript)) ;;  (list ">" (conc test-name "-run.log"))))
	    ;; (success      exec-results)) ;; (eq? (cadr exec-results) 0)))
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411

	 ;; nb// if itempath is not "" then it is prefixed with "/"
	 (toptest-path (conc disk-path "/" testtop-base))
	 (test-path    (conc disk-path "/" test-base))

	 ;; ensure this exists first as links to subtests must be created there
	 (linktree  (let ((rd (config-lookup *configdat* "setup" "linktree")))
		     (if rd rd (conc *toppath* "/runs"))))

	 (lnkbase  (conc linktree "/" target "/" runname))
	 (lnkpath  (conc lnkbase "/" testname))
	 (lnkpathf (conc lnkpath (if not-iterated "" "/") item-path)))

    ;; Update the rundir path in the test record for all
    (db:test-set-rundir! db run-id testname item-path lnkpathf)







|







404
405
406
407
408
409
410
411
412
413
414
415
416
417
418

	 ;; nb// if itempath is not "" then it is prefixed with "/"
	 (toptest-path (conc disk-path "/" testtop-base))
	 (test-path    (conc disk-path "/" test-base))

	 ;; ensure this exists first as links to subtests must be created there
	 (linktree  (let ((rd (config-lookup *configdat* "setup" "linktree")))
		      (if rd rd (conc *toppath* "/runs"))))

	 (lnkbase  (conc linktree "/" target "/" runname))
	 (lnkpath  (conc lnkbase "/" testname))
	 (lnkpathf (conc lnkpath (if not-iterated "" "/") item-path)))

    ;; Update the rundir path in the test record for all
    (db:test-set-rundir! db run-id testname item-path lnkpathf)
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
;;    - could be ssh to host from hosts table (update regularly with load)
;;    - could be netbatch
;;      (launch-test db (cadr status) test-conf))
(define (launch-test db run-id runname test-conf keyvallst test-name test-path itemdat params)
  (change-directory *toppath*)
  (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute"
   (list ;; (list "MT_TEST_RUN_DIR" work-area)
	 (list "MT_RUN_AREA_HOME" *toppath*)
	 (list "MT_TEST_NAME" test-name)
	 ;; (list "MT_ITEM_INFO" (conc itemdat)) 
	 (list "MT_RUNNAME"   runname)
	 ;; (list "MT_TARGET"    mt_target)
	 ))
  (let* ((useshell   (config-lookup *configdat* "jobtools"     "useshell"))
	 (launcher   (config-lookup *configdat* "jobtools"     "launcher"))
	 (runscript  (config-lookup test-conf   "setup"        "runscript"))
	 (ezsteps    (> (length (hash-table-ref/default test-conf "ezsteps" '())) 0)) ;; don't send all the steps, could be big
	 (diskspace  (config-lookup test-conf   "requirements" "diskspace"))
	 (memory     (config-lookup test-conf   "requirements" "memory"))
	 (hosts      (config-lookup *configdat* "jobtools"     "workhosts"))







|
|
|
|
|
|







496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
;;    - could be ssh to host from hosts table (update regularly with load)
;;    - could be netbatch
;;      (launch-test db (cadr status) test-conf))
(define (launch-test db run-id runname test-conf keyvallst test-name test-path itemdat params)
  (change-directory *toppath*)
  (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute"
   (list ;; (list "MT_TEST_RUN_DIR" work-area)
    (list "MT_RUN_AREA_HOME" *toppath*)
    (list "MT_TEST_NAME" test-name)
    ;; (list "MT_ITEM_INFO" (conc itemdat)) 
    (list "MT_RUNNAME"   runname)
    ;; (list "MT_TARGET"    mt_target)
    ))
  (let* ((useshell   (config-lookup *configdat* "jobtools"     "useshell"))
	 (launcher   (config-lookup *configdat* "jobtools"     "launcher"))
	 (runscript  (config-lookup test-conf   "setup"        "runscript"))
	 (ezsteps    (> (length (hash-table-ref/default test-conf "ezsteps" '())) 0)) ;; don't send all the steps, could be big
	 (diskspace  (config-lookup test-conf   "requirements" "diskspace"))
	 (memory     (config-lookup test-conf   "requirements" "memory"))
	 (hosts      (config-lookup *configdat* "jobtools"     "workhosts"))
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
							  (list 'set-vars  (if params (hash-table-ref/default params "-setvars" #f)))
							  (list 'runname   runname)
							  (list 'mt-bindir-path mt-bindir-path))))))) ;; (string-intersperse keyvallst " "))))
    ;; clean out step records from previous run if they exist
    (debug:print 4 "INFO: FIXMEEEEE!!!! This can be removed some day, perhaps move all test records to the test db?")
    (open-run-close db:delete-test-step-records db test-id)
    (change-directory work-area) ;; so that log files from the launch process don't clutter the test dir
    (open-run-close tests:test-set-status! db test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED"))
    (cond
     ((and launcher hosts) ;; must be using ssh hostname
      (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms) debug-param)))
     ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms))))
     (launcher
      (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms) debug-param)))
     ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms))))







|







571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
							  (list 'set-vars  (if params (hash-table-ref/default params "-setvars" #f)))
							  (list 'runname   runname)
							  (list 'mt-bindir-path mt-bindir-path))))))) ;; (string-intersperse keyvallst " "))))
    ;; clean out step records from previous run if they exist
    (debug:print 4 "INFO: FIXMEEEEE!!!! This can be removed some day, perhaps move all test records to the test db?")
    (open-run-close db:delete-test-step-records db test-id)
    (change-directory work-area) ;; so that log files from the launch process don't clutter the test dir
    (tests:test-set-status! test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED"))
    (cond
     ((and launcher hosts) ;; must be using ssh hostname
      (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms) debug-param)))
     ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms))))
     (launcher
      (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms) debug-param)))
     ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms))))

Modified megatest.scm from [9e9d0ce49a] to [b0572d4f5a].

714
715
716
717
718
719
720
721

722
723
724
725
726
727
728
729
730
731
		(if (and (args:get-arg "-test-status")
			 (or (not state)
			     (not status)))
		    (begin
		      (debug:print 0 "ERROR: You must specify :state and :status with every call to -test-status\n" help)
		      ;; (sqlite3:finalize! db)
		      (exit 6)))
		(let ((msg (args:get-arg "-m")))

		  ;; Convert to rpc
		  ;; (rdb:open-run-close 'tests:test-set-status! #f test-id state newstatus msg otherdata))))
		  (tests:test-set-status! db test-id state newstatus msg otherdata))))
	  (if db (sqlite3:finalize! db))
	  (set! *didsomething* #t))))

;;======================================================================
;; Various helper commands can go below here
;;======================================================================








|
>
|
<
|







714
715
716
717
718
719
720
721
722
723

724
725
726
727
728
729
730
731
		(if (and (args:get-arg "-test-status")
			 (or (not state)
			     (not status)))
		    (begin
		      (debug:print 0 "ERROR: You must specify :state and :status with every call to -test-status\n" help)
		      ;; (sqlite3:finalize! db)
		      (exit 6)))
		(let* ((msg    (args:get-arg "-m"))
		       (numoth (length (hash-table-keys otherdata))))
		  ;; Convert to rpc inside the tests:test-set-status! call, not here

		  (tests:test-set-status! test-id state newstatus msg otherdata))))
	  (if db (sqlite3:finalize! db))
	  (set! *didsomething* #t))))

;;======================================================================
;; Various helper commands can go below here
;;======================================================================

Modified runs.scm from [5de4941805] to [13613d1781].

653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
	 (debug:print 1 "NOTE: " new-test-name " is already running or was explictly killed, use -force to launch it."))
	((LAUNCHED REMOTEHOSTSTART RUNNING)  
	 (if (> (- (current-seconds)(+ (db:test-get-event_time testdat)
				       (db:test-get-run_duration testdat)))
		600) ;; i.e. no update for more than 600 seconds
	     (begin
	       (debug:print 0 "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD")
	       (open-run-close tests:test-set-status! db test-id "INCOMPLETE" "STUCK/DEAD" "Test is stuck or dead" #f))
	     (debug:print 2 "NOTE: " test-name " is already running")))
	(else       (debug:print 0 "ERROR: Failed to launch test " new-test-name ". Unrecognised state " (test:get-state testdat)))))))

;;======================================================================
;; END OF NEW STUFF
;;======================================================================








|







653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
	 (debug:print 1 "NOTE: " new-test-name " is already running or was explictly killed, use -force to launch it."))
	((LAUNCHED REMOTEHOSTSTART RUNNING)  
	 (if (> (- (current-seconds)(+ (db:test-get-event_time testdat)
				       (db:test-get-run_duration testdat)))
		600) ;; i.e. no update for more than 600 seconds
	     (begin
	       (debug:print 0 "WARNING: Test " test-name " appears to be dead. Forcing it to state INCOMPLETE and status STUCK/DEAD")
	       (tests:test-set-status! test-id "INCOMPLETE" "STUCK/DEAD" "Test is stuck or dead" #f))
	     (debug:print 2 "NOTE: " test-name " is already running")))
	(else       (debug:print 0 "ERROR: Failed to launch test " new-test-name ". Unrecognised state " (test:get-state testdat)))))))

;;======================================================================
;; END OF NEW STUFF
;;======================================================================

Modified server.scm from [57e8a67dda] to [a7dc2ad12d].

68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
	   'rdb:open-run-close 
	   (lambda (procname . remargs)
	     (debug:print 4 "INFO: rdb:open-run-close " procname " " remargs)
	     (set! *last-db-access* (current-seconds))
	     (apply open-run-close (eval procname) remargs)))
	  
	  (rpc:publish-procedure!
	   'cdb:test-set-state-status
	   (lambda (test-id status state)
	     (debug:print 4 "INFO: cdb:test-set-state-status " procname " " remargs)
	     (apply cdb:test-set-state-status remargs)))

	  ;;======================================================================
	  ;; end of publish-procedure section
	  ;;======================================================================

	  (set! *rpc:listener* rpc:listener)
	  (on-exit (lambda ()







|

|
|







68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
	   'rdb:open-run-close 
	   (lambda (procname . remargs)
	     (debug:print 4 "INFO: rdb:open-run-close " procname " " remargs)
	     (set! *last-db-access* (current-seconds))
	     (apply open-run-close (eval procname) remargs)))
	  
	  (rpc:publish-procedure!
	   'cdb:test-set-status-state
	   (lambda (test-id status state)
	     (debug:print 4 "INFO: cdb:test-set-status-state " procname " " remargs)
	     (apply cdb:test-set-status-state remargs)))

	  ;;======================================================================
	  ;; end of publish-procedure section
	  ;;======================================================================

	  (set! *rpc:listener* rpc:listener)
	  (on-exit (lambda ()

Modified tests.scm from [35c75741ff] to [b7a51a2a27].

109
110
111
112
113
114
115
116

117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
			   (hash-table-set! tests-hash full-testname testdat))))
		   results)
		  (if (null? tal)
		      (map cdr (hash-table->alist tests-hash)) ;; return a list of the most recent tests
		      (loop (car tal)(cdr tal))))))))))

;; Do not rpc this one, do the underlying calls!!!
(define (tests:test-set-status! db test-id state status comment dat)

  (let* ((real-status status)
	 (otherdat    (if dat dat (make-hash-table)))
	 (testdat     (open-run-close db:get-test-info-by-id db test-id))
	 (run-id      (db:test-get-run_id testdat))
	 (test-name   (db:test-get-testname   testdat))
	 (item-path   (db:test-get-item-path testdat))
	 ;; before proceeding we must find out if the previous test (where all keys matched except runname)
	 ;; was WAIVED if this test is FAIL
	 (waived   (if (equal? status "FAIL")
		       (let ((prev-test (test:get-previous-test-run-record db run-id test-name item-path)))
			 (if prev-test ;; true if we found a previous test in this run series
			     (let ((prev-status (db:test-get-status   prev-test))
				   (prev-state  (db:test-get-state    prev-test))
				   (prev-comment (db:test-get-comment prev-test)))
			       (debug:print 4 "prev-status " prev-status ", prev-state " prev-state ", prev-comment " prev-comment)
			       (if (and (equal? prev-state  "COMPLETED")
					(equal? prev-status "WAIVED"))
				   prev-comment ;; waived is either the comment or #f
				   #f))
			     #f))
		       #f)))
    (if waived (set! real-status "WAIVED"))
    (debug:print 4 "real-status " real-status ", waived " waived ", status " status)

    ;; update the primary record IF state AND status are defined
    (if (and state status)
	;; (rdb:open-run-close 'cdb:test-set-state-status #f test-id real-status state)) ;; this one works
	(cdb:test-set-state-status test-id real-status state))
    
    ;; if status is "AUTO" then call rollup (note, this one modifies data in test
    ;; run area, do not rpc it (yet)
    (if (and test-id state status (equal? status "AUTO")) 
	(open-run-close db:test-data-rollup db test-id status))

    ;; add metadata (need to do this way to avoid SQL injection issues)







|
>
|








|

















|







109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
			   (hash-table-set! tests-hash full-testname testdat))))
		   results)
		  (if (null? tal)
		      (map cdr (hash-table->alist tests-hash)) ;; return a list of the most recent tests
		      (loop (car tal)(cdr tal))))))))))

;; Do not rpc this one, do the underlying calls!!!
(define (tests:test-set-status! test-id state status comment dat)
  (let* ((db          #f)
	 (real-status status)
	 (otherdat    (if dat dat (make-hash-table)))
	 (testdat     (open-run-close db:get-test-info-by-id db test-id))
	 (run-id      (db:test-get-run_id testdat))
	 (test-name   (db:test-get-testname   testdat))
	 (item-path   (db:test-get-item-path testdat))
	 ;; before proceeding we must find out if the previous test (where all keys matched except runname)
	 ;; was WAIVED if this test is FAIL
	 (waived   (if (equal? status "FAIL")
		       (let ((prev-test (open-run-close test:get-previous-test-run-record db run-id test-name item-path)))
			 (if prev-test ;; true if we found a previous test in this run series
			     (let ((prev-status (db:test-get-status   prev-test))
				   (prev-state  (db:test-get-state    prev-test))
				   (prev-comment (db:test-get-comment prev-test)))
			       (debug:print 4 "prev-status " prev-status ", prev-state " prev-state ", prev-comment " prev-comment)
			       (if (and (equal? prev-state  "COMPLETED")
					(equal? prev-status "WAIVED"))
				   prev-comment ;; waived is either the comment or #f
				   #f))
			     #f))
		       #f)))
    (if waived (set! real-status "WAIVED"))
    (debug:print 4 "real-status " real-status ", waived " waived ", status " status)

    ;; update the primary record IF state AND status are defined
    (if (and state status)
	;; (rdb:open-run-close 'cdb:test-set-state-status #f test-id real-status state)) ;; this one works
	(cdb:test-set-status-state test-id real-status state))
    
    ;; if status is "AUTO" then call rollup (note, this one modifies data in test
    ;; run area, do not rpc it (yet)
    (if (and test-id state status (equal? status "AUTO")) 
	(open-run-close db:test-data-rollup db test-id status))

    ;; add metadata (need to do this way to avoid SQL injection issues)
200
201
202
203
204
205
206

207
208
209
210
211
212
213
(define (tests:summarize-items db run-id test-name force)
  ;; if not force then only update the record if one of these is true:
  ;;   1. logf is "log/final.log
  ;;   2. logf is same as outputfilename
  (let ((outputfilename (conc "megatest-rollup-" test-name ".html"))
	(orig-dir       (current-directory))
	(logf           #f))

    (sqlite3:for-each-row 
     (lambda (path final_logf)
       (set! logf final_logf)
       (if (directory? path)
	   (begin
	     (print "Found path: " path)
	     (change-directory path))







>







201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
(define (tests:summarize-items db run-id test-name force)
  ;; if not force then only update the record if one of these is true:
  ;;   1. logf is "log/final.log
  ;;   2. logf is same as outputfilename
  (let ((outputfilename (conc "megatest-rollup-" test-name ".html"))
	(orig-dir       (current-directory))
	(logf           #f))
    ;; This query finds the path and changes the directory to it for the test
    (sqlite3:for-each-row 
     (lambda (path final_logf)
       (set! logf final_logf)
       (if (directory? path)
	   (begin
	     (print "Found path: " path)
	     (change-directory path))
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467

(define (test:archive db test-id)
  #f)

(define (test:archive-tests db keynames target)
  #f)

;;======================================================================
;; R P C
;;======================================================================

(define (rtests:register-test db run-id test-name item-path)
  (if *runremote*
      (let ((host (vector-ref *runremote* 0))
	    (port (vector-ref *runremote* 1)))
	((rpc:procedure 'rtests:register-test host port) run-id test-name item-path))
      (tests:register-test db run-id test-name item-path)))

(define (rtests:test-set-status!  db test-id state status comment dat)
  (if *runremote*
      (let ((host (vector-ref *runremote* 0))
	    (port (vector-ref *runremote* 1)))
	((rpc:procedure 'rtests:test-set-status! host port) test-id state status comment dat))
      (tests:test-set-status! db test-id state status comment dat)))

(define (rtests:test-set-toplog! db run-id test-name logf)
  (if *runremote*
      (let ((host (vector-ref *runremote* 0))
            (port (vector-ref *runremote* 1)))
        ((rpc:procedure 'rtests:test-set-toplog! host port) run-id test-name logf))
      (tests:test-set-toplog! db run-id test-name logf)))








<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
438
439
440
441
442
443
444


























(define (test:archive db test-id)
  #f)

(define (test:archive-tests db keynames target)
  #f)