Megatest

Changes On Branch 6bdeae43f397c1a2
Login

Changes In Branch v1.63-09c-candidate Through [6bdeae43f3] Excluding Merge-Ins

This is equivalent to a diff from a024d9e60f to 6bdeae43f3

2017-03-07
14:43
various fixes check-in: 6fc6b23e48 user: bjbarcla tags: v1.63
14:12
restored env var name MT_TESTSUITENAME which had inadvertently changed to MT_TESTSUITE_NAME check-in: f121875378 user: bjbarcla tags: v1.63-09c-candidate
09:46
merged in fixes for COMPLETE/COMPLETED state wrong name, client-setup crash bug check-in: 6bdeae43f3 user: bjbarcla tags: v1.63-09c-candidate
09:44
merged in code to show detail of failure if dbopen fails check-in: 8639a32fde user: bjbarcla tags: v1.63-09c-candidate
09:39
Create new branch named "v1.63-09c-candidate" check-in: 848bb4fc58 user: bjbarcla tags: v1.63-09c-candidate
05:41
Check that dir is writeable before creating test-summary.html Closed-Leaf check-in: 1da6632403 user: matt tags: test-summary-log-writeable
05:17
Added comprehensive check for corrupted data received from server to fix occasional stack trace from defunct server causing data corruption. Closed-Leaf check-in: 5a9a0708b8 user: matt tags: bad-data-defense
2017-03-06
12:24
fixed bug where COMPLETED was mispelled to COMPLETE check-in: 2592f737ad user: bjbarcla tags: v1.63-randy-1405717332
2017-03-03
22:47
Added *user-hash-data* - a global that can be used in -repl and #{scheme ...} calls by the end user check-in: a024d9e60f user: matt tags: v1.63
2017-03-02
12:19
fixed -list-targets delay issue check-in: b56656e03c user: bjbarcla tags: v1.63, 1.6309b

Modified client.scm from [5d4087763d] to [d740aa52d4].

98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
		    (if (and start-res
			     ping-res)
			(begin
			  (remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res)
			  (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res))
			  start-res)
			(begin    ;; login failed but have a server record, clean out the record and try again
			  (debug:print-info 0 *default-log-port* "client:setup, login failed, will attempt to start server ... start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat)
			  (case *transport-type* 
			    ((http)(http-transport:close-connections)))
			  (remote-conndat-set! runremote #f)  ;; (hash-table-delete! runremote run-id)
			  (thread-sleep! 1)
			  (client:setup-http areapath remaining-tries: (- remaining-tries 1))
			  )))
		  (begin    ;; no server registered







|







98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
		    (if (and start-res
			     ping-res)
			(begin
			  (remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res)
			  (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res))
			  start-res)
			(begin    ;; login failed but have a server record, clean out the record and try again
			  (debug:print-info 0 *default-log-port* "client:setup, login failed, will attempt to start server ... start-res=" start-res ", server-dat=" server-dat) ;; had runid.  Fixes part of Randy;s ticket 1405717332
			  (case *transport-type* 
			    ((http)(http-transport:close-connections)))
			  (remote-conndat-set! runremote #f)  ;; (hash-table-delete! runremote run-id)
			  (thread-sleep! 1)
			  (client:setup-http areapath remaining-tries: (- remaining-tries 1))
			  )))
		  (begin    ;; no server registered

Modified db.scm from [8fbbea621c] to [bc6dc61276].

215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236







237
238
239
240
241






242
243
244
245
246
247
248
	 (dir-writable (file-write-access? parent-dir))
	 (file-exists  (file-exists? fname))
	 (file-write   (if file-exists
			   (file-write-access? fname)
			   dir-writable )))
    ;;(mutex-lock! *db-open-mutex*) ;; tried this mutex, not clear it helped.
    (if file-write ;; dir-writable
	(let (;; (lock    (obtain-dot-lock fname 1 5 10))
	      (db      (sqlite3:open-database fname)))
	  (sqlite3:set-busy-handler! db (make-busy-timeout 136000))
	  ;; (db:set-sync db)
	  (sqlite3:execute db "PRAGMA synchronous = 0;")
	  (if (not file-exists)
              (begin
		(if (and (configf:lookup *configdat* "setup" "use-wal")
                         (string-match "^/tmp/.*" fname)) ;; this is a file in /tmp
		    (sqlite3:execute db "PRAGMA journal_mode=WAL;")
		    (print "Creating " fname " in NON-WAL mode."))
		(initproc db)))
	  ;; (release-dot-lock fname)
          ;;(mutex-unlock! *db-open-mutex*)
	  db)







	(begin
	  (debug:print 2 *default-log-port* "WARNING: opening db in non-writable dir " fname)
	  (let ((db (sqlite3:open-database fname)))
            ;;(mutex-unlock! *db-open-mutex*)
            db))))) ;; )












;; ;; This routine creates the db. It is only called if the db is not already opened







|
|
|
<
|
|
|
|
|
|
|
|
<
<
|
>
>
>
>
>
>
>
|
|
|
|
|
>
>
>
>
>
>







215
216
217
218
219
220
221
222
223
224

225
226
227
228
229
230
231
232


233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
	 (dir-writable (file-write-access? parent-dir))
	 (file-exists  (file-exists? fname))
	 (file-write   (if file-exists
			   (file-write-access? fname)
			   dir-writable )))
    ;;(mutex-lock! *db-open-mutex*) ;; tried this mutex, not clear it helped.
    (if file-write ;; dir-writable
	(condition-case
	    (let ((db      (sqlite3:open-database fname)))
	      (sqlite3:set-busy-handler! db (make-busy-timeout 136000))

	      (sqlite3:execute db "PRAGMA synchronous = 0;")
	      (if (not file-exists)
		  (begin
		    (if (and (configf:lookup *configdat* "setup" "use-wal")
			     (string-match "^/tmp/.*" fname)) ;; this is a file in /tmp
			(sqlite3:execute db "PRAGMA journal_mode=WAL;")
			(print "Creating " fname " in NON-WAL mode."))
		    (initproc db)))


	      db)
	  (exn (io-error)  (debug:print 0 *default-log-port* "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again."))
	  (exn (corrupt)   (debug:print 0 *default-log-port* "ERROR: database " fname " is corrupt. Repair it to proceed."))
	  (exn (busy)      (debug:print 0 *default-log-port* "ERROR: database " fname " is locked. Try copying to another location, remove original and copy back."))
	  (exn (permission)(debug:print 0 *default-log-port* "ERROR: database " fname " has some permissions problem."))
	  (exn () (debug:print 0 *default-log-port* "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn))))

	(condition-case
	    (begin
	      (debug:print 2 *default-log-port* "WARNING: opening db in non-writable dir " fname)
	      (let ((db (sqlite3:open-database fname)))
		;;(mutex-unlock! *db-open-mutex*)
		db))
	  (exn (io-error)  (debug:print 0 *default-log-port* "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again."))
	  (exn (corrupt)   (debug:print 0 *default-log-port* "ERROR: database " fname " is corrupt. Repair it to proceed."))
	  (exn (busy)      (debug:print 0 *default-log-port* "ERROR: database " fname " is locked. Try copying to another location, remove original and copy back."))
	  (exn (permission)(debug:print 0 *default-log-port* "ERROR: database " fname " has some permissions problem."))
	  (exn () (debug:print 0 *default-log-port* "ERROR: Unknown error with database " fname " message: " ((condition-property-accessor 'exn 'message) exn))))
	)))






;; ;; This routine creates the db. It is only called if the db is not already opened
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
              (min-incompleted-ids (map car incompleted)) ;; do 'em all
              (all-ids             (append min-incompleted-ids (map car oldlaunched))))
         (if (> (length all-ids) 0)
             (begin
               (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") " as INCOMPLETE")
               (for-each
                (lambda (test-id)
                  (db:test-set-state-status dbstruct run-id test-id "COMPLETE" "DEAD" "Test failed to complete"))
                all-ids))))))))

;; ALL REPLACED BY THE BLOCK ABOVE
;;
;; 	    (sqlite3:execute 
;; 	     db
;; 	     (conc "UPDATE tests SET state='INCOMPLETE' WHERE run_id=? AND id IN (" 







|







1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
              (min-incompleted-ids (map car incompleted)) ;; do 'em all
              (all-ids             (append min-incompleted-ids (map car oldlaunched))))
         (if (> (length all-ids) 0)
             (begin
               (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") " as INCOMPLETE")
               (for-each
                (lambda (test-id)
                  (db:test-set-state-status dbstruct run-id test-id "COMPLETED" "DEAD" "Test failed to complete")) ;; fix for one aspect of Randy's ticket 1405717332
                all-ids))))))))

;; ALL REPLACED BY THE BLOCK ABOVE
;;
;; 	    (sqlite3:execute 
;; 	     db
;; 	     (conc "UPDATE tests SET state='INCOMPLETE' WHERE run_id=? AND id IN (" 

Modified rmt.scm from [ebacc63386] to [c6804ca810].

253
254
255
256
257
258
259
260










261
262
263
264
265
266
267
(define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5))
  (let* ((qry-is-write   (not (member cmd api:read-only-queries)))
	 (db-file-path   (db:dbfile-path)) ;;  0))
	 (dbstruct-local (db:setup))  ;; make-dbr:dbstruct path:  dbdir local: #t)))
	 (read-only      (not (file-write-access? db-file-path)))
	 (start          (current-milliseconds))
	 (resdat         (if (not (and read-only qry-is-write))
			     (api:execute-requests dbstruct-local (vector (symbol->string cmd) params))










			     (vector #t '())))
	 (success        (vector-ref resdat 0))
	 (res            (vector-ref resdat 1))
	 (duration       (- (current-milliseconds) start)))
    (if (and read-only qry-is-write)
        (debug:print 0 *default-log-port* "ERROR: attempt to write to read-only database ignored. cmd=" cmd))
    (if (not success)







|
>
>
>
>
>
>
>
>
>
>







253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
(define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5))
  (let* ((qry-is-write   (not (member cmd api:read-only-queries)))
	 (db-file-path   (db:dbfile-path)) ;;  0))
	 (dbstruct-local (db:setup))  ;; make-dbr:dbstruct path:  dbdir local: #t)))
	 (read-only      (not (file-write-access? db-file-path)))
	 (start          (current-milliseconds))
	 (resdat         (if (not (and read-only qry-is-write))
			     (let ((v (api:execute-requests dbstruct-local (vector (symbol->string cmd) params))))
			       (handle-exceptions ;; there has been a long history of receiving strange errors from values returned by the client when things go wrong..
				exn               ;;  This is an attempt to detect that situation and recover gracefully
				(begin
				  (debug:print0 *default-log-port* "ERROR: bad data from server " v " message: "  ((condition-property-accessor 'exn 'message) exn))
				  (vector #t '())) ;; should always get a vector but if something goes wrong return a dummy
				(if (and (vector? v)
					 (> (vector-length v) 1))
				    (let ((newvec (vector (vector-ref v 0)(vector-ref v 1))))
				      newvec)           ;; by copying the vector while inside the error handler we should force the detection of a corrupted record
				    (vector #t '()))))  ;; we could also check that the returned types are valid
			     (vector #t '())))
	 (success        (vector-ref resdat 0))
	 (res            (vector-ref resdat 1))
	 (duration       (- (current-milliseconds) start)))
    (if (and read-only qry-is-write)
        (debug:print 0 *default-log-port* "ERROR: attempt to write to read-only database ignored. cmd=" cmd))
    (if (not success)
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
	      #f))
	(begin
	  ;; (rmt:update-db-stats run-id cmd params duration)
	  ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it
	  (if qry-is-write
	      (let ((start-time (current-seconds)))
		(mutex-lock! *db-multi-sync-mutex*)
		(set! *db-last-access* start-time)  ;; THIS IS PROBABLY USELESS? (we are on a client)
                (mutex-unlock! *db-multi-sync-mutex*)))))
    res))

(define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params)
  (let* ((run-id   (if run-id run-id 0))
	 (res  	   (handle-exceptions
		    exn







|







285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
	      #f))
	(begin
	  ;; (rmt:update-db-stats run-id cmd params duration)
	  ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it
	  (if qry-is-write
	      (let ((start-time (current-seconds)))
		(mutex-lock! *db-multi-sync-mutex*)
/		(set! *db-last-access* start-time)  ;; THIS IS PROBABLY USELESS? (we are on a client)
                (mutex-unlock! *db-multi-sync-mutex*)))))
    res))

(define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params)
  (let* ((run-id   (if run-id run-id 0))
	 (res  	   (handle-exceptions
		    exn

Modified tests.scm from [e19b1bb1d2] to [31439bf084].

1025
1026
1027
1028
1029
1030
1031





1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
		       (string<? (conc time-a)(conc time-b)))))))))


;; summarize test in to a file test-summary.html in the test directory
;;
(define (tests:summarize-test run-id test-id)
  (let* ((test-dat  (rmt:get-test-info-by-id run-id test-id))





	 (steps-dat (rmt:get-steps-for-test run-id test-id))
	 (test-name (db:test-get-testname test-dat))
	 (item-path (db:test-get-item-path test-dat))
	 (full-name (db:test-make-full-name test-name item-path))
	 (oup       (open-output-file (conc (db:test-get-rundir test-dat) "/test-summary.html")))
	 (status    (db:test-get-status   test-dat))
	 (color     (common:get-color-from-status status))
	 (logf      (db:test-get-final_logf test-dat))
	 (steps-dat (tests:get-compressed-steps run-id test-id)))
    ;; (dcommon:get-compressed-steps #f 1 30045)
    ;; (#("wasting_time" "23:36:13" "23:36:21" "0" "8.0s" "wasting_time.log"))

    (s:output-new
     oup
     (s:html
      (s:title "Summary for " full-name)
      (s:body 
       (s:h2 "Summary for " full-name)
       (s:table 'cellspacing "0" 'border "1"
	(s:tr (s:td "run id")   (s:td (db:test-get-run_id   test-dat))
	      (s:td "test id")  (s:td (db:test-get-id       test-dat)))
	(s:tr (s:td "testname") (s:td test-name)
	      (s:td "itempath") (s:td item-path))
	(s:tr (s:td "state")    (s:td (db:test-get-state    test-dat))
	      (s:td "status")   (s:td (s:a 'href logf (s:font 'color color status))))
	(s:tr (s:td "TestDate") (s:td (seconds->work-week/day-time 
				       (db:test-get-event_time test-dat)))
	      (s:td "Duration") (s:td (seconds->hr-min-sec (db:test-get-run_duration test-dat)))))
       (s:h3 "Log files")
       (s:table
	'cellspacing "0" 'border "1"
	(s:tr (s:td "Final log")(s:td (s:a 'href logf logf))))
       (s:table
	'cellspacing "0" 'border "1"
	(s:tr (s:td "Step Name")(s:td "Start")(s:td "End")(s:td "Status")(s:td "Duration")(s:td "Log File"))
	(map (lambda (step-dat)
	       (s:tr (s:td (tdb:steps-table-get-stepname step-dat))
		     (s:td (tdb:steps-table-get-start    step-dat))
		     (s:td (tdb:steps-table-get-end      step-dat))
		     (s:td (tdb:steps-table-get-status   step-dat))
		     (s:td (tdb:steps-table-get-runtime  step-dat))
		     (s:td (let ((step-log (tdb:steps-table-get-log-file step-dat)))
			     (s:a 'href step-log step-log)))))
	     steps-dat))
	)))
    (close-output-port oup)))
	  
	  
;; MUST BE CALLED local!
;;
(define (tests:test-get-paths-matching keynames target fnamepatt #!key (res '()))
  ;; BUG: Move the values derived from args to parameters and push to megatest.scm
  (let* ((testpatt   (or (args:get-arg "-testpatt")(args:get-arg "-testpatt") "%"))







>
>
>
>
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|







1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
		       (string<? (conc time-a)(conc time-b)))))))))


;; summarize test in to a file test-summary.html in the test directory
;;
(define (tests:summarize-test run-id test-id)
  (let* ((test-dat  (rmt:get-test-info-by-id run-id test-id))
	 (out-dir   (db:test-get-rundir test-dat))
	 (out-file  (conc out-dir "/test-summary.html")))
    ;; first verify we are able to write the output file
    (if (not (file-write-access? out-dir))
	(debug:print 0 *default-log-port* "ERROR: cannot write test-summary.html to " out-dir)
	(let* (;; (steps-dat (rmt:get-steps-for-test run-id test-id))
	       (test-name (db:test-get-testname test-dat))
	       (item-path (db:test-get-item-path test-dat))
	       (full-name (db:test-make-full-name test-name item-path))
	       (oup       (open-output-file out-file))
	       (status    (db:test-get-status   test-dat))
	       (color     (common:get-color-from-status status))
	       (logf      (db:test-get-final_logf test-dat))
	       (steps-dat (tests:get-compressed-steps run-id test-id)))
	  ;; (dcommon:get-compressed-steps #f 1 30045)
	  ;; (#("wasting_time" "23:36:13" "23:36:21" "0" "8.0s" "wasting_time.log"))
	  
	  (s:output-new
	   oup
	   (s:html
	    (s:title "Summary for " full-name)
	    (s:body 
	     (s:h2 "Summary for " full-name)
	     (s:table 'cellspacing "0" 'border "1"
		      (s:tr (s:td "run id")   (s:td (db:test-get-run_id   test-dat))
			    (s:td "test id")  (s:td (db:test-get-id       test-dat)))
		      (s:tr (s:td "testname") (s:td test-name)
			    (s:td "itempath") (s:td item-path))
		      (s:tr (s:td "state")    (s:td (db:test-get-state    test-dat))
			    (s:td "status")   (s:td (s:a 'href logf (s:font 'color color status))))
		      (s:tr (s:td "TestDate") (s:td (seconds->work-week/day-time 
						     (db:test-get-event_time test-dat)))
			    (s:td "Duration") (s:td (seconds->hr-min-sec (db:test-get-run_duration test-dat)))))
	     (s:h3 "Log files")
	     (s:table
	      'cellspacing "0" 'border "1"
	      (s:tr (s:td "Final log")(s:td (s:a 'href logf logf))))
	     (s:table
	      'cellspacing "0" 'border "1"
	      (s:tr (s:td "Step Name")(s:td "Start")(s:td "End")(s:td "Status")(s:td "Duration")(s:td "Log File"))
	      (map (lambda (step-dat)
		     (s:tr (s:td (tdb:steps-table-get-stepname step-dat))
			   (s:td (tdb:steps-table-get-start    step-dat))
			   (s:td (tdb:steps-table-get-end      step-dat))
			   (s:td (tdb:steps-table-get-status   step-dat))
			   (s:td (tdb:steps-table-get-runtime  step-dat))
			   (s:td (let ((step-log (tdb:steps-table-get-log-file step-dat)))
				   (s:a 'href step-log step-log)))))
		   steps-dat))
	     )))
	  (close-output-port oup)))))
	  
	  
;; MUST BE CALLED local!
;;
(define (tests:test-get-paths-matching keynames target fnamepatt #!key (res '()))
  ;; BUG: Move the values derived from args to parameters and push to megatest.scm
  (let* ((testpatt   (or (args:get-arg "-testpatt")(args:get-arg "-testpatt") "%"))