Megatest

Check-in [b589024130]
Login
Overview
Comment:Improved exit handling slightly
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.60
Files: files | file ages | folders
SHA1: b5890241301c7990a2e41e635888c802de2cabe2
User & Date: matt on 2015-05-26 22:29:52
Other Links: branch diff | manifest | tags
Context
2015-05-26
22:43
Improved signal handling little more. Handle sigterm. check-in: 64b3ca10d0 user: matt tags: v1.60
22:29
Improved exit handling slightly check-in: b589024130 user: matt tags: v1.60
2015-05-21
23:29
Fixed toplevel mode tests not running check-in: 3d3e19cd4d user: matt tags: v1.60
Changes

Modified client.scm from [c1867a27a6] to [ae90ed41bd].

212
213
214
215
216
217
218

219
220
221
222
223
224
225
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226







+







(define (client:start run-id server-info)
  (http-transport:client-connect (tasks:hostinfo-get-interface server-info)
				 (tasks:hostinfo-get-port server-info)))

;; client:signal-handler
(define (client:signal-handler signum)
  (signal-mask! signum)
  (set! *time-to-exit* #t)
  (handle-exceptions
   exn
   (debug:print " ... exiting ...")
   (let ((th1 (make-thread (lambda ()
			     "") ;; do nothing for now (was flush out last call if applicable)
			   "eat response"))
	 (th2 (make-thread (lambda ()

Modified common.scm from [163b8623d2] to [408ac4db73].

241
242
243
244
245
246
247

248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267































268
269
270

271
272
273
274
275
276


277
278
279
280
281
282
283
241
242
243
244
245
246
247
248




















249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281

282
283
284
285
286


287
288
289
290
291
292
293
294
295







+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+


-
+




-
-
+
+







       (pathname-file *toppath*)))

;;======================================================================
;; E X I T   H A N D L I N G
;;======================================================================

(define (std-exit-procedure)
  (set! *time-to-exit* #t)
  (debug:print-info 2 "starting exit process, finalizing databases.")
  (rmt:print-db-stats)
  (let ((run-ids (hash-table-keys *db-local-sync*)))
    (if (and (not (null? run-ids))
	     (configf:lookup *configdat* "setup" "megatest-db"))
	(db:multi-db-sync run-ids 'new2old)))
  (if *dbstruct-db* (db:close-all *dbstruct-db*))
  (if *inmemdb*     (db:close-all *inmemdb*))
  (if (and *megatest-db*
	   (sqlite3:database? *megatest-db*))
      (begin
	(sqlite3:interrupt! *megatest-db*)
	(sqlite3:finalize! *megatest-db* #t)
	(set! *megatest-db* #f)))
  (if *task-db*     (let ((db (cdr *task-db*)))
		      (if (sqlite3:database? db)
			  (begin
			    (sqlite3:interrupt! db)
			    (sqlite3:finalize! db #t)
			    (vector-set! *task-db* 0 #f))))))
  (debug:print-info 0 "starting exit process, finalizing databases.")
  (if (debug:debug-mode 18)
      (rmt:print-db-stats))
  (let ((th1 (make-thread (lambda () ;; thread for cleaning up, give it five seconds
			    (let ((run-ids (hash-table-keys *db-local-sync*)))
			      (if (and (not (null? run-ids))
				       (configf:lookup *configdat* "setup" "megatest-db"))
				  (db:multi-db-sync run-ids 'new2old)))
			    (if *dbstruct-db* (db:close-all *dbstruct-db*))
			    (if *inmemdb*     (db:close-all *inmemdb*))
			    (if (and *megatest-db*
				     (sqlite3:database? *megatest-db*))
				(begin
				  (sqlite3:interrupt! *megatest-db*)
				  (sqlite3:finalize! *megatest-db* #t)
				  (set! *megatest-db* #f)))
			    (if *task-db*     (let ((db (cdr *task-db*)))
						(if (sqlite3:database? db)
						    (begin
						      (sqlite3:interrupt! db)
						      (sqlite3:finalize! db #t)
						      (vector-set! *task-db* 0 #f)))))) "Cleanup db exit thread"))
	(th2 (make-thread (lambda ()
			    (debug:print 0 "ERROR: Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.")
			    (thread-sleep! 5) ;; give the clean up few seconds to do it's stuff
			    (debug:print 0 "       Done.")
			    (exit 4))
			  "exit on ^C timer")))
    (thread-start! th2)
    (thread-start! th1)
    (thread-join! th2)))

(define (std-signal-handler signum)
  (signal-mask! signum)
  ;; (signal-mask! signum)
  (debug:print 0 "ERROR: Received signal " signum " exiting promptly")
  ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway
  (exit))

(set-signal-handler! signal/int std-signal-handler)
(set-signal-handler! signal/term std-signal-handler)
(set-signal-handler! signal/int std-signal-handler)  ;; ^C
;; (set-signal-handler! signal/term std-signal-handler)

;;======================================================================
;; Misc utils
;;======================================================================

;; Convert strings like "5s 2h 3m" => 60x60x2 + 3x60 + 5
(define (common:hms-string->seconds tstr)

Modified db.scm from [272f710720] to [5f7db1b5aa].

504
505
506
507
508
509
510
511


512
513
514
515
516
517
518
504
505
506
507
508
509
510

511
512
513
514
515
516
517
518
519







-
+
+







     (debug:print 0 " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
     (debug:print 0 " src db:  " (db:dbdat-get-path fromdb))
     (for-each (lambda (dbdat)
		 (debug:print 0 " dbpath:  " (db:dbdat-get-path dbdat)))
	       (cons todb slave-dbs))
     (if *server-run* ;; we are inside a server, throw a sync-failed error
	 (signal (make-composite-condition
		 (make-property-condition 'sync-failed 'message "db:sync-tables failed in a server context.")))))
		 (make-property-condition 'sync-failed 'message "db:sync-tables failed in a server context.")))
	 0)) ;; return zero for num synced

	 ;; (set! *time-to-exit* #t) ;; let watch dog know that it is time to die.
	 ;; (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "shutting-down")
	 ;; (portlogger:open-run-close portlogger:set-port port "released")
	 ;; (exit 1)))
   (cond
    ((not fromdb) (debug:print 3 "WARNING: db:sync-tables called with fromdb missing") -1)
564
565
566
567
568
569
570
571

572
573
574
575
576
577
578
565
566
567
568
569
570
571

572
573
574
575
576
577
578
579







-
+







	     (db:dbdat-get-db fromdb)
	     full-sel)
	    
	    ;; tack on remaining records in fromdat
	    (if (not (null? fromdat))
		(set! fromdats (cons fromdat fromdats)))

	    (debug:print-info 2 "found " totrecords " records to sync")
	    (debug:print-info 4 "found " totrecords " records to sync")

	    ;; read the target table
	    (sqlite3:for-each-row
	     (lambda (a . b)
	       (hash-table-set! todat a (apply vector a b)))
	     (db:dbdat-get-db todb)
	     full-sel)

Modified http-transport.scm from [bbdcef7659] to [9f2d0f6fb0].

420
421
422
423
424
425
426
427

428
429
430
431
432
433
434
420
421
422
423
424
425
426

427
428
429
430
431
432
433
434







-
+







			     (loop count server-state (+ bad-sync-count 1)))))
	     ((exn)
	      (debug:print 0 "ERROR: error from sync code other than 'sync-failed. Attempting to gracefully shutdown the server")
	      (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id " http-transport:keep-running crashed")
	      (exit)))
	    (set! sync-time  (- (current-milliseconds) start-time))
	    (set! rem-time (quotient (- 4000 sync-time) 1000))
	    (debug:print 2 "SYNC: time= " sync-time ", rem-time=" rem-time)
	    (debug:print 4 "SYNC: time= " sync-time ", rem-time=" rem-time)
	    
	    (if (and (<= rem-time 4)
		     (> rem-time 0))
		(thread-sleep! rem-time)
		(thread-sleep! 4))) ;; fallback for if the math is changed ...

	  ;;

Modified megatest-version.scm from [efcc00779e] to [01cc069134].

1
2
3
4
5
6

7
1
2
3
4
5

6
7





-
+

;; Always use two or four digit decimal
;; 1.01, 1.02...1.10,1.11,1,1101 ... 1.99,2.00..

(declare (unit megatest-version))

(define megatest-version 1.6013)
(define megatest-version 1.6014)

Modified megatest.scm from [2b7acd1b9e] to [9c1bf3c037].

305
306
307
308
309
310
311






312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351









































352
353
354
355
356
357
358
305
306
307
308
309
310
311
312
313
314
315
316
317








































318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365







+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







(define *watchdog*
  (make-thread 
   (lambda ()
     (thread-sleep! 0.05) ;; delay for startup
     (let ((legacy-sync (configf:lookup *configdat* "setup" "megatest-db"))
	   (debug-mode  (debug:debug-mode 1))
	   (last-time   (current-seconds)))
       (if (or (args:get-arg "-runtests")
	       (args:get-arg "-server")
	       (args:get-arg "-set-run-status")
	       (args:get-arg "-remove-runs")
	       (args:get-arg "-get-run-status")
	       )
       (let loop ()
	 ;; sync for filesystem local db writes
	 ;;
	 (let ((start-time      (current-seconds))
	       (servers-started (make-hash-table)))
	   (for-each 
	    (lambda (run-id)
	      (mutex-lock! *db-multi-sync-mutex*)
	      (if (and legacy-sync 
		       (hash-table-ref/default *db-local-sync* run-id #f))
		  ;; (if (> (- start-time last-write) 5) ;; every five seconds
		  (begin ;; let ((sync-time (- (current-seconds) start-time)))
		    (db:multi-db-sync (list run-id) 'new2old)
		    (if (common:low-noise-print 30 "sync new to old")
			(let ((sync-time (- (current-seconds) start-time)))
			  (debug:print-info 0 "Sync of newdb to olddb for run-id " run-id " completed in " sync-time " seconds")))
		    ;; (if (> sync-time 10) ;; took more than ten seconds, start a server for this run
		    ;;     (begin
		    ;;       (debug:print-info 0 "Sync is taking a long time, start up a server to assist for run " run-id)
		    ;;       (server:kind-run run-id)))))
		    (hash-table-delete! *db-local-sync* run-id)))
	      (mutex-unlock! *db-multi-sync-mutex*))
	    (hash-table-keys *db-local-sync*))
	   (if (and debug-mode
		    (> (- start-time last-time) 60))
	       (begin
		 (set! last-time start-time)
		 (debug:print-info 4 "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*))))))
	 
	 ;; keep going unless time to exit
	 ;;
	 (if (not *time-to-exit*)
	     (let delay-loop ((count 0))
	       (if (and (not *time-to-exit*)
			(< count 11)) ;; aprox 5-6 seconds
		   (begin
		     (thread-sleep! 1)
		     (delay-loop (+ count 1))))
	       (loop))))))
   "Watchdog thread"))
	   (let loop ()
	     ;; sync for filesystem local db writes
	     ;;
	     (let ((start-time      (current-seconds))
		   (servers-started (make-hash-table)))
	       (for-each 
		(lambda (run-id)
		  (mutex-lock! *db-multi-sync-mutex*)
		  (if (and legacy-sync 
			   (hash-table-ref/default *db-local-sync* run-id #f))
		      ;; (if (> (- start-time last-write) 5) ;; every five seconds
		      (begin ;; let ((sync-time (- (current-seconds) start-time)))
			(db:multi-db-sync (list run-id) 'new2old)
			(if (common:low-noise-print 30 "sync new to old")
			    (let ((sync-time (- (current-seconds) start-time)))
			      (debug:print-info 0 "Sync of newdb to olddb for run-id " run-id " completed in " sync-time " seconds")))
			;; (if (> sync-time 10) ;; took more than ten seconds, start a server for this run
			;;     (begin
			;;       (debug:print-info 0 "Sync is taking a long time, start up a server to assist for run " run-id)
			;;       (server:kind-run run-id)))))
			(hash-table-delete! *db-local-sync* run-id)))
		  (mutex-unlock! *db-multi-sync-mutex*))
		(hash-table-keys *db-local-sync*))
	       (if (and debug-mode
			(> (- start-time last-time) 60))
		   (begin
		     (set! last-time start-time)
		     (debug:print-info 4 "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*))))))
	     
	     ;; keep going unless time to exit
	     ;;
	     (if (not *time-to-exit*)
		 (let delay-loop ((count 0))
		   (if (and (not *time-to-exit*)
			    (< count 11)) ;; aprox 5-6 seconds
		       (begin
			 (thread-sleep! 1)
			 (delay-loop (+ count 1))))
		   (loop))))
	   (debug:print-info 0 "Exiting watchdog timer")))
     "Watchdog thread")))

(thread-start! *watchdog*)

(if (args:get-arg "-log")
    (let ((oup (open-output-file (args:get-arg "-log"))))
      (debug:print-info 0 "Sending log output to " (args:get-arg "-log"))
      (current-error-port oup)

Modified runs.scm from [c0fe0ada27] to [1623285039].

223
224
225
226
227
228
229

230

231
232
233
234











235
236
237
238
239
240
241
223
224
225
226
227
228
229
230
231
232




233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250







+

+
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+







	 (tdbdat             (tasks:open-db)))

    (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10))

    (set-signal-handler! signal/int
			 (lambda (signum)
			   ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting
			   (set! *time-to-exit* #t)
			   (print "Received signal " signum ", cleaning up before exit. Please wait...")
			   (let ((th1 (make-thread (lambda ()
			   (let ((tdbdat (tasks:open-db)))
			     (rmt:tasks-set-state-given-param-key task-key "killed"))
			   (print "Killed by signal " signum ". Exiting")
			   (exit)))
						     (let ((tdbdat (tasks:open-db)))
						       (rmt:tasks-set-state-given-param-key task-key "killed"))
						     (print "Killed by signal " signum ". Exiting")
						     (exit))))
				 (th2 (make-thread (lambda ()
						     (thread-sleep! 3)
						     (debug:print 0 "Done")
						     (exit 4)))))
			     (thread-start! th2)
			     (thread-start! th1)
			     (thread-join! th2))))

    ;; register this run in monitor.db
    (rmt:tasks-add "run-tests" user target runname test-patts task-key) ;; params)
    (rmt:tasks-set-state-given-param-key task-key "running")
    (runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process
    (if (file-exists? runconfigf)
	(setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals target)