Megatest

Check-in [dc2468ee23]
Login
Overview
Comment:Re-enabled remote server stop. Addressing issues found by test1
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | refactor
Files: files | file ages | folders
SHA1: dc2468ee231f46c1d2eafaca090c8a59f3ea7f7a
User & Date: matt on 2013-05-06 00:04:01
Other Links: branch diff | manifest | tags
Context
2013-05-06
00:44
Clean up related to test1 and refactoring check-in: 5c6d9dac45 user: matt tags: refactor
00:04
Re-enabled remote server stop. Addressing issues found by test1 check-in: dc2468ee23 user: matt tags: refactor
2013-05-05
22:32
Second pass on refactoring keys handling. Created runs:create-run-record which will eventually unify all actions check-in: f4e1a69886 user: matt tags: refactor
Changes

Modified common.scm from [3109b21887] to [585cd1b3a7].

51
52
53
54
55
56
57


58
59
60
61
62
63
64
(define *logged-in-clients* (make-hash-table))
(define *client-non-blocking-mode* #f)
(define *server-id*         #f)
(define *server-info*       #f)
(define *time-to-exit*      #f)
(define *received-response* #f)
(define *default-numtries*  10)



(define *target*            (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN
(define *keys*              (make-hash-table)) ;; cache the keys here
(define *keyvals*           (make-hash-table))
(define *toptest-paths*     (make-hash-table)) ;; cache toptest path settings here
(define *test-paths*        (make-hash-table)) ;; cache test-id to test run paths here
(define *test-ids*          (make-hash-table)) ;; cache run-id, testname, and item-path => test-id







>
>







51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
(define *logged-in-clients* (make-hash-table))
(define *client-non-blocking-mode* #f)
(define *server-id*         #f)
(define *server-info*       #f)
(define *time-to-exit*      #f)
(define *received-response* #f)
(define *default-numtries*  10)
(define *server-run*        #t)


(define *target*            (make-hash-table)) ;; cache the target here; target is keyval1/keyval2/.../keyvalN
(define *keys*              (make-hash-table)) ;; cache the keys here
(define *keyvals*           (make-hash-table))
(define *toptest-paths*     (make-hash-table)) ;; cache toptest path settings here
(define *test-paths*        (make-hash-table)) ;; cache test-id to test run paths here
(define *test-ids*          (make-hash-table)) ;; cache run-id, testname, and item-path => test-id

Modified db.scm from [7d1a9f4b1b] to [17c4d5f356].

1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358

(define (cdb:tests-register-test serverdat run-id test-name item-path)
  (cdb:client-call serverdat 'register-test #t *default-numtries* run-id test-name item-path))

(define (cdb:flush-queue serverdat)
  (cdb:client-call serverdat 'flush #f *default-numtries*))

(define (cdb:kill-server serverdat)
  (cdb:client-call serverdat 'killserver #t *default-numtries*))

(define (cdb:roll-up-pass-fail-counts serverdat run-id test-name item-path status)
  (cdb:client-call serverdat 'immediate #f *default-numtries* open-run-close db:roll-up-pass-fail-counts #f run-id test-name item-path status))

(define (cdb:get-test-info serverdat run-id test-name item-path)
  (cdb:client-call serverdat 'immediate #f *default-numtries* open-run-close db:get-test-info #f run-id test-name item-path))








|
|







1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358

(define (cdb:tests-register-test serverdat run-id test-name item-path)
  (cdb:client-call serverdat 'register-test #t *default-numtries* run-id test-name item-path))

(define (cdb:flush-queue serverdat)
  (cdb:client-call serverdat 'flush #f *default-numtries*))

(define (cdb:kill-server serverdat pid)
  (cdb:client-call serverdat 'killserver #t *default-numtries* pid))

(define (cdb:roll-up-pass-fail-counts serverdat run-id test-name item-path status)
  (cdb:client-call serverdat 'immediate #f *default-numtries* open-run-close db:roll-up-pass-fail-counts #f run-id test-name item-path status))

(define (cdb:get-test-info serverdat run-id test-name item-path)
  (cdb:client-call serverdat 'immediate #f *default-numtries* open-run-close db:get-test-info #f run-id test-name item-path))

1582
1583
1584
1585
1586
1587
1588



1589

1590
1591

1592

1593

1594
1595
1596
1597
1598
1599
1600
1601
			   (server:reply return-address qry-sig #f (list #f (conc "Login failed due to mismatch paths: " calling-path ", " *toppath*)))))))
		((flush sync)
		 (server:reply return-address qry-sig #t 1)) ;; (length data)))
		((set-verbosity)
		 (set! *verbosity* (car params))
		 (server:reply return-address qry-sig #t '(#t *verbosity*)))
		((killserver)



		 (debug:print 0 "WARNING: Server going down in 15 seconds by user request!")

		 (open-run-close tasks:server-deregister tasks:open-db 
				 (car *runremote*)

				 pullport: (cadr *runremote*))

		 (thread-start! (make-thread (lambda ()(thread-sleep! 15)(exit))))

		 (server:reply return-address qry-sig #t '(#t "exit process started")))
		(else ;; not a command, i.e. is a query
		 (debug:print 0 "ERROR: Unrecognised query/command " stmt-key)
		 (server:reply return-address qry-sig #f 'failed)))))
	   (else
	    (debug:print-info 11 "Executing " stmt-key " for " params)
	    (apply sqlite3:execute (hash-table-ref queries stmt-key) params)
	    (server:reply return-address qry-sig #t #t)))))))







>
>
>
|
>
|
<
>
|
>
|
>
|







1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594

1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
			   (server:reply return-address qry-sig #f (list #f (conc "Login failed due to mismatch paths: " calling-path ", " *toppath*)))))))
		((flush sync)
		 (server:reply return-address qry-sig #t 1)) ;; (length data)))
		((set-verbosity)
		 (set! *verbosity* (car params))
		 (server:reply return-address qry-sig #t '(#t *verbosity*)))
		((killserver)
		 (let ((hostname (car  *runremote*))
		       (port     (cadr *runremote*))
		       (pid      (car params)))
		   (debug:print 0 "WARNING: Server on " hostname ":" port " going down by user request!")
		   (debug:print-info 1 "current pid=" (current-process-id))
		   (open-run-close tasks:server-deregister tasks:open-db 

				   hostname
				   port: port)
		   (set! *server-run* #f)
		   (thread-sleep! 3)
		   (process-signal pid signal/kill)
		   (server:reply return-address qry-sig #t '(#t "exit process started"))))
		(else ;; not a command, i.e. is a query
		 (debug:print 0 "ERROR: Unrecognised query/command " stmt-key)
		 (server:reply return-address qry-sig #f 'failed)))))
	   (else
	    (debug:print-info 11 "Executing " stmt-key " for " params)
	    (apply sqlite3:execute (hash-table-ref queries stmt-key) params)
	    (server:reply return-address qry-sig #t #t)))))))

Modified http-transport.scm from [fe6673ff15] to [5696601f09].

32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
(include "db_records.scm")

(define (http-transport:make-server-url hostport)
  (if (not hostport)
      #f
      (conc "http://" (car hostport) ":" (cadr hostport))))

(define  *server-loop-heart-beat* (current-seconds))
(define *heartbeat-mutex* (make-mutex))

;;======================================================================
;; S E R V E R
;;======================================================================

;; Call this to start the actual server







|







32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
(include "db_records.scm")

(define (http-transport:make-server-url hostport)
  (if (not hostport)
      #f
      (conc "http://" (car hostport) ":" (cadr hostport))))

(define *server-loop-heart-beat* (current-seconds))
(define *heartbeat-mutex* (make-mutex))

;;======================================================================
;; S E R V E R
;;======================================================================

;; Call this to start the actual server
271
272
273
274
275
276
277

278
279
280
281
282
283
284
285
286
287
288
        (tasks:server-update-heartbeat tdb spid)
      
        ;; (if ;; (or (> numrunning 0) ;; stay alive for two days after last access
        (mutex-lock! *heartbeat-mutex*)
        (set! last-access *last-db-access*)
        (mutex-unlock! *heartbeat-mutex*)
	;; (debug:print 11 "last-access=" last-access ", server-timeout=" server-timeout)

        (if (> (+ last-access server-timeout)
               (current-seconds))
            (begin
              (debug:print-info 2 "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
              (loop 0))
            (begin
              (debug:print-info 0 "Starting to shutdown the server.")
              ;; need to delete only *my* server entry (future use)
              (set! *time-to-exit* #t)
              (open-run-close tasks:server-deregister-self tasks:open-db (get-host-name))
              (thread-sleep! 1)







>
|
|

|







271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
        (tasks:server-update-heartbeat tdb spid)
      
        ;; (if ;; (or (> numrunning 0) ;; stay alive for two days after last access
        (mutex-lock! *heartbeat-mutex*)
        (set! last-access *last-db-access*)
        (mutex-unlock! *heartbeat-mutex*)
	;; (debug:print 11 "last-access=" last-access ", server-timeout=" server-timeout)
        (if (and *server-run*
		 (> (+ last-access server-timeout)
		    (current-seconds)))
            (begin
              (debug:print-info 0 "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
              (loop 0))
            (begin
              (debug:print-info 0 "Starting to shutdown the server.")
              ;; need to delete only *my* server entry (future use)
              (set! *time-to-exit* #t)
              (open-run-close tasks:server-deregister-self tasks:open-db (get-host-name))
              (thread-sleep! 1)

Modified runs.scm from [b1cb11f80b] to [1164e5fe02].

68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
  (let* ((testname (db:test-get-testname   test))
	 (itempath (db:test-get-item-path test)))
    (conc testname (if (equal? itempath "") "" (conc "(" itempath ")")))))

;; This is the *new* methodology. One record to inform them and in the chaos, organise them.
;;
(define (runs:create-run-record)
  (let* ((mconfig      (if *configdat* 
		           *configdat*
		           (if (setup-for-run)
		               *configdat*
		               (begin
		                 (debug:print 0 "ERROR: Called setup in a non-megatest area, exiting")
		                 (exit 1)))))
	  (runrec      (runs:runrec-make-record))







|







68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
  (let* ((testname (db:test-get-testname   test))
	 (itempath (db:test-get-item-path test)))
    (conc testname (if (equal? itempath "") "" (conc "(" itempath ")")))))

;; This is the *new* methodology. One record to inform them and in the chaos, organise them.
;;
(define (runs:create-run-record)
  (let* ((mconfig      (if *configdat*
		           *configdat*
		           (if (setup-for-run)
		               *configdat*
		               (begin
		                 (debug:print 0 "ERROR: Called setup in a non-megatest area, exiting")
		                 (exit 1)))))
	  (runrec      (runs:runrec-make-record))

Modified tasks.scm from [9d3053bc48] to [6678d1eaa1].

101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
   interface
   port
   pubport
   transport
   ))

;; NB// two servers with same pid on different hosts will be removed from the list if pid: is used!
(define (tasks:server-deregister mdb hostname #!key (port #f)(pid #f)(action 'markdead))
  (debug:print-info 11 "server-deregister " hostname ", port " port ", pid " pid)
  (if pid
      (case action
	((delete)(sqlite3:execute mdb "DELETE FROM servers WHERE pid=?;" pid))
	(else    (sqlite3:execute mdb "UPDATE servers SET state='dead' WHERE pid=?;" pid)))
      (if port
	  (case action
	    ((delete)(sqlite3:execute mdb "DELETE FROM servers WHERE  hostname=? AND port=?;" hostname port))
	    (else    (sqlite3:execute mdb "UPDATE servers SET state='dead' WHERE hostname=? AND port=?;" hostname port)))
	  (debug:print 0 "ERROR: tasks:server-deregister called with neither pid nor port specified"))))

(define (tasks:server-deregister-self mdb hostname)
  (tasks:server-deregister mdb hostname pid: (current-process-id)))

;; need a simple call for robustly removing records given host and port
(define (tasks:server-delete mdb hostname port)







|







|
|







101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
   interface
   port
   pubport
   transport
   ))

;; NB// two servers with same pid on different hosts will be removed from the list if pid: is used!
(define (tasks:server-deregister mdb hostname #!key (port #f)(pid #f)(action 'delete))
  (debug:print-info 11 "server-deregister " hostname ", port " port ", pid " pid)
  (if pid
      (case action
	((delete)(sqlite3:execute mdb "DELETE FROM servers WHERE pid=?;" pid))
	(else    (sqlite3:execute mdb "UPDATE servers SET state='dead' WHERE pid=?;" pid)))
      (if port
	  (case action
	    ((delete)(sqlite3:execute mdb "DELETE FROM servers WHERE (interface=? or hostname=?) AND port=?;" hostname hostname port))
	    (else    (sqlite3:execute mdb "UPDATE servers SET state='dead' WHERE (interface=? or hostname=?) AND port=?;" hostname hostname port)))
	  (debug:print 0 "ERROR: tasks:server-deregister called with neither pid nor port specified"))))

(define (tasks:server-deregister-self mdb hostname)
  (tasks:server-deregister mdb hostname pid: (current-process-id)))

;; need a simple call for robustly removing records given host and port
(define (tasks:server-delete mdb hostname port)
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
       (begin
	 (debug:print 0 "ERROR: tasks:server-get-server-id needs (hostname and pid) OR (iface and port) OR (hostname and port)")
	 "SELECT id FROM servers WHERE pid=-999;")))
     (if hostname hostname iface)(if pid pid port))
    res))

(define (tasks:server-update-heartbeat mdb server-id)
  (debug:print-info 0 "Heart beat update of server id=" server-id)
  (sqlite3:execute mdb "UPDATE servers SET heartbeat=strftime('%s','now') WHERE id=?;" server-id))

;; alive servers keep the heartbeat field upto date with seconds every 6 or so seconds
(define (tasks:server-alive? mdb server-id #!key (iface #f)(hostname #f)(port #f)(pid #f))
  (let* ((server-id  (if server-id 
			 server-id
			 (tasks:server-get-server-id mdb hostname iface port pid)))







|







139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
       (begin
	 (debug:print 0 "ERROR: tasks:server-get-server-id needs (hostname and pid) OR (iface and port) OR (hostname and port)")
	 "SELECT id FROM servers WHERE pid=-999;")))
     (if hostname hostname iface)(if pid pid port))
    res))

(define (tasks:server-update-heartbeat mdb server-id)
  (debug:print-info 1 "Heart beat update of server id=" server-id)
  (sqlite3:execute mdb "UPDATE servers SET heartbeat=strftime('%s','now') WHERE id=?;" server-id))

;; alive servers keep the heartbeat field upto date with seconds every 6 or so seconds
(define (tasks:server-alive? mdb server-id #!key (iface #f)(hostname #f)(port #f)(pid #f))
  (let* ((server-id  (if server-id 
			 server-id
			 (tasks:server-get-server-id mdb hostname iface port pid)))
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
			       "  EXCEPTION: " ((condition-property-accessor 'exn 'message) exn))
	     (debug:print 1 "Sending signal/term to " pid " on " hostname)
	     (process-signal pid signal/term)
	     (thread-sleep! 5) ;; give it five seconds to die peacefully then do a brutal kill
	     ;;(process-signal pid signal/kill)
	     ) ;; local machine, send sig term
	    (begin
		(debug:print-info 1 "Stopping remote servers not yet supported."))))
	;;      (debug:print-info 1 "Telling alive server on " hostname ":" port " to commit servercide")
	;;      (let ((serverdat (list hostname port)))
	;;	(case (string->symbol transport)
	;;	  ((http)(http-transport:client-connect hostname port))
	;;	  (else  (debug:print "ERROR: remote stopping servers of type " transport " not supported yet")))
	;;	(cdb:kill-server serverdat)))))    ;; remote machine, try telling server to commit suicide
      (begin
	(if status 
	    (if (equal? hostname (get-host-name))
		(begin
		  (debug:print-info 1 "Sending signal/term to " pid " on " hostname)
		  (process-signal pid signal/term)  ;; local machine, send sig term
		  (thread-sleep! 5)                 ;; give it five seconds to die peacefully then do a brutal kill







|
|
|
|
|
|
|







248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
			       "  EXCEPTION: " ((condition-property-accessor 'exn 'message) exn))
	     (debug:print 1 "Sending signal/term to " pid " on " hostname)
	     (process-signal pid signal/term)
	     (thread-sleep! 5) ;; give it five seconds to die peacefully then do a brutal kill
	     ;;(process-signal pid signal/kill)
	     ) ;; local machine, send sig term
	    (begin
	      ;;(debug:print-info 1 "Stopping remote servers not yet supported."))))
	      (debug:print-info 1 "Telling alive server on " hostname ":" port " to commit servercide")
	      (let ((serverdat (list hostname port)))
	      	(case (if (string? transport) (string->symbol transport) transport)
	      	  ((http)(http-transport:client-connect hostname port))
	      	  (else  (debug:print "ERROR: remote stopping servers of type " transport " not supported yet")))
	      	(cdb:kill-server serverdat pid)))))    ;; remote machine, try telling server to commit suicide
      (begin
	(if status 
	    (if (equal? hostname (get-host-name))
		(begin
		  (debug:print-info 1 "Sending signal/term to " pid " on " hostname)
		  (process-signal pid signal/term)  ;; local machine, send sig term
		  (thread-sleep! 5)                 ;; give it five seconds to die peacefully then do a brutal kill

Modified tests/tests.scm from [8c3cd1cc08] to [340a51275a].

79
80
81
82
83
84
85
86
87
88
89
90
91
92








93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108

109


110
111
112
113
114
115
116

(test "setup for run" #t (begin (setup-for-run)
				(string? (getenv "MT_RUN_AREA_HOME"))))

(test "server-register, get-best-server" #t (let ((res #f))
					      (open-run-close tasks:server-register tasks:open-db 1 "bob" 1234 100 'live 'http)
					      (set! res (open-run-close tasks:get-best-server tasks:open-db))
					      (number? (cadddr res))))

(test "de-register server" #t (let ((res #f))
				(open-run-close tasks:server-deregister tasks:open-db "bob" pullport: 1234)
				(list? (open-run-close tasks:get-best-server tasks:open-db))))

(define hostinfo #f)








(test "get-best-server" #t (let ((dat (open-run-close tasks:get-best-server tasks:open-db)))
			     (set! hostinfo dat) ;; host ip pullport pubport
			     (and (string? (car dat))
				  (number? (caddr dat)))))

(test #f #t (let ((zmq-socket (server:client-connect
			       (cadr hostinfo)
			       (caddr hostinfo)
			       ;; (cadddr hostinfo)
			       )))
	      (set! *runremote* zmq-socket)
	      (string? (car *runremote*))))

(test #f #t (let ((res (server:client-login *runremote*)))
	      (car res)))


(test #f #t (car (cdb:login *runremote* *toppath* *my-client-signature*)))



(exit 1)

;;======================================================================
;; C O N F I G   F I L E S 
;;======================================================================








|

|
|
|

|
>
>
>
>
>
>
>
>

|
|
|

<
<
<
<
<
<
<
|
|


>
|
>
>







79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105







106
107
108
109
110
111
112
113
114
115
116
117
118
119
120

(test "setup for run" #t (begin (setup-for-run)
				(string? (getenv "MT_RUN_AREA_HOME"))))

(test "server-register, get-best-server" #t (let ((res #f))
					      (open-run-close tasks:server-register tasks:open-db 1 "bob" 1234 100 'live 'http)
					      (set! res (open-run-close tasks:get-best-server tasks:open-db))
					      (number? (vector-ref res 3))))

(test "de-register server" #f (let ((res #f))
				(open-run-close tasks:server-deregister tasks:open-db "bob" port: 1234)
				(open-run-close tasks:get-best-server tasks:open-db)))

(define server-pid #f)
(test "launch server" #t (let ((pid (process-fork (lambda ()
						    ;; (daemon:ize)
						    (server:launch 'http)))))
			   (set! server-pid pid)
			   (print "pid=" server-pid)
			   (number? pid)))

(thread-sleep! 3) ;; need to wait for server to start. Yes, a better way is needed.
(test "get-best-server" #t (let ((dat (open-run-close tasks:get-best-server tasks:open-db)))
			     (set! *runremote* (list (vector-ref dat 1)(vector-ref dat 2))) ;; host ip pullport pubport
			     (and (string? (car  *runremote*))
			     	  (number? (cadr *runremote*)))))








(test #f #t (car (cdb:login *runremote* *toppath* *my-client-signature*)))
(test #f #t (let ((res (client:login *runremote*)))
	      (car res)))

(test "server stop" #f (let ((hostname (car  *runremote*))
			     (port     (cadr *runremote*)))
			 (tasks:kill-server #t hostname port server-pid 'http)
			 (open-run-close tasks:get-best-server tasks:open-db)))

(exit 1)

;;======================================================================
;; C O N F I G   F I L E S 
;;======================================================================