Megatest

Check-in [29dd9489e5]
Login
Overview
Comment:wip
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.6584-nanomsg
Files: files | file ages | folders
SHA1: 29dd9489e514b3f8cd1bee011d0f3496a4030d0f
User & Date: matt on 2021-06-09 09:02:04
Other Links: branch diff | manifest | tags
Context
2021-06-12
04:25
wip check-in: c47b41a610 user: matt tags: v1.6584-nanomsg
2021-06-09
09:02
wip check-in: 29dd9489e5 user: matt tags: v1.6584-nanomsg
2021-06-07
08:59
Adjusted receive for new usage (was copied from mtut.scm check-in: f70db69e66 user: matt tags: v1.6584-nanomsg
Changes

Modified commonmod.scm from [f4c84442dd] to [d20588a683].

3647
3648
3649
3650
3651
3652
3653
3654

3655
3656
3657
3658
3659
3660
3661
3647
3648
3649
3650
3651
3652
3653

3654
3655
3656
3657
3658
3659
3660
3661







-
+







;; timeout is hms string: 1h 5m 3s, default is 10 minutes
;;
(define (server:expiration-timeout)
  (let ((tmo (configf:lookup *configdat* "server" "timeout")))
    (if (and (string? tmo)
	     (common:hms-string->seconds tmo)) ;; BUG: hms-string->seconds is broken, if given "10" returns 0. Also, it doesn't belong in this logic unless the string->number is changed below
        (* 3600 (string->number tmo))
	600))) ;; default is ten minutes
	60))) ;; default is one minute

(define (runs:get-mt-env-alist run-id runname target testname itempath)
  ;;(bb-check-path msg: "runs:set-megatest-env-vars entry")
  `(("MT_TEST_NAME"     . ,testname)
    
    ("MT_ITEMPATH"      . ,itempath)

Modified docs/manual/server.dot from [3e029f5fe5] to [ec783673b9].

15
16
17
18
19
20
21




22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43






















44
45

46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72




























73
74
75
76
77
78
15
16
17
18
19
20
21
22
23
24
25






















26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47


48



























49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82







+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+






//     You should have received a copy of the GNU General Public License
//     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.
digraph G {

    subgraph cluster_1 {
        node [style=filled,shape=box];

	rmt:send-receive -> "init-*remote* if needed" -> rmt:general-open-connection ->
	  rmt:send-receive-real;
	

	check_available_queue       -> remove_entries_over_10s_old;
	remove_entries_over_10s_old -> set_available [label="num_avail < 3"];
	remove_entries_over_10s_old -> exit [label="num_avail > 2"];

	set_available               -> delay_2s;
	delay_2s          -> check_place_in_queue;

	check_place_in_queue        -> "http:transport-launch" [label="at head"];
	check_place_in_queue        -> exit [label="not at head"];

	"client:login"              -> "server:shutdown" [label="login failed"];
	"server:shutdown"           -> exit;	

	subgraph cluster_2 {
		"http:transport-launch"       -> "http:transport-run";
		"http:transport-launch"       -> "http:transport-keep-running";

		"http:transport-keep-running" -> "tests running?";
		"tests running?"              -> "client:login" [label=yes];
		"tests running?"              -> "server:shutdown" [label=no];
		"client:login"                -> delay_5s [label="login ok"];
		delay_5s                      -> "http:transport-keep-running";
//	check_available_queue       -> remove_entries_over_10s_old;
//	remove_entries_over_10s_old -> set_available [label="num_avail < 3"];
//	remove_entries_over_10s_old -> exit [label="num_avail > 2"];
//
//	set_available               -> delay_2s;
//	delay_2s          -> check_place_in_queue;
//
//	check_place_in_queue        -> "http:transport-launch" [label="at head"];
//	check_place_in_queue        -> exit [label="not at head"];
//
//	"client:login"              -> "server:shutdown" [label="login failed"];
//	"server:shutdown"           -> exit;	
//
//	subgraph cluster_2 {
//		"http:transport-launch"       -> "http:transport-run";
//		"http:transport-launch"       -> "http:transport-keep-running";
//
//		"http:transport-keep-running" -> "tests running?";
//		"tests running?"              -> "client:login" [label=yes];
//		"tests running?"              -> "server:shutdown" [label=no];
//		"client:login"                -> delay_5s [label="login ok"];
//		delay_5s                      -> "http:transport-keep-running";
	}

//	}
	// start_server -> "server_running?";
	// "server_running?" -> set_available [label="no"];
	// "server_running?" -> delay_2s [label="yes"];
	// delay_2s -> "still_running?";
	// "still_running?" -> ping_server [label=yes];
	// "still_running?" -> set_available [label=no];
	// ping_server -> exit [label=alive];
	// ping_server -> remove_server_record [label=dead];
	// remove_server_record -> set_available;
	// set_available -> avail_delay [label="delay 3s"];
	// avail_delay -> "first_in_queue?";
	// 
	// "first_in_queue?" -> set_running [label=yes];
	// set_running -> get_next_port -> handle_requests;
	// "first_in_queue?" -> "dead_entry_in_queue?" [label=no];
	// "dead_entry_in_queue?" -> "server_running?" [label=no];
	// "dead_entry_in_queue?" -> "remove_dead_entries" [label=yes];
	// remove_dead_entries -> "server_running?";
	// 
	// handle_requests -> start_shutdown [label="no traffic\nno running tests"];
	// handle_requests -> shutdown_request;
	// start_shutdown -> shutdown_delay;
	// shutdown_request -> shutdown_delay;
	// shutdown_delay -> exit;
	
        label = "server:launch";
        color=brown;
//
//	// start_server -> "server_running?";
//	// "server_running?" -> set_available [label="no"];
//	// "server_running?" -> delay_2s [label="yes"];
//	// delay_2s -> "still_running?";
//	// "still_running?" -> ping_server [label=yes];
//	// "still_running?" -> set_available [label=no];
//	// ping_server -> exit [label=alive];
//	// ping_server -> remove_server_record [label=dead];
//	// remove_server_record -> set_available;
//	// set_available -> avail_delay [label="delay 3s"];
//	// avail_delay -> "first_in_queue?";
//	// 
//	// "first_in_queue?" -> set_running [label=yes];
//	// set_running -> get_next_port -> handle_requests;
//	// "first_in_queue?" -> "dead_entry_in_queue?" [label=no];
//	// "dead_entry_in_queue?" -> "server_running?" [label=no];
//	// "dead_entry_in_queue?" -> "remove_dead_entries" [label=yes];
//	// remove_dead_entries -> "server_running?";
//	// 
//	// handle_requests -> start_shutdown [label="no traffic\nno running tests"];
//	// handle_requests -> shutdown_request;
//	// start_shutdown -> shutdown_delay;
//	// shutdown_request -> shutdown_delay;
//	// shutdown_delay -> exit;
//	
//        label = "server:launch";
//        color=brown;
    }

//     client_start_server -> start_server;
//     handle_requests -> read_write;
//     read_write -> handle_requests;
}

Modified rmtmod.scm from [cd84ed1c84] to [d9cd48e79d].

243
244
245
246
247
248
249
250


251
252


253
254
255
256
257
258
259
243
244
245
246
247
248
249

250
251
252

253
254
255
256
257
258
259
260
261







-
+
+

-
+
+







		#t)
	      (start-main-srv)))
	(start-main-srv))))

;; NB// remote is a rmt:remote struct
;;
(define (rmt:general-open-connection remote apath dbname #!key (num-tries 5))
  (let ((mdbname (db:run-id->dbname #f)))
  (let* ((mdbname (db:run-id->dbname #f))
	 (mconn   (rmt:get-conn remote apath mdbname)))
    (cond
     ((not (rmt:get-conn remote apath mdbname)) ;; no channel open to main? 
     ((or (not mconn) ;; no channel open to main?
	  (< (rmt:conn-expires mconn)(+ (current-seconds) 2))) ;; restablish connection if less than 2 seconds on the lease
      (rmt:open-main-connection remote apath)
      (thread-sleep! 2)
      (rmt:general-open-connection remote apath mdbname))
     ((not (rmt:get-conn remote apath dbname))                 ;; no channel open to dbname?     
      (let* ((res (rmt:send-receive-real remote apath mdbname 'get-server `(,apath ,dbname))))
	(case res
	  ((server-started)
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343

344
345
346
347
348
349
350
295
296
297
298
299
300
301




302
303
304
305
306
307
308
309







310





311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328

329
330
331
332
333
334
335
336







-
-
-
-








-
-
-
-
-
-
-

-
-
-
-
-


















-
+







  
;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed
;; sometime in the future
;;
(define (rmt:send-receive-real remote apath dbname cmd params)
  (let* ((conn (rmt:get-conn remote apath dbname)))
    (assert conn "FATAL: rmt:send-receive-real called without the needed channels opened")
    (pp (rmt:conn->alist conn))
    ;; (rmt:send-receive-setup conn)
;;     (let-values (((i o) (tcp-connect (rmt:conn-ipaddr conn)
;; 				     (rmt:conn-port conn))))
    (let* ((key     #f)
	   (host    (rmt:conn-ipaddr conn))
	   (port    (rmt:conn-port   conn))
	   (payload `((cmd    . ,cmd)
		      (key    . ,(rmt:conn-srvkey conn))
		      (params . ,params)))
	   (res      (open-send-receive-nn (conc host":"port)
					   (sexpr->string payload))))
      ;; begin
	;; 		 (write payload o) ;; (rmt:conn-outport conn))
	;; 		 (with-input-from-port
	;; 		     i ;; (rmt:conn-inport conn)
	;; 		   read))))
	;; (close-input-port i)
	;; (close-output-port o)
      (string->sexpr res))))
;;  (if (string? res)
;; 	  (string->sexpr res)
;; 	  res))))



;; db is at apath/.db/dbname, rid is an intermediary solution and will be removed
;; sometime in the future.
;;
;; Purpose - call the main.db server and request a server be started
;; for the given area path and dbname
;;
;; (define (rmt:send-receive-server-start remote apath dbname)
;;   (let* ((conn (rmt:get-conn remote apath dbname)))
;;     (assert conn "FATAL: Unable to connect to db "apath"/"dbname)
;;     #;(let* ((res      (with-input-from-request
;; 		      (rmt:conn->uri conn "api") 
;; 		      `((params . (,apath ,dbname)))
;; 		      read-string)))
;;       (string->sexpr res))))

(define (rmt:print-db-stats)
  (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f"
    (debug:print 18 *default-log-port* "DB Stats\n========")
    (debug:print 18 *default-log-port* "DB Stats, "(seconds->year-week/day-time (current-seconds))"\n=====================")
    (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg"))
    (for-each (lambda (cmd)
		(let ((cmd-dat (hash-table-ref *db-stats* cmd)))
		  (debug:print 18 *default-log-port* (format #f fmtstr cmd (vector-ref cmd-dat 0) (vector-ref cmd-dat 1) (/ (vector-ref cmd-dat 1)(vector-ref cmd-dat 0))))))
	      (sort (hash-table-keys *db-stats*)
		    (lambda (a b)
		      (> (vector-ref (hash-table-ref *db-stats* a) 0)
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472

1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494



















1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512

1513
1514
1515
1516
1517
1518
1519
1448
1449
1450
1451
1452
1453
1454




1455
1456
1457
1458
1459
1460

















1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496

1497
1498
1499
1500
1501
1502
1503
1504







-
-
-
-
+





-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

















-
+







			 (bdat-time-to-exit-set! *bdat* #t)
			 #t))))
    (debug:print-info 4 *default-log-port* "starting exit process, finalizing databases.")
    (if (and no-hurry (debug:debug-mode 18))
	(rmt:print-db-stats))
    (let ((th1 (make-thread (lambda () ;; thread for cleaning up, give it five seconds
                              (if *server-info*
				  (let ((pkt-file (conc (get-pkts-dir *toppath*)
							"/" (servdat-uuid *server-info*)
							".pkt"))
					(dbfile   (servdat-dbfile *server-info*)))
				  (let ((dbfile   (servdat-dbfile *server-info*)))
				    (if dbfile
					(begin

					  ;; do a final sync here
					  
					(if (string-match ".*/main.db$" dbfile)
					    (begin
					      (debug:print-info 0 *default-log-port* "removing pkt "pkt-file)
					      (delete-file* pkt-file)
					      (debug:print-info 0 *default-log-port* "Releasing lock for "dbfile)
					      (db:with-lock-db (servdat-dbfile *server-info*)
							       (lambda (dbh dbfile)
								 (db:release-lock dbh dbfile))))
					    (let* ((sdat *server-info*)) ;; we have a run-id server
					      (rmt:send-receive-real *rmt:remote* *toppath*
								     (db:run-id->dbname #f)
								     'deregister-server
								     `(,(servdat-uuid sdat)
								       ,(current-process-id)
								       ,(servdat-host sdat)   ;; iface
								       ,(servdat-port sdat)))))))))
			      (if *dbstruct-db* (db:close-all *dbstruct-db*)) ;; one second allocated
					  (if (string-match ".*/main.db$" dbfile)
					      (let ((pkt-file (conc (get-pkts-dir *toppath*)
							"/" (servdat-uuid *server-info*)
							".pkt")))
						(debug:print-info 0 *default-log-port* "removing pkt "pkt-file)
						(delete-file* pkt-file)
						(debug:print-info 0 *default-log-port* "Releasing lock for "dbfile)
						(db:with-lock-db (servdat-dbfile *server-info*)
								 (lambda (dbh dbfile)
								   (db:release-lock dbh dbfile))))
					      (let* ((sdat *server-info*)) ;; we have a run-id server
						(rmt:send-receive-real *rmt:remote* *toppath*
								       (db:run-id->dbname #f)
								       'deregister-server
								       `(,(servdat-uuid sdat)
									 ,(current-process-id)
									 ,(servdat-host sdat)   ;; iface
									 ,(servdat-port sdat)))))))))
			      ;; (if *dbstruct-db* (db:close-all *dbstruct-db*)) ;; one second allocated
			      (if (bdat-task-db *bdat*)    ;; TODO: Check that this is correct for task db
				  (let ((db (cdr (bdat-task-db *bdat*))))
				    (if (sqlite3:database? db)
					(begin
					  (sqlite3:interrupt! db)
					  (sqlite3:finalize! db #t)
					  (bdat-task-db-set! *bdat* #f)))))
                              #;(http-client#close-idle-connections!)
                              (if (not (eq? *default-log-port* (current-error-port)))
                                  (close-output-port *default-log-port*))
			      (set! *default-log-port* (current-error-port))) "Cleanup db exit thread"))
	  (th2 (make-thread (lambda ()
			      (debug:print 4 *default-log-port* "Attempting clean exit. Please be patient and wait a few seconds...")
			      (if no-hurry
                                  (begin
                                    (thread-sleep! 5)) ;; give the clean up few seconds to do it's stuff
                                  (begin
      				  (thread-sleep! 2)))
				    (thread-sleep! 2)))
      			      (debug:print 4 *default-log-port* " ... done")
      			      )
			    "clean exit")))
      (thread-start! th1)
      (thread-start! th2)
      (thread-join! th1)
      )
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974









1975
1976
1977
1978
1979
1980
1981
1945
1946
1947
1948
1949
1950
1951








1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967







-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+







		    (exit))
		  (loop start-time
			(equal? sdat last-sdat)
			sdat))))))))

(define (rmt:register-server remote apath iface port server-key dbname)
  (rmt:open-main-connection remote apath) ;; we need a channel to main.db
  (rmt:send-receive-real remote apath ;; params: host port servkey pid ipaddr dbpath
			 (db:run-id->dbname #f) 'register-server `(,iface
								   ,port
								   ,server-key
								   ,(current-process-id)
								   ,iface
								   ,apath
								   ,dbname)))
  (rmt:send-receive-real remote apath      ;; params: host port servkey pid ipaddr dbpath
			 (db:run-id->dbname #f)
			 'register-server `(,iface
					    ,port
					    ,server-key
					    ,(current-process-id)
					    ,iface
					    ,apath
					    ,dbname)))

(define (http-transport:wait-for-stable-interface #!optional (num-tries-allowed 100))
  ;; wait until *server-info* stops changing
  (let* ((stime (current-seconds)))
    (let loop ((last-host  #f)
	       (last-port  #f)
	       (tries 0))
2034
2035
2036
2037
2038
2039
2040
2041


2042
2043
2044
2045
2046
2047
2048
2020
2021
2022
2023
2024
2025
2026

2027
2028
2029
2030
2031
2032
2033
2034
2035







-
+
+







    ;; this is our forever loop
    (let* ((iface             (servdat-host *server-info*))
	   (port              (servdat-port *server-info*)))
      (let loop ((count         0)
		 (bad-sync-count 0)
		 (start-time     (current-milliseconds)))
	
	(if (not is-main)
	(if (and (not is-main)
		 (common:low-noise-print 60 "servdat-status"))
	    (debug:print-info 0 *default-log-port* "servdat-status is " (servdat-status *server-info*)))

	;; set up the database handle
	(mutex-lock! *heartbeat-mutex*)
	(if (not *dbstruct-db*) ;; no db opened yet, open the db and register with main if appropriate
	    (let ((watchdog (bdat-watchdog *bdat*)))		 
	      (debug:print 0 *default-log-port* "SERVER: dbprep")