Megatest

Changes On Branch b3a83a0cec1ff430
Login

Changes In Branch try-nanomsg Through [b3a83a0cec] Excluding Merge-Ins

This is equivalent to a diff from ec50f4ac00 to b3a83a0cec

2014-11-30
09:53
Merged fix for get-tests-for-run(s) check-in: c8184e551e user: matt tags: v1.60
08:04
Archiving check-in: 5ab4109044 user: matt tags: archiving
2014-11-29
22:44
Re-enable closing connections if open longer than the server timeout and no accesses Closed-Leaf check-in: cec4ee3511 user: matt tags: try-nanomsg
21:50
Added a little time spread on client starting servers - try to avoid startup storms check-in: b3a83a0cec user: matt tags: try-nanomsg
20:33
http transport with try-nanomsg changes completed first pass Validation 100% check-in: b260c4c5d9 user: matt tags: try-nanomsg
2014-11-26
08:59
Merged v1.60 changes into try-nanomsg check-in: 0b3a6d8aa9 user: matt tags: try-nanomsg
2014-11-25
21:10
Add big delay and take a break when system is clearly overloaded. check-in: ec50f4ac00 user: matt tags: v1.60
16:39
Many tweaks to improve reliability under stress check-in: 0b6b35ab5b user: mrwellan tags: v1.60

Modified Makefile from [64fd867d54] to [4886dc652e].

1
2
3
4
5
6
7
8
9

10
11
12
13
14
15
16
1
2
3
4
5
6
7
8

9
10
11
12
13
14
15
16








-
+







# make install CSCOPTS='-accumulate-profile -profile-name $(PWD)/profile-ww$(shell date +%V.%u)'
PREFIX=$(PWD)
CSCOPTS= 
INSTALL=install
SRCFILES = common.scm items.scm launch.scm \
           ods.scm runconfig.scm server.scm configf.scm \
           db.scm keys.scm margs.scm megatest-version.scm \
           process.scm runs.scm tasks.scm tests.scm genexample.scm \
	   http-transport.scm filedb.scm \
	   http-transport.scm nmsg-transport.scm filedb.scm \
           client.scm gutils.scm synchash.scm daemon.scm mt.scm dcommon.scm \
	   tree.scm ezsteps.scm lock-queue.scm sdb.scm \
	   rmt.scm api.scm tdb.scm portlogger.scm

# Eggs to install (straightforward ones)
EGGS=matchable readline apropos base64 regex-literals format regex-case test coops trace csv \
     dot-locking posix-utils posix-extras directory-utils hostinfo tcp-server rpc csv-xml fmt \

Modified api.scm from [a688529701] to [080a657599].

47
48
49
50
51
52
53


54
55
56
57
58
59
60
61
62























63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86























87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106



















107
108
109


110
111
112
113
114




115
116
117
118
119
120
121
122
123
124
125












126
127
128
129
130




131
132
133
134
135
136
137
138
139
140
141
142
143
144



145
146
147
148
149
150
151
152
153
154

155
47
48
49
50
51
52
53
54
55









56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79























80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103



















104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123


124
125
126




127
128
129
130
131










132
133
134
135
136
137
138
139
140
141
142
143
144




145
146
147
148


149
150
151
152
153
154
155
156
157
158


159
160
161
162
163
164
165
166
167
168
169
170

171
172







+
+
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

-
-
+
+

-
-
-
-
+
+
+
+

-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+

-
-
-
-
+
+
+
+
-
-










-
-
+
+
+









-
+

    get-steps-data
    login
    testmeta-get-record))

;; These are called by the server on recipt of /api calls
;;    - keep it simple, only return the actual result of the call, i.e. no meta info here
;;
;;    - returns #( flag result )
;;
(define (api:execute-requests dbstruct cmd params)
  (case (string->symbol cmd)
    ;; SERVERS
    ((start-server)                 (apply server:kind-run params))
    ((kill-server)                  (set! *server-run* #f))

    ;; KEYS
    ((get-key-val-pairs)            (apply db:get-key-val-pairs dbstruct params))
    ((get-keys)                     (db:get-keys dbstruct))
(define (api:execute-requests dbstruct dat)
  (handle-exceptions
   exn
   (let ((call-chain (get-call-chain)))
     (print-call-chain (current-error-port))
     (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))       
     (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens
   (if (not (vector? dat))                    ;; it is an error to not receive a vector
       (vector #f #f "remote must be called with a vector")       
       (vector                                   ;; return a vector + the returned data structure
	#t 
	(let ((cmd    (vector-ref dat 0))
	      (params (vector-ref dat 1)))
	  (case (if (symbol? cmd)
		    cmd
		    (string->symbol cmd))
	    ;; SERVERS
	    ((start-server)                 (apply server:kind-run params))
	    ((kill-server)                  (set! *server-run* #f))
	    
	    ;; KEYS
	    ((get-key-val-pairs)            (apply db:get-key-val-pairs dbstruct params))
	    ((get-keys)                     (db:get-keys dbstruct))

    ;; TESTS
    ((test-toplevel-num-items)         (apply db:test-toplevel-num-items dbstruct params))
    ((get-test-info-by-id)	       (apply db:get-test-info-by-id dbstruct params))
    ((test-get-rundir-from-test-id)    (apply db:test-get-rundir-from-test-id dbstruct params))
    ((test-set-state-status-by-id)     (apply db:test-set-state-status-by-id dbstruct params))
    ((get-count-tests-running)         (apply db:get-count-tests-running dbstruct params))
    ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params))
    ((delete-test-records)             (apply db:delete-test-records dbstruct params))
    ;; ((delete-test-step-records)        (apply db:delete-test-step-records dbstruct params))
    ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params))
    ((test-set-status-state)           (apply db:test-set-status-state dbstruct params))
    ((get-previous-test-run-record)    (apply db:get-previous-test-run-record dbstruct params))
    ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params))
    ((test-get-logfile-info)           (apply db:test-get-logfile-info dbstruct params))
    ((test-get-records-for-index-file)  (apply db:test-get-records-for-index-file dbstruct params))
    ((get-testinfo-state-status)       (apply db:get-testinfo-state-status dbstruct params))
    ((test-set-top-process-pid)        (apply db:test-set-top-process-pid dbstruct params))
    ((test-get-top-process-pid)        (apply db:test-get-top-process-pid dbstruct params))
    ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params))
    ((get-prereqs-not-met)             (apply db:get-prereqs-not-met dbstruct params))
    ((roll-up-pass-fail-counts)        (apply db:roll-up-pass-fail-counts dbstruct params))
    ((update-fail-pass-counts)         (apply db:general-call dbstruct 'update-pass-fail-counts params))
    ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params))
	    ;; TESTS
	    ((test-toplevel-num-items)         (apply db:test-toplevel-num-items dbstruct params))
	    ((get-test-info-by-id)	       (apply db:get-test-info-by-id dbstruct params))
	    ((test-get-rundir-from-test-id)    (apply db:test-get-rundir-from-test-id dbstruct params))
	    ((test-set-state-status-by-id)     (apply db:test-set-state-status-by-id dbstruct params))
	    ((get-count-tests-running)         (apply db:get-count-tests-running dbstruct params))
	    ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params))
	    ((delete-test-records)             (apply db:delete-test-records dbstruct params))
	    ;; ((delete-test-step-records)        (apply db:delete-test-step-records dbstruct params))
	    ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params))
	    ((test-set-status-state)           (apply db:test-set-status-state dbstruct params))
	    ((get-previous-test-run-record)    (apply db:get-previous-test-run-record dbstruct params))
	    ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params))
	    ((test-get-logfile-info)           (apply db:test-get-logfile-info dbstruct params))
	    ((test-get-records-for-index-file)  (apply db:test-get-records-for-index-file dbstruct params))
	    ((get-testinfo-state-status)       (apply db:get-testinfo-state-status dbstruct params))
	    ((test-set-top-process-pid)        (apply db:test-set-top-process-pid dbstruct params))
	    ((test-get-top-process-pid)        (apply db:test-get-top-process-pid dbstruct params))
	    ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params))
	    ((get-prereqs-not-met)             (apply db:get-prereqs-not-met dbstruct params))
	    ((roll-up-pass-fail-counts)        (apply db:roll-up-pass-fail-counts dbstruct params))
	    ((update-fail-pass-counts)         (apply db:general-call dbstruct 'update-pass-fail-counts params))
	    ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params))

    ;; RUNS
    ((get-run-info)                 (apply db:get-run-info dbstruct params))
    ((get-run-status)               (apply db:get-run-status dbstruct params))
    ((set-run-status)               (apply db:set-run-status dbstruct params))
    ((register-run)                 (apply db:register-run dbstruct params))
    ((set-tests-state-status)       (apply db:set-tests-state-status dbstruct params))
    ((get-tests-for-run)            (apply db:get-tests-for-run dbstruct params))
    ((get-test-id)                  (apply db:get-test-id dbstruct params))
    ((get-tests-for-runs-mindata)   (apply db:get-tests-for-runs-mindata dbstruct params))
    ((get-run-name-from-id)         (apply db:get-run-name-from-id dbstruct params))
    ((delete-run)                   (apply db:delete-run dbstruct params))
    ((get-runs)                     (apply db:get-runs dbstruct params))
    ((get-all-run-ids)              (db:get-all-run-ids dbstruct))
    ((get-prev-run-ids)             (apply db:get-prev-run-ids dbstruct params))
    ((get-run-ids-matching-target)  (apply db:get-run-ids-matching-target dbstruct params))
    ((get-runs-by-patt)             (apply db:get-runs-by-patt dbstruct params))
    ((lock/unlock-run)              (apply db:lock/unlock-run dbstruct params))
    ((update-run-event_time)        (apply db:update-run-event_time dbstruct params))
    ((find-and-mark-incomplete)     (apply db:find-and-mark-incomplete dbstruct params))
	    ;; RUNS
	    ((get-run-info)                 (apply db:get-run-info dbstruct params))
	    ((get-run-status)               (apply db:get-run-status dbstruct params))
	    ((set-run-status)               (apply db:set-run-status dbstruct params))
	    ((register-run)                 (apply db:register-run dbstruct params))
	    ((set-tests-state-status)       (apply db:set-tests-state-status dbstruct params))
	    ((get-tests-for-run)            (apply db:get-tests-for-run dbstruct params))
	    ((get-test-id)                  (apply db:get-test-id dbstruct params))
	    ((get-tests-for-run-mindata)    (apply db:get-tests-for-run-mindata dbstruct params))
	    ((get-run-name-from-id)         (apply db:get-run-name-from-id dbstruct params))
	    ((delete-run)                   (apply db:delete-run dbstruct params))
	    ((get-runs)                     (apply db:get-runs dbstruct params))
	    ((get-all-run-ids)              (db:get-all-run-ids dbstruct))
	    ((get-prev-run-ids)             (apply db:get-prev-run-ids dbstruct params))
	    ((get-run-ids-matching-target)  (apply db:get-run-ids-matching-target dbstruct params))
	    ((get-runs-by-patt)             (apply db:get-runs-by-patt dbstruct params))
	    ((lock/unlock-run)              (apply db:lock/unlock-run dbstruct params))
	    ((update-run-event_time)        (apply db:update-run-event_time dbstruct params))
	    ((find-and-mark-incomplete)     (apply db:find-and-mark-incomplete dbstruct params))

    ;; STEPS
    ((teststep-set-status!)         (apply db:teststep-set-status! dbstruct params))
	    ;; STEPS
	    ((teststep-set-status!)         (apply db:teststep-set-status! dbstruct params))

    ;; TEST DATA
    ((test-data-rollup)             (apply db:test-data-rollup dbstruct params))
    ((csv->test-data)               (apply db:csv->test-data dbstruct params))
    ((get-steps-data)               (apply db:get-steps-data dbstruct params))
	    ;; TEST DATA
	    ((test-data-rollup)             (apply db:test-data-rollup dbstruct params))
	    ((csv->test-data)               (apply db:csv->test-data dbstruct params))
	    ((get-steps-data)               (apply db:get-steps-data dbstruct params))

    ;; MISC
    ((login)                        (apply db:login dbstruct params))
    ((general-call)                 (let ((stmtname   (car params))
					  (run-id     (cadr params))
					  (realparams (cddr params)))
				      (db:with-db dbstruct run-id #t ;; these are all for modifying the db
						  (lambda (db)
						    (db:general-call db stmtname realparams)))))
    ((sync-inmem->db)               (db:sync-touched dbstruct run-id force-sync: #t))
    ((sdb-qry)                      (apply sdb:qry params))
	    ;; MISC
	    ((login)                        (apply db:login dbstruct params))
	    ((general-call)                 (let ((stmtname   (car params))
						  (run-id     (cadr params))
						  (realparams (cddr params)))
					      (db:with-db dbstruct run-id #t ;; these are all for modifying the db
							  (lambda (db)
							    (db:general-call db stmtname realparams)))))
	    ((sync-inmem->db)               (let ((run-id (car params)))
					      (db:sync-touched dbstruct run-id force-sync: #t)))
	    ((sdb-qry)                      (apply sdb:qry params))
	    ((ping)                         (current-process-id))

    ;; TESTMETA
    ((testmeta-get-record)       (apply db:testmeta-get-record dbstruct params))
    ((testmeta-add-record)       (apply db:testmeta-add-record dbstruct params))
    ((testmeta-update-field)     (apply db:testmeta-update-field dbstruct params))
	    ;; TESTMETA
	    ((testmeta-get-record)       (apply db:testmeta-get-record dbstruct params))
	    ((testmeta-add-record)       (apply db:testmeta-add-record dbstruct params))
	    ((testmeta-update-field)     (apply db:testmeta-update-field dbstruct params))))))))
    (else
     (list "ERROR" 0))))

;; http-server  send-response
;;                 api:process-request
;;                    db:*
;;
;; NB// Runs on the server as part of the server loop
;;
(define (api:process-request dbstruct $) ;; the $ is the request vars proc
  (let* ((cmd     ($ 'cmd))
	 (paramsj ($ 'params))
	 (params  (db:string->obj paramsj)) ;; (rmt:json-str->dat paramsj))
	 (res     (api:execute-requests dbstruct cmd params)))
	 (params  (db:string->obj paramsj transport: 'http)) ;; (rmt:json-str->dat paramsj))
	 (resdat  (api:execute-requests dbstruct (vector cmd params))) ;; #( flag result )
	 (res     (vector-ref resdat 1)))

    ;; This can be here but needs controls to ensure it doesn't run more than every 4 seconds
    ;; (rmt:dat->json-str
    ;;  (if (or (string? res)
    ;;          (list?   res)
    ;;          (number? res)
    ;;          (boolean? res))
    ;;      res 
    ;;      (list "ERROR, not string, list, number or boolean" 1 cmd params res)))))
    (db:obj->string res)))
    (db:obj->string res transport: 'http)))

Modified client.scm from [6d1c8717b3] to [72d1a98b4a].

51
52
53
54
55
56
57
58

59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116










































117
118

119
120
121
122



































123
124
125
126
127
128
129
51
52
53
54
55
56
57

58
59
60
61
62
63
64




















































65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106


107




108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149







-
+






-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
+
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







;;   2. We are a run tests, list runs or other interactive process and we must figure out
;;      *transport-type* and *runremote* from the monitor.db
;;
;; client:setup
;;
;; lookup_server, need to remove *runremote* stuff
;;
(define (client:setup run-id #!key (remaining-tries 100) (failed-connects 0))
(define (client:setup run-id #!key (remaining-tries 10) (failed-connects 0))
  (debug:print-info 2 "client:setup remaining-tries=" remaining-tries)
  (let* ((tdbdat (tasks:open-db)))
    (if (<= remaining-tries 0)
	(begin
	  (debug:print 0 "ERROR: failed to start or connect to server for run-id " run-id)
	  (exit 1))
	(let ((host-info (hash-table-ref/default *runremote* run-id #f)))
	  (if host-info
	      (let* ((iface     (http-transport:server-dat-get-iface host-info))
		     (port      (http-transport:server-dat-get-port  host-info))
		     (start-res (http-transport:client-connect iface port))
		     (ping-res  (rmt:login-no-auto-client-setup start-res run-id)))
		(if ping-res   ;; sucessful login?
		    (begin
		      (debug:print-info 2 "client:setup, ping is good using host-info=" host-info ", remaining-tries=" remaining-tries)
		      ;; Why add the close-connections here?
		      ;; (http-transport:close-connections run-id)
		      (hash-table-set! *runremote* run-id start-res)
		      start-res)  ;; return the server info
		    ;; have host info but no ping. shutdown the current connection and try again
		    (begin    ;; login failed
		      (debug:print-info 1 "client:setup, ping is bad for start-res=" start-res " and *runremote*=" host-info)
		      (http-transport:close-connections run-id)
		      (hash-table-delete! *runremote* run-id)
		      (if (< remaining-tries 8)
			  (thread-sleep! 5)
			  (thread-sleep! 1))
		      (client:setup run-id remaining-tries: (- remaining-tries 1)))))
	      ;; YUK: rename server-dat here
	      (let* ((server-dat (tasks:get-server (db:delay-if-busy tdbdat) run-id)))
		(debug:print-info 4 "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
		(if server-dat
		    (let* ((iface     (tasks:hostinfo-get-interface server-dat))
			   (port      (tasks:hostinfo-get-port      server-dat))
			   (start-res (http-transport:client-connect iface port))
			   (ping-res  (rmt:login-no-auto-client-setup start-res run-id)))
		      (if (and start-res
			       ping-res)
			  (begin
			    (hash-table-set! *runremote* run-id start-res)
			    (debug:print-info 2 "connected to " (http-transport:server-dat-make-url start-res))
			    start-res)
			  (begin    ;; login failed but have a server record, clean out the record and try again
			    (debug:print-info 0 "client:setup, login failed, will attempt to start server ... start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat)
			    (http-transport:close-connections run-id)
			    (hash-table-delete! *runremote* run-id)
			    (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat)
								 run-id 
								 (tasks:hostinfo-get-interface server-dat)
								 (tasks:hostinfo-get-port      server-dat)
								 " client:setup (server-dat = #t)")
			    (thread-sleep! 2)
			    (server:try-running run-id)
			    (thread-sleep! 10) ;; give server a little time to start up
			    (client:setup run-id remaining-tries: (- remaining-tries 1)))))
		    (begin    ;; no server registered
		      (let ((num-available (tasks:num-in-available-state (db:dbdat-get-db tdbdat) run-id)))
			(debug:print-info 0 "client:setup, no server registered, remaining-tries=" remaining-tries " num-available=" num-available)
	(let* ((server-dat (tasks:get-server (db:delay-if-busy tdbdat) run-id)))
	  (debug:print-info 4 "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
	  (if server-dat
	      (let* ((iface     (tasks:hostinfo-get-interface server-dat))
		     (hostname  (tasks:hostinfo-get-hostname  server-dat))
		     (port      (tasks:hostinfo-get-port      server-dat))
		     (start-res (case *transport-type*
				  ((http)(http-transport:client-connect iface port))
				  ((nmsg)(nmsg-transport:client-connect hostname port))))
		     (ping-res  (case *transport-type* 
				  ((http)(rmt:login-no-auto-client-setup start-res run-id))
				  ((nmsg)(let ((logininfo (rmt:login-no-auto-client-setup start-res run-id)))
 					   (if logininfo
 					       (car (vector-ref logininfo 1))
 					       #f))))))
		(if (and start-res
			 ping-res)
		    (begin
		      (hash-table-set! *runremote* run-id start-res)
		      (debug:print-info 2 "connected to " (http-transport:server-dat-make-url start-res))
		      start-res)
		    (begin    ;; login failed but have a server record, clean out the record and try again
		      (debug:print-info 0 "client:setup, login failed, will attempt to start server ... start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat)
		      (case *transport-type* 
			((http)(http-transport:close-connections run-id)))
		      (hash-table-delete! *runremote* run-id)
		      (tasks:kill-server-run-id run-id)
		      (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat)
							   run-id 
							   (tasks:hostinfo-get-interface server-dat)
							   (tasks:hostinfo-get-port      server-dat)
							   " client:setup (server-dat = #t)")
		      (if (> remaining-tries 8)
			  (thread-sleep! (+ 1 (random 5))) ;; spread out the starts a little
			  (thread-sleep! (+ 15 (random 20)))) ;; it isn't going well. give it plenty of time
		      (server:try-running run-id)
		      (thread-sleep! 5)   ;; give server a little time to start up
		      (client:setup run-id remaining-tries: (- remaining-tries 1))
		      )))
	      (begin    ;; no server registered
		(let ((num-available (tasks:num-in-available-state (db:dbdat-get-db tdbdat) run-id)))
		  (debug:print-info 0 "client:setup, no server registered, remaining-tries=" remaining-tries " num-available=" num-available)
			(thread-sleep! 2) 
			(if (< num-available 2)
		  (if (< num-available 2)
			    (begin
			      (server:try-running run-id)))
			(thread-sleep! 10) ;; give server a little time to start up
			(client:setup run-id remaining-tries: (- remaining-tries 1)))))))))))
		      (server:try-running run-id))
		  (thread-sleep! (+ 5 (random (- 20 remaining-tries))))  ;; give server a little time to start up, randomize a little to avoid start storms.
		  (client:setup run-id remaining-tries: (- remaining-tries 1)))))))))

;; 	(let ((host-info (hash-table-ref/default *runremote* run-id #f)))
;; 	  (if host-info ;; this is a bit circular. the host-info *is* the start-res FIXME
;; 	      (let* ((iface     (http-transport:server-dat-get-iface host-info))
;; 		     (port      (http-transport:server-dat-get-port  host-info))
;; 		     (start-res (case *transport-type* 
;; 				  ((http)(http-transport:client-connect iface port))
;; 				  ((nmsg)(nmsg-transport:client-connect iface port)) ;; (http-transport:server-dat-get-socket host-info))
;; 				  (else #f)))
;; 		     (ping-res  (case *transport-type*
;; 				  ((http)(rmt:login-no-auto-client-setup start-res run-id))
;; 				  ((nmsg)(let ((logininfo (rmt:login-no-auto-client-setup start-res run-id)))
;; 					   (if logininfo
;; 					       (vector-ref (vector-ref logininfo 1) 1)
;; 					       #f)))
;; 				  (else #f))))
;; 		(if ping-res   ;; sucessful login?
;; 		    (begin
;; 		      (debug:print-info 2 "client:setup, ping is good using host-info=" host-info ", remaining-tries=" remaining-tries)
;; 		      start-res)  ;; return the server info
;; 		    ;; have host info but no ping. shutdown the current connection and try again
;; 		    (begin    ;; login failed
;; 		      (debug:print-info 1 "client:setup, ping is bad for start-res=" start-res " and *runremote*=" host-info)
;; 		      (case *transport-type*
;; 			((http)(http-transport:close-connections run-id)))
;; 		      (hash-table-delete! *runremote* run-id)
;; 		      (if (< remaining-tries 8)
;; 			  (thread-sleep! 5)
;; 			  (thread-sleep! 1))
;; 		      (client:setup run-id remaining-tries: (- remaining-tries 1)))))
;; 	      ;; YUK: rename server-dat here
;; 

;; keep this as a function to ease future 
(define (client:start run-id server-info)
  (http-transport:client-connect (tasks:hostinfo-get-interface server-info)
				 (tasks:hostinfo-get-port server-info)))

;; client:signal-handler

Modified common.scm from [aa6e9ff977] to [5faac0528e].

63
64
65
66
67
68
69
70
71


72
73
74
75
76
77
78
63
64
65
66
67
68
69


70
71
72
73
74
75
76
77
78







-
-
+
+







(define *inmemdb*             #f)
(define *task-db*             #f) ;; (vector db path-to-db)
(define *db-access-allowed*   #t) ;; flag to allow access
(define *db-access-mutex*     (make-mutex))

;; SERVER
(define *my-client-signature* #f)
(define *transport-type*    'http)
(define *rpc:listener*      #f) ;; if set up for server communication this will hold the tcp port
;; (define *transport-type*    'nmsg)
(define *transport-type*    'http)             ;; override with [server] transport http|rpc|nmsg
(define *runremote*         (make-hash-table)) ;; if set up for server communication this will hold <host port>
(define *max-cache-size*    0)
(define *logged-in-clients* (make-hash-table))
(define *client-non-blocking-mode* #f)
(define *server-id*         #f)
(define *server-info*       #f)
(define *time-to-exit*      #f)
223
224
225
226
227
228
229

230
231
232
233
234
235
236
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237







+







  (debug:print-info 2 "starting exit process, finalizing databases.")
  (rmt:print-db-stats)
  (let ((run-ids (hash-table-keys *db-local-sync*)))
    (if (and (not (null? run-ids))
	     (configf:lookup *configdat* "setup" "megatest-db"))
	(db:multi-db-sync run-ids 'new2old)))
  (if *dbstruct-db* (db:close-all *dbstruct-db*))
  (if *inmemdb*     (db:close-all *inmemdb*))
  (if (and *megatest-db*
	   (sqlite3:database? *megatest-db*))
      (begin
	(sqlite3:interrupt! *megatest-db*)
	(sqlite3:finalize! *megatest-db* #t)
	(set! *megatest-db* #f)))
  (if *task-db*     (let ((db (cdr *task-db*)))

Modified db.scm from [7251c124d5] to [ad92b83fd4].

294
295
296
297
298
299
300
301

302
303
304
305
306
307
308
294
295
296
297
298
299
300

301
302
303
304
305
306
307
308







-
+







	(inmem  (dbr:dbstruct-get-inmem dbstruct))
	(maindb (dbr:dbstruct-get-main  dbstruct))
	(refdb  (dbr:dbstruct-get-refdb dbstruct))
	(olddb  (dbr:dbstruct-get-olddb dbstruct))
	;; (runid  (dbr:dbstruct-get-run-id dbstruct))
	)
    (debug:print-info 4 "Syncing for run-id: " run-id)
    (mutex-lock! *http-mutex*)
    ;; (mutex-lock! *http-mutex*)
    (if (eq? run-id 0)
	;; runid equal to 0 is main.db
	(if maindb
	    (if (or (not (number? mtime))
		    (not (number? stime))
		    (> mtime stime)
		    force-sync)
323
324
325
326
327
328
329
330
331
332



333
334
335

336
337
338
339
340
341
342
323
324
325
326
327
328
329



330
331
332
333
334

335
336
337
338
339
340
341
342







-
-
-
+
+
+


-
+







	(if (or (not (number? mtime))
		(not (number? stime))
		(> mtime stime)
		force-sync)
	    (begin
	      (db:delay-if-busy rundb)
	      (db:delay-if-busy olddb)
	      (let ((num-synced (db:sync-tables db:sync-tests-only inmem refdb rundb olddb)))
		(dbr:dbstruct-set-stime! dbstruct (current-milliseconds))
		(mutex-unlock! *http-mutex*)
	      (dbr:dbstruct-set-stime! dbstruct (current-milliseconds))
	      (let ((num-synced (db:sync-tables db:sync-tests-only inmem refdb rundb olddb)))
		;; (mutex-unlock! *http-mutex*)
		num-synced)
	      (begin
		(mutex-unlock! *http-mutex*)
		;; (mutex-unlock! *http-mutex*)
		0))))))

(define (db:close-main dbstruct)
  (let ((maindb (dbr:dbstruct-get-main dbstruct)))
    (if maindb
	(begin
	  (sqlite3:finalize! (db:dbdat-get-db maindb))
615
616
617
618
619
620
621

622
623
624
625
626
627
628
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629







+







;;
;;  run-ids: '(1 2 3 ...) or #f (for all)
;;
(define (db:multi-db-sync run-ids . options)
  (let* ((toppath  (launch:setup-for-run))
	 (dbstruct (if toppath (make-dbr:dbstruct path: toppath) #f))
	 (mtdb     (if toppath (db:open-megatest-db)))
	 (allow-cleanup (if run-ids #f #t))
	 (run-ids  (if run-ids 
		       run-ids
		       (if toppath (begin
				     (db:delay-if-busy mtdb)
				     (db:get-all-run-ids mtdb)))))
	 (tdbdat  (tasks:open-db))
	 (servers (tasks:get-all-servers (db:delay-if-busy tdbdat))))
661
662
663
664
665
666
667


668






669
670
671
672
673
674
675
676
677
678




























679
680
681
682
683
684
685
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677










678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712







+
+

+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







		   (dbstruct (if toppath (make-dbr:dbstruct path: toppath local: #t) #f)))
	       (debug:print 0 "INFO: Propagating " (length testrecs) " records for run-id=" run-id " to run specific db")
	       (db:replace-test-records dbstruct run-id testrecs)
	       (sqlite3:finalize! (db:dbdat-get-db (dbr:dbstruct-get-rundb dbstruct)))))
	   run-ids)))

    ;; now ensure all newdb data are synced to megatest.db
    ;; do not use the run-ids list passed in to the function
    ;;
    (if (member 'new2old options)
	(let* ((maindb      (make-dbr:dbstruct path: toppath local: #t))
	       (src-run-ids (db:get-all-run-ids (db:dbdat-get-db (db:get-db maindb 0))))
	       (all-run-ids (sort (delete-duplicates (cons 0 src-run-ids)) <))
	       (count       1)
	       (total       (length all-run-ids))
	       (dead-runs  '()))
	(for-each
	 (lambda (run-id)
	   (let* ((fromdb (if toppath (make-dbr:dbstruct path: toppath local: #t) #f))
		  (frundb (db:dbdat-get-db (db:get-db fromdb run-id))))
	     ;; (db:delay-if-busy frundb)
	     ;; (db:delay-if-busy mtdb)
	     (if (eq? run-id 0)
		 (db:sync-tables (db:sync-main-list dbstruct) (db:get-db fromdb #f) mtdb)
		 (db:sync-tables db:sync-tests-only (db:get-db fromdb run-id) mtdb))))
	 (cons 0 run-ids)))
	  (for-each
	   (lambda (run-id)
	     (debug:print 0 "Processing run " (if (eq? run-id 0) " main.db " run-id) ", " count " of " total)
	     (set! count (+ count 1))
	     (let* ((fromdb (if toppath (make-dbr:dbstruct path: toppath local: #t) #f))
		    (frundb (db:dbdat-get-db (db:get-db fromdb run-id))))
	       ;; (db:delay-if-busy frundb)
	       ;; (db:delay-if-busy mtdb)
	       ;; (db:clean-up frundb)
	       (if (eq? run-id 0)
		   (begin
		     (db:sync-tables (db:sync-main-list dbstruct) (db:get-db fromdb #f) mtdb)
		     (set! dead-runs (db:clean-up-maindb (db:get-db fromdb #f))))
		   (begin
		     ;; NB// must sync first to ensure deleted tests get marked as such in megatest.db
		     (db:sync-tables db:sync-tests-only (db:get-db fromdb run-id) mtdb)
		     (db:clean-up-rundb (db:get-db fromdb run-id))
		     ))))
	   all-run-ids)
	  ;; removed deleted runs
	  (let ((dbdir (tasks:get-task-db-path)))
	    (for-each (lambda (run-id)
			(let ((fullname (conc dbdir "/" run-id ".db")))
			  (if (file-exists? fullname)
			      (begin
				(debug:print 0 "Removing database file for deleted run " fullname)
				(delete-file fullname)))))
		      dead-runs))))
    ;; (db:close-all dbstruct)
    ;; (sqlite3:finalize! mdb)
    ))

;; keeping it around for debugging purposes only
(define (open-run-close-no-exception-handling  proc idb . params)
  (debug:print-info 11 "open-run-close-no-exception-handling START given a db=" (if idb "yes " "no ") ", params=" params)
991
992
993
994
995
996
997
998

999
1000
1001
1002
1003
1004
1005
1018
1019
1020
1021
1022
1023
1024

1025
1026
1027
1028
1029
1030
1031
1032







-
+







;;    a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown'
;;    b. If test dir gone, delete the test record
;; 2. Look at run records
;;    a. If have tests that are not deleted, set state='unknown'
;;    b. ....
;;
(define (db:clean-up dbdat)
  (debug:print 0 "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db")
  ;; (debug:print 0 "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db")
  (let* ((db         (db:dbdat-get-db dbdat))
	 (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM tests)+(SELECT count(id) FROM runs);"))
	(statements
	 (map (lambda (stmt)
		(sqlite3:prepare db stmt))
	      (list
	       ;; delete all tests that belong to runs that are 'deleted'
1025
1026
1027
1028
1029
1030
1031

























































































1032
1033
1034
1035
1036
1037
1038
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







			       (debug:print-info 0 "Records count after  clean: " tot))
			     count-stmt)))
    (map sqlite3:finalize! statements)
    (sqlite3:finalize! count-stmt)
    ;; (db:find-and-mark-incomplete db)
    (db:delay-if-busy dbdat)
    (sqlite3:execute db "VACUUM;")))

;; Clean out old junk and vacuum the database
;;
;; Ultimately do something like this:
;;
;; 1. Look at test records either deleted or part of deleted run:
;;    a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown'
;;    b. If test dir gone, delete the test record
;; 2. Look at run records
;;    a. If have tests that are not deleted, set state='unknown'
;;    b. ....
;;
(define (db:clean-up-rundb dbdat)
  ;; (debug:print 0 "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db")
  (let* ((db         (db:dbdat-get-db dbdat))
	 (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM tests);"))
	(statements
	 (map (lambda (stmt)
		(sqlite3:prepare db stmt))
	      (list
	       ;; delete all tests that belong to runs that are 'deleted'
	       ;; (conc "DELETE FROM tests WHERE run_id NOT IN (" (string-intersperse (map conc valid-runs) ",") ");")
	       ;; delete all tests that are 'DELETED'
	       "DELETE FROM tests WHERE state='DELETED';"
	       ))))
    (db:delay-if-busy dbdat)
    (sqlite3:with-transaction 
     db
     (lambda ()
       (sqlite3:for-each-row (lambda (tot)
			       (debug:print-info 0 "Records count before clean: " tot))
			     count-stmt)
       (map sqlite3:execute statements)
       (sqlite3:for-each-row (lambda (tot)
			       (debug:print-info 0 "Records count after  clean: " tot))
			     count-stmt)))
    (map sqlite3:finalize! statements)
    (sqlite3:finalize! count-stmt)
    ;; (db:find-and-mark-incomplete db)
    (db:delay-if-busy dbdat)
    (sqlite3:execute db "VACUUM;")))

;; Clean out old junk and vacuum the database
;;
;; Ultimately do something like this:
;;
;; 1. Look at test records either deleted or part of deleted run:
;;    a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown'
;;    b. If test dir gone, delete the test record
;; 2. Look at run records
;;    a. If have tests that are not deleted, set state='unknown'
;;    b. ....
;;
(define (db:clean-up-maindb dbdat)
  ;; (debug:print 0 "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db")
  (let* ((db         (db:dbdat-get-db dbdat))
	 (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM runs);"))
	 (statements
	  (map (lambda (stmt)
		 (sqlite3:prepare db stmt))
	       (list
		;; delete all tests that belong to runs that are 'deleted'
		;; (conc "DELETE FROM tests WHERE run_id NOT IN (" (string-intersperse (map conc valid-runs) ",") ");")
		;; delete all tests that are 'DELETED'
		"DELETE FROM runs WHERE state='deleted';"
		)))
	 (dead-runs '()))
    (sqlite3:for-each-row
     (lambda (run-id)
       (set! dead-runs (cons run-id dead-runs)))
       db
       "SELECT id FROM runs WHERE state='deleted';")
    (db:delay-if-busy dbdat)
    (sqlite3:with-transaction 
     db
     (lambda ()
       (sqlite3:for-each-row (lambda (tot)
			       (debug:print-info 0 "Records count before clean: " tot))
			     count-stmt)
       (map sqlite3:execute statements)
       (sqlite3:for-each-row (lambda (tot)
			       (debug:print-info 0 "Records count after  clean: " tot))
			     count-stmt)))
    (map sqlite3:finalize! statements)
    (sqlite3:finalize! count-stmt)
    ;; (db:find-and-mark-incomplete db)
    (db:delay-if-busy dbdat)
    (sqlite3:execute db "VACUUM;")
    dead-runs))

;;======================================================================
;; M E T A   G E T   A N D   S E T   V A R S
;;======================================================================

;; returns number if string->number is successful, string otherwise
;; also updates *global-delta*
1718
1719
1720
1721
1722
1723
1724

1725

1726
1727










1728
1729
1730
1731
1732
1733
1734
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843


1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860







+

+
-
-
+
+
+
+
+
+
+
+
+
+







		   db 
		   "SELECT run_id,testname,item_path,state,status FROM tests WHERE id=?;" 
		   test-id)))
    res))

;; get a useful subset of the tests data (used in dashboard
;; use db:mintests-get-{id ,run_id,testname ...}
;; 
(define (db:get-tests-for-runs-mindata dbstruct run-ids testpatt states statuses not-in)
  (debug:print 0 "ERROR: BROKN!")
  (db:get-tests-for-runs dbstruct run-ids testpatt states statuses not-in: not-in qryvals: "id,run_id,testname,state,status,event_time,item_path"))

  ;; (db:get-tests-for-runs dbstruct run-ids testpatt states statuses not-in: not-in qryvals: "id,run_id,testname,state,status,event_time,item_path"))
)

;; get a useful subset of the tests data (used in dashboard
;;
(define (db:get-tests-for-run-mindata dbstruct run-id testpatt states statuses not-in)
  (db:get-tests-for-run dbstruct run-id testpatt states statuses #f #f not-in #f #f "id,run_id,testname,state,status,event_time,item_path"))

;; do not use.
;;
(define (db:get-tests-for-runs dbstruct run-ids testpatt states statuses #!key (not-in #f)(qryvals #f))
  ;; (db:delay-if-busy)
  (let ((res '()))
    (for-each 
     (lambda (run-id)
       (set! res (append 
		  res 
2284
2285
2286
2287
2288
2289
2290
2291
2292


2293
2294
2295
2296
2297
2298
2299
2300
2301
2302

2303
2304
2305
2306


2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319


2320
2321
2322
2323
2324
2325
2326
2410
2411
2412
2413
2414
2415
2416


2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427

2428
2429
2430


2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443


2444
2445
2446
2447
2448
2449
2450
2451
2452







-
-
+
+









-
+


-
-
+
+











-
-
+
+







       res))))

;;======================================================================
;; QUEUE UP META, TEST STATUS AND STEPS REMOTE ACCESS
;;======================================================================

;; NOTE: Can remove the regex and base64 encoding for zmq
(define (db:obj->string obj)
  (case *transport-type*
(define (db:obj->string obj #!key (transport 'http))
  (case transport
    ;; ((fs) obj)
    ((http fs)
     (string-substitute
      (regexp "=") "_"
      (base64:base64-encode 
       (z3:encode-buffer
	(with-output-to-string
	  (lambda ()(serialize obj)))))
      #t))
    ((zmq)(with-output-to-string (lambda ()(serialize obj))))
    ((zmq nmsg)(with-output-to-string (lambda ()(serialize obj))))
    (else obj)))

(define (db:string->obj msg)
  (case *transport-type*
(define (db:string->obj msg #!key (transport 'http))
  (case transport
    ;; ((fs) msg)
    ((http fs)
     (if (string? msg)
	 (with-input-from-string 
	     (z3:decode-buffer
	      (base64:base64-decode
	       (string-substitute 
		(regexp "_") "=" msg #t)))
	   (lambda ()(deserialize)))
	 (begin
	   (debug:print 0 "ERROR: reception failed. Received " msg " but cannot translate it.")
	   #f))) ;; crude reply for when things go awry
    ((zmq)(with-input-from-string msg (lambda ()(deserialize))))
	   msg))) ;; crude reply for when things go awry
    ((zmq nmsg)(with-input-from-string msg (lambda ()(deserialize))))
    (else msg)))

(define (db:test-set-status-state dbstruct run-id test-id status state msg)
  (let ((dbdat  (db:get-db dbstruct run-id)))
    (if (member state '("LAUNCHED" "REMOTEHOSTSTART"))
	(db:general-call dbdat 'set-test-start-time (list test-id)))
    (if msg

Modified docs/manual/megatest_manual.html from [191f1255c5] to [151c9a3dda].

1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1010
1011
1012
1013
1014
1015
1016







1017
1018
1019
1020
1021
1022
1023







-
-
-
-
-
-
-







sudo netstat -lptu
sudo netstat -tulpn</code></pre>
</div></div>
</div>
</div>
</div>
<h1 id="_reference">Reference</h1>
<div class="sect1">
<h2 id="_the_first_chapter_of_the_second_part_2">The First Chapter of the Second Part</h2>
<div class="sectionbody">
<div class="paragraph"><p>Chapters grouped into book parts are at level 1 and can contain
sub-sections.</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_the_testconfig_file">The testconfig File</h2>
<div class="sectionbody">
<div class="sect2">
<h3 id="_setup_section">Setup section</h3>
<div class="sect3">
<h4 id="_header">Header</h4>
1182
1183
1184
1185
1186
1187
1188




















































































1189
1190
1191
1192
1193
1194
1195
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







<td class="icon">
<div class="title">Note</div>
</td>
<td class="content">There is a trailing space after the --</td>
</tr></table>
</div>
</div>
</div>
</div>
<div class="sect1">
<h2 id="_programming_api">Programming API</h2>
<div class="sectionbody">
<div class="paragraph"><p>These routines can be called from the megatest repl.</p></div>
<div class="tableblock">
<table rules="all"
width="70%"
frame="hsides"
cellspacing="0" cellpadding="4">
<caption class="title">Table 2. API Server Management Calls</caption>
<col width="14%" />
<col width="28%" />
<col width="28%" />
<col width="28%" />
<thead>
<tr>
<th align="center" valign="top">API Call                  </th>
<th align="left" valign="top"> Purpose comments   </th>
<th align="left" valign="top"> Returns                 </th>
<th align="left" valign="top"> Comments</th>
</tr>
</thead>
<tfoot>
<tr>
<td align="center" valign="top"><p class="table">(rmt:login        run-id)</p></td>
<td align="left" valign="top"><p class="table"><code>Verify the the version, testsuite area etc. are correct.</code></p></td>
<td align="left" valign="top"><p class="table"><code>#( #t "successful login" )</code></p></td>
<td align="left" valign="top"><p class="table"><code></code></p></td>
</tr>
</tfoot>
<tbody>
<tr>
<td align="center" valign="top"><p class="table">(rmt:start-server run-id)</p></td>
<td align="left" valign="top"><p class="table"><code></code></p></td>
<td align="left" valign="top"><p class="table"><code>#( success/fail   n/a )</code></p></td>
<td align="left" valign="top"><p class="table"><code></code></p></td>
</tr>
<tr>
<td align="center" valign="top"><p class="table">(rmt:kill-server  run-id)</p></td>
<td align="left" valign="top"><p class="table"><code></code></p></td>
<td align="left" valign="top"><p class="table"><code>#( success/fail   n/a )</code></p></td>
<td align="left" valign="top"><p class="table"><code>Works only if the server is still reachable</code></p></td>
</tr>
</tbody>
</table>
</div>
<div class="tableblock">
<table rules="all"
width="70%"
frame="hsides"
cellspacing="0" cellpadding="4">
<caption class="title">Table 3. API Keys Related Calls</caption>
<col width="14%" />
<col width="28%" />
<col width="28%" />
<col width="28%" />
<thead>
<tr>
<th align="center" valign="top">API Call                        </th>
<th align="left" valign="top"> Purpose comments   </th>
<th align="left" valign="top"> Returns                 </th>
<th align="left" valign="top"> Comments</th>
</tr>
</thead>
<tfoot>
<tr>
<td align="center" valign="top"><p class="table">(rmt:get-key-val-pairs run-id)</p></td>
<td align="left" valign="top"><p class="table"><code></code></p></td>
<td align="left" valign="top"><p class="table"><code>#t=success/#f=fail</code></p></td>
<td align="left" valign="top"><p class="table"><code>Works only if the server is still reachable</code></p></td>
</tr>
</tfoot>
<tbody>
<tr>
<td align="center" valign="top"><p class="table">(rmt:get-keys run-id)</p></td>
<td align="left" valign="top"><p class="table"><code></code></p></td>
<td align="left" valign="top"><p class="table"><code>( key1 key2 &#8230; )</code></p></td>
<td align="left" valign="top"><p class="table"><code></code></p></td>
</tr>
</tbody>
</table>
</div>
<div class="sect2">
<h3 id="_megatest_internals">Megatest Internals</h3>
<div class="imageblock graphviz">
<div class="content">
<img src="server.png" alt="server.png" />
</div>
</div>
1273
1274
1275
1276
1277
1278
1279
1280

1281
1282
1283
1284
1350
1351
1352
1353
1354
1355
1356

1357
1358
1359
1360
1361







-
+




</div>
</div>
</div>
<div id="footnotes"><hr /></div>
<div id="footer">
<div id="footer-text">
Version 1.0<br />
Last updated 2014-10-08 23:02:21 MST
Last updated 2014-11-25 21:11:28 MST
</div>
</div>
</body>
</html>

Modified docs/manual/reference.txt from [eff8aa5426] to [ddb57ef21a].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
1
2
3
4






5
6
7
8
9
10
11




-
-
-
-
-
-








Reference
=========

The First Chapter of the Second Part
------------------------------------

Chapters grouped into book parts are at level 1 and can contain
sub-sections.

The testconfig File
-------------------

Setup section
~~~~~~~~~~~~~

Header
176
177
178
179
180
181
182























183
184
185
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+




-----------------
[triggers]
COMPLETED/ xterm -e bash -s -- 
-----------------

NOTE: There is a trailing space after the --

Programming API
---------------

These routines can be called from the megatest repl. 

.API Server Management Calls
[width="70%",cols="^,2m,2m,2m",frame="topbot",options="header,footer"]
|======================
|API Call                  | Purpose comments   | Returns                 | Comments
|(rmt:start-server run-id) |                    | #( success/fail   n/a ) | 
|(rmt:kill-server  run-id) |                    | #( success/fail   n/a ) | Works only if the server is still reachable
|(rmt:login        run-id) | Verify the the version, testsuite area etc. are correct. | #( #t "successful login" ) |
|======================

.API Keys Related Calls
[width="70%",cols="^,2m,2m,2m",frame="topbot",options="header,footer"]
|======================
|API Call                        | Purpose comments   | Returns                 | Comments
|(rmt:get-keys run-id)           |                    | ( key1 key2 ... )       | 
| (rmt:get-key-val-pairs run-id) |                    | #t=success/#f=fail      | Works only if the server is still reachable
|======================


:numbered!:

Modified http-transport.scm from [907ced71b2] to [012f18812a].

241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265




















266
267
268
269
270
271
272
273
274
275
276
277
278
279
280

281
282
283
284
285
286
287
288
289
290
291
292
293
294
295



















296
297
298
299
300
301
302
303
304
305
306
307
308
309


310












311
312
313
314
315
316
317
318
319
320
321
322
323

324
325
326
327
328
329

330
331
332
333
334
335
336
241
242
243
244
245
246
247


















248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283















284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318

319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342

343
344
345
346
347
348
349
350
351
352
353
354
355
356
357







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+















+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+














+
+
-
+
+
+
+
+
+
+
+
+
+
+
+












-
+






+







(define (http-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 3))
  (let* ((fullurl    (if (vector? serverdat)
			 (http-transport:server-dat-get-api-req serverdat)
			 (begin
			   (debug:print 0 "FATAL ERROR: http-transport:client-api-send-receive called with no server info")
			   (exit 1))))
	 (res        #f)
	 (success    #t))
    (handle-exceptions
     exn
     (if (> numretries 0)
	 (begin
	   (mutex-unlock! *http-mutex*)
	   (thread-sleep! 1)
	   (handle-exceptions
	    exn
	    (debug:print 0 "WARNING: closing connections failed. Server at " fullurl " almost certainly dead")
	    (close-all-connections!))
	   (debug:print 0 "WARNING: Failed to communicate with server, trying again, numretries left: " numretries)
	   (http-transport:client-api-send-receive run-id serverdat cmd params numretries: (- numretries 1)))
	 (begin
	   (mutex-unlock! *http-mutex*)
	   (tasks:kill-server-run-id run-id)
	   #f))
     (begin
	 (success    #t)
	 (sparams    (db:obj->string params transport: 'http)))
;;    (condition-case
;;     handle-exceptions
;;     exn
;;     (if (> numretries 0)
;;	 (begin
;;	   (mutex-unlock! *http-mutex*)
;;	   (thread-sleep! 1)
;;	   (handle-exceptions
;;	    exn
;;	    (debug:print 0 "WARNING: closing connections failed. Server at " fullurl " almost certainly dead")
;;	    (close-all-connections!))
;;	   (debug:print 0 "WARNING: Failed to communicate with server, trying again, numretries left: " numretries)
;;	   (http-transport:client-api-send-receive run-id serverdat cmd sparams numretries: (- numretries 1)))
;;	 (begin
;;	   (mutex-unlock! *http-mutex*)
;;	   (tasks:kill-server-run-id run-id)
;;	   #f))
;;     (begin
       (debug:print-info 11 "fullurl=" fullurl ", cmd=" cmd ", params=" params ", run-id=" run-id "\n")
       ;; set up the http-client here
       (max-retry-attempts 1)
       ;; consider all requests indempotent
       (retry-request? (lambda (request)
			 #f))
       ;; send the data and get the response
       ;; extract the needed info from the http data and 
       ;; process and return it.
       (let* ((send-recieve (lambda ()
			      (mutex-lock! *http-mutex*)
			      ;; (condition-case (with-input-from-request "http://localhost"; #f read-lines)
			      ;;					       ((exn http client-error) e (print e)))
			      (set! res (vector
					 success
					 (db:string->obj 
					 (handle-exceptions
					  exn
					  (begin
					    (set! success #f)
					    (debug:print 0 "WARNING: failure in with-input-from-request to " fullurl ". Killing associated server to allow clean retry.")
					    (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
					    (hash-table-delete! *runremote* run-id)
					    ;; (tasks:kill-server-run-id run-id)  ;; better to kill the server in the logic that called this routine.
					    #f)
					  (with-input-from-request ;; was dat
					   fullurl 
					   (list (cons 'key "thekey")
						 (cons 'cmd cmd)
						 (cons 'params params))
					   read-string))))
					  (handle-exceptions
					   exn
					   (begin
					     (set! success #f)
					     (debug:print 0 "WARNING: failure in with-input-from-request to " fullurl ".")
					     (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
					     (hash-table-delete! *runremote* run-id)
					     ;; Killing associated server to allow clean retry.")
					     (tasks:kill-server-run-id run-id)  ;; better to kill the server in the logic that called this routine?
					     (signal (make-composite-condition
						      (make-property-condition 'commfail 'message "failed to connect to server")))
					     #f)
					   (with-input-from-request ;; was dat
					    fullurl 
					    (list (cons 'key "thekey")
						  (cons 'cmd cmd)
						  (cons 'params sparams))
					    read-string))
					  transport: 'http)))
			      ;; Shouldn't this be a call to the managed call-all-connections stuff above?
			      (close-all-connections!)
			      (mutex-unlock! *http-mutex*)
			      ))
	      (time-out     (lambda ()
			      (thread-sleep! 45)
			      #f))
	      (th1 (make-thread send-recieve "with-input-from-request"))
	      (th2 (make-thread time-out     "time out")))
	 (thread-start! th1)
	 (thread-start! th2)
	 (thread-join! th1)
	 (thread-terminate! th2)
	 (debug:print-info 11 "got res=" res)
	 (if (vector? res)
	     (if (vector-ref res 0)
	 res)))))
		 res
		 (begin ;; note: this code also called in nmsg-transport - consider consolidating it
		   (debug:print 0 "ERROR: error occured at server, info=" (vector-ref res 2))
		   (debug:print 0 " client call chain:")
		   (print-call-chain (current-error-port))
		   (debug:print 0 " server call chain:")
		   (pp (vector-ref res 1) (current-error-port))
		   (signal (vector-ref result 0))))
	     (signal (make-composite-condition
		      (make-property-condition 
		       'timeout
		       'message "nmsg-transport:client-api-send-receive-raw timed out talking to server")))))))

;; careful closing of connections stored in *runremote*
;;
(define (http-transport:close-connections run-id)
  (let* ((server-dat (hash-table-ref/default *runremote* run-id #f)))
    (if (vector? server-dat)
	(let ((api-dat (http-transport:server-dat-get-api-uri server-dat)))
	  (close-connection! api-dat)
	  #t)
	#f)))


(define (make-http-transport:server-dat)(make-vector 5))
(define (make-http-transport:server-dat)(make-vector 6))
(define (http-transport:server-dat-get-iface         vec)    (vector-ref  vec 0))
(define (http-transport:server-dat-get-port          vec)    (vector-ref  vec 1))
(define (http-transport:server-dat-get-api-uri       vec)    (vector-ref  vec 2))
(define (http-transport:server-dat-get-api-url       vec)    (vector-ref  vec 3))
(define (http-transport:server-dat-get-api-req       vec)    (vector-ref  vec 4))
(define (http-transport:server-dat-get-last-access   vec)    (vector-ref  vec 5))
(define (http-transport:server-dat-get-socket        vec)    (vector-ref  vec 6))

(define (http-transport:server-dat-make-url vec)
  (if (and (http-transport:server-dat-get-iface vec)
	   (http-transport:server-dat-get-port  vec))
      (conc "http://" 
	    (http-transport:server-dat-get-iface vec)
	    ":"
353
354
355
356
357
358
359

360
361
362
363
364
365
366
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388







+







;; run http-transport:keep-running in a parallel thread to monitor that the db is being 
;; used and to shutdown after sometime if it is not.
;;
(define (http-transport:keep-running server-id run-id)
  ;; if none running or if > 20 seconds since 
  ;; server last used then start shutdown
  ;; This thread waits for the server to come alive
  (debug:print-info 0 "Starting the sync-back, keep alive thread in server for run-id=" run-id)
  (let* ((tdbdat      (tasks:open-db))
	 (server-info (let loop ((start-time (current-seconds))
				 (changed    #t)
				 (last-sdat  "not this"))
                        (let ((sdat #f))
			  (thread-sleep! 0.01)
			  (debug:print-info 0 "Waiting for server alive signature")
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471

472
473





474
475
476
477
478
479
480







-


-
-
-
-
-







      ;; (debug:print 11 "last-access=" last-access ", server-timeout=" server-timeout)
      ;;
      ;; no_traffic, no running tests, if server 0, no running servers
      ;;
      ;; (let ((wait-on-running (configf:lookup *configdat* "server" "wait-on-running"))) ;; wait on running tasks (if not true then exit on time out)
      ;;
      (if (and *server-run*
	       ;; (or
	       (> (+ last-access server-timeout)
		  (current-seconds)))
;;		   (and (eq? run-id 0)
;;			(> (tasks:num-servers-non-zero-running tdb) 0))
;;		   (and (not (eq? run-id 0)) ;; only makes sense in non-zero run-id servers
;;			(> (db:get-count-tests-actually-running *inmemdb* run-id) 0))
;;		   ))
	  (begin
	    (debug:print-info 0 "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
	    ;;
	    ;; Consider implementing some smarts here to re-insert the record or kill self is
	    ;; the db indicates so
	    ;;
	    ;; (if (tasks:server-am-i-the-server? tdb run-id)

Modified launch.scm from [34882953c1] to [111f911e42].

456
457
458
459
460
461
462







463
464
465
466
467
468
469
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476







+
+
+
+
+
+
+







	(set! *configinfo* (find-and-read-config 
			    (if (args:get-arg "-config")(args:get-arg "-config") "megatest.config")
			    environ-patt: "env-override"
			    given-toppath: (get-environment-variable "MT_RUN_AREA_HOME")
			    pathenvvar: "MT_RUN_AREA_HOME"))
	(set! *configdat*  (if (car *configinfo*)(car *configinfo*) #f))
	(set! *toppath*    (if (car *configinfo*)(cadr *configinfo*) #f))
	(let* ((tmptransport (configf:lookup *configdat* "server" "transport"))
	       (transport    (if tmptransport (string->symbol tmptransport) 'http)))
	  (if (member transport '(http rpc nmsg))
	      (set! *transport-type* transport)
	      (begin
		(debug:print 0 "ERROR: Unrecognised transport " transport)
		(exit))))
	(let ((linktree (configf:lookup *configdat* "setup" "linktree"))) ;; link tree is critical
	  (if linktree
	      (if (not (file-exists? linktree))
		  (begin
		    (handle-exceptions
		     exn
		     (begin

Modified megatest.scm from [16af3583d8] to [f155a05495].

281
282
283
284
285
286
287

288
289
290
291
292



293
294
295
296
297
298
299
281
282
283
284
285
286
287
288
289
290
291
292

293
294
295
296
297
298
299
300
301
302







+




-
+
+
+







			"-q" ;; quiet 0, errors/warnings only
		       )
		 args:arg-hash
		 0))

;; The watchdog is to keep an eye on things like db sync etc.
;;
(define *time-zero* (current-seconds))
(define *watchdog*
  (make-thread 
   (lambda ()
     (thread-sleep! 0.05) ;; delay for startup
     (let ((legacy-sync (configf:lookup *configdat* "setup" "megatest-db")))
     (let ((legacy-sync (configf:lookup *configdat* "setup" "megatest-db"))
	   (debug-mode  (debug:debug-mode 1))
	   (last-time   (current-seconds)))
       (let loop ()
	 ;; sync for filesystem local db writes
	 ;;
	 (let ((start-time      (current-seconds))
	       (servers-started (make-hash-table)))
	   (for-each 
	    (lambda (run-id)
308
309
310
311
312
313
314
315
316







317
318
319
320
321

322
323
324
325
326
327
328
311
312
313
314
315
316
317


318
319
320
321
322
323
324
325
326
327
328

329
330
331
332
333
334
335
336







-
-
+
+
+
+
+
+
+




-
+







			  (debug:print-info 0 "Sync of newdb to olddb for run-id " run-id " completed in " sync-time " seconds")))
		    ;; (if (> sync-time 10) ;; took more than ten seconds, start a server for this run
		    ;;     (begin
		    ;;       (debug:print-info 0 "Sync is taking a long time, start up a server to assist for run " run-id)
		    ;;       (server:kind-run run-id)))))
		    (hash-table-delete! *db-local-sync* run-id)))
	      (mutex-unlock! *db-multi-sync-mutex*))
	    (hash-table-keys *db-local-sync*)))

	    (hash-table-keys *db-local-sync*))
	   (if (and debug-mode
		    (> (- start-time last-time) 14))
	       (begin
		 (set! last-time start-time)
		 (debug:print-info 0 "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*))))))
	 
	 ;; keep going unless time to exit
	 ;;
	 (if (not *time-to-exit*)
	     (begin
	       (thread-sleep! 1) ;; wait one second before syncing again
	       (thread-sleep! 5) ;; wait five seconds before syncing again, we'll also sync on exit
	       (loop)))))
     "Watchdog thread")))

(thread-start! *watchdog*)

(if (args:get-arg "-log")
    (let ((oup (open-output-file (args:get-arg "-log"))))

Modified newdashboard.scm from [24924c0cda] to [c632e597af].

76
77
78
79
80
81
82




83
84
85
86
87
88
89
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93







+
+
+
+








;; (if (args:get-arg "-host")
;;     (begin
;;       (set! *runremote* (string-split (args:get-arg "-host" ":")))
;;       (client:launch))
;;     (client:launch))

;; ease debugging by loading ~/.dashboardrc
(let ((debugcontrolf (conc (get-environment-variable "HOME") "/.dashboardrc")))
  (if (file-exists? debugcontrolf)
      (load debugcontrolf)))

(define *dbdir* (conc (configf:lookup *configdat* "setup" "linktree") "/.db"))
(define *dbstruct-local*  (make-dbr:dbstruct path:  *dbdir*
					     local: #t))
(define *db-file-path* (db:dbfile-path 0))

;; HACK ALERT: this is a hack, please fix.

Added nmsg-transport.scm version [2023441101].







































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

;; Copyright 2006-2012, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.

(require-extension (srfi 18) extras tcp s11n)

(use sqlite3 srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest)
(import (prefix sqlite3 sqlite3:))

(use nanomsg)

(declare (unit nmsg-transport))

(declare (uses common))
(declare (uses db))
(declare (uses tests))
(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
(declare (uses server))

(include "common_records.scm")
(include "db_records.scm")

;; Transition to pub --> sub with pull <-- push
;;
;;   1. client sends request to server via push to the pull port
;;   2. server puts request in queue or processes immediately as appropriate
;;   3. server puts responses from completed requests into pub port 
;;
;; TODO
;;
;; Done Tested
;; [x]  [ ]    1. Add columns pullport pubport to servers table
;; [x]  [ ]    2. Add rm of monitor.db if older than 11/12/2012 
;; [x]  [ ]    3. Add create of pullport and pubport with finding of available ports
;; [x]  [ ]    4. Add client compose of request
;; [x]  [ ]        - name of client: testname/itempath-test_id-hostname 
;; [x]  [ ]        - name of request: callname, params
;; [x]  [ ]        - request key: f(clientname, callname, params)
;; [x]  [ ]    5. Add processing of subscription hits
;; [x]  [ ]        - done when get key 
;; [x]  [ ]        - return results
;; [x]  [ ]    6. Add timeout processing
;; [x]  [ ]        - after 60 seconds
;; [ ]  [ ]            i. check server alive, connect to new if necessary
;; [ ]  [ ]           ii. resend request
;; [ ]  [ ]    7. Turn self ping back on

(define (nmsg-transport:make-server-url hostport #!key (bindall #f))
  (if (not hostport)
      #f
      (conc "tcp://" (if bindall "*" (car hostport)) ":" (cadr hostport))))

(define *server-loop-heart-beat* (current-seconds))
(define *heartbeat-mutex* (make-mutex))

;;======================================================================
;; S E R V E R
;;======================================================================

(define (nmsg-transport:run dbstruct hostn run-id server-id #!key (retrynum 1000))
  (debug:print 2 "Attempting to start the server ...")
  (let* ((start-port      (portlogger:open-run-close portlogger:find-port))
	 (server-thread   (make-thread (lambda ()
					 (nmsg-transport:try-start-server dbstruct run-id start-port server-id))
				       "server thread"))
	 (tdbdat          (tasks:open-db)))
    (thread-start! server-thread)
    (thread-sleep! 0.1)
    (if (nmsg-transport:ping hostn start-port timeout: 2 expected-key: (current-process-id))
	(let ((interface (if (equal? hostn "-")(get-host-name) hostn)))
	  (tasks:server-set-interface-port (db:delay-if-busy tdbdat) server-id interface start-port)
	  (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "dbprep")
	  (set! *server-info* (list hostn start-port)) ;; probably not needed anymore? currently used by keep-running
	  (thread-sleep! 3) ;; give some margin for queries to complete before switching from file based access to server based access
	  ;; (set! *inmemdb*  dbstruct)
	  (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "running")
	  (thread-start! (make-thread
			  (lambda ()(nmsg-transport:keep-running server-id run-id))
			  "keep running"))
	  (thread-join! server-thread))
	(if (> retrynum 0)
	    (begin
	      (debug:print 0 "WARNING: Failed to connect to server (self) on host " hostn ":" start-port ", trying again.")
	      (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id "failed to start, never received server alive signature")
	      (portlogger:open-run-close portlogger:set-failed start-port)
	      (nmsg-transport:run dbstruct hostn run-id server-id))
	    (begin
	      (debug:print 0 "ERROR: could not find an open port to start server on. Giving up")
	      (exit 1))))))

(define (nmsg-transport:try-start-server dbstruct run-id portnum server-id)
  (let ((repsoc (nn-socket 'rep)))
    (nn-bind repsoc (conc "tcp://*:" portnum))
    (let loop ((msg-in (nn-recv repsoc)))
      (let* ((dat    (db:string->obj msg-in transport: 'nmsg)))
	(debug:print 0 "server, received: " dat)
	(let ((result (api:execute-requests dbstruct dat)))
	  (debug:print 0 "server, sending: " result)
	  (nn-send repsoc (db:obj->string result  transport: 'nmsg)))
	(loop (nn-recv repsoc))))))

;; all routes though here end in exit ...
;;
(define (nmsg-transport:launch run-id)
  (let* ((tdbdat   (tasks:open-db))
	 (dbstruct (db:setup run-id))
	 (hostn    (or (args:get-arg "-server") "-")))
    (set! *run-id*   run-id)
    (set! *inmemdb* dbstruct)
    ;; with nbfake daemonize isn't really needed
    ;;
    ;; (if (args:get-arg "-daemonize")
    ;;     (begin
    ;;       (daemon:ize)
    ;;       (if *alt-log-file* ;; we should re-connect to this port, I think daemon:ize disrupts it
    ;;           (begin
    ;;     	(current-error-port *alt-log-file*)
    ;;     	(current-output-port *alt-log-file*)))))
    (if (server:check-if-running run-id)
	(begin
	  (debug:print-info 0 "Server for run-id " run-id " already running")
	  (exit 0)))
    (let loop ((server-id (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id))
	       (remtries  4))
      (if (not server-id)
	  (if (> remtries 0)
	      (begin
		(thread-sleep! 2)
		(if (not (server:check-if-running run-id))
		    (loop (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id)
			  (- remtries 1))
		    (begin
		      (debug:print-info 0 "Another server took the slot, exiting")
		      (exit 0))))
	      (begin
		;; since we didn't get the server lock we are going to clean up and bail out
		(debug:print-info 2 "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue")
		(tasks:server-delete-records-for-this-pid (db:delay-if-busy tdbdat) " http-transport:launch")
		))
	  ;; locked in a server id, try to start up
	  (nmsg-transport:run dbstruct hostn run-id server-id))
      (set! *didsomething* #t)
      (exit))))

;;======================================================================
;; S E R V E R   U T I L I T I E S 
;;======================================================================

(define (nmsg-transport:mk-signature)
  (message-digest-string (md5-primitive) 
			 (with-output-to-string
			   (lambda ()
			     (write (list (current-directory)
					  (argv)))))))

;;======================================================================
;; C L I E N T S
;;======================================================================

;; ping the server at host:port
;;   return the open socket if successful (return-socket == #t)
;;   expect the key expected-key returned in payload
;;   send our-key or #f as payload
;;
(define (nmsg-transport:ping hostn port #!key (timeout 3)(return-socket #t)(expected-key #f)(our-key #f)(socket #f))
  ;; send a random number along with pid and check that we get it back
  (let* ((host    (if (or (not hostn)
			  (equal? hostn "-")) ;; use localhost
		      (get-host-name)
		      hostn))
	 (req     (or socket
		      (let ((soc (nn-socket 'req)))
			(nn-connect soc (conc "tcp://" host ":" port))
			soc)))
	 (success #t)
	 (dat     (vector "ping" our-key))
	 (result  (condition-case 
		   (nmsg-transport:client-api-send-receive-raw req dat timeout: timeout)
		   ((timeout)(set! success #f) #f)))
	 (key     (if success 
		      (vector-ref result 1)
		      #f)))
    (debug:print 0 "success=" success ", key=" key ", expected-key=" expected-key ", equal? " (equal? key expected-key))
    (if (and success
	     (or (not expected-key) ;; just getting a reply is good enough then
		 (equal? key expected-key)))
	(if return-socket
	    req
	    (begin
	      (if (not socket)(nn-close req)) ;; don't want a side effect of closing socket if handed it
	      #t))
	(begin
	  (if (not socket)(nn-close req)) ;; failed to ping, close socket as side effect
	  #f))))

;; send data to server, wait max of timeout seconds for a response.
;; return #( success/fail result )
;;
;; for effiency it is easier to do the obj->string and string->obj here.
;;
(define (nmsg-transport:client-api-send-receive-raw socreq indat #!key (enable-send #t)(timeout 25))
  (let* ((success     #f)
	 (result      #f)
	 (keepwaiting #t)
	 (dat         (db:obj->string indat transport: 'nmsg))
	 (send-recv   (make-thread
		       (lambda ()
			 (nn-send socreq dat)
			 (let* ((res (nn-recv socreq)))
			   (set! success #t)
			   (set! result (db:string->obj res transport: 'nmsg))))
		       "send-recv"))
	 (timeout     (make-thread
		       (lambda ()
			 (let loop ((count 0))
			   (thread-sleep! 1)
			   (debug:print-info 1 "send-receive-raw, still waiting after " count " seconds...")
			   (if (and keepwaiting (< count timeout)) ;; yes, this is very aproximate
			       (loop (+ count 1))))
			 (if keepwaiting
			     (begin
			       (print "timeout waiting for ping")
			       (thread-terminate! send-recv))))
		       "timeout")))
    ;; replace with condition-case?
    (handle-exceptions
     exn
     (set! result "timeout")
     (thread-start! timeout)
     (thread-start! send-recv)
     (thread-join! send-recv)
     (if success (thread-terminate! timeout)))
    ;; raise timeout error if timed out
    (if success
	(if (and (vector? result)
		 (vector-ref result 0)) ;; did it fail at the server?
	    result                ;; nope, all good
	    (begin
	      (debug:print 0 "ERROR: error occured at server, info=" (vector-ref result 2))
	      (debug:print 0 " client call chain:")
	      (print-call-chain (current-error-port))
	      (debug:print 0 " server call chain:")
	      (pp (vector-ref result 1) (current-error-port))
	      (signal (vector-ref result 0))))
	(signal (make-composite-condition
		 (make-property-condition 'timeout 'message "nmsg-transport:client-api-send-receive-raw timed out talking to server"))))))

;; run nmsg-transport:keep-running in a parallel thread to monitor that the db is being 
;; used and to shutdown after sometime if it is not.
;;
(define (nmsg-transport:keep-running server-id run-id)
  ;; if none running or if > 20 seconds since 
  ;; server last used then start shutdown
  ;; This thread waits for the server to come alive
  (let* ((server-info (let loop ()
                        (let ((sdat #f))
                          (mutex-lock! *heartbeat-mutex*)
                          (set! sdat *server-info*)
                          (mutex-unlock! *heartbeat-mutex*)
                          (if sdat 
			      (begin
				(debug:print-info 0 "keep-running got sdat=" sdat)
				sdat)
                              (begin
                                (thread-sleep! 0.5)
                                (loop))))))
         (iface       (car server-info))
         (port        (cadr server-info))
         (last-access 0)
	 (tdbdat      (tasks:open-db))
	 (server-timeout (let ((tmo (configf:lookup  *configdat* "server" "timeout")))
			   (if (and (string? tmo)
				    (string->number tmo))
			       (* 60 60 (string->number tmo))
			       ;; (* 3 24 60 60) ;; default to three days
			       (* 60 1)         ;; default to one minute
			       ;; (* 60 60 25)      ;; default to 25 hours
			       ))))
    (print "Keep-running got server pid " server-id ", using iface " iface " and port " port)
    (let loop ((count 0))
      (thread-sleep! 4) ;; no need to do this very often
      ;; NB// sync currently does NOT return queue-length
      (let () ;; (queue-len (cdb:client-call server-info 'sync #t 1)))
      ;; (print "Server running, count is " count)
        (if (< count 1) ;; 3x3 = 9 secs aprox
            (loop (+ count 1)))
        
        (mutex-lock! *heartbeat-mutex*)
        (set! last-access *last-db-access*)
        (mutex-unlock! *heartbeat-mutex*)
	(db:sync-touched *inmemdb* run-id force-sync: #t)
        (if (and *server-run*
	       (> (+ last-access server-timeout)
		  (current-seconds)))
            (begin
              (debug:print-info 0 "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
              (loop 0))
            (begin
              (debug:print-info 0 "Starting to shutdown the server.")
              (set! *time-to-exit* #t)
	      (db:sync-touched *inmemdb* run-id force-sync: #t)
              (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id " http-transport:keep-running")
              (debug:print-info 0 "Server shutdown complete. Exiting")
              (exit)
	      ))))))

;;======================================================================
;; C L I E N T S
;;======================================================================

(define (nmsg-transport:client-connect iface portnum)
  (let* ((reqsoc      (nmsg-transport:ping iface portnum return-socket: #t)))
    (vector iface portnum #f #f #f (current-seconds) reqsoc)))

;; returns result, there is no sucess/fail flag - handled via excpections
;;
(define (nmsg-transport:client-api-send-receive run-id connection-info cmd param #!key (remtries 5))
  ;; NB// In the html version of this routine there is a call to 
  ;;      tasks:kill-server-run-id when there is an exception
  (mutex-lock! *http-mutex*)
  (let* ((packet  (vector cmd param))
	 (reqsoc  (http-transport:server-dat-get-socket connection-info))
	 (res     (nmsg-transport:client-api-send-receive-raw reqsoc packet)))
;;	 (status  (vector-ref rawres 0))
;;	 (result  (vector-ref rawres 1)))
    (mutex-unlock! *http-mutex*)
    res)) ;; (vector status (if status (db:string->obj result transport: 'nmsg) result))))
	
;;======================================================================
;; J U N K 
;;======================================================================

;; DO NOT USE
;;
(define (nmsg-transport:client-signal-handler signum)
  (handle-exceptions
   exn
   (debug:print " ... exiting ...")
   (let ((th1 (make-thread (lambda ()
			     (if (not *received-response*)
				 (receive-message* *runremote*))) ;; flush out last call if applicable
			   "eat response"))
	 (th2 (make-thread (lambda ()
			     (debug:print 0 "ERROR: Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.")
			     (thread-sleep! 3) ;; give the flush three seconds to do it's stuff
			     (debug:print 0 "       Done.")
			     (exit 4))
			   "exit on ^C timer")))
     (thread-start! th2)
     (thread-start! th1)
     (thread-join! th2))))

Modified rmt.scm from [4e95475281] to [df66a53f8b].

11
12
13
14
15
16
17

18
19
20
21
22
23
24
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25







+








(use json format)

(declare (unit rmt))
(declare (uses api))
(declare (uses tdb))
(declare (uses http-transport))
(declare (uses nmsg-transport))

;;
;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!!
;;

;; ;; For debugging add the following to ~/.megatestrc
;;
61
62
63
64
65
66
67
68

69
70
71

72
73
74
75
76
77
78
79
80
81
82
83
84














85
86
87


88
89


90
91
92








93
94
95
96
97

98
99
100
101



102
103
104

105
106



107





108
109
110
111
112
113


114

115

116
117


118


119
120







121
122
123
124
125
126
127
62
63
64
65
66
67
68

69



70
71












72
73
74
75
76
77
78
79
80
81
82
83
84
85
86


87
88
89
90
91
92



93
94
95
96
97
98
99
100
101
102



103




104
105
106



107
108

109
110
111
112
113
114
115
116
117
118
119
120
121


122
123
124
125

126
127
128
129
130
131
132
133


134
135
136
137
138
139
140
141
142
143
144
145
146
147







-
+
-
-
-
+

-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+

-
-
+
+


+
+
-
-
-
+
+
+
+
+
+
+
+


-
-
-
+
-
-
-
-
+
+
+
-
-
-
+

-
+
+
+

+
+
+
+
+




-
-
+
+

+
-
+


+
+

+
+
-
-
+
+
+
+
+
+
+







	cinfo
	;; NB// can cache the answer for server running for 10 seconds ...
	;;  ;; (and (not (rmt:write-frequency-over-limit? cmd run-id))
	(if (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id)
	    (client:setup run-id)
	    #f))))

;; cmd is a symbol
(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id
;; vars is a json string encoding the parameters for the call
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 0))
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)) ;; start attemptnum at 1 so the modulo below works as expected
  ;; clean out old connections
  (mutex-lock! *db-multi-sync-mutex*)
  (let ((expire-time (- (current-seconds) 60)))
    (for-each 
     (lambda (run-id)
       (let ((connection (hash-table-ref/default *runremote* run-id #f)))
	 (if (and connection 
		  (< (http-transport:server-dat-get-last-access connection) expire-time))
	     (begin
	       (debug:print-info 0 "Discarding connection to server for run-id " run-id ", too long between accesses")
	       (hash-table-delete! *runremote* run-id)))))
     (hash-table-keys *runremote*)))
  (mutex-unlock! *db-multi-sync-mutex*)
  ;; (mutex-lock! *db-multi-sync-mutex*)
  ;; (let ((expire-time (- (current-seconds) 60)))
  ;;   (for-each 
  ;;    (lambda (run-id)
  ;;      (let ((connection (hash-table-ref/default *runremote* run-id #f)))
  ;;        (if (and connection 
  ;;       	  (< (http-transport:server-dat-get-last-access connection) expire-time))
  ;;            (begin
  ;;              (debug:print-info 0 "Discarding connection to server for run-id " run-id ", too long between accesses")
  ;;              ;; SHOULD CLOSE THE CONNECTION HERE
  ;;              (hash-table-delete! *runremote* run-id)))))
  ;;    (hash-table-keys *runremote*)))
  ;; (mutex-unlock! *db-multi-sync-mutex*)
  ;; (mutex-lock! *send-receive-mutex*)
  (let* ((run-id          (if rid rid 0))
	 (connection-info (rmt:get-connection-info run-id))
	 (jparams         (db:obj->string params)))
	 (connection-info (rmt:get-connection-info run-id)))
    ;; the nmsg method does the encoding under the hood (the http method should be changed to do this also)
    (if connection-info
	;; use the server if have connection info
	(let* ((dat     (case *transport-type*
			  ((http)(condition-case
	(let* ((dat     (http-transport:client-api-send-receive run-id connection-info cmd jparams))
	       (res     (if (and dat (vector? dat)) (vector-ref dat 1) #f))
	       (success (if (and dat (vector? dat)) (vector-ref dat 0) #f)))
				  (http-transport:client-api-send-receive run-id connection-info cmd params)
				  ((commfail)(vector #f "communications fail"))))
			  ((nmsg)(condition-case
				  (nmsg-transport:client-api-send-receive run-id connection-info cmd params)
				  ((timeout)(vector #f "timeout talking to server"))))
			  (else  (exit))))
	       (success (if (and dat (vector? dat)) (vector-ref dat 0) #f))
	       (res     (if (and dat (vector? dat)) (vector-ref dat 1) #f)))
	  (http-transport:server-dat-update-last-access connection-info)
	  (if success
	      (db:string->obj res)
	      ;; (if (< attemptnum 100)
	      ;;     (begin
	      (begin
	      ;;       (hash-table-delete! *runremote* run-id)
	      ;;       (thread-sleep! 0.5)
	      ;;       (rmt:send-receive cmd rid params attempnum: (+ attemptnum 1)))
	      ;;     (begin
		;; (mutex-unlock! *send-receive-mutex*)
		(case *transport-type* 
		  ((http) res) ;; (db:string->obj res))
	      ;;       (print-call-chain (current-error-port))
	      ;;       (debug:print 0 "ERROR: too many attempts to communicate have failed. Giving up. Kill your mtest processes and start over")
	      ;;       (exit 1)))))
		  ((nmsg) res))) ;; (vector-ref res 1)))
	      (begin ;; let ((new-connection-info (client:setup run-id)))
		(debug:print 0 "WARNING: Communication failed, trying call to http-transport:client-api-send-receive again.")
		(debug:print 0 "WARNING: Communication failed, trying call to rmt:send-receive again.")
		;; (case *transport-type*
		;;   ((nmsg)(nn-close (http-transport:server-dat-get-socket connection-info))))
		(hash-table-delete! *runremote* run-id) ;; don't keep using the same connection
		(if (eq? (modulo attemptnum 5) 0)
		    (tasks:kill-server-run-id run-id tag: "api-send-receive-failed"))
		;; (mutex-unlock! *send-receive-mutex*) ;; close the mutex here to allow other threads access to communications
		(tasks:start-and-wait-for-server (tasks:open-db) run-id 15)
		;; (nmsg-transport:client-api-send-receive run-id connection-info cmd param remtries: (- remtries 1))))))

		;; no longer killing the server in http-transport:client-api-send-receive
		;; may kill it here but what are the criteria?
		;; start with three calls then kill server
		(if (eq? attemptnum 3)(tasks:kill-server-run-id run-id))
		(thread-sleep! 2)
		;; (if (eq? attemptnum 3)(tasks:kill-server-run-id run-id))
		;; (thread-sleep! 2)
		(rmt:send-receive cmd run-id params attemptnum: (+ attemptnum 1)))))
	;; no connection info? try to start a server
	(if (and (< attemptnum 10)
	(if (and (< attemptnum 15)
		 (tasks:need-server run-id))
	    (begin
	      (hash-table-delete! *runremote* run-id)
	      ;; (mutex-unlock! *send-receive-mutex*)
	      (tasks:start-and-wait-for-server (db:delay-if-busy (tasks:open-db)) run-id 10)
	      (client:setup run-id)
	      (thread-sleep! (random 5)) ;; give some time to settle and minimize collison?
	      (rmt:send-receive cmd rid params (+ attemptnum 1)))
	    (rmt:open-qry-close-locally cmd run-id params)))))
	      (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
	    (begin
	      (debug:print 0 "ERROR: Communication failed!")
	      ;; (mutex-unlock! *send-receive-mutex*)
	      (exit)
	      ;; (rmt:open-qry-close-locally cmd run-id params))))
	      )))))

(define (rmt:update-db-stats run-id rawcmd params duration)
  (mutex-lock! *db-stats-mutex*)
  (handle-exceptions
   exn
   (begin
     (debug:print 0 "WARNING: stats collection failed in update-db-stats")
182
183
184
185
186
187
188
189


190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210









211
212
213
214
215
216
217
202
203
204
205
206
207
208

209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224







225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240







-
+
+














-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+







			     (let* ((dbdir (conc    (configf:lookup *configdat* "setup" "linktree") "/.db"))
				    (db (make-dbr:dbstruct path:  dbdir local: #t)))
			       (set! *dbstruct-db* db)
			       db)))
	 (db-file-path   (db:dbfile-path 0)))
    ;; (read-only      (not (file-read-access? db-file-path)))
    (let* ((start         (current-milliseconds))
	   (res           (api:execute-requests dbstruct-local (symbol->string cmd) params))
	   (resdat        (api:execute-requests dbstruct-local (symbol->string cmd) params))
	   (res           (vector-ref resdat 1))
	   (duration      (- (current-milliseconds) start)))
      (rmt:update-db-stats run-id cmd params duration)
      ;; mark this run as dirty if this was a write
      (if (not (member cmd api:read-only-queries))
	  (let ((start-time (current-seconds)))
	    (mutex-lock! *db-multi-sync-mutex*)
	    ;; (if (not (hash-table-ref/default *db-local-sync* run-id #f))
	    ;; just set it every time. Is a write more expensive than a read and does it matter?
	    (hash-table-set! *db-local-sync* (or run-id 0) start-time) ;; the oldest "write"
	    (mutex-unlock! *db-multi-sync-mutex*)))
      res)))

(define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params)
  (let* ((run-id   (if run-id run-id 0))
	 (jparams  (db:obj->string params)) ;; (rmt:dat->json-str params))
	 (dat      (http-transport:client-api-send-receive run-id connection-info cmd jparams)))
    (if (and dat (vector-ref dat 0))
	(db:string->obj (vector-ref dat 1))
	(begin
	  (debug:print 0 "ERROR: rmt:send-receive-no-auto-client-setup failed, attempting to continue. Got " dat)
	  dat))))
	 ;; (jparams  (db:obj->string params)) ;; (rmt:dat->json-str params))
	 (res  	   (http-transport:client-api-send-receive run-id connection-info cmd params)))
    (if (and res (vector-ref res 0))
	res
	#f)))
;; 	(db:string->obj (vector-ref dat 1))
;; 	(begin
;; 	  (debug:print 0 "ERROR: rmt:send-receive-no-auto-client-setup failed, attempting to continue. Got " dat)
;; 	  dat))))

;; Wrap json library for strings (why the ports crap in the first place?)
(define (rmt:dat->json-str dat)
  (with-output-to-string 
    (lambda ()
      (json-write dat))))

240
241
242
243
244
245
246

247
248

249
250



251
252
253
254
255
256
257
263
264
265
266
267
268
269
270
271
272
273


274
275
276
277
278
279
280
281
282
283







+


+
-
-
+
+
+







;;  M I S C
;;======================================================================

(define (rmt:login run-id)
  (rmt:send-receive 'login run-id (list *toppath* megatest-version run-id *my-client-signature*)))

;; This login does no retries under the hood - it acts a bit like a ping.
;; Deprecated for nmsg-transport.
;;
(define (rmt:login-no-auto-client-setup connection-info run-id)
  (case *transport-type*
  (rmt:send-receive-no-auto-client-setup connection-info 'login run-id (list *toppath* megatest-version run-id *my-client-signature*)))
  
    ((http)(rmt:send-receive-no-auto-client-setup connection-info 'login run-id (list *toppath* megatest-version run-id *my-client-signature*)))
    ((nmsg)(nmsg-transport:client-api-send-receive run-id connection-info 'login (list *toppath* megatest-version run-id *my-client-signature*)))))

;; hand off a call to one of the db:queries statements
;; added run-id to make looking up the correct db possible 
;;
(define (rmt:general-call stmtname run-id . params)
  (rmt:send-receive 'general-call run-id (append (list stmtname run-id) params)))

(define (rmt:sync-inmem->db run-id)
313
314
315
316
317
318
319


320

321

322
323
324
325
326



























327
328
329
330
331
332
333
339
340
341
342
343
344
345
346
347
348
349

350
351




352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385







+
+

+
-
+

-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







  (if (number? run-id)
      (rmt:send-receive 'get-tests-for-run run-id (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals))
      (begin
	(debug:print "ERROR: rmt:get-tests-for-run called with bad run-id=" run-id)
	(print-call-chain (current-error-port))
	'())))

;; IDEA: Threadify these - they spend a lot of time waiting ...
;;
(define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in)
  (let ((multi-run-mutex (make-mutex))
  (let ((run-id-list (if run-ids
	(run-id-list (if run-ids
			 run-ids
			 (rmt:get-all-run-ids))))
    (apply append (map (lambda (run-id)
			 (rmt:send-receive 'get-tests-for-run-mindata run-id (list run-ids testpatt states status not-in)))
		       run-id-list))))
			 (rmt:get-all-run-ids)))
	(result      '()))
    (if (null? run-id-list)
	'()
	(for-each 
	 (lambda (th)
	   (thread-join! th)) ;; I assume that joining completed threads just moves on
	 (let loop ((hed     (car run-id-list))
		    (tal     (cdr run-id-list))
		    (threads '()))
	   (let* ((newthread (make-thread
			      (lambda ()
				(let ((res (rmt:send-receive 'get-tests-for-run-mindata hed (list hed testpatt states status not-in))))
				  (if (list? res)
				      (begin
					(mutex-lock! multi-run-mutex)
					(set! result (append result res))
					(mutex-unlock! multi-run-mutex))
				      (debug:print 0 "ERROR: get-tests-for-run-mindata failed for run-id " hed ", testpatt " testpatt ", states " states ", status " status ", not-in " not-in))))
			      (conc "multi-run-thread for run-id " hed)))
		  (newthreads (cons newthread threads)))
	     (thread-start! newthread)
	     (thread-sleep! 0.5) ;; give that thread some time to start
	     (if (null? tal)
		 newthreads
		 (loop (car tal)(cdr tal) newthreads))))))
    result))

(define (rmt:delete-test-records run-id test-id)
  (rmt:send-receive 'delete-test-records run-id (list run-id test-id)))

;; This is not needed as test steps are deleted on test delete call
;;
;; (define (rmt:delete-test-step-records run-id test-id)

Modified runs.scm from [396462afab] to [f2976f1213].

942
943
944
945
946
947
948
949

950
951
952
953
954
955
956
942
943
944
945
946
947
948

949
950
951
952
953
954
955
956







-
+







	     (newtal      (append tal (list hed)))
	     (regfull     (>= (length reg) reglen))
	     (num-running (rmt:get-count-tests-running-for-run-id run-id)))

	;; every couple minutes verify the server is there for this run
	(if (and (common:low-noise-print 60 "try start server"  run-id)
		 (tasks:need-server run-id))
	    (tasks:start-and-wait-for-server tdbdat run-id 10))
	    (tasks:start-and-wait-for-server tdbdat run-id 10)) ;; NOTE: delay and wait is done under the hood
	
	(if (> num-running 0)
	  (set! last-time-some-running (current-seconds)))

      (if (> (current-seconds)(+ last-time-some-running 240))
	  (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))
	;; (debug:print 0 "max-tries-hash: " (hash-table->alist *max-tries-hash*))

Modified server.scm from [f2b9d5f3d9] to [3a939720aa].

18
19
20
21
22
23
24

25
26
27
28
29
30
31
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32







+







(declare (unit server))

(declare (uses common))
(declare (uses db))
(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
(declare (uses synchash))
(declare (uses http-transport))
(declare (uses nmsg-transport))
(declare (uses launch))
;; (declare (uses zmq-transport))
(declare (uses daemon))

(include "common_records.scm")
(include "db_records.scm")

45
46
47
48
49
50
51

52



53
54
55
56
57
58
59
46
47
48
49
50
51
52
53

54
55
56
57
58
59
60
61
62
63







+
-
+
+
+







;;

;; all routes though here end in exit ...
;;
;; start_server
;;
(define (server:launch run-id)
  (case *transport-type*
  (http-transport:launch run-id))
    ((http)(http-transport:launch run-id))
    ((nmsg)(nmsg-transport:launch run-id))
    (else (debug:print 0 "ERROR: unknown server type " *transport-type*))))

;;======================================================================
;; Q U E U E   M A N A G E M E N T
;;======================================================================

;; We don't want to flush the queue if it was just flushed
(define *server:last-write-flush* (current-milliseconds))
134
135
136
137
138
139
140

141
142
143






144
145
146
147
148
149
150
138
139
140
141
142
143
144
145



146
147
148
149
150
151
152
153
154
155
156
157
158







+
-
-
-
+
+
+
+
+
+







	       (trycount 0))
    (if server
	;; note: client:start will set *runremote*. this needs to be changed
	;;       also, client:start will login to the server, also need to change that.
	;;
	;; client:start returns #t if login was successful.
	;;
	(let ((res (case *transport-type*
	(let ((res (server:ping-server run-id 
				       (tasks:hostinfo-get-interface server)
				       (tasks:hostinfo-get-port      server))))
		     ((http)(server:ping-server run-id 
						(tasks:hostinfo-get-interface server)
						(tasks:hostinfo-get-port      server)))
		     ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server)
						 (tasks:hostinfo-get-port      server)
						 timeout: 2)))))
	  ;; if the server didn't respond we must remove the record
	  (if res
	      #t
	      (begin
		(debug:print-info 0 "server at " server " not responding, removing record")
		(tasks:server-force-clean-running-records-for-run-id (db:delay-if-busy tdbdat) run-id 
				" server:check-if-running")

Modified tasks.scm from [af4bc3dbb1] to [1cac97501f].

183
184
185
186
187
188
189
190

191
192
193
194
195
196
197
183
184
185
186
187
188
189

190
191
192
193
194
195
196
197







-
+







   (get-host-name)            ;; hostname
   -1                         ;; port
   -1                         ;; pubport
   (random 1000)              ;; priority (used a tiebreaker on get-available)
   "available"                ;; state
   (common:version-signature) ;; mt_version
   -1                         ;; interface
   "http"                     ;; transport
   (conc *transport-type*)    ;; transport
   run-id
   ))

(define (tasks:num-in-available-state mdb run-id)
  (let ((res 0))
    (sqlite3:for-each-row
     (lambda (num-in-queue)
359
360
361
362
363
364
365
366

367
368
369
370
371
372
373
359
360
361
362
363
364
365

366
367
368
369
370
371
372
373







-
+







(define (tasks:need-server run-id)
  (let ((forced (configf:lookup *configdat* "server" "required"))
	(maxqry (cdr (rmt:get-max-query-average run-id)))
	(threshold   (string->number (or (configf:lookup *configdat* "server" "server-query-threshold") "10"))))
    (cond
     (forced 
      (if (common:low-noise-print 60 run-id "server required is set")
	  (debug:print-info 0 "Server required is set, starting server."))
	  (debug:print-info 0 "Server required is set, starting server for run-id " run-id "."))
      #t)
     ((> maxqry threshold)
      (if (common:low-noise-print 60 run-id "Max query time execeeded")
	  (debug:print-info 0 "Max avg query time of " maxqry "ms exceeds limit of " threshold "ms, starting server."))
      #t)
     (else
      #f))))

Added testnanomsg/basic-req-rep.scm version [1436c827c9].




1
2
3
+
+
+
(use nanomsg srfi-18 sqlite3 numbers)

(define resp (nn-socket 'rep))

Added testnanomsg/mockupclient.scm version [63a8c6685a].











































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
(use zmq posix numbers)

(define cname "Bob")
(define runtime 10)
(let ((args (argv)))
  (if (< (length args) 3)
      (begin
	(print "Usage: mockupclient clientname runtime")
	(exit))
      (begin
	(set! cname (cadr args))
	(set! runtime (string->number (caddr args))))))
      
;; (define start-delay (/ (random 100) 9))
;; (define runtime     (+ 1 (/ (random 200) 2)))

(print "Starting client " cname " with runtime " runtime)

(include "mockupclientlib.scm")

(set! endtime (+ (current-seconds) runtime))

;; first ping the server to ensure we have a connection
(if (server-ping cname 5)
    (print "SUCCESS: Client " cname " connected to server")
    (begin
      (print "ERROR: Client " cname " failed ping of server, exiting")
      (exit)))

(let loop ()
  (let ((x (random 15))
	(varname (list-ref (list "hello" "goodbye" "saluton" "kiaorana")(random 4))))
    (case x
      ;; ((1)(dbaccess cname 'sync "nodat"    #f))
      ((2 3 4 5)(dbaccess cname 'set varname (random 999)))
      ((6 7 8 9 10)(print cname ": Get \"" varname "\" " (dbaccess cname 'get varname #f)))
      (else
       (thread-sleep! 0.011)))
    (if (< (current-seconds) endtime)
	(loop))))

(print "Client " cname " all done!!")

Added testnanomsg/mockupclientlib.scm version [3b245ba7a9].



























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
(define reqs (nn-socket 'req))

(connect-socket reqs "tcp://localhost:6563")

(thread-sleep! 0.2)

(define (server-ping cname timeout)
  (let ((msg     (conc cname ":ping:" timeout))
	(maxtime (+ (current-seconds) timeout)))
    (print "pinging server from " cname " with timeout " timeout)
    (let loop ((res   #f))
      (if (< maxtime (current-seconds))
	  #f ;; failed to ping
	  (if (equal? res "Got ping")
	      #t
	      (begin
		(print "Ping received from server " res)
		(send-message push msg)
		(thread-sleep! 0.1)
		(loop (receive-message sub non-blocking: #t))))))))
  
(define (dbaccess cname cmd var val #!key (numtries 20))
  (let* ((msg (conc cname ":" cmd ":" (if val (conc var " " val) var)))
	 (res #f)
	 (mtx1 (make-mutex))
	 (do-access (lambda ()
		      (let ((tmpres #f))
			(print "Sending msg: " msg)
			(send-message push msg)
			(print "Message " msg " sent")
			(print "Client " cname " waiting for response to " msg)
			(print "Client " cname " received address " (receive-message* sub))
			(set! tmpres (receive-message* sub))
			(mutex-lock! mtx1)
			(set! res tmpres)
			(mutex-unlock! mtx1))))
	 (th1 (make-thread do-access "do access"))
	 (th2 (make-thread (lambda ()
			     (let ((result #f))
			       (mutex-lock! mtx1)
			       (set! result res)
			       (mutex-unlock! mtx1)
			       (thread-sleep! 5)
			       (if (not result)
				   (if (> numtries 0)
				       (begin
					 (print "WARNING: access timed out for " cname ", trying again. Trys remaining=" numtries)
					 (dbaccess cname cmd var val numtries: (- numtries 1)))
				       (begin
					 (print "ERROR: dbaccess timed out. Exiting")
					 (exit)))))
			     "timeout thread"))))
    (thread-start! th1)
    (thread-start! th2)
    (thread-join! th1)
    (if res (print "SUCCESS: received " res " with " numtries " remaining possible attempts"))
    res))

Added testnanomsg/mockupserver.scm version [a4d3e5594c].



















































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
;; pub/sub with envelope address
;; Note that if you don't insert a sleep, the server will crash with SIGPIPE as soon
;; as a client disconnects.  Also a remaining client may receive tons of
;; messages afterward.

(use nanomsg srfi-18 sqlite3 numbers)

(define resp (nn-socket 'rep))
(define cname "server")
(define total-db-accesses 0)
(define start-time (current-seconds))

(nn-bind resp  "tcp://*:6563")

(thread-sleep! 0.2)

(define (open-db)
  (let* ((dbpath    "mockup.db")
	 (dbexists  (file-exists? dbpath))
	 (db        (open-database dbpath)) ;; (never-give-up-open-db dbpath))
	 (handler   (make-busy-timeout 10)))
    (set-busy-handler! db handler)
    (if (not dbexists)
	(for-each
	 (lambda (stmt)
	   (execute db stmt))
	 (list
	  "PRAGMA SYNCHRONOUS=0;"
	  "CREATE TABLE clients (id INTEGER PRIMARY KEY,name TEXT,num_accesses INTEGER DEFAULT 0);"
	  "CREATE TABLE vars    (var TEXT,val TEXT,CONSTRAINT vars_constraint UNIQUE (var));")))
    db))

(define cid-cache (make-hash-table))

(define (get-client-id db cname)
  (let ((cid (hash-table-ref/default cid-cache cname #f)))
    (if cid 
	cid
	(begin
	  (execute db "INSERT OR REPLACE INTO clients (name) VALUES(?);" cname)
	  (for-each-row 
	   (lambda (id)
	     (set! cid id))
	   db
	   "SELECT id FROM clients WHERE name=?;" cname)
	  (hash-table-set! cid-cache cname cid)
	  (set! total-db-accesses (+ total-db-accesses 2))
	  cid))))

(define (count-client db cname)
  (let ((cid (get-client-id db cname)))
    (execute db "UPDATE clients SET num_accesses=num_accesses+1 WHERE id=?;" cid)
    (set! total-db-accesses (+ total-db-accesses 1))
    ))

(define db (open-db))
;; (define queuelst '())
;; (define mx1 (make-mutex))

(define max-queue-len 0)

(define (process-queue queuelst)
  (let ((queuelen (length queuelst)))
    (if (> queuelen max-queue-len)
	(set! max-queue-len queuelen))
    (for-each
     (lambda (item)
       (let ((cname (vector-ref item 1))
	     (clcmd (vector-ref item 2))
	     (cdata (vector-ref item 3)))
	 (send-message pub cname send-more: #t)
	 (send-message pub (case clcmd
			     ((sync)
			      (conc queuelen))
			     ((set)
			      (set! total-db-accesses (+ total-db-accesses 1))
			      (apply execute db "INSERT OR REPLACE INTO vars (var,val) VALUES (?,?);" (string-split cdata))
			      "ok")
			     ((get)
			      (set! total-db-accesses (+ total-db-accesses 1))
			      (let ((res "noval"))
				(for-each-row
				 (lambda (val)
				   (set! res val))
				 db 
				 "SELECT val FROM vars WHERE var=?;" cdata)
				res))
			     (else (conc "unk cmd: " clcmd))))))
     queuelst)))

;; SERVER THREAD
(define th1 (make-thread 
	     (lambda ()
	       (let ((last-run 0)) ;; current-seconds when run last
		 (let loop ((queuelst '()))
		   (let* ((indat (receive-message* pull))
			  (parts (string-split indat ":"))
			  (cname (car parts))                   ;; client name
			  (clcmd (string->symbol (cadr parts))) ;; client cmd
			  (cdata (caddr parts))                 ;; client data
			  (svect (vector (current-seconds) cname clcmd cdata))) ;; record for the queue
		     ;; (print "Server received message: " indat)
		     (count-client db cname)
		     (case clcmd
		       ((ping)
			(print "Got ping from " cname)
			(send-message pub cname send-more: #t)
			(send-message pub "Got ping")
			(loop queuelst))
		       ((sync) ;; just process the queue
			(print "Got sync from " cname)
			(process-queue (cons svect queuelst))
			(loop '()))
		       ((get)
			(process-queue (cons svect queuelst))
			(loop '()))
		       (else
			(loop (cons svect queuelst))))))))
	     "server thread"))

(include "mockupclientlib.scm")

;; SYNC THREAD
;; send a sync to the pull port
(define th2 (make-thread
	     (lambda ()
	       (let ((last-action-time (current-seconds)))
		 (let loop ()
		   (thread-sleep! 5)
		   (let ((queuelen (string->number (dbaccess "server" 'sync "nada" #f)))
			 (last-action-delta #f))
		     (if (> queuelen 1)(set! last-action-time (current-seconds)))
		     (set! last-action-delta (- (current-seconds) last-action-time))
		     (print "Server: Got queuelen=" queuelen ", last-action-delta=" last-action-delta)
		     (if (< last-action-delta 60)
			 (loop)
			 (print "Server exiting, 25 seconds since last access"))))))
	     "sync thread"))

(thread-start! th1)
(thread-start! th2)
(thread-join! th2)

(let* ((run-time       (- (current-seconds) start-time))
       (queries/second (/  total-db-accesses run-time)))
  (print "Server exited! Total db accesses=" total-db-accesses " in " run-time " seconds for " queries/second " queries/second with max queue length of: " max-queue-len))

Added testnanomsg/pipeline.scm version [1d4d831eb6].


























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
;; watch nanomsg's pipeline load-balancer in action.
(use nanomsg)

(define push (nn-socket 'push))
(define pull1 (nn-socket 'pull))
(define pull2 (nn-socket 'pull))

(nn-bind    push  "inproc://test")
(nn-connect pull1 "inproc://test")
(nn-connect pull2 "inproc://test")

(nn-send push "a")
(nn-send push "b")
(nn-send push "c")
(nn-send push "d")

(define ((th sock))
  (print (current-thread) ": " (nn-recv sock))
  (print (current-thread) ": " (nn-recv sock))
  (print (current-thread) " is done"))

(thread-start! (th pull1))
(thread-start! (th pull2))

(thread-sleep! 1)

Added testnanomsg/req-rep-client.scm version [7998d54555].































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
;; watch nanomsg's pipeline load-balancer in action.
(use nanomsg)

(define req   (nn-socket 'req))

(nn-connect req  "tcp://localhost:22022")

;; (with-output-to-string (lambda ()(serialize obj)))
(define (client-send-receive soc msg)
  (nn-send soc msg)
  (nn-recv soc))

(define ((talk-to-server soc))
  (let loop ((cnt 20))
    (let ((name (list-ref '("Matt" "Tom" "Bob" "Jill" "James" "Jane")(random 6))))
      (print "Sending " name)
      (print (client-send-receive req name))
      (if (> cnt 0)(loop (- cnt 1)))))
  (print (client-send-receive req "quit"))
  (nn-close req)
  (exit))

;; (thread-start! (lambda ()
;; 		 (thread-sleep! 20)
;; 		 (print "Give up on waiting for the server")
;; 		 (nn-close req)
;; 		 (exit)))

(thread-join! (thread-start! (talk-to-server req)))

Added testnanomsg/req-rep-server.scm version [d9de6da037].



























































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
;; watch nanomsg's pipeline load-balancer in action.
(use nanomsg)

;; (use trace)
;; (trace nn-bind nn-socket nn-assert nn-recv nn-send thread-terminate! nn-close )

(define port  22022)
(define host  "127.0.0.1")

(define rep   (nn-socket 'rep))

(print "connecting, got: " (nn-bind    rep  (conc "tcp://" "*" ":" port)))

(define (server soc)
  (print "server starting")
  (let loop ((msg-in (nn-recv soc)))
    (print "server received: " msg-in)
    (cond
     ((equal? msg-in "quit")
      (nn-send soc "Ok, quitting"))
     ((and (>= (string-length msg-in) 4)
	   (equal? (substring msg-in 0 4) "ping"))
      (nn-send soc (conc (current-process-id)))
      (loop (nn-recv soc)))
     ;;((and (>= (string-length msg-in)
     (else
      (let ((this-task (random 15)))
	(thread-sleep! this-task)
	(nn-send soc (conc "hello " msg-in " this task took " this-task " seconds to complete"))
	(loop (nn-recv soc)))))))

(define (ping-self host port #!key (return-socket #t))
  ;; send a random number along with pid and check that we get it back
  (let* ((req     (nn-socket 'req))
	 (key     "ping")
	 (success #f)
	 (keepwaiting #t)
	 (ping    (make-thread
		   (lambda ()
		     (print "ping: sending string \"" key "\", expecting " (current-process-id))
		     (nn-send req key)
		     (let ((result  (nn-recv req)))
		       (if (equal? (conc (current-process-id)) result)
			   (begin
			     (print "ping, success: received \"" result "\"")
			     (set! success #t))
			   (begin
			     (print "ping, failed: received key \"" result "\"")
			     (set! keepwaiting #f)
			     (set! success #f)))))
		   "ping"))
	 (timeout (make-thread (lambda ()
				 (let loop ((count 0))
				   (thread-sleep! 1)
				   (print "still waiting after count seconds...")
				   (if (and keepwaiting (< count 10))
				       (loop (+ count 1))))
				 (if keepwaiting
				     (begin
				       (print "timeout waiting for ping")
				       (thread-terminate! ping))))
			       "timeout")))
    (nn-connect req (conc "tcp://" host ":" port))
    (handle-exceptions
     exn
     (begin
       (print-call-chain)
       (print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
       (print "exn=" (condition->list exn))
       (print "ping failed to connect to " host ":" port))
     (thread-start! timeout)
     (thread-start! ping)
     (thread-join! ping)
     (if success (thread-terminate! timeout)))
    (if return-socket
	(if success req #f)
	(begin
	  (nn-close req)
	  success))))

(let ((server-thread (make-thread (lambda ()(server rep)) "server")))
  (thread-start! server-thread)
  ;; (thread-sleep! 1)
  (if (ping-self host port)
      (begin
	(thread-join! server-thread)
	(nn-close rep))
      (print "ping failed")))

(exit)

Added testnanomsg/req-rep.scm version [b77ebf1421].































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
;; watch nanomsg's pipeline load-balancer in action.
(use nanomsg)

(define req   (nn-socket 'req))
(define rep   (nn-socket 'rep))

(nn-bind    rep  "inproc://test")
(nn-connect req  "inproc://test")

(define (client-send-receive soc msg)
  (nn-send soc msg)
  (nn-recv soc))

(define ((server soc))
  (let loop ((msg-in (nn-recv soc)))
    (if (not (equal? msg-in "quit"))
	(begin
	  (nn-send soc (conc "hello " msg-in))
	  (loop (nn-recv soc))))))

(thread-start! (server rep))

(print (client-send-receive req "Matt"))
(print (client-send-receive req "Tom"))

;; (client-send-receive req "quit")

(nn-close req)
(nn-close rep)
(exit)

Modified tests/Makefile from [502a984b43] to [608cf0bdb8].

32
33
34
35
36
37
38
39

40
41
42
43
44
45
46
32
33
34
35
36
37
38

39
40
41
42
43
44
45
46







-
+








stopserver :
	cd ..;make -j && make install
	cd fullrun;$(MEGATEST) -stop-server 0

repl :
	cd ..;make -j && make install
	cd fullrun;$(MEGATEST) -repl
	cd fullrun;$(MEGATEST) -:b -repl

test0 : cleanprep
	cd simplerun ; $(MEGATEST) -server - -debug $(DEBUG)

test1 : cleanprep

test2 : fullprep
114
115
116
117
118
119
120
121

122
123
124
125

126
127
128
129

130
131
132
133

134
135
136
137

138
139
140
141
142
143
144
114
115
116
117
118
119
120

121
122
123
124

125
126
127
128

129
130
131
132

133
134
135
136

137
138
139
140
141
142
143
144







-
+



-
+



-
+



-
+



-
+







# Some simple checks for bootstrapping and run loop logic 

test9 : minsetup test9a test9b test9c test9d test9e

test9a :
	@echo Run super-simple mintest e, no waitons.
	cd mintest;$(DASHBOARD)&
	cd mintest;$(MEGATEST) -preclean -runtests e -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG)
	cd mintest;$(MEGATEST) -preclean -runtests e -target $(VER) -runname $(shell date +%H.%M.%S) -debug $(DEBUG)

test9b :
	@echo Run simple mintest d with one waiton c
	cd mintest;$(MEGATEST) -preclean -runtests d -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG)
	cd mintest;$(MEGATEST) -preclean -runtests d -target $(VER) -runname `date +%H.%M.%S` -debug $(DEBUG)

test9c :
	@echo Run mintest a with full waiton chain a -> b -> c -> d -> e
	cd mintest;$(MEGATEST) -preclean -runtests a -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG)
	cd mintest;$(MEGATEST) -preclean -runtests a -target $(VER) -runname `date +%H.%M.%S` -debug $(DEBUG)

test9d :
	@echo Run an itemized test with no items
	cd mintest;$(MEGATEST) -preclean -runtests g -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG)
	cd mintest;$(MEGATEST) -preclean -runtests g -target $(VER) -runname `date +%H.%M.%S` -debug $(DEBUG)

test9e :
	@echo Run mintest a1 with full waiton chain with d1fail: a1 -> b1 -> c1 -> d1fail -> e1
	cd mintest;$(MEGATEST) -preclean -runtests a1 -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG)
	cd mintest;$(MEGATEST) -preclean -runtests a1 -target $(VER) -runname `date +%H.%M.%S` -debug $(DEBUG)

test10 :
	@echo Run a bunch of different targets simultaneously
	(cd fullrun;$(MEGATEST) -server - ;sleep 2)&
	for targ in mint/btrfs/mintdir sunos/sshfs/loc; do \
	   (cd fullrun;$(MEGATEST) -preclean -runtests priority_10_waiton_1 -target $$targ :runname $(RUNNAME) &); done
	for sys in ubuntu suse redhat debian;do \

Modified tests/fullrun/megatest.config from [a6f800861f] to [ebc04ead74].

29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
29
30
31
32
33
34
35



36
37
38
39
40
41
42







-
-
-







waivercommentpatt ^WW\d+ [a-z].*
incomplete-timeout 1

# yes, anything else is no
run-wait yes


# Use http instead of direct filesystem access
# transport http
# transport fs

# If set to "default" the old code is used. Otherwise defaults to 200 or uses
# numeric value given.
#
runqueue 20

# Default runtimelim 1d 1h 1m 10s
122
123
124
125
126
127
128






129
130
131
132
133
134
135
136
137
138

139
140
141

142
143

144

145
146
147
148
149
150
151
152
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140

141
142
143

144
145
146
147

148

149
150
151
152
153
154
155







+
+
+
+
+
+









-
+


-
+


+
-
+
-







blah

MAX_ALLOWED_LOAD 200
# XTERM   [system xterm]
# RUNDEAD [system exit 56]

[server]

# Use http instead of direct filesystem access
transport http
# transport fs
# transport nmsg

synchronous 0

# If the server can't be started on this port it will try the next port until
# it succeeds
port 8080

# This server will keep running this number of hours after last access. 
# Three minutes is 0.05 hours
# timeout 0.025
timeout 0.1
timeout 0.061

# Server is required - slower but more resistant to Sqlite issues.
# required yes
required yes

# Start server when average query takes longer than this
# server-query-threshold 55500
server-query-threshold 100
server-query-threshold -1
# 55500

# daemonize yes
# hostname #{scheme (get-host-name)}

## disks are:
## name host:/path/to/area
## -or-

Modified tests/mintest/megatest.config from [158955d103] to [74b434d2c6].

1
2
3
4
5
6

7
8
9
10
11
12
13
1
2
3
4
5

6
7
8
9
10
11
12
13





-
+







[fields]
X TEXT

[setup]
max_concurrent_jobs 50
linktree #{getenv PWD}/linktree
linktree #{getenv MT_RUN_AREA_HOME}/linktree
transport http

[server]
port 8090

[jobtools]
useshell yes

Modified tests/unittests/basicserver.scm from [4e0a526d82] to [8806c7e3ae].

12
13
14
15
16
17
18
19

20
21
22
23

24
25
26
27
28
29
30
31
32
33
34
35
36

37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53


54
55
56
57
58
59
60
12
13
14
15
16
17
18

19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36

37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53

54
55
56
57
58
59
60
61
62







-
+




+












-
+
















-
+
+







(test "setup for run" #t (begin (launch:setup-for-run)
 				(string? (getenv "MT_RUN_AREA_HOME"))))

;; NON Server tests go here

(test #f #f (db:dbdat-get-path *db*))
(test #f #f (db:get-run-name-from-id *db* run-id))
(test #f '("SYSTEM" "RELEASE") (rmt:get-keys))
;; (test #f '("SYSTEM" "RELEASE") (rmt:get-keys))

;; (exit)

;; Server tests go here 
(for-each (lambda (run-id)
(test #f #f (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id))
(server:kind-run run-id)
(test "did server start within 20 seconds?"
      #t
      (let loop ((remtries 20)
		 (running (tasks:server-running-or-starting? (db:delay-if-busy
							      (tasks:open-db))
							     run-id)))
	(if running 
	    (> running 0)
	    (if (> remtries 0)
		(begin
		  (thread-sleep! 1.1)
		  (thread-sleep! 1)
		  (loop (- remtries 1)
			(tasks:server-running-or-starting? (db:delay-if-busy
							    (tasks:open-db))
							   run-id)))))))

(test "did server become available" #t
      (let loop ((remtries 10)
		 (res      (tasks:get-server (db:delay-if-busy (tasks:open-db)) run-id)))
	(if res
	    (vector? res)
	    (begin
	      (if (> remtries 0)
		  (begin
		    (thread-sleep! 1.1)
		    (loop (- remtries 1)(tasks:get-server (db:delay-if-busy (tasks:open-db)) run-id)))
		  res)))))

)
(list 0 1))
(define user    (current-user-name))
(define runname "mytestrun")
(define keys    (rmt:get-keys))
(define runinfo #f)
(define keyvals '(("SYSTEM" "abc")("RELEASE" "def")))
(define header  (vector "SYSTEM" "RELEASE" "id" "runname" "state" "status" "owner" "event_time"))

98
99
100
101
102
103
104

105
106
107
108
109
110


111
112
113
114
115
116
117
100
101
102
103
104
105
106
107
108
109
110
111
112

113
114
115
116
117
118
119
120
121







+





-
+
+







      (let ((run-info (rmt:get-run-info run-id)))
	(db:get-value-by-header (db:get-rows run-info)
				(db:get-header run-info)
				"runname")))

      ;; (vector header (vector "abc" "def" 1 "mytestrun" "new" "n/a" "matt" 1416280640.0))

(for-each (lambda (run-id)
;; test killing server
;;
(tasks:kill-server-run-id run-id)

(test #f #f (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id))

)
(list 0 1))
;; (test #f #f (client:setup run-id))

;; (set! *transport-type* 'http)
;; 
;; (test "setup for run" #t (begin (launch:setup-for-run)
;; 				(string? (getenv "MT_RUN_AREA_HOME"))))
;; 

Modified utils/Makefile.installall from [c3d10e5280] to [507fd637d5].

143
144
145
146
147
148
149
















150
151
152
153
154
155
156
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+








$(PREFIX)/bin/sqlite3 : sqlite-autoconf-$(SQLITE3_VERSION)/config.log
	cd sqlite-autoconf-$(SQLITE3_VERSION);./configure --prefix=$(PREFIX);make;make install

$(PREFIX)/lib/sqlite3.so : $(PREFIX)/bin/sqlite3
	CSC_OPTIONS="-I$(PREFIX)/include -L$(PREFIX)/lib" $(CHICKEN_INSTALL) $(PROX) sqlite3

#======================================================================
# N  A N O M S G
#======================================================================

nanomsg-0.5-beta.tar.gz :
	wget http://download.nanomsg.org/nanomsg-0.5-beta.tar.gz

nanomsg-0.5-beta/COPYING : nanomsg-0.5-beta.tar.gz
	tar xfvz nanomsg-0.5-beta.tar.gz

$(PREFIX)/bin/nanocat : nanomsg-0.5-beta/COPYING
	cd nanomsg-0.5-beta;./configure --prefix=$(PREFIX);make;make install

$(PREFIX)/lib/nanomsg.so : $(PREFIX)/bin/nanocat
	CSC_OPTIONS="-I$(PREFIX)/include -L$(PREFIX)/lib" $(CHICKEN_INSTALL) $(PROX) nanomsg

#======================================================================
# M A T T S   U T I L S
#======================================================================

opensrc.fossil :
	fossil clone http://www.kiatoa.com/fossils/opensrc opensrc.fossil

Modified utils/plot-code.scm from [de4d05b676] to [cd37a2db38].

1
2







3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19


20
21
22
23
24
25
26
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35


+
+
+
+
+
+
+

















+
+







#!/mfs/pkgs/chicken/4.8.0.5/bin/csi -nbq

;; Coming soon (right?) Usage: plot-code file1.scm,file2.scm "fun1,fun2,x*" *.scm > plot.dot
;; Usage: plot-code file1.scm,file2.scm *.scm > plot.dot
;;        dot -Tpdf plot.dot > plot.pdf
;; first param is comma separated list of files to include in the map, use - to do all
;; second param is list of regexs for functions to include in the map
;; third param is list of files to scan

(use regex srfi-69 srfi-13)

(define targs #f) 
(define files (cddddr (argv)))

(let ((targdat (cadddr (argv))))
  (if (equal? targdat "-")
      (set! targs files)
      (set! targs (string-split targdat ","))))

(define filedat-defns (make-hash-table))
(define filedat-usages (make-hash-table))

(define defn-rx (regexp "^\\s*\\(define\\s+\\(([^\\s\\)]+).*"))
(define all-regexs (make-hash-table))

(define all-fns '())

;; for the se

(define (print-err . data)
  (with-output-to-port (current-error-port)
    (lambda ()
      (apply print data))))

(print-err "Making graph for files: " (string-intersperse targs ", "))
79
80
81
82
83
84
85
86






















87
88
89
90
91
92
93
88
89
90
91
92
93
94

95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123







-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







		    (loop (car tal)
			  (cdr tal)
			  newres)))
	      (if (null? tal)
		  res
		  (loop (car tal)(cdr tal) res)))))
      '()))
  

;; (define mm-header #<<MMHEADER
;; <map version="freeplane 1.2.0">
;; <!--To view this file, download free mind mapping software Freeplane from http://freeplane.sourceforge.net -->
;; 
;; MMHEADER
;; 
;; (define (add-node text)
;;   <node TEXT="homenode" ID="ID_1723255651" CREATED="1283093380553" MODIFIED="1417113442955"><hook NAME="MapStyle">
;; )
;; 
;;  minimal mindmap file
;;  <map version="freeplane 1.2.0">
;;   <!--To view this file, download free mind mapping software Freeplane from http://freeplane.sourceforge.net -->
;;   <node TEXT="homenode" ID="ID_1723255651" CREATED="1283093380553" MODIFIED="1417113442955">
;;   <node TEXT="node1" POSITION="right" ID="ID_1810107939" CREATED="1417113473476" MODIFIED="1417113480425">
;;   <node TEXT="node2" ID="ID_68133256" CREATED="1417113482134" MODIFIED="1417113484466"/>
;;   <node TEXT="node3" ID="ID_1572284821" CREATED="1417113487785" MODIFIED="1417113491589"/>
;;   </node>
;;   </node>
;;   </map>

;; Gather the usages
(print "digraph G {")
(define curr-cluster-num 0)
(define function-calls '())

(for-each
 (lambda (fname)

Added utils/trace/trace.import.scm version [937dcb55c1].

































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
;;;; trace.import.scm - GENERATED BY CHICKEN 4.9.0.1 -*- Scheme -*-

(eval '(import
         scheme
         chicken
         csi
         advice
         extras
         ports
         data-structures
         (except srfi-1 break)
         miscmacros))
(##sys#register-compiled-module
  'trace
  (list)
  '((breakpoint . trace#breakpoint)
    (trace . trace#trace)
    (untrace . trace#untrace)
    (break . trace#break)
    (unbreak . trace#unbreak)
    (trace-output-port . trace#trace-output-port)
    (continue . trace#continue)
    (c . trace#c)
    (traced? . trace#traced?)
    (trace-module . trace#trace-module)
    (untrace-module . trace#untrace-module)
    (trace-verbose . trace#trace-verbose)
    (trace/untrace . trace#trace/untrace))
  (list)
  (list))

;; END OF FILE

Added utils/trace/trace.meta version [9714181a62].











1
2
3
4
5
6
7
8
9
10
+
+
+
+
+
+
+
+
+
+
;;;; trace.meta -*- Scheme -*-


((category tools)
 (synopsis "tracing and breakpoints")
 (author "felix winkelmann")
 (license "public domain")
 (needs advice				; don't we all?
	miscmacros)
 (files "tests/run.scm" "trace.meta" "trace.release-info" "trace.scm" "trace.setup") )

Added utils/trace/trace.scm version [dc3560e035].




































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
;;;; trace.scm


(module trace (breakpoint 
	       trace untrace
	       break unbreak
	       trace-output-port
	       continue c 
	       traced?
	       trace-module untrace-module
	       trace-verbose
	       trace/untrace)
	       
(import scheme chicken csi)

(use advice extras ports data-structures)
(require-library srfi-1)
(import (except srfi-1 break) miscmacros)


(define *last-breakpoint* #f)
(define *traced-procedures* '())
(define *broken-procedures* '())
(define *trace-indent-level* 0)

(define trace-output-port (make-parameter (current-output-port)))
(define trace-verbose (make-parameter #t))

(define (break-entry name args)
  ;; Does _not_ unwind!
  (##sys#call-with-current-continuation
   (lambda (c)
     (let ((exn (##sys#make-structure
		 'condition
		 '(exn breakpoint)
		 (list '(exn . message) "*** breakpoint ***"
		       '(exn . arguments) (list (cons name args))
		       '(exn . location) name
		       '(exn . continuation) c) ) ) )
       (set! *last-breakpoint* exn)
       (signal exn) ) ) ) )

(define (break-resume exn)
  (let ((a (member '(exn . continuation) (##sys#slot exn 2))))
    (if a
	((cadr a) (void))
	(error "condition has no continuation" exn) ) ) )

(define (breakpoint #!optional (name 'breakpoint))
  (break-entry name '()) )

(define (trace-indent)
  (let ((port (trace-output-port)))
    (do ((i (fxmin 3 *trace-indent-level*) (fx- i 1)))
	((fx<= i 0))
      (write-char #\space port) )
    (fprintf port "[~a] " *trace-indent-level*) ) )

(define (traced-procedure-entry name args)
  (let ((port (trace-output-port)))
    (trace-indent)
    (set! *trace-indent-level* (fx+ 1 *trace-indent-level*))
    (write (cons name args) port)
    (write ", Called from: " port)
    (write (conc (car (reverse (get-call-chain)))))
    (write-char #\newline port)
    (flush-output port) ) )

(define (traced-procedure-exit name results)
  (let ((port (trace-output-port)))
    (set! *trace-indent-level* (fx- *trace-indent-level* 1))
    (trace-indent)
    (fprintf port "~a -> " name)
    (if results
	(for-each
	 (lambda (x)
	   (write x port)
	   (write-char #\space port) )
	 results)
	(display "(escaping)" port))
    (write-char #\newline port)
    (flush-output port) ) )

(define (procedure-name proc)
  (cond ((procedure-information proc) =>
	 (lambda (info)
	   (if (pair? info) (car info) info) ) )
	(else '<unknown>)) )

(define (do-trace procs)
  (for-each
   (lambda (s)
     (ensure procedure? s)
     (cond ((traced? s)
	    (warning "procedure already traced" s) )
	   (else
	    (let ((name (procedure-name s)))
	      (when (trace-verbose)
		(fprintf (current-error-port) "; tracing ~a~%" name))
	      (set! *traced-procedures* (cons (cons s name) *traced-procedures*))
	      (advise 
	       'around s
	       (lambda (next args)
		 (let ((results #f))
		   (dynamic-wind
		       (cut traced-procedure-entry name args)
		       (lambda () 
			 (call-with-values (cut apply next args)
			   (lambda rs
			     (set! results rs)
			     (apply values rs))))
		       (cut traced-procedure-exit name results))))
	       '*trace*)))))
   procs) )

(define (do-untrace-all)
  (define (unadvise* p)
    (ignore-errors (unadvise p '*trace*)))
  (for-each
   (lambda (proc)
     (let ((proc (car proc)))
       (when (trace-verbose)
	 (fprintf (current-error-port) "; untracing ~a~%" (procedure-name proc))
	 (unadvise* proc))))
   *traced-procedures*)
  (set! *traced-procedures* '()))

(define (do-untrace procs)
  (for-each
   (lambda (s)
     (ensure procedure? s)
     (let ((p (assq s *traced-procedures*)) 
	   (name (procedure-name s)))
       (cond ((not p) (warning "procedure not traced" name))
	     (else
	      (when (trace-verbose)
		(fprintf (current-error-port) "; untracing ~a~%" name))
	      (ignore-errors (unadvise s '*trace*))
	      (set! *traced-procedures* 
		(delete 
		 p *traced-procedures* 
		 eq?))))))
   procs) )

(define (do-break procs)
  (for-each
   (lambda (s)
     (let ((name (procedure-name s)))
       (ensure procedure? s)
       (cond ((assq s *broken-procedures*)
	      (warning "procedure already has break-point" name))
	     (else
	      (when (trace-verbose)
		(fprintf (current-error-port) "; setting break-point in ~a~%" name))
	      (set! *broken-procedures* (cons (cons s name) *broken-procedures*))
	      (advise 
	       'before s
	       (lambda (args)
		 (break-entry name args) )
	       '*break*) ) )))
   procs) )

(define (do-unbreak procs)
  (for-each
   (lambda (s)
     (ensure procedure? s)
     (let ((p (assq s *broken-procedures*)) 
	   (name (procedure-name s)))
       (cond ((not p) (warning "procedure has no breakpoint" name))
	     (else
	      (when (trace-verbose)
		(fprintf (current-error-port) "; removing break-point in ~a~%" name))
	      (ignore-errors (unadvise s '*break*))
	      (set! *broken-procedures* (delete p *broken-procedures* eq?) ) ) ) ) )
   procs) )

(define (do-unbreak-all)
  (for-each
   (lambda (bp)
     (ignore-errors (unadvise (car bp) '*break*)))
   *broken-procedures*)
  (set! *broken-procedures* '())
  (void))

(define (trace . procs)
  (cond ((null? procs)
	 (when (pair? *traced-procedures*)
	   (printf "Traced:~%~%")
	   (for-each (lambda (p) (printf "  ~a~%" (cdr p))) *traced-procedures*)) )
	(else
	 (do-trace procs) ) ) )

(define (untrace . procs)
  (cond ((null? procs) (do-untrace-all))
	(else (do-untrace procs)))
  (void))

(define (break . procs)
  (cond ((null? procs)
	 (when (pair? *broken-procedures*)
	   (printf "Breakpoints:~%~%")
	   (for-each (lambda (p) (printf "  ~a~%" (cdr p))) *broken-procedures*)) )
	(else
	 (do-break procs) ) ) )

(define (unbreak . procs)
  (cond ((null? procs) (do-unbreak-all))
	(else (do-unbreak procs))))

(define (continue #!optional (bp *last-breakpoint*))
  (cond (*last-breakpoint*
	 (let ((exn *last-breakpoint*))
	   (set! *last-breakpoint* #f)
	   (break-resume exn) ) )
	(else (display "no breakpoint pending\n") ) ) )

(define c continue)

(define (traced? proc)
  (assq proc *traced-procedures*))

(define (trace/untrace . procs)
  (for-each
   (lambda (proc)
     ((if (traced? proc) do-untrace do-trace) (list proc)))
   procs))

(define (walk-module mname proc)
  (let* ((m (##sys#find-module mname))
	 (exps (nth-value 1 (##sys#module-exports m))))
    (for-each
     (lambda (exp)
       (let* ((realname (cdr exp))
	      (prim (get realname '##core#primitive)))
	 (if prim
	     (warning "export is a core-library primitive - not traced" (car exp))
	     (when (##sys#symbol-has-toplevel-binding? realname)
	       (let ((val (##sys#slot realname 0)))
		 (when (procedure? val)
		   (proc val)))))))
     exps)))

(define (trace-module . mnames)
  (for-each
   (lambda (mname)
     (walk-module mname trace))
   mnames))

(define (untrace-module . mnames)
  (for-each
   (lambda (mname)
     (walk-module 
      mname 
      (lambda (proc)
	(when (traced? proc)
	  (do-untrace (list proc))))))
   mnames))

)

Added utils/trace/trace.setup version [d222d610b4].










1
2
3
4
5
6
7
8
9
+
+
+
+
+
+
+
+
+
;;;; trace.setup -*- Scheme -*-


(compile -s trace.scm -O3 -d1 -j trace)
(compile -s trace.import.scm -O3 -d0)

(install-extension
 'trace
 '("trace.so" "trace.import.so"))