Megatest

Check-in [2d8ad11ad9]
Login
Overview
Comment:Merged v1.60
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: 2d8ad11ad9bffad9d48c8a8aaffb9f506db58cd0
User & Date: matt on 2015-01-01 11:16:07
Other Links: manifest | tags
Context
2015-01-09
00:41
Archive restoring now works check-in: 5494747c0c user: matt tags: trunk
2015-01-01
11:16
Merged v1.60 check-in: 2d8ad11ad9 user: matt tags: trunk
2014-12-29
16:06
Moved default tests path to last - user can always override with an entry in megatest.config check-in: ebc2ba96c7 user: mrwellan tags: v1.60
2014-11-30
12:44
Bring v1.60 back to trunk check-in: 2f9676f58c user: matt tags: trunk
Changes

Modified Makefile from [64fd867d54] to [a24183c8ff].

1
2
3
4
5
6
7
8
9

10
11
12


13
14
15
16
17
18
19
1
2
3
4
5
6
7
8

9
10
11

12
13
14
15
16
17
18
19
20








-
+


-
+
+







# make install CSCOPTS='-accumulate-profile -profile-name $(PWD)/profile-ww$(shell date +%V.%u)'
PREFIX=$(PWD)
CSCOPTS= 
INSTALL=install
SRCFILES = common.scm items.scm launch.scm \
           ods.scm runconfig.scm server.scm configf.scm \
           db.scm keys.scm margs.scm megatest-version.scm \
           process.scm runs.scm tasks.scm tests.scm genexample.scm \
	   http-transport.scm filedb.scm \
	   http-transport.scm nmsg-transport.scm filedb.scm \
           client.scm gutils.scm synchash.scm daemon.scm mt.scm dcommon.scm \
	   tree.scm ezsteps.scm lock-queue.scm sdb.scm \
	   rmt.scm api.scm tdb.scm portlogger.scm
	   rmt.scm api.scm tdb.scm rpc-transport.scm \
	   portlogger.scm archive.scm

# Eggs to install (straightforward ones)
EGGS=matchable readline apropos base64 regex-literals format regex-case test coops trace csv \
     dot-locking posix-utils posix-extras directory-utils hostinfo tcp-server rpc csv-xml fmt \
     json md5 awful http-client spiffy uri-common intarweb spiffy-request-vars \
     spiffy-directory-listing ssax sxml-serializer sxml-modifications iup canvas-draw sqlite3

56
57
58
59
60
61
62
63

64
65
66
67
68
69
70
57
58
59
60
61
62
63

64
65
66
67
68
69
70
71







-
+







# Special dependencies for the includes
tests.o db.o launch.o runs.o dashboard-tests.o dashboard-guimonitor.o dashboard-main.o monitor.o dashboard.o megatest.o : db_records.scm
tests.o runs.o dashboard.o dashboard-tests.o dashboard-main.o  : run_records.scm
db.o ezsteps.o keys.o launch.o megatest.o monitor.o runs-for-ref.o runs.o tests.o : key_records.scm
tests.o tasks.o dashboard-tasks.o : task_records.scm
runs.o : test_records.scm
megatest.o : megatest-fossil-hash.scm
client.scm common.scm configf.scm dashboard-guimonitor.scm dashboard-tests.scm dashboard.scm db.scm dcommon.scm ezsteps.scm fs-transport.scm http-transport.scm index-tree.scm items.scm keys.scm launch.scm megatest.scm monitor.scm mt.scm newdashboard.scm runconfig.scm runs.scm server.scm tdb.scm tests.scm tree.scm zmq-transport.scm : common_records.scm
client.scm common.scm configf.scm dashboard-guimonitor.scm dashboard-tests.scm dashboard.scm db.scm dcommon.scm ezsteps.scm fs-transport.scm http-transport.scm index-tree.scm items.scm keys.scm launch.scm megatest.scm monitor.scm mt.scm newdashboard.scm runconfig.scm runs.scm server.scm tdb.scm tests.scm tree.scm zmq-transport.scm : common_records.scm rpc-transport.scm

# Temporary while transitioning to new routine
# runs.o : run-tests-queue-classic.scm  run-tests-queue-new.scm

megatest-fossil-hash.scm : $(SRCFILES) megatest.scm *_records.scm
	echo "(define megatest-fossil-hash \"$(MTESTHASH)\")" > megatest-fossil-hash.new
	if ! diff -q megatest-fossil-hash.new megatest-fossil-hash.scm ; then echo copying .new to .scm;cp -f megatest-fossil-hash.new megatest-fossil-hash.scm;fi

Modified api.scm from [b8269d4337] to [030af4fe6b].

8
9
10
11
12
13
14

15
16
17
18
19
20
21
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22







+







;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.
;;======================================================================

(declare (unit api))
(declare (uses rmt))
(declare (uses db))
(declare (uses tasks))

;; allow these queries through without starting a server
;;
(define api:read-only-queries
  '(get-key-val-pairs
    get-keys
    test-toplevel-num-items
42
43
44
45
46
47
48
49














































50
51
52
53


54
55
56
57
58
























59















































60
61
62



63
64
65
66
67




68
69
70


71
72

73
74
75
76
77
78
79





80
81
82
83



84
85
86


87
88
89
90
91




92
93
94
95


96
97
98
99
100
101
102
103
104






105
106
107
108

109
110
111
112
113
114

115
116
117
118
119
120
121
122
123









124
125


126
127
128
129
130
131






132
133
134
135
136
137
138
139
140
141
142
143
144



145
146
147
148
149
150
151
152
153
154

155
43
44
45
46
47
48
49

50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101





102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173



174
175
176
177




178
179
180
181



182
183


184







185
186
187
188
189




190
191
192



193
194
195




196
197
198
199




200
201
202








203
204
205
206
207
208


209

210






211
212








213
214
215
216
217
218
219
220
221


222
223
224





225
226
227
228
229
230

231
232
233
234
235
236
237
238
239
240


241
242
243
244
245
246
247
248
249
250
251
252

253
254







-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+




+
+
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
+
+
+

-
-
-
-
+
+
+
+
-
-
-
+
+
-
-
+
-
-
-
-
-
-
-
+
+
+
+
+
-
-
-
-
+
+
+
-
-
-
+
+

-
-
-
-
+
+
+
+
-
-
-
-
+
+

-
-
-
-
-
-
-
-
+
+
+
+
+
+
-
-

-
+
-
-
-
-
-
-
+

-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
-
-
+
+

-
-
-
-
-
+
+
+
+
+
+
-










-
-
+
+
+









-
+

    get-runs
    get-all-run-ids
    get-prev-run-ids
    get-run-ids-matching-target
    get-runs-by-patt
    get-steps-data
    login
    testmeta-get-record))
    testmeta-get-record
    have-incompletes?
    synchash-get
    ))

(define api:write-queries
  '(
    ;; SERVERS
    start-server
    kill-server

    ;; TESTS
    test-set-state-status-by-id
    delete-test-records
    delete-old-deleted-test-records
    test-set-status-state
    test-set-top-process-pid
    roll-up-pass-fail-counts
    update-fail-pass-counts

    ;; RUNS
    register-run
    set-tests-state-status
    delete-run
    lock/unlock-run
    update-run-event_time
    mark-incomplete

    ;; STEPS
    teststep-set-status!

    ;; TEST DATA
    test-data-rollup
    csv->test-data

    ;; MISC
    sync-inmem->db

    ;; TESTMETA
    testmeta-add-record
    testmeta-update-field

    ;; TASKS
    tasks-add
    tasks-set-state-given-param-key
    ))

;; These are called by the server on recipt of /api calls
;;    - keep it simple, only return the actual result of the call, i.e. no meta info here
;;
;;    - returns #( flag result )
;;
(define (api:execute-requests dbstruct cmd params)
  (case (string->symbol cmd)
    ;; SERVERS
    ((start-server)                 (apply server:kind-run params))
    ((kill-server)                  (set! *server-run* #f))
(define (api:execute-requests dbstruct dat)
  (handle-exceptions
   exn
   (let ((call-chain (get-call-chain)))
     (print-call-chain (current-error-port))
     (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))       
     (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens
   (if (not (vector? dat))                    ;; it is an error to not receive a vector
       (vector #f #f "remote must be called with a vector")       
       (vector                                   ;; return a vector + the returned data structure
	#t 
	(let ((cmd    (vector-ref dat 0))
	      (params (vector-ref dat 1)))
	  (case (if (symbol? cmd)
		    cmd
		    (string->symbol cmd))

	    ;;===============================================
	    ;; READ/WRITE QUERIES
	    ;;===============================================

	    ;; SERVERS
	    ((start-server)                    (apply server:kind-run params))
	    ((kill-server)                     (set! *server-run* #f))

	    ;; TESTS
	    ((test-set-state-status-by-id)     (apply db:test-set-state-status-by-id dbstruct params))
	    ((delete-test-records)             (apply db:delete-test-records dbstruct params))
	    ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params))
	    ((test-set-status-state)           (apply db:test-set-status-state dbstruct params))
	    ((test-set-top-process-pid)        (apply db:test-set-top-process-pid dbstruct params))
	    ((roll-up-pass-fail-counts)        (apply db:roll-up-pass-fail-counts dbstruct params))
	    ((update-fail-pass-counts)         (apply db:general-call dbstruct 'update-pass-fail-counts params))
	    ((test-set-archive-block-id)       (apply db:test-set-archive-block-id dbstruct params))

	    ;; RUNS
	    ((register-run)                 (apply db:register-run dbstruct params))
	    ((set-tests-state-status)       (apply db:set-tests-state-status dbstruct params))
	    ((delete-run)                   (apply db:delete-run dbstruct params))
	    ((lock/unlock-run)              (apply db:lock/unlock-run dbstruct params))
	    ((update-run-event_time)        (apply db:update-run-event_time dbstruct params))

	    ;; STEPS
	    ((teststep-set-status!)         (apply db:teststep-set-status! dbstruct params))

	    ;; TEST DATA
	    ((test-data-rollup)             (apply db:test-data-rollup dbstruct params))
	    ((csv->test-data)               (apply db:csv->test-data dbstruct params))

	    ;; MISC
	    ((sync-inmem->db)               (let ((run-id (car params)))
					      (db:sync-touched dbstruct run-id force-sync: #t)))
	    ((mark-incomplete)              (apply db:find-and-mark-incomplete dbstruct params))

	    ;; TESTMETA
	    ((testmeta-add-record)       (apply db:testmeta-add-record dbstruct params))
	    ((testmeta-update-field)     (apply db:testmeta-update-field dbstruct params))

	    ;; TASKS
	    ((tasks-add)                 (apply tasks:add dbstruct params))   
	    ((tasks-set-state-given-param-key) (apply tasks:set-state-given-param-key dbstruct params))

	    ;; ARCHIVES
	    ;; ((archive-get-allocations)   
	    ((archive-register-disk)     (apply db:archive-register-disk dbstruct params))
	    ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params))
	    ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey))

	    ;;======================================================================
	    ;; READ ONLY QUERIES
	    ;;======================================================================

    ;; KEYS
    ((get-key-val-pairs)            (apply db:get-key-val-pairs dbstruct params))
    ((get-keys)                     (db:get-keys dbstruct))
	    ;; KEYS
	    ((get-key-val-pairs)               (apply db:get-key-val-pairs dbstruct params))
	    ((get-keys)                        (db:get-keys dbstruct))

    ;; TESTS
    ((test-toplevel-num-items)         (apply db:test-toplevel-num-items dbstruct params))
    ((get-test-info-by-id)	       (apply db:get-test-info-by-id dbstruct params))
    ((test-get-rundir-from-test-id)    (apply db:test-get-rundir-from-test-id dbstruct params))
	    ;; TESTS
	    ((test-toplevel-num-items)         (apply db:test-toplevel-num-items dbstruct params))
	    ((get-test-info-by-id)	       (apply db:get-test-info-by-id dbstruct params))
	    ((test-get-rundir-from-test-id)    (apply db:test-get-rundir-from-test-id dbstruct params))
    ((test-set-state-status-by-id)     (apply db:test-set-state-status-by-id dbstruct params))
    ((get-count-tests-running)         (apply db:get-count-tests-running dbstruct params))
    ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params))
	    ((get-count-tests-running)         (apply db:get-count-tests-running dbstruct params))
	    ((get-count-tests-running-in-jobgroup) (apply db:get-count-tests-running-in-jobgroup dbstruct params))
    ((delete-test-records)             (apply db:delete-test-records dbstruct params))
    ;; ((delete-test-step-records)        (apply db:delete-test-step-records dbstruct params))
	    ;; ((delete-test-step-records)        (apply db:delete-test-step-records dbstruct params))
    ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params))
    ((test-set-status-state)           (apply db:test-set-status-state dbstruct params))
    ((get-previous-test-run-record)    (apply db:get-previous-test-run-record dbstruct params))
    ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params))
    ((test-get-logfile-info)           (apply db:test-get-logfile-info dbstruct params))
    ((test-get-records-for-index-file)  (apply db:test-get-records-for-index-file dbstruct params))
    ((get-testinfo-state-status)       (apply db:get-testinfo-state-status dbstruct params))
	    ((get-previous-test-run-record)    (apply db:get-previous-test-run-record dbstruct params))
	    ((get-matching-previous-test-run-records)(apply db:get-matching-previous-test-run-records dbstruct params))
	    ((test-get-logfile-info)           (apply db:test-get-logfile-info dbstruct params))
	    ((test-get-records-for-index-file)  (apply db:test-get-records-for-index-file dbstruct params))
	    ((get-testinfo-state-status)       (apply db:get-testinfo-state-status dbstruct params))
    ((test-set-top-process-pid)        (apply db:test-set-top-process-pid dbstruct params))
    ((test-get-top-process-pid)        (apply db:test-get-top-process-pid dbstruct params))
    ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params))
    ((get-prereqs-not-met)             (apply db:get-prereqs-not-met dbstruct params))
	    ((test-get-top-process-pid)        (apply db:test-get-top-process-pid dbstruct params))
	    ((test-get-paths-matching-keynames-target-new) (apply db:test-get-paths-matching-keynames-target-new dbstruct params))
	    ((get-prereqs-not-met)             (apply db:get-prereqs-not-met dbstruct params))
    ((roll-up-pass-fail-counts)        (apply db:roll-up-pass-fail-counts dbstruct params))
    ((update-fail-pass-counts)         (apply db:general-call dbstruct 'update-pass-fail-counts params))
    ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params))
	    ((get-count-tests-running-for-run-id) (apply db:get-count-tests-running-for-run-id dbstruct params))
	    ((synchash-get)                    (apply synchash:server-get dbstruct params))

    ;; RUNS
    ((get-run-info)                 (apply db:get-run-info dbstruct params))
    ((get-run-status)               (apply db:get-run-status dbstruct params))
    ((set-run-status)               (apply db:set-run-status dbstruct params))
	    ;; RUNS
	    ((get-run-info)                 (apply db:get-run-info dbstruct params))
	    ((get-run-status)               (apply db:get-run-status dbstruct params))
	    ((set-run-status)               (apply db:set-run-status dbstruct params))
    ((register-run)                 (apply db:register-run dbstruct params))
    ((set-tests-state-status)       (apply db:set-tests-state-status dbstruct params))
    ((get-tests-for-run)            (apply db:get-tests-for-run dbstruct params))
    ((get-test-id)                  (apply db:get-test-id dbstruct params))
	    ((get-tests-for-run)            (apply db:get-tests-for-run dbstruct params))
	    ((get-test-id)                  (apply db:get-test-id dbstruct params))
	    ((get-tests-for-run-mindata)    (apply db:get-tests-for-run-mindata dbstruct params))
    ((get-run-name-from-id)         (apply db:get-run-name-from-id dbstruct params))
    ((delete-run)                   (apply db:delete-run dbstruct params))
    ((get-runs)                     (apply db:get-runs dbstruct params))
    ((get-all-run-ids)              (db:get-all-run-ids dbstruct))
    ((get-prev-run-ids)             (apply db:get-prev-run-ids dbstruct params))
    ((get-run-ids-matching-target)  (apply db:get-run-ids-matching-target dbstruct params))
    ((get-runs-by-patt)             (apply db:get-runs-by-patt dbstruct params))
    ((lock/unlock-run)              (apply db:lock/unlock-run dbstruct params))
	    ((get-runs)                     (apply db:get-runs dbstruct params))
	    ((get-all-run-ids)              (db:get-all-run-ids dbstruct))
	    ((get-prev-run-ids)             (apply db:get-prev-run-ids dbstruct params))
	    ((get-run-ids-matching-target)  (apply db:get-run-ids-matching-target dbstruct params))
	    ((get-runs-by-patt)             (apply db:get-runs-by-patt dbstruct params))
	    ((get-run-name-from-id)         (apply db:get-run-name-from-id dbstruct params))
    ((update-run-event_time)        (apply db:update-run-event_time dbstruct params))
    ((find-and-mark-incomplete)     (apply db:find-and-mark-incomplete dbstruct params))

    ;; STEPS
	    ;; STEPS
    ((teststep-set-status!)         (apply db:teststep-set-status! dbstruct params))

    ;; TEST DATA
    ((test-data-rollup)             (apply db:test-data-rollup dbstruct params))
    ((csv->test-data)               (apply db:csv->test-data dbstruct params))
    ((get-steps-data)               (apply db:get-steps-data dbstruct params))
	    ((get-steps-data)               (apply db:get-steps-data dbstruct params))

    ;; MISC
    ((login)                        (apply db:login dbstruct params))
    ((general-call)                 (let ((stmtname   (car params))
					  (run-id     (cadr params))
					  (realparams (cddr params)))
				      (db:with-db dbstruct run-id #t ;; these are all for modifying the db
						  (lambda (db)
						    (db:general-call db stmtname realparams)))))
	    ;; MISC
	    ((have-incompletes?)            (apply db:have-incompletes? dbstruct params))
	    ((login)                        (apply db:login dbstruct params))
	    ((general-call)                 (let ((stmtname   (car params))
						  (run-id     (cadr params))
						  (realparams (cddr params)))
					      (db:with-db dbstruct run-id #t ;; these are all for modifying the db
							  (lambda (db)
							    (db:general-call db stmtname realparams)))))
    ((sync-inmem->db)               (db:sync-touched dbstruct run-id force-sync: #t))
    ((sdb-qry)                      (apply sdb:qry params))
	    ((sdb-qry)                      (apply sdb:qry params))
	    ((ping)                         (current-process-id))

    ;; TESTMETA
    ((testmeta-get-record)       (apply db:testmeta-get-record dbstruct params))
    ((testmeta-add-record)       (apply db:testmeta-add-record dbstruct params))
    ((testmeta-update-field)     (apply db:testmeta-update-field dbstruct params))
    (else
	    ;; TESTMETA
	    ((testmeta-get-record)       (apply db:testmeta-get-record dbstruct params))

	    ;; TASKS 
	    ((find-task-queue-records)   (apply tasks:find-task-queue-records dbstruct params))))))))

     (list "ERROR" 0))))

;; http-server  send-response
;;                 api:process-request
;;                    db:*
;;
;; NB// Runs on the server as part of the server loop
;;
(define (api:process-request dbstruct $) ;; the $ is the request vars proc
  (let* ((cmd     ($ 'cmd))
	 (paramsj ($ 'params))
	 (params  (db:string->obj paramsj)) ;; (rmt:json-str->dat paramsj))
	 (res     (api:execute-requests dbstruct cmd params)))
	 (params  (db:string->obj paramsj transport: 'http)) ;; (rmt:json-str->dat paramsj))
	 (resdat  (api:execute-requests dbstruct (vector cmd params))) ;; #( flag result )
	 (res     (vector-ref resdat 1)))

    ;; This can be here but needs controls to ensure it doesn't run more than every 4 seconds
    ;; (rmt:dat->json-str
    ;;  (if (or (string? res)
    ;;          (list?   res)
    ;;          (number? res)
    ;;          (boolean? res))
    ;;      res 
    ;;      (list "ERROR, not string, list, number or boolean" 1 cmd params res)))))
    (db:obj->string res)))
    (db:obj->string res transport: 'http)))

Modified archive.scm from [907e35ab0e] to [1c7ca4f61c].

1
2
3
4
5
6
7
8
9
10
11
12

13
14
15
16
17
18















































































































































































































































































1
2
3
4
5
6
7
8
9
10
11

12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289











-
+






+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
;; Copyright 2006-2014, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.

;;  strftime('%m/%d/%Y %H:%M:%S','now','localtime')

(use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking format)
(use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking format md5 message-digest)
(import (prefix sqlite3 sqlite3:))

(declare (unit archive))
(declare (uses db))
(declare (uses common))

(include "common_records.scm")
(include "db_records.scm")

;;======================================================================
;; 
;;======================================================================

;; NOT CURRENTLY USED
;;
(define (archive:main linktree target runname testname itempath options)
  (let ((testdir (conc linktree "/" target "/" runname "/" testname "/" itempatt))
	(flavor  'plain) ;; type of machine to run jobs on
	(maxload 1.5)   ;; max allowed load for this work
	(adisks  (archive:get-archive-disks)))
    ;; get testdir size
    ;;   - hand off du to job mgr
    (if (and (file-exists? testdir)
	     (file-is-writable? testdir))
	(let* ((dused  (jobrunner:run-job 
			flavor  ;; machine type
			maxload ;; max allowed load
			'()     ;; prevars - environment vars to set for the job
			common:get-disk-space-used  ;; if a proc call it, if a string it is a unix command
			(list testdir)))
	       (apath  (archive:get-archive testname itempath dused)))
	  (jobrunner:run-job
	   flavor
	   maxload
	   '()
	   archive:run-bup
	   (list testdir apath))))))
	  
;; Get archive disks from megatest.config
;;
(define (archive:get-archive-disks)
  (let ((section (configf:get-section *configdat* "archive-disks")))
    (if section
	section
	'())))

;; look for the best candidate archive area, else create new 
;; area
;;
(define (archive:get-archive testname itempath dused)
  ;; look up in archive_allocations if there is a pre-used archive
  ;; with adequate diskspace
  ;;
  (let* ((existing-blocks (rmt:archive-get-allocations testname itempath dused))
	 (candidate-disks (map (lambda (block)
				 (list
				  (vector-ref block 1)   ;; archive-area-name
				  (vector-ref block 2))) ;; disk-path
			       existing-blocks)))
    (or (common:get-disk-with-most-free-space candidate-disks dused)
	(archive:allocate-new-archive-block testname itempath))))

;; allocate a new archive area
;;
(define (archive:allocate-new-archive-block run-area-home testsuite-name dneeded)
  (let* ((adisks    (archive:get-archive-disks))
	 (best-disk (common:get-disk-with-most-free-space adisks dneeded)))
    (if best-disk
	(let* ((bdisk-name    (car best-disk))
	       (bdisk-path    (cdr best-disk))
	       (area-key      (substring (message-digest-string (md5-primitive) run-area-home) 0 5))
	       (bdisk-id      (rmt:archive-register-disk bdisk-name bdisk-path (get-df bdisk-path)))
	       (archive-name  (let ((sec (current-seconds)))
				(conc (time->string (seconds->local-time sec) "%Y")
				      "_q" (seconds->quarter sec) "/"
				      testsuite-name "_" area-key)))
	       (archive-path  (conc bdisk-path "/" archive-name))
	       (block-id      (rmt:archive-register-block-name bdisk-id archive-path)))
	  ;;   (allocation-id (rmt:archive-allocate-testsuite/area-to-block block-id testsuite-name area-key)))
	  (if block-id ;; (and block-id allocation-id)
	      (cons block-id archive-path)
	      #f))
	#f)))

;; archive - run bup
;;
;; 1. create the bup dir if not exists
;; 2. start the du of each directory
;; 3. gen index
;; 4. save
;;
(define (archive:run-bup archive-command run-id run-name tests)
  ;; move the getting of archive space down into the below block so that a single run can 
  ;; allocate as needed should a disk fill up
  ;;
  (let* ((min-space    (string->number (or (configf:lookup *configdat* "archive" "minspace") "1000")))
	 (archive-info (archive:allocate-new-archive-block *toppath* (common:get-testsuite-name) min-space))
	 (archive-dir  (if archive-info (cdr archive-info) #f))
	 (archive-id   (if archive-info (car archive-info) -1))
	 (disk-groups  (make-hash-table))
	 (test-groups  (make-hash-table)) ;; these two (disk and test groups) could be combined nicely
	 (bup-exe      (or (configf:lookup *configdat* "archive" "bup") "bup"))
	 (compress     (or (configf:lookup *configdat* "archive" "compress") "9"))
	 (linktree     (configf:lookup *configdat* "setup" "linktree")))

    (if (not archive-dir) ;; no archive disk found, this is fatal
	(begin
	  (debug:print 0 "FATAL: No archive disks found. Please add disks with at least " min-space " MB space to the [archive-disks] section of megatest.config")
	  (debug:print 0 "       use [archive] minspace to specify minimum available space")
	  (debug:print 0 "   disks: " (string-intersperse (map cadr (archive:get-archive-disks)) "\n         "))
	  (exit 1))
	(debug:print-info 0 "Using path " archive-dir " for archiving"))

    ;; from the test info bin the path to the test by stem
    ;;
    (for-each
     (lambda (test-dat)
       (let* ((item-path         (db:test-get-item-path test-dat))
	      (test-name         (db:test-get-testname  test-dat))
	      (test-id           (db:test-get-id        test-dat))
	      (run-id            (db:test-get-run_id    test-dat))
	      (target            (string-intersperse (map cadr (rmt:get-key-val-pairs run-id)) "/"))
	      
	      (toplevel/children (and (db:test-get-is-toplevel test-dat)
				      (> (rmt:test-toplevel-num-items run-id test-name) 0)))
	      (test-partial-path (conc target "/" run-name "/" (runs:make-full-test-name test-name item-path)))
	      ;; note the trailing slash to get the dir inspite of it being a link
	      (test-path         (conc linktree "/" test-partial-path))
	      (test-physical-path (if (file-exists? test-path) (read-symbolic-link test-path #t) #f))
	      (partial-path-index (if test-physical-path (substring-index test-partial-path test-physical-path) #f))
	      (test-base         (if (and partial-path-index 
					  test-physical-path )
				     (substring test-physical-path
						0
						partial-path-index)
				     #f)))
	 
 	 (if (or toplevel/children
		 (not (file-exists? test-path)))
	     #f
	     (begin
	       (debug:print 0
			    "From test-dat=" test-dat " derived the following:\n"
			    "test-partial-path  = " test-partial-path "\n"
			    "test-path          = " test-path "\n"
			    "test-physical-path = " test-physical-path "\n"
			    "partial-path-index = " partial-path-index "\n"
			    "test-base          = " test-base)
	       (hash-table-set! disk-groups test-base (cons test-physical-path (hash-table-ref/default disk-groups test-base '())))
	       (hash-table-set! test-groups test-base (cons test-dat (hash-table-ref/default test-groups test-base '())))
	       test-path))))
     tests)
    ;; for each disk-group
    (for-each 
     (lambda (disk-group)
       (debug:print 0 "Processing disk-group " disk-group)
       (let* ((test-paths (hash-table-ref disk-groups disk-group))
	      ;; ((string-intersperse (map cadr (rmt:get-key-val-pairs 1)) "-")
	      (bup-init-params  (list "-d" archive-dir "init"))
	      (bup-index-params (append (list "-d" archive-dir "index") test-paths))
	      (bup-save-params  (append (list "-d" archive-dir "save" ;; (conc "--strip-path=" linktree)
					      (conc "-" compress) ;; or (conc "--compress=" compress)
					      "-n" (conc (common:get-testsuite-name) "-" run-id)
					      (conc "--strip-path=" disk-group))
					test-paths))
	      (print-prefix      #f)) ;; "Running: ")) ;; change to #f to turn off printing
	 (if (not (file-exists? archive-dir))
	     (create-directory archive-dir #t))
	 (if (not (file-exists? (conc archive-dir "/HEAD")))
	     (begin
	       ;; replace this with jobrunner stuff enventually
	       (debug:print-info 0 "Init bup in " archive-dir)
	       (run-n-wait bup-exe params: bup-init-params print-cmd: print-prefix)))
	 (debug:print-info 0 "Indexing data to be archived")
	 (run-n-wait bup-exe params: bup-index-params print-cmd: print-prefix)
	 (debug:print-info 0 "Archiving data with bup")
	 (run-n-wait bup-exe params: bup-save-params print-cmd: print-prefix)
	 (for-each
	  (lambda (test-dat)
	    (let ((test-id           (db:test-get-id        test-dat))
		  (run-id            (db:test-get-run_id    test-dat)))
	      (rmt:test-set-archive-block-id run-id test-id archive-id)
	      (if (member archive-command '("save-remove"))
		  (runs:remove-test-directory test-dat 'archive-remove))))
	  (hash-table-ref test-groups disk-group))))
     (hash-table-keys disk-groups))
    #t))

(define (archive:bup-restore archive-command run-id run-name tests)  ;; move the getting of archive space down into the below block so that a single run can 
  ;; allocate as needed should a disk fill up
  ;;
  (let* ((disk-groups  (make-hash-table))
	 (test-groups  (make-hash-table)) ;; these two (disk and test groups) could be combined nicely
	 (bup-exe      (or (configf:lookup *configdat* "archive" "bup") "bup"))
	 (linktree     (configf:lookup *configdat* "setup" "linktree")))

    ;; from the test info bin the path to the test by stem
    ;;
    (for-each
     (lambda (test-dat)
       (let* ((item-path         (db:test-get-item-path test-dat))
	      (test-name         (db:test-get-testname  test-dat))
	      (test-id           (db:test-get-id        test-dat))
	      (run-id            (db:test-get-run_id    test-dat))
	      (target            (string-intersperse (map cadr (rmt:get-key-val-pairs run-id)) "/"))
	      
	      (toplevel/children (and (db:test-get-is-toplevel test-dat)
				      (> (rmt:test-toplevel-num-items run-id test-name) 0)))
	      (test-partial-path (conc target "/" run-name "/" (runs:make-full-test-name test-name item-path)))
	      ;; note the trailing slash to get the dir inspite of it being a link
	      (test-path         (conc linktree "/" test-partial-path))
	      (test-physical-path (if (file-exists? test-path) (read-symbolic-link test-path #t) #f))
	      (partial-path-index (if test-physical-path (substring-index test-partial-path test-physical-path) #f))
	      (test-base         (if (and partial-path-index 
					  test-physical-path )
				     (substring test-physical-path
						0
						partial-path-index)
				     #f)))
	 
	 ;; CREATE WORK AREA
	 (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat #!key (remtries 2))
	 
	 ;; DO BUP RESTORE



 	 (if (or toplevel/children
		 (not (file-exists? test-path)))
	     #f
	     (begin
	       (debug:print 0
			    "From test-dat=" test-dat " derived the following:\n"
			    "test-partial-path  = " test-partial-path "\n"
			    "test-path          = " test-path "\n"
			    "test-physical-path = " test-physical-path "\n"
			    "partial-path-index = " partial-path-index "\n"
			    "test-base          = " test-base)
	       (hash-table-set! disk-groups test-base (cons test-physical-path (hash-table-ref/default disk-groups test-base '())))
	       (hash-table-set! test-groups test-base (cons test-dat (hash-table-ref/default test-groups test-base '())))
	       test-path))))
     tests)
    ;; for each disk-group
    (for-each 
     (lambda (disk-group)
       (debug:print 0 "Processing disk-group " disk-group)
       (let* ((test-paths (hash-table-ref disk-groups disk-group))
	      ;; ((string-intersperse (map cadr (rmt:get-key-val-pairs 1)) "-")
	      (bup-init-params  (list "-d" archive-dir "init"))
	      (bup-index-params (append (list "-d" archive-dir "index") test-paths))
	      (bup-save-params  (append (list "-d" archive-dir "save" ;; (conc "--strip-path=" linktree)
					      (conc "-" compress) ;; or (conc "--compress=" compress)
					      "-n" (conc (common:get-testsuite-name) "-" run-id)
					      (conc "--strip-path=" disk-group))
					test-paths))
	      (print-prefix      #f)) ;; "Running: ")) ;; change to #f to turn off printing
	 (if (not (file-exists? archive-dir))
	     (create-directory archive-dir #t))
	 (if (not (file-exists? (conc archive-dir "/HEAD")))
	     (begin
	       ;; replace this with jobrunner stuff enventually
	       (debug:print-info 0 "Init bup in " archive-dir)
	       (run-n-wait bup-exe params: bup-init-params print-cmd: print-prefix)))
	 (debug:print-info 0 "Indexing data to be archived")
	 (run-n-wait bup-exe params: bup-index-params print-cmd: print-prefix)
	 (debug:print-info 0 "Archiving data with bup")
	 (run-n-wait bup-exe params: bup-save-params print-cmd: print-prefix)
	 (for-each
	  (lambda (test-dat)
	    (let ((test-id           (db:test-get-id        test-dat))
		  (run-id            (db:test-get-run_id    test-dat)))
	      (rmt:test-set-archive-block-id run-id test-id archive-id)
	      (if (member archive-command '("save-remove"))
		  (runs:remove-test-directory test-dat 'archive-remove))))
	  (hash-table-ref test-groups disk-group))))
     (hash-table-keys disk-groups))
    #t))

Modified client.scm from [6d1c8717b3] to [c1867a27a6].

39
40
41
42
43
44
45



































































































46
47
48
49
50
51
52
53
54
55
56
57
58

59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116










































117
118

119
120
121
122



123
124
125
126
127
128
129
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156

157
158
159
160
161
162
163




















































164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205


206




207
208
209
210
211
212
213
214
215
216







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+












-
+






-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
+
-
-
-
-
+
+
+








;; Not currently used! But, I think it *should* be used!!!
(define (client:logout serverdat)
  (let ((ok (and (socket? serverdat)
		 (cdb:logout serverdat *toppath* (client:get-signature)))))
    ok))

(define (client:connect iface port)
  (case (server:get-transport)
    ((rpc)  (rpc:client-connect  iface port))
    ((http) (http:client-connect iface port))
    ((zmq)  (zmq:client-connect  iface port))
    (else   (rpc:client-connect  iface port))))

(define (client:setup  run-id #!key (remaining-tries 10) (failed-connects 0))
  (case (server:get-transport)
    ((rpc) (rpc-transport:client-setup run-id)) ;;(client:setup-rpc run-id))
    ((http)(client:setup-http run-id))
    (else  (rpc-transport:client-setup run-id)))) ;; (client:setup-rpc run-id))))

;; (define (client:login-no-auto-setup server-info run-id)
;;   (case (server:get-transport)
;;     ((rpc)  (rpc:login-no-auto-client-setup server-info run-id))
;;     ((http) (rmt:login-no-auto-client-setup server-info run-id))
;;     (else   (rpc:login-no-auto-client-setup server-info run-id))))
;; 
;; (define (client:setup-rpc run-id)
;;   (debug:print 0 "INFO: client:setup remaining-tries=" remaining-tries)
;;   (if (<= remaining-tries 0)
;;       (begin
;; 	(debug:print 0 "ERROR: failed to start or connect to server for run-id " run-id)
;; 	(exit 1))
;;       (let ((host-info (hash-table-ref/default *runremote* run-id #f)))
;; 	(debug:print-info 0 "client:setup host-info=" host-info ", remaining-tries=" remaining-tries)
;; 	(if host-info
;; 	    (let* ((iface     (car  host-info))
;; 		   (port      (cadr host-info))
;; 		   (start-res (client:connect iface port))
;; 		   ;; (ping-res  (server:ping-server run-id iface port))
;; 		   (ping-res  (client:login-no-auto-setup start-res run-id)))
;; 	      (if ping-res   ;; sucessful login?
;; 		  (begin
;; 		    (hash-table-set! *runremote* run-id start-res)
;; 		    start-res)  ;; return the server info
;; 		  (if (member remaining-tries '(3 4 6))
;; 		      (begin    ;; login failed
;; 			(debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" host-info)
;; 			(hash-table-delete! *runremote* run-id)
;; 			(open-run-close tasks:server-force-clean-run-record
;; 			 		tasks:open-db
;; 			 		run-id 
;; 			 		(car  host-info)
;; 			 		(cadr host-info)
;; 					" client:setup (host-info=#t)")
;; 			(thread-sleep! 5)
;; 			(client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1)))
;; 		      (begin
;; 			(debug:print 25 "INFO: client:setup failed to connect, start-res=" start-res ", run-id=" run-id ", host-info=" host-info)
;; 			(thread-sleep! 5)
;; 			(client:setup run-id remaining-tries: (- remaining-tries 1))))))
;; 	    ;; YUK: rename server-dat here
;; 	    (let* ((server-dat (open-run-close tasks:get-server tasks:open-db run-id)))
;; 	      (debug:print-info 0 "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
;; 	      (if server-dat
;; 		  (let* ((iface     (tasks:hostinfo-get-interface server-dat))
;; 			 (port      (tasks:hostinfo-get-port      server-dat))
;; 			 (start-res (http-transport:client-connect iface port))
;; 			 ;; (ping-res  (server:ping-server run-id iface port))
;; 			 (ping-res  (rmt:login-no-auto-client-setup start-res run-id)))
;; 		    (if start-res
;; 			(begin
;; 			  (hash-table-set! *runremote* run-id start-res)
;; 			  start-res)
;; 			(if (member remaining-tries '(2 5))
;; 			    (begin    ;; login failed
;; 			      (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat)
;; 			      (hash-table-delete! *runremote* run-id)
;; 			      (open-run-close tasks:server-force-clean-run-record
;; 					      tasks:open-db
;; 					      run-id 
;; 					      (tasks:hostinfo-get-interface server-dat)
;; 					      (tasks:hostinfo-get-port      server-dat)
;; 					      " client:setup (server-dat = #t)")
;; 			      (thread-sleep! 2)
;; 			      (server:try-running run-id)
;; 			      (thread-sleep! 10) ;; give server a little time to start up
;; 			      (client:setup run-id remaining-tries: 10)) ;; (- remaining-tries 1)))
;; 			    (begin
;; 			      (debug:print 25 "INFO: client:setup start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat)
;; 			      (thread-sleep! 5)
;; 			      (client:setup run-id remaining-tries: (- remaining-tries 1))))))
;; 		  (begin    ;; no server registered
;; 		    (if (eq? remaining-tries 2)
;; 			(begin
;; 			  ;; (open-run-close tasks:server-clean-out-old-records-for-run-id tasks:open-db run-id " client:setup (server-dat=#f)")
;; 			  (client:setup run-id remaining-tries: 10))
;; 			(begin
;; 			  (thread-sleep! 2) 
;; 			  (debug:print 25 "INFO: client:setup start-res (not defined here), run-id=" run-id ", server-dat=" server-dat)
;; 			  (if (< (open-run-close tasks:num-in-available-state tasks:open-db run-id) 3)
;; 			      (begin
;; 				;; (open-run-close tasks:server-clean-out-old-records-for-run-id tasks:open-db run-id " client:setup (server-dat=#f)")
;; 				(server:try-running run-id)))
;; 			  (thread-sleep! 10) ;; give server a little time to start up
;; 			  (client:setup run-id remaining-tries: (- remaining-tries 1)))))))))))

;; Do all the connection work, look up the transport type and set up the
;; connection if required.
;;
;; There are two scenarios. 
;;   1. We are a test manager and we received *transport-type* and *runremote* via cmdline
;;   2. We are a run tests, list runs or other interactive process and we must figure out
;;      *transport-type* and *runremote* from the monitor.db
;;
;; client:setup
;;
;; lookup_server, need to remove *runremote* stuff
;;
(define (client:setup run-id #!key (remaining-tries 100) (failed-connects 0))
(define (client:setup-http run-id #!key (remaining-tries 10) (failed-connects 0))
  (debug:print-info 2 "client:setup remaining-tries=" remaining-tries)
  (let* ((tdbdat (tasks:open-db)))
    (if (<= remaining-tries 0)
	(begin
	  (debug:print 0 "ERROR: failed to start or connect to server for run-id " run-id)
	  (exit 1))
	(let ((host-info (hash-table-ref/default *runremote* run-id #f)))
	  (if host-info
	      (let* ((iface     (http-transport:server-dat-get-iface host-info))
		     (port      (http-transport:server-dat-get-port  host-info))
		     (start-res (http-transport:client-connect iface port))
		     (ping-res  (rmt:login-no-auto-client-setup start-res run-id)))
		(if ping-res   ;; sucessful login?
		    (begin
		      (debug:print-info 2 "client:setup, ping is good using host-info=" host-info ", remaining-tries=" remaining-tries)
		      ;; Why add the close-connections here?
		      ;; (http-transport:close-connections run-id)
		      (hash-table-set! *runremote* run-id start-res)
		      start-res)  ;; return the server info
		    ;; have host info but no ping. shutdown the current connection and try again
		    (begin    ;; login failed
		      (debug:print-info 1 "client:setup, ping is bad for start-res=" start-res " and *runremote*=" host-info)
		      (http-transport:close-connections run-id)
		      (hash-table-delete! *runremote* run-id)
		      (if (< remaining-tries 8)
			  (thread-sleep! 5)
			  (thread-sleep! 1))
		      (client:setup run-id remaining-tries: (- remaining-tries 1)))))
	      ;; YUK: rename server-dat here
	      (let* ((server-dat (tasks:get-server (db:delay-if-busy tdbdat) run-id)))
		(debug:print-info 4 "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
		(if server-dat
		    (let* ((iface     (tasks:hostinfo-get-interface server-dat))
			   (port      (tasks:hostinfo-get-port      server-dat))
			   (start-res (http-transport:client-connect iface port))
			   (ping-res  (rmt:login-no-auto-client-setup start-res run-id)))
		      (if (and start-res
			       ping-res)
			  (begin
			    (hash-table-set! *runremote* run-id start-res)
			    (debug:print-info 2 "connected to " (http-transport:server-dat-make-url start-res))
			    start-res)
			  (begin    ;; login failed but have a server record, clean out the record and try again
			    (debug:print-info 0 "client:setup, login failed, will attempt to start server ... start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat)
			    (http-transport:close-connections run-id)
			    (hash-table-delete! *runremote* run-id)
			    (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat)
								 run-id 
								 (tasks:hostinfo-get-interface server-dat)
								 (tasks:hostinfo-get-port      server-dat)
								 " client:setup (server-dat = #t)")
			    (thread-sleep! 2)
			    (server:try-running run-id)
			    (thread-sleep! 10) ;; give server a little time to start up
			    (client:setup run-id remaining-tries: (- remaining-tries 1)))))
		    (begin    ;; no server registered
		      (let ((num-available (tasks:num-in-available-state (db:dbdat-get-db tdbdat) run-id)))
			(debug:print-info 0 "client:setup, no server registered, remaining-tries=" remaining-tries " num-available=" num-available)
	(let* ((server-dat (tasks:get-server (db:delay-if-busy tdbdat) run-id)))
	  (debug:print-info 4 "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
	  (if server-dat
	      (let* ((iface     (tasks:hostinfo-get-interface server-dat))
		     (hostname  (tasks:hostinfo-get-hostname  server-dat))
		     (port      (tasks:hostinfo-get-port      server-dat))
		     (start-res (case *transport-type*
				  ((http)(http-transport:client-connect iface port))
				  ((nmsg)(nmsg-transport:client-connect hostname port))))
		     (ping-res  (case *transport-type* 
				  ((http)(rmt:login-no-auto-client-setup start-res run-id))
				  ((nmsg)(let ((logininfo (rmt:login-no-auto-client-setup start-res run-id)))
 					   (if logininfo
 					       (car (vector-ref logininfo 1))
 					       #f))))))
		(if (and start-res
			 ping-res)
		    (begin
		      (hash-table-set! *runremote* run-id start-res)
		      (debug:print-info 2 "connected to " (http-transport:server-dat-make-url start-res))
		      start-res)
		    (begin    ;; login failed but have a server record, clean out the record and try again
		      (debug:print-info 0 "client:setup, login failed, will attempt to start server ... start-res=" start-res ", run-id=" run-id ", server-dat=" server-dat)
		      (case *transport-type* 
			((http)(http-transport:close-connections run-id)))
		      (hash-table-delete! *runremote* run-id)
		      (tasks:kill-server-run-id run-id)
		      (tasks:server-force-clean-run-record (db:delay-if-busy tdbdat)
							   run-id 
							   (tasks:hostinfo-get-interface server-dat)
							   (tasks:hostinfo-get-port      server-dat)
							   " client:setup (server-dat = #t)")
		      (if (> remaining-tries 8)
			  (thread-sleep! (+ 1 (random 5))) ;; spread out the starts a little
			  (thread-sleep! (+ 15 (random 20)))) ;; it isn't going well. give it plenty of time
		      (server:try-running run-id)
		      (thread-sleep! 5)   ;; give server a little time to start up
		      (client:setup run-id remaining-tries: (- remaining-tries 1))
		      )))
	      (begin    ;; no server registered
		(let ((num-available (tasks:num-in-available-state (db:dbdat-get-db tdbdat) run-id)))
		  (debug:print-info 0 "client:setup, no server registered, remaining-tries=" remaining-tries " num-available=" num-available)
			(thread-sleep! 2) 
			(if (< num-available 2)
		  (if (< num-available 2)
			    (begin
			      (server:try-running run-id)))
			(thread-sleep! 10) ;; give server a little time to start up
			(client:setup run-id remaining-tries: (- remaining-tries 1)))))))))))
		      (server:try-running run-id))
		  (thread-sleep! (+ 5 (random (- 20 remaining-tries))))  ;; give server a little time to start up, randomize a little to avoid start storms.
		  (client:setup run-id remaining-tries: (- remaining-tries 1)))))))))

;; keep this as a function to ease future 
(define (client:start run-id server-info)
  (http-transport:client-connect (tasks:hostinfo-get-interface server-info)
				 (tasks:hostinfo-get-port server-info)))

;; client:signal-handler

Modified common.scm from [b52de339b7] to [63e9da460d].

64
65
66
67
68
69
70
71

72
73
74
75
76
77
78
64
65
66
67
68
69
70

71
72
73
74
75
76
77
78







-
+







(define *task-db*             #f) ;; (vector db path-to-db)
(define *db-access-allowed*   #t) ;; flag to allow access
(define *db-access-mutex*     (make-mutex))

;; SERVER
(define *my-client-signature* #f)
(define *transport-type*    'http)
(define *rpc:listener*      #f) ;; if set up for server communication this will hold the tcp port
(define *transport-type*    'http)             ;; override with [server] transport http|rpc|nmsg
(define *runremote*         (make-hash-table)) ;; if set up for server communication this will hold <host port>
(define *max-cache-size*    0)
(define *logged-in-clients* (make-hash-table))
(define *client-non-blocking-mode* #f)
(define *server-id*         #f)
(define *server-info*       #f)
(define *time-to-exit*      #f)
175
176
177
178
179
180
181
182


183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198

199
200
201
202
203
204
205
175
176
177
178
179
180
181

182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198

199
200
201
202
203
204
205
206







-
+
+















-
+







  '((0 "COMPLETED")
    (1 "NOT_STARTED")
    (2 "RUNNING")
    (3 "REMOTEHOSTSTART")
    (4 "LAUNCHED")
    (5 "KILLED")
    (6 "KILLREQ")
    (7 "STUCK")))
    (7 "STUCK")
    (8 "ARCHIVED")))

(define *common:std-statuses*
  '((0 "PASS")
    (1 "WARN")
    (2 "FAIL")
    (3 "CHECK")
    (4 "n/a")
    (5 "WAIVED")
    (6 "SKIP")
    (7 "DELETED")
    (8 "STUCK/DEAD")
    (9 "ABORT")))

;; These are stopping conditions that prevent a test from being run
(define *common:cant-run-states-sym* 
  '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE ABORT))
  '(COMPLETED KILLED WAIVED UNKNOWN INCOMPLETE ABORT ARCHIVED))

;;======================================================================
;; D E B U G G I N G   S T U F F 
;;======================================================================

(define *verbosity*         1)
(define *logging*           #f)
224
225
226
227
228
229
230

231
232
233
234
235
236
237
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239







+







  (debug:print-info 2 "starting exit process, finalizing databases.")
  (rmt:print-db-stats)
  (let ((run-ids (hash-table-keys *db-local-sync*)))
    (if (and (not (null? run-ids))
	     (configf:lookup *configdat* "setup" "megatest-db"))
	(db:multi-db-sync run-ids 'new2old)))
  (if *dbstruct-db* (db:close-all *dbstruct-db*))
  (if *inmemdb*     (db:close-all *inmemdb*))
  (if (and *megatest-db*
	   (sqlite3:database? *megatest-db*))
      (begin
	(sqlite3:interrupt! *megatest-db*)
	(sqlite3:finalize! *megatest-db* #t)
	(set! *megatest-db* #f)))
  (if *task-db*     (let ((db (cdr *task-db*)))
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
442
443
444
445
446
447
448














449
450
451
452
453
454
455







-
-
-
-
-
-
-
-
-
-
-
-
-
-








;; return a nice clean pathname made absolute
(define (nice-path dir)
  (normalize-pathname (if (absolute-pathname? dir)
			  dir
			  (conc (current-directory) "/" dir))))

(define (get-df path)
  (let* ((df-results (cmd-run->list (conc "df " path)))
	 (space-rx   (regexp "([0-9]+)\\s+([0-9]+)%"))
	 (freespc    #f))
    ;; (write df-results)
    (for-each (lambda (l)
		(let ((match (string-search space-rx l)))
		  (if match 
		      (let ((newval (string->number (cadr match))))
			(if (number? newval)
			    (set! freespc newval))))))
	      (car df-results))
    freespc))
  
(define (get-cpu-load)
  (car (common:get-cpu-load)))
;;   (let* ((load-res (cmd-run->list "uptime"))
;; 	 (load-rx  (regexp "load average:\\s+(\\d+)"))
;; 	 (cpu-load #f))
;;     (for-each (lambda (l)
;; 		(let ((match (string-search load-rx l)))
510
511
512
513
514
515
516

























































517
518
519
520
521
522
523
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+








(define (get-uname . params)
  (let* ((uname-res (cmd-run->list (conc "uname " (if (null? params) "-a" (car params)))))
	 (uname #f))
    (if (null? (car uname-res))
	"unknown"
	(caar uname-res))))

;;======================================================================
;; D I S K   S P A C E 
;;======================================================================

(define (common:get-disk-space-used fpath)
  (with-input-from-pipe (conc "/usr/bin/du -s " fpath) read))

(define (get-df path)
  (let* ((df-results (cmd-run->list (conc "df " path)))
	 (space-rx   (regexp "([0-9]+)\\s+([0-9]+)%"))
	 (freespc    #f))
    ;; (write df-results)
    (for-each (lambda (l)
		(let ((match (string-search space-rx l)))
		  (if match 
		      (let ((newval (string->number (cadr match))))
			(if (number? newval)
			    (set! freespc newval))))))
	      (car df-results))
    freespc))
  
;; paths is list of lists ((name path) ... )
;;
(define (common:get-disk-with-most-free-space disks minsize)
  (let ((best     #f)
	(bestsize 0))
    (for-each 
     (lambda (disk-num)
       (let* ((dirpath    (cadr (assoc disk-num disks)))
	      (freespc    (cond
			   ((not (directory? dirpath))
			    (if (common:low-noise-print 50 "disks not a dir " disk-num)
				(debug:print 0 "WARNING: disk " disk-num " at path " dirpath " is not a directory - ignoring it."))
			    -1)
			   ((not (file-write-access? dirpath))
			    (if (common:low-noise-print 50 "disks not writeable " disk-num)
				(debug:print 0 "WARNING: disk " disk-num " at path " dirpath " is not writeable - ignoring it."))
			    -1)
			   ((not (eq? (string-ref dirpath 0) #\/))
			    (if (common:low-noise-print 50 "disks not a proper path " disk-num)
				(debug:print 0 "WARNING: disk " disk-num " at path " dirpath " is not a fully qualified path - ignoring it."))
			    -1)
			   (else
			    (get-df dirpath)))))
	 (if (> freespc bestsize)
	     (begin
	       (set! best     (cons disk-num dirpath))
	       (set! bestsize freespc)))))
     (map car disks))
    (if (and best (> bestsize minsize))
	best
	#f))) ;; #f means no disk candidate found

;;======================================================================
;; E N V I R O N M E N T   V A R S
;;======================================================================
	      
(define (save-environment-as-files fname #!key (ignorevars (list "USER" "HOME" "DISPLAY" "LS_COLORS" "XKEYSYMDB" "EDITOR" "MAKEFLAGS" "MAKEF")))
  (let ((envvars (get-environment-variables))
        (whitesp (regexp "[^a-zA-Z0-9_\\-:,.\\/%$]")))
     (with-output-to-file (conc fname ".csh")
       (lambda ()
          (for-each (lambda (keyval)

Modified configf.scm from [3684e66c72] to [8f6bce516a].

463
464
465
466
467
468
469
470


471
472
473
474
475
476
477
463
464
465
466
467
468
469

470
471
472
473
474
475
476
477
478







-
+
+







	       (lambda (sheet-name)
		 (let* ((dat-path  (conc refdb-path "/" sheet-name ".dat"))
			(ref-dat   (configf:read-file dat-path #f #t))
			(ref-assoc (map (lambda (key)
					  (list key (hash-table-ref ref-dat key)))
					(hash-table-keys ref-dat))))
				   ;; (hash-table->alist ref-dat)))
		   (set! data (append data (list (list sheet-name ref-assoc))))))
		   ;; (set! data (append data (list (list sheet-name ref-assoc))))))
		   (set! data (cons (list sheet-name ref-assoc) data))))
	       sheets)
	      (list data "NO ERRORS"))))))

;; map over all pairs in a three level hierarchial alist and apply a function to the keys/val
;;
(define (configf:map-all-hier-alist data proc #!key (initproc1 #f)(initproc2 #f)(initproc3 #f))
  (for-each 
487
488
489
490
491
492
493
494



























488
489
490
491
492
493
494

495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521







-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
	    (for-each
	     (lambda (varname)
	       (let* ((valtmp (assoc varname sectiondat))
		      (val    (if valtmp (cadr valtmp) "")))
		 (proc sheetname sectionname varname val)))
	     (map car sectiondat))))
	(map car sheetdat))))
   (map car data)))
   (map car data))
  data)

;;======================================================================
;;  C O N F I G   T O / F R O M   A L I S T
;;======================================================================

(define (configf:config->alist cfgdat)
  (hash-table->alist cfgdat))

(define (configf:alist->config adat)
  (let ((ht (make-hash-table)))
    (for-each
     (lambda (section)
       (hash-table-set! ht (car section)(cdr section)))
     adat)
    ht))

(define (configf:read-alist fname)
  (configf:alist->config
   (with-input-from-file fname read)))

(define (configf:write-alist cdat fname)
  (with-output-to-file fname
    (lambda ()
      (pp (configf:config->alist cdat)))))
     

Modified dashboard.scm from [6d6a8350b9] to [3787336c4d].

473
474
475
476
477
478
479
480

481
482
483
484
485
486
487
473
474
475
476
477
478
479

480
481
482
483
484
485
486
487







-
+







			   (itempath   (db:test-get-item-path test))
			   (testfullname (test:test-get-fullname test))
			   (teststatus (db:test-get-status   test))
			   (teststate  (db:test-get-state    test))
			   ;;(teststart  (db:test-get-event_time test))
			   ;;(runtime    (db:test-get-run_duration test))
			   (buttontxt  (cond
					((equal? teststate "COMPLETED") teststatus)
					((member teststate '("COMPLETED" "ARCHIVED")) teststatus)
					((and (equal? teststate "NOT_STARTED")
					      (member teststatus '("ZERO_ITEMS" "BLOCKED" "PREQ_FAIL" "PREQ_DISCARDED" "TIMED_OUT" "KEEP_TRYING" "TEN_STRIKES")))
					 teststatus)
					(else
					 teststate)))
			   (button     (vector-ref columndat rown))
			   (color      (car (gutils:get-color-for-state-status teststate teststatus)))
1471
1472
1473
1474
1475
1476
1477
1478


1479
1480

1481
1482
1483
1484
1485
1486
1487
1471
1472
1473
1474
1475
1476
1477

1478
1479
1480

1481
1482
1483
1484
1485
1486
1487
1488







-
+
+

-
+







  (let* ((modtime         (dashboard:get-youngest-run-db-mod-time)) ;; (file-modification-time *db-file-path*))
	 (monitor-modtime (if (file-exists? *monitor-db-path*)
			      (file-modification-time *monitor-db-path*)
			      -1))
	 (run-update-time (current-seconds))
	 (recalc          (dashboard:recalc modtime *please-update-buttons* *last-db-update-time*)))
    (if (and (eq? *current-tab-number* 0)
	     (> monitor-modtime *last-monitor-update-time*))
	     (or (> monitor-modtime *last-monitor-update-time*)
		 (> (- run-update-time *last-monitor-update-time*) 5))) ;; update every 1/2 minute just in case
	(begin
	  (set! *last-monitor-update-time* monitor-modtime)
	  (set! *last-monitor-update-time* run-update-time) ;; monitor-modtime)
	  (if dashboard:update-servers-table (dashboard:update-servers-table))))
    (if recalc
	(begin	
	  (case *current-tab-number* 
	    ((0) 
	     (if dashboard:update-summary-tab (dashboard:update-summary-tab)))
	    ((1) ;; The runs table is active

Modified db.scm from [cbd895f712] to [6f32ff2afc].

232
233
234
235
236
237
238

239

240
241

242
243
244
245
246




247
248
249
250
251
252
253
232
233
234
235
236
237
238
239

240
241

242
243




244
245
246
247
248
249
250
251
252
253
254







+
-
+

-
+

-
-
-
-
+
+
+
+







	  ;; (dbr:dbstruct-set-run-id! dbstruct run-id)
	  (if local
	      (begin
		(dbr:dbstruct-set-localdb! dbstruct run-id db) ;; (dbr:dbstruct-set-inmem! dbstruct db) ;; direct access ...
		db)
	      (begin
		(dbr:dbstruct-set-inmem!  dbstruct inmem)
		;; dec 14, 2014 - keep deleted records available. hunch is that they are needed for id placeholders
		(sqlite3:execute db "DELETE FROM tests WHERE state='DELETED';") ;; they just slow us down in this context
		;; (sqlite3:execute db "DELETE FROM tests WHERE state='DELETED';") ;; they just slow us down in this context
		(db:sync-tables db:sync-tests-only db inmem)
		(db:delay-if-busy refdb) ;; dbpath: (db:dbdat-get-path refdb))
		(db:delay-if-busy refdb) ;; dbpath: (db:dbdat-get-path refdb)) ;; What does delaying here achieve? 
		(dbr:dbstruct-set-refdb!  dbstruct refdb)
		(db:sync-tables db:sync-tests-only db refdb)
		;; sync once more to deal with delays
		(db:sync-tables db:sync-tests-only db inmem)
		(db:sync-tables db:sync-tests-only db refdb)
		(db:sync-tables db:sync-tests-only inmem refdb) ;; use inmem as the reference, don't read again from db
		;; sync once more to deal with delays?
		;; (db:sync-tables db:sync-tests-only db inmem)
		;; (db:sync-tables db:sync-tests-only inmem refdb)
		inmem))))))

;; This routine creates the db. It is only called if the db is not already ls opened
;;
(define (db:open-main dbstruct) ;;  (conc *toppath* "/megatest.db") (car *configinfo*)))
  (let ((mdb (dbr:dbstruct-get-main dbstruct)))
    (if mdb
294
295
296
297
298
299
300
301

302
303
304
305
306
307
308
295
296
297
298
299
300
301

302
303
304
305
306
307
308
309







-
+







	(inmem  (dbr:dbstruct-get-inmem dbstruct))
	(maindb (dbr:dbstruct-get-main  dbstruct))
	(refdb  (dbr:dbstruct-get-refdb dbstruct))
	(olddb  (dbr:dbstruct-get-olddb dbstruct))
	;; (runid  (dbr:dbstruct-get-run-id dbstruct))
	)
    (debug:print-info 4 "Syncing for run-id: " run-id)
    (mutex-lock! *http-mutex*)
    ;; (mutex-lock! *http-mutex*)
    (if (eq? run-id 0)
	;; runid equal to 0 is main.db
	(if maindb
	    (if (or (not (number? mtime))
		    (not (number? stime))
		    (> mtime stime)
		    force-sync)
323
324
325
326
327
328
329
330
331
332



333
334
335

336
337
338
339
340
341
342
324
325
326
327
328
329
330



331
332
333
334
335

336
337
338
339
340
341
342
343







-
-
-
+
+
+


-
+







	(if (or (not (number? mtime))
		(not (number? stime))
		(> mtime stime)
		force-sync)
	    (begin
	      (db:delay-if-busy rundb)
	      (db:delay-if-busy olddb)
	      (let ((num-synced (db:sync-tables db:sync-tests-only inmem refdb rundb olddb)))
		(dbr:dbstruct-set-stime! dbstruct (current-milliseconds))
		(mutex-unlock! *http-mutex*)
	      (dbr:dbstruct-set-stime! dbstruct (current-milliseconds))
	      (let ((num-synced (db:sync-tables db:sync-tests-only inmem refdb rundb olddb)))
		;; (mutex-unlock! *http-mutex*)
		num-synced)
	      (begin
		(mutex-unlock! *http-mutex*)
		;; (mutex-unlock! *http-mutex*)
		0))))))

(define (db:close-main dbstruct)
  (let ((maindb (dbr:dbstruct-get-main dbstruct)))
    (if maindb
	(begin
	  (sqlite3:finalize! (db:dbdat-get-db maindb))
491
492
493
494
495
496
497
498
499
500








501
502
503
504
505
506
507
492
493
494
495
496
497
498



499
500
501
502
503
504
505
506
507
508
509
510
511
512
513







-
-
-
+
+
+
+
+
+
+
+







     (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
     (print "exn=" (condition->list exn))
     (debug:print 0 " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
     (debug:print 0 " src db:  " (db:dbdat-get-path fromdb))
     (for-each (lambda (dbdat)
		 (debug:print 0 " dbpath:  " (db:dbdat-get-path dbdat)))
	       (cons todb slave-dbs))
     (if *server-run* ;; we are inside a server
	 (set! *time-to-exit* #t) ;; let watch dog know that it is time to die.
	 (exit 1)))
     (if *server-run* ;; we are inside a server, throw a sync-failed error
	 (signal (make-composite-condition
		 (make-property-condition 'sync-failed 'message "db:sync-tables failed in a server context.")))))

	 ;; (set! *time-to-exit* #t) ;; let watch dog know that it is time to die.
	 ;; (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "shutting-down")
	 ;; (portlogger:open-run-close portlogger:set-port port "released")
	 ;; (exit 1)))
   (cond
    ((not fromdb) (debug:print 3 "WARNING: db:sync-tables called with fromdb missing") -1)
    ((not todb)   (debug:print 3 "WARNING: db:sync-tables called with todb missing") -2)
    ((not (sqlite3:database? (db:dbdat-get-db fromdb)))
     (debug:print 0 "ERROR: db:sync-tables called with fromdb not a database " fromdb) -3)
    ((not (sqlite3:database? (db:dbdat-get-db todb)))
     (debug:print 0 "ERROR: db:sync-tables called with todb not a database " todb) -4)
543
544
545
546
547
548
549




550
551
552
553
554
555
556
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566







+
+
+
+







	       (if (> (length fromdat) batch-len)
		   (begin
		     (set! fromdats (cons fromdat fromdats))
		     (set! fromdat  '())
		     (set! totrecords (+ totrecords 1)))))
	     (db:dbdat-get-db fromdb)
	     full-sel)
	    
	    ;; tack on remaining records in fromdat
	    (if (not (null? fromdat))
		(set! fromdats (cons fromdat fromdats)))

	    (debug:print-info 2 "found " totrecords " records to sync")

	    ;; read the target table
	    (sqlite3:for-each-row
	     (lambda (a . b)
	       (hash-table-set! todat a (apply vector a b)))
615
616
617
618
619
620
621

622
623
624
625
626
627
628
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639







+







;;
;;  run-ids: '(1 2 3 ...) or #f (for all)
;;
(define (db:multi-db-sync run-ids . options)
  (let* ((toppath  (launch:setup-for-run))
	 (dbstruct (if toppath (make-dbr:dbstruct path: toppath) #f))
	 (mtdb     (if toppath (db:open-megatest-db)))
	 (allow-cleanup (if run-ids #f #t))
	 (run-ids  (if run-ids 
		       run-ids
		       (if toppath (begin
				     (db:delay-if-busy mtdb)
				     (db:get-all-run-ids mtdb)))))
	 (tdbdat  (tasks:open-db))
	 (servers (tasks:get-all-servers (db:delay-if-busy tdbdat))))
661
662
663
664
665
666
667


668






669
670
671
672
673
674
675
676
677
678




























679
680
681
682
683
684
685
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687










688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722







+
+

+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







		   (dbstruct (if toppath (make-dbr:dbstruct path: toppath local: #t) #f)))
	       (debug:print 0 "INFO: Propagating " (length testrecs) " records for run-id=" run-id " to run specific db")
	       (db:replace-test-records dbstruct run-id testrecs)
	       (sqlite3:finalize! (db:dbdat-get-db (dbr:dbstruct-get-rundb dbstruct)))))
	   run-ids)))

    ;; now ensure all newdb data are synced to megatest.db
    ;; do not use the run-ids list passed in to the function
    ;;
    (if (member 'new2old options)
	(let* ((maindb      (make-dbr:dbstruct path: toppath local: #t))
	       (src-run-ids (db:get-all-run-ids (db:dbdat-get-db (db:get-db maindb 0))))
	       (all-run-ids (sort (delete-duplicates (cons 0 src-run-ids)) <))
	       (count       1)
	       (total       (length all-run-ids))
	       (dead-runs  '()))
	(for-each
	 (lambda (run-id)
	   (let* ((fromdb (if toppath (make-dbr:dbstruct path: toppath local: #t) #f))
		  (frundb (db:dbdat-get-db (db:get-db fromdb run-id))))
	     ;; (db:delay-if-busy frundb)
	     ;; (db:delay-if-busy mtdb)
	     (if (eq? run-id 0)
		 (db:sync-tables (db:sync-main-list dbstruct) (db:get-db fromdb #f) mtdb)
		 (db:sync-tables db:sync-tests-only (db:get-db fromdb run-id) mtdb))))
	 (cons 0 run-ids)))
	  (for-each
	   (lambda (run-id)
	     (debug:print 0 "Processing run " (if (eq? run-id 0) " main.db " run-id) ", " count " of " total)
	     (set! count (+ count 1))
	     (let* ((fromdb (if toppath (make-dbr:dbstruct path: toppath local: #t) #f))
		    (frundb (db:dbdat-get-db (db:get-db fromdb run-id))))
	       ;; (db:delay-if-busy frundb)
	       ;; (db:delay-if-busy mtdb)
	       ;; (db:clean-up frundb)
	       (if (eq? run-id 0)
		   (begin
		     (db:sync-tables (db:sync-main-list dbstruct) (db:get-db fromdb #f) mtdb)
		     (set! dead-runs (db:clean-up-maindb (db:get-db fromdb #f))))
		   (begin
		     ;; NB// must sync first to ensure deleted tests get marked as such in megatest.db
		     (db:sync-tables db:sync-tests-only (db:get-db fromdb run-id) mtdb)
		     (db:clean-up-rundb (db:get-db fromdb run-id))
		     ))))
	   all-run-ids)
	  ;; removed deleted runs
	  (let ((dbdir (tasks:get-task-db-path)))
	    (for-each (lambda (run-id)
			(let ((fullname (conc dbdir "/" run-id ".db")))
			  (if (file-exists? fullname)
			      (begin
				(debug:print 0 "Removing database file for deleted run " fullname)
				(delete-file fullname)))))
		      dead-runs))))
    ;; (db:close-all dbstruct)
    ;; (sqlite3:finalize! mdb)
    ))

;; keeping it around for debugging purposes only
(define (open-run-close-no-exception-handling  proc idb . params)
  (debug:print-info 11 "open-run-close-no-exception-handling START given a db=" (if idb "yes " "no ") ", params=" params)
767
768
769
770
771
772
773






































774
775
776
777
778
779
780
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







                                     reviewed    TIMESTAMP,
                                     iterated    TEXT DEFAULT '',
                                     avg_runtime REAL,
                                     avg_disk    REAL,
                                     tags        TEXT DEFAULT '',
                                     jobgroup    TEXT DEFAULT 'default',
                                CONSTRAINT test_meta_constraint UNIQUE (testname));")
       (sqlite3:execute db "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY,
                                action TEXT DEFAULT '',
                                owner TEXT,
                                state TEXT DEFAULT 'new',
                                target TEXT DEFAULT '',
                                name TEXT DEFAULT '',
                                testpatt TEXT DEFAULT '',
                                keylock TEXT,
                                params TEXT,
                                creation_time TIMESTAMP DEFAULT (strftime('%s','now')),
                                execution_time TIMESTAMP);")
       ;; archive disk areas, cached info from [archive-disks]
       (sqlite3:execute db "CREATE TABLE IF NOT EXISTS archive_disks (
                                id INTEGER PRIMARY KEY,
                                archive_area_name TEXT,
                                disk_path TEXT,
                                last_df INTEGER DEFAULT -1,
                                last_df_time TIMESTAMP DEFAULT (strftime('%s','now')),
                                creation_time TIMESTAMP DEFAULT (strftime('%','now')));")
       ;; individual bup (or tar) data chunks
       (sqlite3:execute db "CREATE TABLE IF NOT EXISTS archive_blocks (
                                id INTEGER PRIMARY KEY,
                                archive_disk_id INTEGER,
                                disk_path TEXT,
                                last_du INTEGER DEFAULT -1,
                                last_du_time TIMESTAMP DEFAULT (strftime('%s','now')),
                                creation_time TIMESTAMP DEFAULT (strftime('%','now')));")
       ;; tests allocated to what chunks. reusing a chunk for a test/item_path is very efficient
       ;; NB// the per run/test recording of where the archive is stored is done in the test
       ;;      record. 
       (sqlite3:execute db "CREATE TABLE IF NOT EXISTS archive_allocations (
                                id INTEGER PRIMARY KEY,
                                archive_block_id INTEGER,
                                testname TEXT,
                                item_path TEXT,
                                creation_time TIMESTAMP DEFAULT (strftime('%','now')));")
       ;; move this clean up call somewhere else
       (sqlite3:execute db "DELETE FROM tasks_queue WHERE state='done' AND creation_time < ?;" (- (current-seconds)(* 24 60 60))) ;; remove older than 24 hrs
       (sqlite3:execute db (conc "CREATE INDEX IF NOT EXISTS runs_index ON runs (runname" (if havekeys "," "") keystr ");"))
       ;; (sqlite3:execute db "CREATE VIEW runs_tests AS SELECT * FROM runs INNER JOIN tests ON runs.id=tests.run_id;")
       (sqlite3:execute db "CREATE TABLE IF NOT EXISTS extradat (id INTEGER PRIMARY KEY, run_id INTEGER, key TEXT, val TEXT);")
       (sqlite3:execute db "CREATE TABLE IF NOT EXISTS metadat (id INTEGER PRIMARY KEY, var TEXT, val TEXT,
                                  CONSTRAINT metadat_constraint UNIQUE (var));")
       (sqlite3:execute db "CREATE TABLE IF NOT EXISTS access_log (id INTEGER PRIMARY KEY, user TEXT, accessed TIMESTAMP, args TEXT);")
       ;; Must do this *after* running patch db !! No more. 
807
808
809
810
811
812
813
814

815
816
817
818
819
820
821
882
883
884
885
886
887
888

889
890
891
892
893
894
895
896







-
+







                     final_logf   TEXT      DEFAULT 'logs/final.log',
                     logdat       TEXT      DEFAULT '', 
                     run_duration INTEGER   DEFAULT 0,
                     comment      TEXT      DEFAULT '',
                     event_time   TIMESTAMP DEFAULT (strftime('%s','now')),
                     fail_count   INTEGER   DEFAULT 0,
                     pass_count   INTEGER   DEFAULT 0,
                     archived     INTEGER   DEFAULT 0, -- 0=no, 1=in progress, 2=yes
                     archived     INTEGER   DEFAULT 0, -- 0=no, > 1=archive block id where test data can be found
                        CONSTRAINT testsconstraint UNIQUE (run_id, testname, item_path));")
     (sqlite3:execute db "CREATE INDEX IF NOT EXISTS tests_index ON tests (run_id, testname, item_path);")
     (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_steps 
                              (id INTEGER PRIMARY KEY,
                               test_id INTEGER, 
                               stepname TEXT, 
                               state TEXT DEFAULT 'NOT_STARTED', 
850
851
852
853
854
855
856
857









858














































































































859
860
861
862
863
864
865
925
926
927
928
929
930
931

932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058







-
+
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







     (sqlite3:execute db "CREATE TABLE IF NOT EXISTS test_rundat (
                              id           INTEGER PRIMARY KEY,
                              test_id      INTEGER,
                              update_time  TIMESTAMP,
                              cpuload      INTEGER DEFAULT -1,
                              diskfree     INTEGER DEFAULT -1,
                              diskusage    INTGER DEFAULT -1,
                              run_duration INTEGER DEFAULT 0);")))
                              run_duration INTEGER DEFAULT 0);")
     (sqlite3:execute db "CREATE TABLE IF NOT EXISTS archives (
                              id           INTEGER PRIMARY KEY,
                              test_id      INTEGER,
                              state        TEXT DEFAULT 'new',
                              status       TEXT DEFAULT 'n/a',
                              archive_type TEXT DEFAULT 'bup',
                              du           INTEGER,
                              archive_path TEXT);")))
  db)

;;======================================================================
;; A R C H I V E S
;;======================================================================

;; dneeded is minimum space needed, scan for existing archives that 
;; are on disks with adequate space and already have this test/itempath
;; archived
;;
(define (db:archive-get-allocations dbstruct testname itempath dneeded)
  (let* ((dbdat        (db:get-db dbstruct #f)) ;; archive tables are in main.db
	 (db           (db:dbdat-get-db dbdat))
	 (res          '())
	 (blocks       '())) ;; a block is an archive chunck that can be added too if there is space
    (sqlite3:for-each-row
     (lambda (id archive-disk-id disk-path last-du last-du-time)
       (set! res (cons (vector id archive-disk-id disk-path last-du last-du-time) res)))
     db
     "SELECT b.id,b.archive_disk_id,b.disk_path,b.last_du,b.last_du_time FROM archive_blocks AS b
        INNER JOIN archive_allocations AS a ON a.archive_block_id=b.id
        WHERE a.testname=? AND a.item_path=?;" 
     testname itempath)
    ;; Now res has list of candidate paths, look in archive_disks for candidate with potential free space
    (if (null? res)
	'()
	(sqlite3:for-each-row
	 (lambda (id archive-area-name disk-path last-df last-df-time)
	   (set! blocks (cons (vector id archive-area-name disk-path last-df last-df-time) blocks)))
	 db 
	 (conc
	  "SELECT d.id,d.archive_area_name,disk_path,last_df,last_df_time FROM archive_disks AS d
             INNER JOIN archive_blocks AS b ON d.id=b.archive_disk_id
             WHERE b.id IN (" (string-intersperse (map conc res) ",") ") AND
         last_df > ?;")
	 dneeded))
    blocks))
    
;; returns id of the record, register a disk allocated to archiving and record it's last known
;; available space
;;
(define (db:archive-register-disk dbstruct bdisk-name bdisk-path df)
  (let* ((dbdat        (db:get-db dbstruct #f)) ;; archive tables are in main.db
	 (db           (db:dbdat-get-db dbdat))
	 (res          #f))
    (sqlite3:for-each-row
     (lambda (id)
       (set! res id))
     db
     "SELECT id FROM archive_disks WHERE archive_area_name=? AND disk_path=?;"
     bdisk-name bdisk-path)
    (if res ;; record exists, update df and return id
	(begin
	  (sqlite3:execute db "UPDATE archive_disks SET last_df=?,last_df_time=(strftime('%s','now'))
                                  WHERE archive_area_name=? AND disk_path=?;"
			   df bdisk-name bdisk-path)
	  res)
	(begin
	  (sqlite3:execute
	   db
	   "INSERT OR REPLACE INTO archive_disks (archive_area_name,disk_path,last_df)
                VALUES (?,?,?);"
	   bdisk-name bdisk-path df)
	  (db:archive-register-disk dbstruct bdisk-name bdisk-path df)))))

;; record an archive path created on a given archive disk (identified by it's bdisk-id)
;; if path starts with / then it is full, otherwise it is relative to the archive disk
;; preference is to store the relative path.
;;
(define (db:archive-register-block-name dbstruct bdisk-id archive-path #!key (du #f))
  (let* ((dbdat        (db:get-db dbstruct #f)) ;; archive tables are in main.db
	 (db           (db:dbdat-get-db dbdat))
	 (res          #f))
    ;; first look to see if this path is already registered
    (sqlite3:for-each-row
     (lambda (id)
       (set! res id))
     db
     "SELECT id FROM archive_blocks WHERE archive_disk_id=? AND disk_path=?;"
     bdisk-id archive-path)
    (if res ;; record exists, update du if applicable and return res
	(begin
	  (if du (sqlite3:exectute db "UPDATE archive_blocks SET last_du=?,last_du_time=(strftime('%s','now'))
                                          WHERE archive_disk_id=? AND disk_path=?;"
				   bdisk-id archive-path du))
	  res)
	(begin
	  (sqlite3:execute db "INSERT OR REPLACE INTO archive_blocks (archive_disk_id,disk_path,last_du)
                                                        VALUES (?,?,?);"
			   bdisk-id archive-path (or du 0))
	  (db:archive-register-block-name dbstruct bdisk-id archive-path du: du)))))


;; The "archived" field in tests is overloaded; 0 = not archived, > 0 archived in block with given id
;;
(define (db:test-set-archive-block-id dbstruct run-id test-id archive-block-id)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (db)
     (sqlite3:execute db "UPDATE tests SET archived=? WHERE id=?;"
		      archive-block-id test-id))))
 

;; (define (db:archive-allocate-testsuite/area-to-block block-id testsuite-name areakey)
;;   (let* ((dbdat        (db:get-db dbstruct #f)) ;; archive tables are in main.db
;; 	 (db           (db:dbdat-get-db dbdat))
;; 	 (res          '())
;; 	 (blocks       '())) ;; a block is an archive chunck that can be added too if there is space
;;     (sqlite3:for-each-row  #f)

;;======================================================================
;; L O G G I N G    D B 
;;======================================================================

(define (open-logging-db) ;;  (conc *toppath* "/megatest.db") (car *configinfo*)))
  (let* ((dbpath    (conc (if *toppath* (conc *toppath* "/") "") "logging.db")) ;; fname)
893
894
895
896
897
898
899






















































900
901
902
903
904
905
906
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







;;======================================================================
;; D B   U T I L S
;;======================================================================

;;======================================================================
;; M A I N T E N A N C E
;;======================================================================

(define (db:have-incompletes? dbstruct run-id ovr-deadtime)
  (let* ((dbdat        (db:get-db dbstruct run-id))
	 (db           (db:dbdat-get-db dbdat))
	 (incompleted '())
	 (oldlaunched '())
	 (toplevels   '())
	 (deadtime-str (configf:lookup *configdat* "setup" "deadtime"))
	 (deadtime     (if (and deadtime-str
				(string->number deadtime-str))
			   (string->number deadtime-str)
			   7200))) ;; two hours
    (if (number? ovr-deadtime)(set! deadtime ovr-deadtime))
    
    ;; in RUNNING or REMOTEHOSTSTART for more than 10 minutes
    ;;
    ;; HOWEVER: this code in run:test seems to work fine
    ;;              (> (- (current-seconds)(+ (db:test-get-event_time testdat)
    ;;                     (db:test-get-run_duration testdat)))
    ;;                    600) 
    (db:delay-if-busy dbdat)
    (sqlite3:for-each-row 
     (lambda (test-id run-dir uname testname item-path)
       (if (and (equal? uname "n/a")
		(equal? item-path "")) ;; this is a toplevel test
	   ;; what to do with toplevel? call rollup?
	   (begin
	     (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels))
	     (debug:print-info 0 "Found old toplevel test in RUNNING state, test-id=" test-id))
	   (set! incompleted (cons (list test-id run-dir uname testname item-path run-id) incompleted))))
     db
     "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > (run_duration + ?) AND state IN ('RUNNING','REMOTEHOSTSTART');"
     run-id deadtime)

    ;; in LAUNCHED for more than one day. Could be long due to job queues TODO/BUG: Need override for this in config
    ;;
    (db:delay-if-busy dbdat)
    (sqlite3:for-each-row
     (lambda (test-id run-dir uname testname item-path)
       (if (and (equal? uname "n/a")
		(equal? item-path "")) ;; this is a toplevel test
	   ;; what to do with toplevel? call rollup?
	   (set! toplevels   (cons (list test-id run-dir uname testname item-path run-id) toplevels))
	   (set! oldlaunched (cons (list test-id run-dir uname testname item-path run-id) oldlaunched))))
     db
     "SELECT id,rundir,uname,testname,item_path FROM tests WHERE run_id=? AND (strftime('%s','now') - event_time) > 86400 AND state IN ('LAUNCHED');"
     run-id)
    
    (debug:print-info 18 "Found " (length oldlaunched) " old LAUNCHED items, " (length toplevels) " old LAUNCHED toplevel tests and " (length incompleted) " tests marked RUNNING but apparently dead.")
    (if (and (null? incompleted)
	     (null? oldlaunched)
	     (null? toplevels))
	#f
	#t)))

;;  select end_time-now from
;;      (select testname,item_path,event_time+run_duration as
;;                          end_time,strftime('%s','now') as now from tests where state in
;;      ('RUNNING','REMOTEHOSTSTART','LAUNCED'));

(define (db:find-and-mark-incomplete dbstruct run-id ovr-deadtime)
991
992
993
994
995
996
997
998

999
1000
1001
1002
1003
1004
1005
1238
1239
1240
1241
1242
1243
1244

1245
1246
1247
1248
1249
1250
1251
1252







-
+







;;    a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown'
;;    b. If test dir gone, delete the test record
;; 2. Look at run records
;;    a. If have tests that are not deleted, set state='unknown'
;;    b. ....
;;
(define (db:clean-up dbdat)
  (debug:print 0 "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db")
  ;; (debug:print 0 "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db")
  (let* ((db         (db:dbdat-get-db dbdat))
	 (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM tests)+(SELECT count(id) FROM runs);"))
	(statements
	 (map (lambda (stmt)
		(sqlite3:prepare db stmt))
	      (list
	       ;; delete all tests that belong to runs that are 'deleted'
1025
1026
1027
1028
1029
1030
1031

























































































1032
1033
1034
1035
1036
1037
1038
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







			       (debug:print-info 0 "Records count after  clean: " tot))
			     count-stmt)))
    (map sqlite3:finalize! statements)
    (sqlite3:finalize! count-stmt)
    ;; (db:find-and-mark-incomplete db)
    (db:delay-if-busy dbdat)
    (sqlite3:execute db "VACUUM;")))

;; Clean out old junk and vacuum the database
;;
;; Ultimately do something like this:
;;
;; 1. Look at test records either deleted or part of deleted run:
;;    a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown'
;;    b. If test dir gone, delete the test record
;; 2. Look at run records
;;    a. If have tests that are not deleted, set state='unknown'
;;    b. ....
;;
(define (db:clean-up-rundb dbdat)
  ;; (debug:print 0 "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db")
  (let* ((db         (db:dbdat-get-db dbdat))
	 (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM tests);"))
	(statements
	 (map (lambda (stmt)
		(sqlite3:prepare db stmt))
	      (list
	       ;; delete all tests that belong to runs that are 'deleted'
	       ;; (conc "DELETE FROM tests WHERE run_id NOT IN (" (string-intersperse (map conc valid-runs) ",") ");")
	       ;; delete all tests that are 'DELETED'
	       "DELETE FROM tests WHERE state='DELETED';"
	       ))))
    (db:delay-if-busy dbdat)
    (sqlite3:with-transaction 
     db
     (lambda ()
       (sqlite3:for-each-row (lambda (tot)
			       (debug:print-info 0 "Records count before clean: " tot))
			     count-stmt)
       (map sqlite3:execute statements)
       (sqlite3:for-each-row (lambda (tot)
			       (debug:print-info 0 "Records count after  clean: " tot))
			     count-stmt)))
    (map sqlite3:finalize! statements)
    (sqlite3:finalize! count-stmt)
    ;; (db:find-and-mark-incomplete db)
    (db:delay-if-busy dbdat)
    (sqlite3:execute db "VACUUM;")))

;; Clean out old junk and vacuum the database
;;
;; Ultimately do something like this:
;;
;; 1. Look at test records either deleted or part of deleted run:
;;    a. If test dir exists, set the the test to state='UNKNOWN', Set the run to 'unknown'
;;    b. If test dir gone, delete the test record
;; 2. Look at run records
;;    a. If have tests that are not deleted, set state='unknown'
;;    b. ....
;;
(define (db:clean-up-maindb dbdat)
  ;; (debug:print 0 "WARNING: db clean up not fully ported to v1.60, cleanup action will be on megatest.db")
  (let* ((db         (db:dbdat-get-db dbdat))
	 (count-stmt (sqlite3:prepare db "SELECT (SELECT count(id) FROM runs);"))
	 (statements
	  (map (lambda (stmt)
		 (sqlite3:prepare db stmt))
	       (list
		;; delete all tests that belong to runs that are 'deleted'
		;; (conc "DELETE FROM tests WHERE run_id NOT IN (" (string-intersperse (map conc valid-runs) ",") ");")
		;; delete all tests that are 'DELETED'
		"DELETE FROM runs WHERE state='deleted';"
		)))
	 (dead-runs '()))
    (sqlite3:for-each-row
     (lambda (run-id)
       (set! dead-runs (cons run-id dead-runs)))
       db
       "SELECT id FROM runs WHERE state='deleted';")
    (db:delay-if-busy dbdat)
    (sqlite3:with-transaction 
     db
     (lambda ()
       (sqlite3:for-each-row (lambda (tot)
			       (debug:print-info 0 "Records count before clean: " tot))
			     count-stmt)
       (map sqlite3:execute statements)
       (sqlite3:for-each-row (lambda (tot)
			       (debug:print-info 0 "Records count after  clean: " tot))
			     count-stmt)))
    (map sqlite3:finalize! statements)
    (sqlite3:finalize! count-stmt)
    ;; (db:find-and-mark-incomplete db)
    (db:delay-if-busy dbdat)
    (sqlite3:execute db "VACUUM;")
    dead-runs))

;;======================================================================
;; M E T A   G E T   A N D   S E T   V A R S
;;======================================================================

;; returns number if string->number is successful, string otherwise
;; also updates *global-delta*
2294
2295
2296
2297
2298
2299
2300
2301
2302


2303
2304
2305
2306
2307
2308
2309
2310
2311
2312

2313
2314
2315
2316


2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329


2330
2331
2332
2333
2334
2335
2336
2630
2631
2632
2633
2634
2635
2636


2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647

2648
2649
2650


2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663


2664
2665
2666
2667
2668
2669
2670
2671
2672







-
-
+
+









-
+


-
-
+
+











-
-
+
+







       res))))

;;======================================================================
;; QUEUE UP META, TEST STATUS AND STEPS REMOTE ACCESS
;;======================================================================

;; NOTE: Can remove the regex and base64 encoding for zmq
(define (db:obj->string obj)
  (case *transport-type*
(define (db:obj->string obj #!key (transport 'http))
  (case transport
    ;; ((fs) obj)
    ((http fs)
     (string-substitute
      (regexp "=") "_"
      (base64:base64-encode 
       (z3:encode-buffer
	(with-output-to-string
	  (lambda ()(serialize obj)))))
      #t))
    ((zmq)(with-output-to-string (lambda ()(serialize obj))))
    ((zmq nmsg)(with-output-to-string (lambda ()(serialize obj))))
    (else obj)))

(define (db:string->obj msg)
  (case *transport-type*
(define (db:string->obj msg #!key (transport 'http))
  (case transport
    ;; ((fs) msg)
    ((http fs)
     (if (string? msg)
	 (with-input-from-string 
	     (z3:decode-buffer
	      (base64:base64-decode
	       (string-substitute 
		(regexp "_") "=" msg #t)))
	   (lambda ()(deserialize)))
	 (begin
	   (debug:print 0 "ERROR: reception failed. Received " msg " but cannot translate it.")
	   #f))) ;; crude reply for when things go awry
    ((zmq)(with-input-from-string msg (lambda ()(deserialize))))
	   msg))) ;; crude reply for when things go awry
    ((zmq nmsg)(with-input-from-string msg (lambda ()(deserialize))))
    (else msg)))

(define (db:test-set-status-state dbstruct run-id test-id status state msg)
  (let ((dbdat  (db:get-db dbstruct run-id)))
    (if (member state '("LAUNCHED" "REMOTEHOSTSTART"))
	(db:general-call dbdat 'set-test-start-time (list test-id)))
    (if msg
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2682
2683
2684
2685
2686
2687
2688








2689
2690
2691
2692
2693
2694
2695







-
-
-
-
-
-
-
-







	    (db:general-call dbdat 'top-test-set-running (list test-name))
	    (if (equal? status "LAUNCHED")
		(db:general-call dbdat 'top-test-set (list "LAUNCHED" test-name))
		(db:general-call dbdat 'top-test-set-per-pf-counts (list test-name run-id test-name test-name test-name))))
	#f)
      #f))

(define (db:tests-register-test dbstruct run-id test-name item-path)
  (db:with-db
   dbstruct
   run-id
   #t
   (lambda (db)
     (sqlite3:execute db 'register-test run-id test-name item-path))))

(define (db:test-get-logfile-info dbstruct run-id test-name)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (db)
     (let ((res #f))

Modified dcommon.scm from [31fd59f2b3] to [ea99b44dfd].

134
135
136
137
138
139
140
141
142


143
144
145
146
147
148
149
150



151



152
153
154
155
156
157
158
134
135
136
137
138
139
140


141
142
143
144
145
146
147
148
149
150
151
152
153

154
155
156
157
158
159
160
161
162
163







-
-
+
+








+
+
+
-
+
+
+







	 ;; the synchash calls modify the "data" hash
	 (get-runs-sig    (conc (client:get-signature) " get-runs"))
	 (get-tests-sig   (conc (client:get-signature) " get-tests"))
	 (get-details-sig (conc (client:get-signature) " get-test-details"))

	 ;; test-ids to get and display are indexed on window-id in curr-test-ids hash
	 (test-ids        (hash-table-values (dboard:data-get-curr-test-ids *data*)))

 	 (run-changes     (synchash:client-get 'db:get-runs get-runs-sig (length keypatts) data runname #f #f keypatts))
	 ;; run-id is #f in next line to send the query to server 0
 	 (run-changes     (synchash:client-get 'db:get-runs get-runs-sig (length keypatts) data #f runname #f #f keypatts))
	 (tests-detail-changes (if (not (null? test-ids))
				   (synchash:client-get 'db:get-test-info-by-ids get-details-sig 0  data #f test-ids)
				   '()))

	 ;; Now can calculate the run-ids
	 (run-hash    (hash-table-ref/default data get-runs-sig #f))
	 (run-ids     (if run-hash (filter number? (hash-table-keys run-hash)) '()))

	 (all-test-changes (let ((res (make-hash-table)))
			     (for-each (lambda (run-id)
					 (if (> run-id 0)
	 (test-changes (synchash:client-get 'db:get-tests-for-runs-mindata get-tests-sig 0 data run-ids testpatt states statuses #f))
					     (hash-table-set! res run-id (synchash:client-get 'db:get-tests-for-run-mindata get-tests-sig 0 data run-id 1 testpatt states statuses #f))))
				       run-ids)
			     res))
	 (runs-hash    (hash-table-ref/default data get-runs-sig #f))
	 (header       (hash-table-ref/default runs-hash "header" #f))
	 (run-ids      (sort (filter number? (hash-table-keys runs-hash))
			     (lambda (a b)
			       (let* ((record-a (hash-table-ref runs-hash a))
				      (record-b (hash-table-ref runs-hash b))
				      (time-a   (db:get-value-by-header record-a header "event_time"))
190
191
192
193
194
195
196

197
198
199
200
201
202
203
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209







+







		  (set! colnum (+ colnum 1))))
	      run-ids)

    ;; Scan all tests to be displayed and organise all the test names, respecting what is in the hash table
    ;; Do this analysis in the order of the run-ids, the most recent run wins
    (for-each (lambda (run-id)
		(let* ((run-path       (hash-table-ref (dboard:data-get-run-keys *data*) run-id))
		       (test-changes   (hash-table-ref all-test-changes run-id))
		       (new-test-dat   (car test-changes))
		       (removed-tests  (cadr test-changes))
		       (tests          (sort (map cadr (filter (lambda (testrec)
								 (eq? run-id (db:mintest-get-run_id (cadr testrec))))
							       new-test-dat))
					     (lambda (a b)
					       (let ((time-a (db:mintest-get-event_time a))
251
252
253
254
255
256
257
258

259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274

275
276
277
278
279
280
281
257
258
259
260
261
262
263

264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279

280
281
282
283
284
285
286
287







-
+















-
+







				      (iup:attribute-set! (dboard:data-get-runs-matrix *data*)
							  (conc rownum ":" 0) dispname)
				      ))
				;; set the cell text and color
				;; (debug:print 2 "rownum:colnum=" rownum ":" colnum ", state=" status)
				(iup:attribute-set! (dboard:data-get-runs-matrix *data*)
						    (conc rownum ":" colnum)
						    (if (string=? state "COMPLETED")
						    (if (member state '("ARCHIVED" "COMPLETED"))
							status
							state))
				(iup:attribute-set! (dboard:data-get-runs-matrix *data*)
						    (conc "BGCOLOR" rownum ":" colnum)
						    (car (gutils:get-color-for-state-status state status)))
				))
			    tests)))
	      run-ids)

    (let ((updater (hash-table-ref/default  (dboard:data-get-updaters *data*) window-id #f)))
      (if updater (updater (hash-table-ref/default data get-details-sig #f))))

    (iup:attribute-set! (dboard:data-get-runs-matrix *data*) "REDRAW" "ALL")
    ;; (debug:print 2 "run-changes: " run-changes)
    ;; (debug:print 2 "test-changes: " test-changes)
    (list run-changes test-changes)))
    (list run-changes all-test-changes)))

;;======================================================================
;; TESTS DATA
;;======================================================================

;; Produce a list of lists ready for common:sparse-list-generate-index
;;

Modified docs/manual/megatest_manual.html from [191f1255c5] to [151c9a3dda].

1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1010
1011
1012
1013
1014
1015
1016







1017
1018
1019
1020
1021
1022
1023







-
-
-
-
-
-
-







sudo netstat -lptu
sudo netstat -tulpn</code></pre>
</div></div>
</div>
</div>
</div>
<h1 id="_reference">Reference</h1>
<div class="sect1">
<h2 id="_the_first_chapter_of_the_second_part_2">The First Chapter of the Second Part</h2>
<div class="sectionbody">
<div class="paragraph"><p>Chapters grouped into book parts are at level 1 and can contain
sub-sections.</p></div>
</div>
</div>
<div class="sect1">
<h2 id="_the_testconfig_file">The testconfig File</h2>
<div class="sectionbody">
<div class="sect2">
<h3 id="_setup_section">Setup section</h3>
<div class="sect3">
<h4 id="_header">Header</h4>
1182
1183
1184
1185
1186
1187
1188




















































































1189
1190
1191
1192
1193
1194
1195
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







<td class="icon">
<div class="title">Note</div>
</td>
<td class="content">There is a trailing space after the --</td>
</tr></table>
</div>
</div>
</div>
</div>
<div class="sect1">
<h2 id="_programming_api">Programming API</h2>
<div class="sectionbody">
<div class="paragraph"><p>These routines can be called from the megatest repl.</p></div>
<div class="tableblock">
<table rules="all"
width="70%"
frame="hsides"
cellspacing="0" cellpadding="4">
<caption class="title">Table 2. API Server Management Calls</caption>
<col width="14%" />
<col width="28%" />
<col width="28%" />
<col width="28%" />
<thead>
<tr>
<th align="center" valign="top">API Call                  </th>
<th align="left" valign="top"> Purpose comments   </th>
<th align="left" valign="top"> Returns                 </th>
<th align="left" valign="top"> Comments</th>
</tr>
</thead>
<tfoot>
<tr>
<td align="center" valign="top"><p class="table">(rmt:login        run-id)</p></td>
<td align="left" valign="top"><p class="table"><code>Verify the the version, testsuite area etc. are correct.</code></p></td>
<td align="left" valign="top"><p class="table"><code>#( #t "successful login" )</code></p></td>
<td align="left" valign="top"><p class="table"><code></code></p></td>
</tr>
</tfoot>
<tbody>
<tr>
<td align="center" valign="top"><p class="table">(rmt:start-server run-id)</p></td>
<td align="left" valign="top"><p class="table"><code></code></p></td>
<td align="left" valign="top"><p class="table"><code>#( success/fail   n/a )</code></p></td>
<td align="left" valign="top"><p class="table"><code></code></p></td>
</tr>
<tr>
<td align="center" valign="top"><p class="table">(rmt:kill-server  run-id)</p></td>
<td align="left" valign="top"><p class="table"><code></code></p></td>
<td align="left" valign="top"><p class="table"><code>#( success/fail   n/a )</code></p></td>
<td align="left" valign="top"><p class="table"><code>Works only if the server is still reachable</code></p></td>
</tr>
</tbody>
</table>
</div>
<div class="tableblock">
<table rules="all"
width="70%"
frame="hsides"
cellspacing="0" cellpadding="4">
<caption class="title">Table 3. API Keys Related Calls</caption>
<col width="14%" />
<col width="28%" />
<col width="28%" />
<col width="28%" />
<thead>
<tr>
<th align="center" valign="top">API Call                        </th>
<th align="left" valign="top"> Purpose comments   </th>
<th align="left" valign="top"> Returns                 </th>
<th align="left" valign="top"> Comments</th>
</tr>
</thead>
<tfoot>
<tr>
<td align="center" valign="top"><p class="table">(rmt:get-key-val-pairs run-id)</p></td>
<td align="left" valign="top"><p class="table"><code></code></p></td>
<td align="left" valign="top"><p class="table"><code>#t=success/#f=fail</code></p></td>
<td align="left" valign="top"><p class="table"><code>Works only if the server is still reachable</code></p></td>
</tr>
</tfoot>
<tbody>
<tr>
<td align="center" valign="top"><p class="table">(rmt:get-keys run-id)</p></td>
<td align="left" valign="top"><p class="table"><code></code></p></td>
<td align="left" valign="top"><p class="table"><code>( key1 key2 &#8230; )</code></p></td>
<td align="left" valign="top"><p class="table"><code></code></p></td>
</tr>
</tbody>
</table>
</div>
<div class="sect2">
<h3 id="_megatest_internals">Megatest Internals</h3>
<div class="imageblock graphviz">
<div class="content">
<img src="server.png" alt="server.png" />
</div>
</div>
1273
1274
1275
1276
1277
1278
1279
1280

1281
1282
1283
1284
1350
1351
1352
1353
1354
1355
1356

1357
1358
1359
1360
1361







-
+




</div>
</div>
</div>
<div id="footnotes"><hr /></div>
<div id="footer">
<div id="footer-text">
Version 1.0<br />
Last updated 2014-10-08 23:02:21 MST
Last updated 2014-11-25 21:11:28 MST
</div>
</div>
</body>
</html>

Modified docs/manual/reference.txt from [eff8aa5426] to [ddb57ef21a].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
1
2
3
4






5
6
7
8
9
10
11




-
-
-
-
-
-








Reference
=========

The First Chapter of the Second Part
------------------------------------

Chapters grouped into book parts are at level 1 and can contain
sub-sections.

The testconfig File
-------------------

Setup section
~~~~~~~~~~~~~

Header
176
177
178
179
180
181
182























183
184
185
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+




-----------------
[triggers]
COMPLETED/ xterm -e bash -s -- 
-----------------

NOTE: There is a trailing space after the --

Programming API
---------------

These routines can be called from the megatest repl. 

.API Server Management Calls
[width="70%",cols="^,2m,2m,2m",frame="topbot",options="header,footer"]
|======================
|API Call                  | Purpose comments   | Returns                 | Comments
|(rmt:start-server run-id) |                    | #( success/fail   n/a ) | 
|(rmt:kill-server  run-id) |                    | #( success/fail   n/a ) | Works only if the server is still reachable
|(rmt:login        run-id) | Verify the the version, testsuite area etc. are correct. | #( #t "successful login" ) |
|======================

.API Keys Related Calls
[width="70%",cols="^,2m,2m,2m",frame="topbot",options="header,footer"]
|======================
|API Call                        | Purpose comments   | Returns                 | Comments
|(rmt:get-keys run-id)           |                    | ( key1 key2 ... )       | 
| (rmt:get-key-val-pairs run-id) |                    | #t=success/#f=fail      | Works only if the server is still reachable
|======================


:numbered!:

Added fs-transport.scm version [d187681c70].













































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

;; Copyright 2006-2012, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.

(require-extension (srfi 18) extras tcp s11n)

(use sqlite3 srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest)
(import (prefix sqlite3 sqlite3:))

(use spiffy uri-common intarweb http-client spiffy-request-vars)

(tcp-buffer-size 2048)

(declare (unit fs-transport))

(declare (uses common))
(declare (uses db))
(declare (uses tests))
(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.

(include "common_records.scm")
(include "db_records.scm")


;;======================================================================
;; F S   T R A N S P O R T   S E R V E R
;;======================================================================

;; There is no "server" per se but a convience routine to make it non
;; necessary to be reopening the db over and over again.
;;

(define (fs:process-queue-item packet)
  (if (not *megatest-db*) ;; we will require that (setup-for-run) has already been called
      (set! *megatest-db* (open-db)))
  (debug:print-info 11 "fs:process-queue-item called with packet=" packet)
  (db:process-queue-item *megatest-db* packet))
      

Modified gutils.scm from [0911f9831f] to [b83aee11f3].

21
22
23
24
25
26
27
28

29
30
31
32
33
34
35
21
22
23
24
25
26
27

28
29
30
31
32
33
34
35







-
+







	 (c2 (map string->number (string-split color2)))
	 (delta (map (lambda (a b)(abs (- a b))) c1 c2)))
    (null? (filter (lambda (x)(> x 3)) delta))))

(define (gutils:get-color-for-state-status state status);; #!key (get-label #f))
  ;; ((if get-label cadr car)
   (case (string->symbol state)
     ((COMPLETED)
     ((COMPLETED ARCHIVED)
      (case (string->symbol status)
	((PASS)        (list "70  249 73" status))
	((WARN WAIVED) (list "255 172 13" status))
	((SKIP)        (list "230 230 0" status))
	(else (list "223 33 49" status))))
     ;;      (if (equal? status "PASS")
     ;;	  '("70 249 73" "PASS")

Modified http-transport.scm from [907ced71b2] to [c3c2fc0fe7].

74
75
76
77
78
79
80






81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103





















104
105
106
107
108
109
110







+
+
+
+
+
+

















-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-







	 (link-tree-path  (configf:lookup *configdat* "setup" "linktree")))
    ;; (set! db *inmemdb*)
    (debug:print-info 0 "portlogger recommended port: " start-port)
    (root-path     (if link-tree-path 
		       link-tree-path
		       (current-directory))) ;; WARNING: SECURITY HOLE. FIX ASAP!
    (handle-directory spiffy-directory-listing)
    (handle-exception (lambda (exn chain)
			(signal (make-composite-condition
				 (make-property-condition 
				  'server
				  'message "server error")))))

    ;; http-transport:handle-directory) ;; simple-directory-handler)
    ;; Setup the web server and a /ctrl interface
    ;;
    (vhost-map `(((* any) . ,(lambda (continue)
			       ;; open the db on the first call 
				 ;; This is were we set up the database connections
			       (let* (($   (request-vars source: 'both))
				      (dat ($ 'dat))
				      (res #f))
				 (cond
				  ((equal? (uri-path (request-uri (current-request)))
					   '(/ "api"))
				   (send-response body:    (api:process-request *inmemdb* $) ;; the $ is the request vars proc
						  headers: '((content-type text/plain)))
				   (mutex-lock! *heartbeat-mutex*)
				   (set! *last-db-access* (current-seconds))
				   (mutex-unlock! *heartbeat-mutex*))
				  ;; This is the /ctrl path where data is handed to the server and
				  ;; responses 
				  ;; ((equal? (uri-path (request-uri (current-request)))
				  ;;          '(/ "ctrl"))
				  ;;  (let* ((packet (db:string->obj dat))
				  ;;         (qtype  (cdb:packet-get-qtype packet)))
				  ;;    (debug:print-info 12 "server=> received packet=" packet)
				  ;;    (if (not (member qtype '(sync ping)))
				  ;;        (begin
				  ;;          (mutex-lock! *heartbeat-mutex*)
				  ;;          (set! *last-db-access* (current-seconds))
				  ;;          (mutex-unlock! *heartbeat-mutex*)))
				  ;;    ;; (mutex-lock! *db:process-queue-mutex*) ;; trying a mutex
				  ;;    ;; (set! res (open-run-close db:process-queue-item open-db packet))
				  ;;    (set! res (db:process-queue-item db packet))
				  ;;    ;; (mutex-unlock! *db:process-queue-mutex*)
				  ;;    (debug:print-info 11 "Return value from db:process-queue-item is " res)
				  ;;    (send-response body: (conc "<head>ctrl data</head>\n<body>"
				  ;;       			res
				  ;;       			"</body>")
				  ;;       	    headers: '((content-type text/plain)))))
				  ((equal? (uri-path (request-uri (current-request))) 
					   '(/ ""))
				   (send-response body: (http-transport:main-page)))
				  ((equal? (uri-path (request-uri (current-request))) 
					   '(/ "runs"))
				   (send-response body: (http-transport:main-page)))
				  ((equal? (uri-path (request-uri (current-request))) 
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265




















266
267
268
269
270
271
272
273
274
275
276
277
278
279
280

281
282
283
284
285
286
287
288









289
290
291
292
293
294
295











296
297
298
299
300
301
302
303
304
305
306
307
308
309


310












311
312
313
314
315
316
317
318
319
320
321
322
323

324
325
326
327
328
329

330
331
332
333
334
335
336
337
338
339
340

341




342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359

360
361
362
363
364
365
366
226
227
228
229
230
231
232


















233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268








269
270
271
272
273
274
275
276
277







278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304

305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328

329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348

349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+















+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+














+
+
-
+
+
+
+
+
+
+
+
+
+
+
+












-
+






+











+
-
+
+
+
+


















+







(define (http-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 3))
  (let* ((fullurl    (if (vector? serverdat)
			 (http-transport:server-dat-get-api-req serverdat)
			 (begin
			   (debug:print 0 "FATAL ERROR: http-transport:client-api-send-receive called with no server info")
			   (exit 1))))
	 (res        #f)
	 (success    #t))
    (handle-exceptions
     exn
     (if (> numretries 0)
	 (begin
	   (mutex-unlock! *http-mutex*)
	   (thread-sleep! 1)
	   (handle-exceptions
	    exn
	    (debug:print 0 "WARNING: closing connections failed. Server at " fullurl " almost certainly dead")
	    (close-all-connections!))
	   (debug:print 0 "WARNING: Failed to communicate with server, trying again, numretries left: " numretries)
	   (http-transport:client-api-send-receive run-id serverdat cmd params numretries: (- numretries 1)))
	 (begin
	   (mutex-unlock! *http-mutex*)
	   (tasks:kill-server-run-id run-id)
	   #f))
     (begin
	 (success    #t)
	 (sparams    (db:obj->string params transport: 'http)))
;;    (condition-case
;;     handle-exceptions
;;     exn
;;     (if (> numretries 0)
;;	 (begin
;;	   (mutex-unlock! *http-mutex*)
;;	   (thread-sleep! 1)
;;	   (handle-exceptions
;;	    exn
;;	    (debug:print 0 "WARNING: closing connections failed. Server at " fullurl " almost certainly dead")
;;	    (close-all-connections!))
;;	   (debug:print 0 "WARNING: Failed to communicate with server, trying again, numretries left: " numretries)
;;	   (http-transport:client-api-send-receive run-id serverdat cmd sparams numretries: (- numretries 1)))
;;	 (begin
;;	   (mutex-unlock! *http-mutex*)
;;	   (tasks:kill-server-run-id run-id)
;;	   #f))
;;     (begin
       (debug:print-info 11 "fullurl=" fullurl ", cmd=" cmd ", params=" params ", run-id=" run-id "\n")
       ;; set up the http-client here
       (max-retry-attempts 1)
       ;; consider all requests indempotent
       (retry-request? (lambda (request)
			 #f))
       ;; send the data and get the response
       ;; extract the needed info from the http data and 
       ;; process and return it.
       (let* ((send-recieve (lambda ()
			      (mutex-lock! *http-mutex*)
			      ;; (condition-case (with-input-from-request "http://localhost"; #f read-lines)
			      ;;					       ((exn http client-error) e (print e)))
			      (set! res (vector
					 success
					 (db:string->obj 
					 (handle-exceptions
					  exn
					  (begin
					    (set! success #f)
					    (debug:print 0 "WARNING: failure in with-input-from-request to " fullurl ". Killing associated server to allow clean retry.")
					    (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
					    (hash-table-delete! *runremote* run-id)
					    ;; (tasks:kill-server-run-id run-id)  ;; better to kill the server in the logic that called this routine.
					  (handle-exceptions
					   exn
					   (begin
					     (set! success #f)
					     (debug:print 0 "WARNING: failure in with-input-from-request to " fullurl ".")
					     (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
					     (hash-table-delete! *runremote* run-id)
					     ;; Killing associated server to allow clean retry.")
					     (tasks:kill-server-run-id run-id)  ;; better to kill the server in the logic that called this routine?
					    #f)
					  (with-input-from-request ;; was dat
					   fullurl 
					   (list (cons 'key "thekey")
						 (cons 'cmd cmd)
						 (cons 'params params))
					   read-string))))
					     (mutex-unlock! *http-mutex*)
					     (signal (make-composite-condition
						      (make-property-condition 'commfail 'message "failed to connect to server")))
					     "communications failed")
					   (with-input-from-request ;; was dat
					    fullurl 
					    (list (cons 'key "thekey")
						  (cons 'cmd cmd)
						  (cons 'params sparams))
					    read-string))
					  transport: 'http)))
			      ;; Shouldn't this be a call to the managed call-all-connections stuff above?
			      (close-all-connections!)
			      (mutex-unlock! *http-mutex*)
			      ))
	      (time-out     (lambda ()
			      (thread-sleep! 45)
			      #f))
	      (th1 (make-thread send-recieve "with-input-from-request"))
	      (th2 (make-thread time-out     "time out")))
	 (thread-start! th1)
	 (thread-start! th2)
	 (thread-join! th1)
	 (thread-terminate! th2)
	 (debug:print-info 11 "got res=" res)
	 (if (vector? res)
	     (if (vector-ref res 0)
	 res)))))
		 res
		 (begin ;; note: this code also called in nmsg-transport - consider consolidating it
		   (debug:print 0 "ERROR: error occured at server, info=" (vector-ref res 2))
		   (debug:print 0 " client call chain:")
		   (print-call-chain (current-error-port))
		   (debug:print 0 " server call chain:")
		   (pp (vector-ref res 1) (current-error-port))
		   (signal (vector-ref result 0))))
	     (signal (make-composite-condition
		      (make-property-condition 
		       'timeout
		       'message "nmsg-transport:client-api-send-receive-raw timed out talking to server")))))))

;; careful closing of connections stored in *runremote*
;;
(define (http-transport:close-connections run-id)
  (let* ((server-dat (hash-table-ref/default *runremote* run-id #f)))
    (if (vector? server-dat)
	(let ((api-dat (http-transport:server-dat-get-api-uri server-dat)))
	  (close-connection! api-dat)
	  #t)
	#f)))


(define (make-http-transport:server-dat)(make-vector 5))
(define (make-http-transport:server-dat)(make-vector 6))
(define (http-transport:server-dat-get-iface         vec)    (vector-ref  vec 0))
(define (http-transport:server-dat-get-port          vec)    (vector-ref  vec 1))
(define (http-transport:server-dat-get-api-uri       vec)    (vector-ref  vec 2))
(define (http-transport:server-dat-get-api-url       vec)    (vector-ref  vec 3))
(define (http-transport:server-dat-get-api-req       vec)    (vector-ref  vec 4))
(define (http-transport:server-dat-get-last-access   vec)    (vector-ref  vec 5))
(define (http-transport:server-dat-get-socket        vec)    (vector-ref  vec 6))

(define (http-transport:server-dat-make-url vec)
  (if (and (http-transport:server-dat-get-iface vec)
	   (http-transport:server-dat-get-port  vec))
      (conc "http://" 
	    (http-transport:server-dat-get-iface vec)
	    ":"
	    (http-transport:server-dat-get-port  vec))
      #f))

(define (http-transport:server-dat-update-last-access vec)
  (if (vector? vec)
  (vector-set! vec 5 (current-seconds)))
      (vector-set! vec 5 (current-seconds))
      (begin
	(print-call-chain (current-error-port))
	(debug:print 0 "ERROR: call to http-transport:server-dat-update-last-access with non-vector!!"))))

;;
;; connect
;;
(define (http-transport:client-connect iface port)
  (let* ((api-url      (conc "http://" iface ":" port "/api"))
	 (api-uri      (uri-reference (conc "http://" iface ":" port "/api")))
	 (api-req      (make-request method: 'POST uri: api-uri))
	 (server-dat   (vector iface port api-uri api-url api-req (current-seconds))))
    server-dat))

;; run http-transport:keep-running in a parallel thread to monitor that the db is being 
;; used and to shutdown after sometime if it is not.
;;
(define (http-transport:keep-running server-id run-id)
  ;; if none running or if > 20 seconds since 
  ;; server last used then start shutdown
  ;; This thread waits for the server to come alive
  (debug:print-info 0 "Starting the sync-back, keep alive thread in server for run-id=" run-id)
  (let* ((tdbdat      (tasks:open-db))
	 (server-info (let loop ((start-time (current-seconds))
				 (changed    #t)
				 (last-sdat  "not this"))
                        (let ((sdat #f))
			  (thread-sleep! 0.01)
			  (debug:print-info 0 "Waiting for server alive signature")
381
382
383
384
385
386
387
388

389
390
391
392
393
394
395
396
397



398

399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417








































418
419
420
421


422
423
424

425
426
427
428
429
430
431
393
394
395
396
397
398
399

400







401

402
403
404
405
406



















407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446




447
448
449
450

451
452
453
454
455
456
457
458







-
+
-
-
-
-
-
-
-

-
+
+
+

+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
+
+


-
+







				      (exit))
				    (loop start-time
					  (equal? sdat last-sdat)
					  sdat)))))))
         (iface       (car server-info))
         (port        (cadr server-info))
         (last-access 0)
	 (server-timeout (let ((tmo (configf:lookup  *configdat* "server" "timeout")))
	 (server-timeout (server:get-timeout)))
			   (if (and (string? tmo)
				    (string->number tmo))
			       (* 60 60 (string->number tmo))
			       ;; (* 3 24 60 60) ;; default to three days
			       (* 60 1)         ;; default to one minute
			       ;; (* 60 60 25)      ;; default to 25 hours
			       ))))
    (let loop ((count         0)
	       (server-state 'available))
	       (server-state 'available)
	       (bad-sync-count 0))

      ;; Use this opportunity to sync the inmemdb to db
      (if *inmemdb* 
      (let ((start-time (current-milliseconds))
	    (sync-time  #f)
	    (rem-time   #f))
	;; inmemdb is a dbstruct
	(if *inmemdb* (db:sync-touched *inmemdb* *run-id* force-sync: #t))
	(set! sync-time  (- (current-milliseconds) start-time))
	(set! rem-time (quotient (- 4000 sync-time) 1000))
	(debug:print 2 "SYNC: time= " sync-time ", rem-time=" rem-time)

      ;;
      ;; set_running after our first pass through and start the db
      ;;
      (if (eq? server-state 'available)
	  (begin
	    (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "dbprep")
	    (thread-sleep! 5) ;; give some margin for queries to complete before switching from file based access to server based access
	    (set! *inmemdb*  (db:setup run-id))
	    (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "running")))

	  (let ((start-time (current-milliseconds))
		(sync-time  #f)
		(rem-time   #f))
	    ;; inmemdb is a dbstruct
	    (condition-case
	     (db:sync-touched *inmemdb* *run-id* force-sync: #t)
	     ((sync-failed)(cond
			    ((> bad-sync-count 10) ;; time to give up
			     (http-transport:server-shutdown server-id port))
			    (else ;; (> bad-sync-count 0)  ;; we've had a fail or two, delay and loop
			     (thread-sleep! 5)
			     (loop count server-state (+ bad-sync-count 1)))))
	     ((exn)
	      (debug:print 0 "ERROR: error from sync code other than 'sync-failed. Attempting to gracefully shutdown the server")
	      (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id " http-transport:keep-running crashed")
	      (exit)))
	    (set! sync-time  (- (current-milliseconds) start-time))
	    (set! rem-time (quotient (- 4000 sync-time) 1000))
	    (debug:print 2 "SYNC: time= " sync-time ", rem-time=" rem-time)
	    
	    (if (and (<= rem-time 4)
		     (> rem-time 0))
		(thread-sleep! rem-time)
		(thread-sleep! 4))) ;; fallback for if the math is changed ...

	  ;;
	  ;; no *inmemdb* yet, set running after our first pass through and start the db
	  ;;
	  (if (eq? server-state 'available)
	      (let ((new-server-id (tasks:server-am-i-the-server? (db:delay-if-busy tdbdat) run-id))) ;; try to ensure no double registering of servers
		(if (equal? new-server-id server-id)
		    (begin
		      (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "dbprep")
		      (thread-sleep! 0.5) ;; give some margin for queries to complete before switching from file based access to server based access
		      (set! *inmemdb*  (db:setup run-id))
		      ;; force initialization
		      ;; (db:get-db *inmemdb* #t)
		      (db:get-db *inmemdb* run-id)
		      (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "running"))
		    (begin ;; gotta exit nicely
      (if (and (<= rem-time 4)
	       (> rem-time 0))
	  (thread-sleep! rem-time)
	  (thread-sleep! 4))) ;; fallback for if the math is changed ...
		      (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "collision")
		      (http-transport:server-shutdown server-id port))))))
      
      (if (< count 1) ;; 3x3 = 9 secs aprox
	  (loop (+ count 1) 'running))
	  (loop (+ count 1) 'running bad-sync-count))
      
      ;; Check that iface and port have not changed (can happen if server port collides)
      (mutex-lock! *heartbeat-mutex*)
      (set! sdat *server-info*)
      (mutex-unlock! *heartbeat-mutex*)
      
      (if (or (not (equal? sdat (list iface port)))
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467

468
469
470
471
472
473
474
470
471
472
473
474
475
476

477
478





479
480
481
482
483
484
485
486
487

488
489
490
491
492
493
494
495







-


-
-
-
-
-









-
+







      ;; (debug:print 11 "last-access=" last-access ", server-timeout=" server-timeout)
      ;;
      ;; no_traffic, no running tests, if server 0, no running servers
      ;;
      ;; (let ((wait-on-running (configf:lookup *configdat* "server" "wait-on-running"))) ;; wait on running tasks (if not true then exit on time out)
      ;;
      (if (and *server-run*
	       ;; (or
	       (> (+ last-access server-timeout)
		  (current-seconds)))
;;		   (and (eq? run-id 0)
;;			(> (tasks:num-servers-non-zero-running tdb) 0))
;;		   (and (not (eq? run-id 0)) ;; only makes sense in non-zero run-id servers
;;			(> (db:get-count-tests-actually-running *inmemdb* run-id) 0))
;;		   ))
	  (begin
	    (debug:print-info 0 "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
	    ;;
	    ;; Consider implementing some smarts here to re-insert the record or kill self is
	    ;; the db indicates so
	    ;;
	    ;; (if (tasks:server-am-i-the-server? tdb run-id)
	    ;;     (tasks:server-set-state! tdb server-id "running"))
	    ;;
	    (loop 0 server-state))
	    (loop 0 server-state bad-sync-count))
	  (http-transport:server-shutdown server-id port)))))

(define (http-transport:server-shutdown server-id port)
  (let ((tdbdat (tasks:open-db)))
    (debug:print-info 0 "Starting to shutdown the server.")
    ;; need to delete only *my* server entry (future use)
    (set! *time-to-exit* #t)
543
544
545
546
547
548
549












550
551
552
553
554
555
556
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589







+
+
+
+
+
+
+
+
+
+
+
+







				   "Keep running")))
	    (thread-start! th2)
	    (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor.
	    (thread-start! th3)
	    (set! *didsomething* #t)
	    (thread-join! th2)
	    (exit))))))

(define (http:ping run-id host-port)
  (let* ((server-dat (http-transport:client-connect (car host-port)(cadr host-port)))
	 (login-res  (rmt:login-no-auto-client-setup server-dat run-id)))
    (if (and (list? login-res)
	     (car login-res))
	(begin
	  (print "LOGIN_OK")
	  (exit 0))
	(begin
	  (print "LOGIN_FAILED")
	  (exit 1)))))

(define (http-transport:server-signal-handler signum)
  (signal-mask! signum)
  (handle-exceptions
   exn
   (debug:print " ... exiting ...")
   (let ((th1 (make-thread (lambda ()

Modified launch.scm from [34882953c1] to [257569992c].

449
450
451
452
453
454
455










456
457
458
459
460





461
462







463
464
465
466
467
468
469
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465





466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486







+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
+
+
+
+
+


+
+
+
+
+
+
+







(define (launch:setup-for-run #!key (force #f))
  ;; would set values for KEYS in the environment here for better support of env-override but 
  ;; have chicken/egg scenario. need to read megatest.config then read it again. Going to 
  ;; pass on that idea for now
  ;; special case
  (if (or force (not (hash-table? *configdat*)))  ;; no need to re-open on every call
      (begin
	(set! *configinfo* (or (if (get-environment-variable "MT_CMDINFO") ;; we are inside a test - do not reprocess configs
				   (let ((alistconfig (conc (get-environment-variable "MT_LINKTREE") "/"
							    (get-environment-variable "MT_TARGET")   "/"
							    (get-environment-variable "MT_RUNNAME")  "/"
							    ".megatest.cfg")))
				     (if (file-exists? alistconfig)
					 (list (configf:read-alist alistconfig)
					       (get-environment-variable "MT_RUN_AREA_HOME"))
					 #f))
				   #f) ;; no config cached - give up
	(set! *configinfo* (find-and-read-config 
			    (if (args:get-arg "-config")(args:get-arg "-config") "megatest.config")
			    environ-patt: "env-override"
			    given-toppath: (get-environment-variable "MT_RUN_AREA_HOME")
			    pathenvvar: "MT_RUN_AREA_HOME"))
			       (find-and-read-config 
				(if (args:get-arg "-config")(args:get-arg "-config") "megatest.config")
				environ-patt: "env-override"
				given-toppath: (get-environment-variable "MT_RUN_AREA_HOME")
				pathenvvar: "MT_RUN_AREA_HOME")))
	(set! *configdat*  (if (car *configinfo*)(car *configinfo*) #f))
	(set! *toppath*    (if (car *configinfo*)(cadr *configinfo*) #f))
	(let* ((tmptransport (configf:lookup *configdat* "server" "transport"))
	       (transport    (if tmptransport (string->symbol tmptransport) 'http)))
	  (if (member transport '(http rpc nmsg))
	      (set! *transport-type* transport)
	      (begin
		(debug:print 0 "ERROR: Unrecognised transport " transport)
		(exit))))
	(let ((linktree (configf:lookup *configdat* "setup" "linktree"))) ;; link tree is critical
	  (if linktree
	      (if (not (file-exists? linktree))
		  (begin
		    (handle-exceptions
		     exn
		     (begin
487
488
489
490
491
492
493
494


495
496



























497
498
499
500


501
502
503
504
505
506
507

508
509
510
511
512
513
514
515
516
517
518
519
520

521
522
523
524
525
526
527
528
529
530





531
532
533
534
535
536
537
504
505
506
507
508
509
510

511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543


544
545
546






547













548










549
550
551
552
553
554
555
556
557
558
559
560







-
+
+


+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+


-
-
+
+

-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+







		(debug:print 0 "ERROR: linktree is required in your megatest.config [setup] section")
		(exit 1)))
	  (if (and *toppath*
		   (directory-exists? *toppath*))
	      (setenv "MT_RUN_AREA_HOME" *toppath*)
	      (begin
		(debug:print 0 "ERROR: failed to find the top path to your Megatest area.")
		(exit 1))))))
		(exit 1)))
	  )))
  *toppath*)

(define (launch:cache-config)
  ;; if we have a linktree and -runtests and -target and the directory exists dump the config
  ;; to megatest-(current-seconds).cfg and symlink it to megatest.cfg
  (if (and *configdat* 
	   (args:get-arg "-runtests"))
      (let* ((linktree (get-environment-variable "MT_LINKTREE"))
	     (target   (common:args-get-target))
	     (runname  (or (args:get-arg "-runname")
			   (args:get-arg ":runname")))
	     (fulldir  (conc linktree "/"
			     target "/"
			     runname)))
	(debug:print-info 0 "Have -runtests with target=" target ", runname=" runname ", fulldir=" fulldir)
	(if (file-exists? linktree) ;; can't proceed without linktree
	    (begin
	      (if (not (file-exists? fulldir))
		  (create-directory fulldir #t)) ;; need to protect with exception handler 
	      (if (and target
		       runname
		       (file-exists? fulldir))
		  (let ((tmpfile  (conc fulldir "/.megatest.cfg." (current-seconds)))
			(targfile (conc fulldir "/.megatest.cfg")))
		    (debug:print-info 0 "Caching megatest.config in " fulldir "/.megatest.cfg")
		    (configf:write-alist *configdat* tmpfile)
		    (system (conc "ln -sf " tmpfile " " targfile))
		    )))))))

(define (get-best-disk confdat)
  (let* ((disks    (hash-table-ref/default confdat "disks" #f))
	 (best     #f)
	 (bestsize 0))
	 (minspace (let ((m (configf:lookup confdat "setup" "minspace")))
		     (string->number (or m "10000")))))
    (if disks 
	(for-each 
	 (lambda (disk-num)
	   (let* ((dirpath    (cadr (assoc disk-num disks)))
		  (freespc    (cond
			       ((not (directory? dirpath))
				(if (common:low-noise-print 50 "disks not a dir " disk-num)
	(let ((res (common:get-disk-with-most-free-space disks minspace))) ;; min size of 1000, seems tad dumb
				    (debug:print 0 "WARNING: disk " disk-num " at path " dirpath " is not a directory - ignoring it."))
				-1)
			       ((not (file-write-access? dirpath))
				(if (common:low-noise-print 50 "disks not writeable " disk-num)
				    (debug:print 0 "WARNING: disk " disk-num " at path " dirpath " is not writeable - ignoring it."))
				-1)
			       ((not (eq? (string-ref dirpath 0) #\/))
				(if (common:low-noise-print 50 "disks not a proper path " disk-num)
				    (debug:print 0 "WARNING: disk " disk-num " at path " dirpath " is not a fully qualified path - ignoring it."))
				-1)
			       (else
				(get-df dirpath)))))
	     (if (> freespc bestsize)
	  (if res
		 (begin
		   (set! best     dirpath)
		   (set! bestsize freespc)))))
	 (map car disks)))
    (if (and best (> bestsize 0))
	best
	(begin
	  (if (common:low-noise-print 20 "no valid disks")
	      (debug:print 0 "ERROR: No valid disks found in megatest.config. Please add some to your [disks] section and ensure the directory exists!"))
	  (exit 1)))))
	      (cdr res)
	      (begin
		(if (common:low-noise-print 20 "no valid disks")
		    (debug:print 0 "ERROR: No valid disks found in megatest.config. Please add some to your [disks] section and ensure the directory exists!"))
		(exit 1)))))))

;; Desired directory structure:
;;
;;  <linkdir> - <target> - <testname> -.
;;                                     |
;;                                     v
;;  <rundir>  -  <target>  -    <testname> -|- <itempath(s)>
579
580
581
582
583
584
585





586

587
588
589
590
591
592
593
602
603
604
605
606
607
608
609
610
611
612
613

614
615
616
617
618
619
620
621







+
+
+
+
+
-
+







    (if (not (file-exists? linktree))
	(begin
	  (debug:print 0 "WARNING: linktree did not exist! Creating it now at " linktree)
	  (create-directory linktree #t))) ;; (system (conc "mkdir -p " linktree))))
    ;; create the directory for the tests dir links, this is needed no matter what...
    (if (and (not (directory-exists? lnkbase))
	     (not (file-exists? lnkbase)))
	(handle-exceptions
	 exn
	 (begin
	   (debug:print "ERROR: Problem creating linktree base at " lnkbase)
	   (print-error-message exn (current-error-port)))
	(create-directory lnkbase #t))
	 (create-directory lnkbase #t)))
    
    ;; update the toptest record with its location rundir, cache the path
    ;; This wass highly inefficient, one db write for every subtest, potentially
    ;; thousands of unnecessary updates, cache the fact it was set and don't set it 
    ;; again. 

    ;; Now create the link from the test path to the link tree, however
642
643
644
645
646
647
648



649

650
651
652
653
654
655
656
670
671
672
673
674
675
676
677
678
679

680
681
682
683
684
685
686
687







+
+
+
-
+







				lnkpath)
			    testname "")
	  ;; (rmt:general-call 'test-set-rundir run-id lnkpath testname "") ;; toptest-path)
	  (if (or (not curr-test-path)
		  (not (directory-exists? toptest-path)))
	      (begin
		(debug:print-info 2 "Creating " toptest-path " and link " lnkpath)
		(handle-exceptions
		 exn
		 #f ;; don't care to catch and deal with errors here for now.
		(create-directory toptest-path #t)
		 (create-directory toptest-path #t))
		(hash-table-set! *toptest-paths* testname toptest-path)))))

    ;; The toptest path has been created, the link to the test in the linktree has
    ;; been created. Now, if this is an iterated test the real test dir must be created
    (if (not not-iterated) ;; this is an iterated test
	(begin ;; (let ((lnktarget (conc lnkpath "/" item-path)))
	  (debug:print 2 "Setting up sub test run area")
765
766
767
768
769
770
771
772

773
774
775
776
777
778
779
796
797
798
799
800
801
802

803
804
805
806
807
808
809
810







-
+







    (set! mt-bindir-path (pathname-directory remote-megatest))
    (if launcher (set! launcher (string-split launcher)))
    ;; set up the run work area for this test
    (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run
	     (not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir
	(begin
	  (debug:print-info 0 "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path)
	  (runs:remove-test-directory #f testinfo #t))) ;; remove data only, do not perturb the record
	  (runs:remove-test-directory testinfo 'remove-data-only))) ;; remove data only, do not perturb the record

    ;; prevent overlapping actions - set to LAUNCHED as early as possible
    ;;
    (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED"))
    (rmt:roll-up-pass-fail-counts run-id test-name item-path "LAUNCHED")
    (set! diskpath (get-best-disk *configdat*))
    (if diskpath

Modified megatest-version.scm from [9b8927108a] to [ea58aed5be].

1
2
3
4
5
6

7
1
2
3
4
5

6
7





-
+

;; Always use two or four digit decimal
;; 1.01, 1.02...1.10,1.11,1,1101 ... 1.99,2.00..

(declare (unit megatest-version))

(define megatest-version 1.6006)
(define megatest-version 1.6007)

Modified megatest.scm from [16af3583d8] to [2c3c187b26].

1
2
3
4
5
6
7
8
9
10
11
12
13
14






15
16
17

18
19
20
21
22
23
24
25
26
27
28
29
30
31
32

33
34
35
36
37
38
39
40
1
2
3
4
5
6
7
8
9
10
11
12


13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36

37

38
39
40
41
42
43
44












-
-
+
+
+
+
+
+



+














-
+
-







;; Copyright 2006-2012, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.

;; (include "common.scm")
;; (include "megatest-version.scm")

(use sqlite3 srfi-1 posix regex regex-case srfi-69 base64 format readline apropos json 
     http-client directory-utils z3 srfi-18) ;;  extras)
(use sqlite3 srfi-1 posix regex regex-case srfi-69 base64 format readline apropos json http-client directory-utils rpc ;; (srfi 18) extras)
     http-client srfi-18) ;;  zmq extras)

;; Added for csv stuff - will be removed
;;
(use sparse-vectors)

(import (prefix sqlite3 sqlite3:))
(import (prefix base64 base64:))
(import (prefix rpc rpc:))

;; (use zmq)

(declare (uses common))
(declare (uses megatest-version))
(declare (uses margs))
(declare (uses runs))
(declare (uses launch))
(declare (uses server))
(declare (uses client))
(declare (uses tests))
(declare (uses genexample))
(declare (uses daemon))
(declare (uses db))
;; (declare (uses sdb))

;; (declare (uses filedb))
(declare (uses tdb))
(declare (uses mt))
(declare (uses api))
(declare (uses tasks)) ;; only used for debugging.

(define *db* #f) ;; this is only for the repl, do not use in general!!!!

129
130
131
132
133
134
135

136
137
138
139
140
141
142
143
144
145
146
147
148
149



150



151
152
153
154
155
156
157
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153

154
155
156
157
158
159
160
161
162
163
164
165
166
167







+













-
+
+
+

+
+
+







  -import-megatest.db     : migrate a database from v1.55 series to v1.60 series
  -sync-to-megatest.db    : migrate data back to megatest.db
  -update-meta            : update the tests metadata for all tests
  -setvars VAR1=val1,VAR2=val2 : Add environment variables to a run NB// these are
                                 overwritten by values set in config files.
  -server -|hostname      : start the server (reduces contention on megatest.db), use
                            - to automatically figure out hostname
  -transport http|zmq     : use http or zmq for transport (default is http) 
  -daemonize              : fork into background and disconnect from stdin/out
  -log logfile            : send stdout and stderr to logfile
  -list-servers           : list the servers 
  -stop-server id         : stop server specified by id (see output of -list-servers), use
                            0 to kill all
  -repl                   : start a repl (useful for extending megatest)
  -load file.scm          : load and run file.scm
  -mark-incompletes       : find and mark incomplete tests
  -ping run-id|host:port  : ping server, exit with 0 if found

Utilities
  -env2file fname         : write the environment to fname.csh and fname.sh
  -refdb2dat refdb        : convert refdb to sexp or to format specified by -dumpmode
                            formats: perl, ruby, sqlite3
                            formats: perl, ruby, sqlite3, csv (for csv the -o param
                            will substitute %s for the sheet name in generating 
                            multiple sheets)
  -o                      : output file for refdb2dat (defaults to stdout)
  -archive cmd            : archive runs specified by selectors to one of disks specified
                            in the [archive-disks] section.
                            cmd: keep-html, restore, save, save-remove

Spreadsheet generation
  -extract-ods fname.ods  : extract an open document spreadsheet from the database
  -pathmod path           : insert path, i.e. path/runame/itempath/logfile.html
                            will clear the field if no rundir/testname/itempath/logfile
                            if it contains forward slashes the path will be converted
                            to windows style
204
205
206
207
208
209
210


211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230

231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258

259
260
261
262
263
264
265







+
+




















+















-







			":expected"
			":tol"
			":units"
			;; misc
			"-start-dir"
			"-server"
			"-stop-server"
			"-transport"
			"-kill-server"
			"-port"
			"-extract-ods"
			"-pathmod"
			"-env2file"
			"-setvars"
			"-set-state-status"
			"-set-run-status"
			"-debug" ;; for *verbosity* > 2
			"-gen-megatest-test"
			"-override-timeout"
			"-test-files"  ;; -test-paths is for listing all
			"-load"        ;; load and exectute a scheme file
			"-section"
			"-var"
			"-dumpmode"
			"-run-id"
			"-ping"
			"-refdb2dat"
			"-o"
			"-log"
			"-archive"
			) 
		 (list  "-h" "-help" "--help"
			"-version"
		        "-force"
		        "-xterm"
		        "-showkeys"
		        "-show-keys"
		        "-test-status"
			"-set-values"
			"-load-test-data"
			"-summarize-items"
		        "-gui"
			"-daemonize"
			"-preclean"
			;; misc
			"-archive"
			"-repl"
			"-lock"
			"-unlock"
			"-list-servers"
                        "-run-wait"      ;; wait on a run to complete (i.e. no RUNNING)

			;; misc queries
281
282
283
284
285
286
287

288
289
290
291
292



293
294
295
296
297
298
299
293
294
295
296
297
298
299
300
301
302
303
304

305
306
307
308
309
310
311
312
313
314







+




-
+
+
+







			"-q" ;; quiet 0, errors/warnings only
		       )
		 args:arg-hash
		 0))

;; The watchdog is to keep an eye on things like db sync etc.
;;
(define *time-zero* (current-seconds))
(define *watchdog*
  (make-thread 
   (lambda ()
     (thread-sleep! 0.05) ;; delay for startup
     (let ((legacy-sync (configf:lookup *configdat* "setup" "megatest-db")))
     (let ((legacy-sync (configf:lookup *configdat* "setup" "megatest-db"))
	   (debug-mode  (debug:debug-mode 1))
	   (last-time   (current-seconds)))
       (let loop ()
	 ;; sync for filesystem local db writes
	 ;;
	 (let ((start-time      (current-seconds))
	       (servers-started (make-hash-table)))
	   (for-each 
	    (lambda (run-id)
308
309
310
311
312
313
314
315
316







317
318
319



320
321
322
323





324
325
326
327
328
329
330
323
324
325
326
327
328
329


330
331
332
333
334
335
336
337
338
339
340
341
342




343
344
345
346
347
348
349
350
351
352
353
354







-
-
+
+
+
+
+
+
+



+
+
+
-
-
-
-
+
+
+
+
+







			  (debug:print-info 0 "Sync of newdb to olddb for run-id " run-id " completed in " sync-time " seconds")))
		    ;; (if (> sync-time 10) ;; took more than ten seconds, start a server for this run
		    ;;     (begin
		    ;;       (debug:print-info 0 "Sync is taking a long time, start up a server to assist for run " run-id)
		    ;;       (server:kind-run run-id)))))
		    (hash-table-delete! *db-local-sync* run-id)))
	      (mutex-unlock! *db-multi-sync-mutex*))
	    (hash-table-keys *db-local-sync*)))

	    (hash-table-keys *db-local-sync*))
	   (if (and debug-mode
		    (> (- start-time last-time) 60))
	       (begin
		 (set! last-time start-time)
		 (debug:print-info 1 "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*))))))
	 
	 ;; keep going unless time to exit
	 ;;
	 (if (not *time-to-exit*)
	     (let delay-loop ((count 0))
	       (if (and (not *time-to-exit*)
			(< count 11)) ;; aprox 5-6 seconds
	     (begin
	       (thread-sleep! 1) ;; wait one second before syncing again
	       (loop)))))
     "Watchdog thread")))
		   (begin
		     (thread-sleep! 1)
		     (delay-loop (+ count 1))))
	       (loop))))))
   "Watchdog thread"))

(thread-start! *watchdog*)

(if (args:get-arg "-log")
    (let ((oup (open-output-file (args:get-arg "-log"))))
      (debug:print-info 0 "Sending log output to " (args:get-arg "-log"))
      (current-error-port oup)
410
411
412
413
414
415
416
417
















































418
419
420
421
422
423

424
425
426
427
428
429
430

431
432
433
434
435
436
437
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494

495
496
497
498
499
500
501

502
503
504
505
506
507
508
509








+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+





-
+






-
+







	(map (lambda (x)
	       (string-intersperse 
		x
		" => "))
	     (common:get-disks *configdat*))
	"\n"))
      (set! *didsomething* #t)))

(define (make-sparse-array)
  (let ((a (make-sparse-vector)))
    (sparse-vector-set! a 0 (make-sparse-vector))
    a))

(define (sparse-array? a)
  (and (sparse-vector? a)
       (sparse-vector? (sparse-vector-ref a 0))))

(define (sparse-array-ref a x y)
  (let ((row (sparse-vector-ref a x)))
    (if row
	(sparse-vector-ref row y)
	#f)))

(define (sparse-array-set! a x y val)
  (let ((row (sparse-vector-ref a x)))
    (if row
	(sparse-vector-set! row y val)
	(let ((new-row (make-sparse-vector)))
	  (sparse-vector-set! a x new-row)
	  (sparse-vector-set! new-row y val)))))

;; csv processing record
(define (make-refdb:csv)
  (vector 
   (make-sparse-array)
   (make-hash-table)
   (make-hash-table)
   0
   0))
(define-inline (refdb:csv-get-svec     vec)    (vector-ref  vec 0))
(define-inline (refdb:csv-get-rows     vec)    (vector-ref  vec 1))
(define-inline (refdb:csv-get-cols     vec)    (vector-ref  vec 2))
(define-inline (refdb:csv-get-maxrow   vec)    (vector-ref  vec 3))
(define-inline (refdb:csv-get-maxcol   vec)    (vector-ref  vec 4))
(define-inline (refdb:csv-set-svec!    vec val)(vector-set! vec 0 val))
(define-inline (refdb:csv-set-rows!    vec val)(vector-set! vec 1 val))
(define-inline (refdb:csv-set-cols!    vec val)(vector-set! vec 2 val))
(define-inline (refdb:csv-set-maxrow!  vec val)(vector-set! vec 3 val))
(define-inline (refdb:csv-set-maxcol!  vec val)(vector-set! vec 4 val))

(define (get-dat results sheetname)
  (or (hash-table-ref/default results sheetname #f)
      (let ((tmp-vec  (make-refdb:csv)))
	(hash-table-set! results sheetname tmp-vec)
	tmp-vec)))

(if (args:get-arg "-refdb2dat")
    (let* ((input-db (args:get-arg "-refdb2dat"))
	   (out-file (args:get-arg "-o"))
	   (out-fmt  (or (args:get-arg "-dumpmode") "scheme"))
	   (out-port (if (and out-file 
			      (not (equal? out-fmt "sqlite3")))
			      (not (member out-fmt '("sqlite3" "csv"))))
			 (open-output-file out-file)
			 (current-output-port)))
	   (res-data (configf:read-refdb input-db))
	   (data     (car res-data))
	   (msg      (cadr res-data)))
      (if (not data)
	  (debug:print 0 data) ;; some error occurred
	  (debug:print 0 "Bad input? data=" data) ;; some error occurred
	  (with-output-to-port out-port
	    (lambda ()
	      (case (string->symbol out-fmt)
		((scheme)(pp data))
		((perl)
		 ;; (print "%hash = (")
		 ;;        key1 => 'value1',
450
451
452
453
454
455
456







































































457
458
459
460
461
462
463
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







		    (print "data[\"" sheetname "\"][\"" sectionname "\"][\"" varname "\"] = \"" val "\""))
		  initproc1:
		  (lambda (sheetname)
		    (print "data[\"" sheetname "\"] = {}"))
		  initproc2:
		  (lambda (sheetname sectionname)
		    (print "data[\"" sheetname "\"][\"" sectionname "\"] = {}"))))
		((csv)
		 (let* ((results  (make-hash-table)) ;; (make-sparse-array)))
			(row-cols (make-hash-table))) ;; hash of hashes where section => ht { row-<name> => num or col-<name> => num
		   ;; (print "data=")
		   ;; (pp data)
		   (configf:map-all-hier-alist
		    data
		    (lambda (sheetname sectionname varname val)
		      ;; (print "sheetname: " sheetname ", sectionname: " sectionname ", varname: " varname ", val: " val)
		      (let* ((dat      (get-dat results sheetname))
			     (vec      (refdb:csv-get-svec dat))
			     (rownames (refdb:csv-get-rows dat))
			     (colnames (refdb:csv-get-cols dat))
			     (currrown (hash-table-ref/default rownames varname #f))
			     (currcoln (hash-table-ref/default colnames sectionname #f))
			     (rown     (or currrown 
					   (let* ((lastn   (refdb:csv-get-maxrow dat))
						  (newrown (+ lastn 1)))
					     (refdb:csv-set-maxrow! dat newrown)
					     newrown)))
			     (coln     (or currcoln 
					   (let* ((lastn   (refdb:csv-get-maxcol dat))
						  (newcoln (+ lastn 1)))
					     (refdb:csv-set-maxcol! dat newcoln)
					     newcoln))))
			(if (not (sparse-array-ref vec 0 coln)) ;; (eq? rown 0)
			    (begin
			      (sparse-array-set! vec 0 coln sectionname)
			      ;; (print "sparse-array-ref " 0 "," coln "=" (sparse-array-ref vec 0 coln))
			      ))
			(if (not (sparse-array-ref vec rown 0)) ;; (eq? coln 0)
			    (begin
			      (sparse-array-set! vec rown 0 varname)
			      ;; (print "sparse-array-ref " rown "," 0 "=" (sparse-array-ref vec rown 0))
			      ))
			(if (not currrown)(hash-table-set! rownames varname rown))
			(if (not currcoln)(hash-table-set! colnames sectionname coln))
			;; (print "dat=" dat ", rown=" rown ", coln=" coln)
			(sparse-array-set! vec rown coln val)
			;; (print "sparse-array-ref " rown "," coln "=" (sparse-array-ref vec rown coln))
			)))
		   (for-each
		    (lambda (sheetname)
		      (let* ((sheetdat (get-dat results sheetname))
			     (svec     (refdb:csv-get-svec sheetdat))
			     (maxrow   (refdb:csv-get-maxrow sheetdat))
			     (maxcol   (refdb:csv-get-maxcol sheetdat))
			     (fname    (if out-file 
					   (string-substitute "%s" sheetname out-file) ;; "/foo/bar/%s.csv")
					   (conc sheetname ".csv"))))
			(with-output-to-file fname
			  (lambda ()
			    ;; (print "Sheetname: " sheetname)
			    (let loop ((row       0)
				       (col       0)
				       (curr-row '())
				       (result   '()))
			      (let* ((val (sparse-array-ref svec row col))
				     (disp-val (if val
						   (conc "\"" val "\"")
						   "")))
				(if (> col 0)(display ","))
				(display disp-val)
				(cond
				 ((> row maxrow)(display "\n") result)
				 ((>= col maxcol)
				  (display "\n")
				  (loop (+ row 1) 0 '() (append result (list curr-row))))
				 (else
				  (loop row (+ col 1) (append curr-row (list val)) result)))))))))
		    (hash-table-keys results))))
		((sqlite3)
		 (let* ((db-file   (or out-file (pathname-file input-db)))
			(db-exists (file-exists? db-file))
			(db        (sqlite3:open-database db-file)))
		   (if (not db-exists)(sqlite3:execute db "CREATE TABLE data (sheet,section,var,val);"))
		   (configf:map-all-hier-alist
		    data
473
474
475
476
477
478
479








480
481
482
483
484
485
486
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637







+
+
+
+
+
+
+
+







      ))

(if (args:get-arg "-ping")
    (let* ((run-id        (string->number (args:get-arg "-run-id")))
	   (host:port     (args:get-arg "-ping")))
      (server:ping run-id host:port)))

;;       (set! *did-something* #t)
;; 	      (begin
;; 		(print ((rpc:procedure 'testing (car host-port)(cadr host-port))))
;; 		(case (server:get-transport)
;; 		  ((http)(http:ping run-id host-port))
;; 		  ((rpc) (rpc:procedure 'server:login (car host-port)(cadr host-port));;  *toppath*)) ;; (rpc-transport:ping  run-id (car host-port)(cadr host-port)))
;; 		  (else  (debug:print 0 "ERROR: No transport set")(exit)))))

;;======================================================================
;; Start the server - can be done in conjunction with -runall or -runtests (one day...)
;;   we start the server if not running else start the client thread
;;======================================================================

(if (args:get-arg "-server")

500
501
502
503
504
505
506
507


508
509
510
511
512
513
514
651
652
653
654
655
656
657

658
659
660
661
662
663
664
665
666







-
+
+







    ;;  Setup client for all expect listed here
    (if (null? (lset-intersection 
		     equal?
		     (hash-table-keys args:arg-hash)
		     '("-list-servers"
		       "-stop-server"
		       "-show-cmdinfo"
		       "-list-runs")))
		       "-list-runs"
		       "-ping")))
	(if (launch:setup-for-run)
	    (let ((run-id    (and (args:get-arg "-run-id")
				  (string->number (args:get-arg "-run-id")))))
	      ;; (set! *fdb*   (filedb:open-db (conc *toppath* "/db/paths.db")))
	      ;; if not list or kill then start a client (if appropriate)
	      (if (or (args-defined? "-h" "-version" "-gen-megatest-area" "-gen-megatest-test")
		      (eq? (length (hash-table-keys args:arg-hash)) 0))
906
907
908
909
910
911
912

913
914
915
916
917
918
919
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072







+







;;======================================================================
;; Get test paths matching target, runname, and testpatt
(if (or (args:get-arg "-test-files")(args:get-arg "-test-paths"))
    ;; if we are in a test use the MT_CMDINFO data
    (if (getenv "MT_CMDINFO")
	(let* ((startingdir (current-directory))
	       (cmdinfo   (common:read-encoded-string (getenv "MT_CMDINFO")))
	       (transport (assoc/default 'transport cmdinfo))
	       (testpath  (assoc/default 'testpath  cmdinfo))
	       (test-name (assoc/default 'test-name cmdinfo))
	       (runscript (assoc/default 'runscript cmdinfo))
	       (db-host   (assoc/default 'db-host   cmdinfo))
	       (run-id    (assoc/default 'run-id    cmdinfo))
	       (itemdat   (assoc/default 'itemdat   cmdinfo))
	       (state     (args:get-arg ":state"))
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988



989
990


991
992
993
994

995
996
997
998
999
1000
1001
1102
1103
1104
1105
1106
1107
1108

































1109
1110
1111


1112
1113




1114
1115
1116
1117
1118
1119
1120
1121







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
-
-
+
+
-
-
-
-
+







		       paths))))))

;;======================================================================
;; Archive tests
;;======================================================================
;; Archive tests matching target, runname, and testpatt
(if (args:get-arg "-archive")
    ;; if we are in a test use the MT_CMDINFO data
    (if (getenv "MT_CMDINFO")
	(let* ((startingdir (current-directory))
	       (cmdinfo   (common:read-encoded-string (getenv "MT_CMDINFO")))
	       (testpath  (assoc/default 'testpath  cmdinfo))
	       (test-name (assoc/default 'test-name cmdinfo))
	       (runscript (assoc/default 'runscript cmdinfo))
	       (db-host   (assoc/default 'db-host   cmdinfo))
	       (run-id    (assoc/default 'run-id    cmdinfo))
	       (itemdat   (assoc/default 'itemdat   cmdinfo))
	       (state     (args:get-arg ":state"))
	       (status    (args:get-arg ":status"))
	       (target    (args:get-arg "-target")))
	  (change-directory testpath)
	  (if (not target)
	      (begin
		(debug:print 0 "ERROR: -target is required.")
		(exit 1)))
	  (if (not (launch:setup-for-run))
	      (begin
		(debug:print 0 "Failed to setup, giving up on -archive, exiting")
		(exit 1)))
	  (let* ((keys     (rmt:get-keys))
		 (paths    (tests:test-get-paths-matching keys target)))
	    (set! *didsomething* #t)
	    (for-each (lambda (path)
			(print path))
		      paths))
	  ;; (if (sqlite3:database? db)(sqlite3:finalize! db))
	  )
	;; else do a general-run-call
	(general-run-call 
	 "-test-paths"
    ;; else do a general-run-call
    (general-run-call 
     "-archive"
	 "Get paths to tests"
	 (lambda (target runname keys keyvals)
     "Archive"
     (lambda (target runname keys keyvals)
	   (let* ((paths    (tests:test-get-paths-matching keys target)))
	     (for-each (lambda (path)
			 (print path))
		       paths))))))
       (operate-on 'archive))))

;;======================================================================
;; Extract a spreadsheet from the runs database
;;======================================================================

(if (args:get-arg "-extract-ods")
    (general-run-call
1031
1032
1033
1034
1035
1036
1037

1038
1039
1040
1041
1042
1043
1044
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165







+








(define (megatest:step step state status logfile msg)
  (if (not (getenv "MT_CMDINFO"))
      (begin
	(debug:print 0 "ERROR: MT_CMDINFO env var not set, -step must be called *inside* a megatest invoked environment!")
	(exit 5))
      (let* ((cmdinfo   (common:read-encoded-string (getenv "MT_CMDINFO")))
	     (transport (assoc/default 'transport cmdinfo))
	     (testpath  (assoc/default 'testpath  cmdinfo))
	     (test-name (assoc/default 'test-name cmdinfo))
	     (runscript (assoc/default 'runscript cmdinfo))
	     (db-host   (assoc/default 'db-host   cmdinfo))
	     (run-id    (assoc/default 'run-id    cmdinfo))
	     (test-id   (assoc/default 'test-id   cmdinfo))
	     (itemdat   (assoc/default 'itemdat   cmdinfo))
1055
1056
1057
1058
1059
1060
1061
1062
1063


1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083

1084
1085
1086
1087
1088
1089
1090
1176
1177
1178
1179
1180
1181
1182


1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212







-
-
+
+




















+







	      (debug:print 0 "ERROR: You must specify :state and :status with every call to -step")
	      (exit 6))))))

(if (args:get-arg "-step")
    (begin
      (megatest:step 
       (args:get-arg "-step")
       (args:get-arg ":state")
       (args:get-arg ":status")
       (or (args:get-arg "-state")(args:get-arg ":state"))
       (or (args:get-arg "-status")(args:get-arg ":status"))
       (args:get-arg "-setlog")
       (args:get-arg "-m"))
      ;; (if db (sqlite3:finalize! db))
      (set! *didsomething* #t)))
    
(if (or (args:get-arg "-setlog")       ;; since setting up is so costly lets piggyback on -test-status
	;;     (not (args:get-arg "-step")))  ;; -setlog may have been processed already in the "-step" previous
	;;     NEW POLICY - -setlog sets test overall log on every call.
	(args:get-arg "-set-toplog")
	(args:get-arg "-test-status")
	(args:get-arg "-set-values")
	(args:get-arg "-load-test-data")
	(args:get-arg "-runstep")
	(args:get-arg "-summarize-items"))
    (if (not (getenv "MT_CMDINFO"))
	(begin
	  (debug:print 0 "ERROR: MT_CMDINFO env var not set, commands -test-status, -runstep and -setlog must be called *inside* a megatest environment!")
	  (exit 5))
	(let* ((startingdir (current-directory))
	       (cmdinfo   (common:read-encoded-string (getenv "MT_CMDINFO")))
	       (transport (assoc/default 'transport cmdinfo))
	       (testpath  (assoc/default 'testpath  cmdinfo))
	       (test-name (assoc/default 'test-name cmdinfo))
	       (runscript (assoc/default 'runscript cmdinfo))
	       (db-host   (assoc/default 'db-host   cmdinfo))
	       (run-id    (assoc/default 'run-id    cmdinfo))
	       (test-id   (assoc/default 'test-id   cmdinfo))
	       (itemdat   (assoc/default 'itemdat   cmdinfo))

Added mlaunch.scm version [4f4e7034c8].



























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
;; Copyright 2006-2014, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.

;;  strftime('%m/%d/%Y %H:%M:%S','now','localtime')

;;======================================================================
;; MLAUNCH
;;
;;   take jobs from the given queue and keep launching them keeping
;;   the cpu load at the targeted level
;;
;;======================================================================

(use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking format)
(import (prefix sqlite3 sqlite3:))

(declare (unit mlaunch))
(declare (uses db))
(declare (uses common))

Modified newdashboard.scm from [24924c0cda] to [c632e597af].

76
77
78
79
80
81
82




83
84
85
86
87
88
89
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93







+
+
+
+








;; (if (args:get-arg "-host")
;;     (begin
;;       (set! *runremote* (string-split (args:get-arg "-host" ":")))
;;       (client:launch))
;;     (client:launch))

;; ease debugging by loading ~/.dashboardrc
(let ((debugcontrolf (conc (get-environment-variable "HOME") "/.dashboardrc")))
  (if (file-exists? debugcontrolf)
      (load debugcontrolf)))

(define *dbdir* (conc (configf:lookup *configdat* "setup" "linktree") "/.db"))
(define *dbstruct-local*  (make-dbr:dbstruct path:  *dbdir*
					     local: #t))
(define *db-file-path* (db:dbfile-path 0))

;; HACK ALERT: this is a hack, please fix.

Added nmsg-transport.scm version [c28712df60].







































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

;; Copyright 2006-2012, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.

(require-extension (srfi 18) extras tcp s11n)

(use sqlite3 srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest)
(import (prefix sqlite3 sqlite3:))

;; (use nanomsg)

(declare (unit nmsg-transport))

(declare (uses common))
(declare (uses db))
(declare (uses tests))
(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
(declare (uses server))

(include "common_records.scm")
(include "db_records.scm")

;; Transition to pub --> sub with pull <-- push
;;
;;   1. client sends request to server via push to the pull port
;;   2. server puts request in queue or processes immediately as appropriate
;;   3. server puts responses from completed requests into pub port 
;;
;; TODO
;;
;; Done Tested
;; [x]  [ ]    1. Add columns pullport pubport to servers table
;; [x]  [ ]    2. Add rm of monitor.db if older than 11/12/2012 
;; [x]  [ ]    3. Add create of pullport and pubport with finding of available ports
;; [x]  [ ]    4. Add client compose of request
;; [x]  [ ]        - name of client: testname/itempath-test_id-hostname 
;; [x]  [ ]        - name of request: callname, params
;; [x]  [ ]        - request key: f(clientname, callname, params)
;; [x]  [ ]    5. Add processing of subscription hits
;; [x]  [ ]        - done when get key 
;; [x]  [ ]        - return results
;; [x]  [ ]    6. Add timeout processing
;; [x]  [ ]        - after 60 seconds
;; [ ]  [ ]            i. check server alive, connect to new if necessary
;; [ ]  [ ]           ii. resend request
;; [ ]  [ ]    7. Turn self ping back on

(define (nmsg-transport:make-server-url hostport #!key (bindall #f))
  (if (not hostport)
      #f
      (conc "tcp://" (if bindall "*" (car hostport)) ":" (cadr hostport))))

(define *server-loop-heart-beat* (current-seconds))
(define *heartbeat-mutex* (make-mutex))

;;======================================================================
;; S E R V E R
;;======================================================================

(define (nmsg-transport:run dbstruct hostn run-id server-id #!key (retrynum 1000))
  (debug:print 2 "Attempting to start the server ...")
  (let* ((start-port      (portlogger:open-run-close portlogger:find-port))
	 (server-thread   (make-thread (lambda ()
					 (nmsg-transport:try-start-server dbstruct run-id start-port server-id))
				       "server thread"))
	 (tdbdat          (tasks:open-db)))
    (thread-start! server-thread)
    (thread-sleep! 0.1)
    (if (nmsg-transport:ping hostn start-port timeout: 2 expected-key: (current-process-id))
	(let ((interface (if (equal? hostn "-")(get-host-name) hostn)))
	  (tasks:server-set-interface-port (db:delay-if-busy tdbdat) server-id interface start-port)
	  (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "dbprep")
	  (set! *server-info* (list hostn start-port)) ;; probably not needed anymore? currently used by keep-running
	  (thread-sleep! 3) ;; give some margin for queries to complete before switching from file based access to server based access
	  ;; (set! *inmemdb*  dbstruct)
	  (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "running")
	  (thread-start! (make-thread
			  (lambda ()(nmsg-transport:keep-running server-id run-id))
			  "keep running"))
	  (thread-join! server-thread))
	(if (> retrynum 0)
	    (begin
	      (debug:print 0 "WARNING: Failed to connect to server (self) on host " hostn ":" start-port ", trying again.")
	      (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id "failed to start, never received server alive signature")
	      (portlogger:open-run-close portlogger:set-failed start-port)
	      (nmsg-transport:run dbstruct hostn run-id server-id))
	    (begin
	      (debug:print 0 "ERROR: could not find an open port to start server on. Giving up")
	      (exit 1))))))

(define (nmsg-transport:try-start-server dbstruct run-id portnum server-id)
  (let ((repsoc (nn-socket 'rep)))
    (nn-bind repsoc (conc "tcp://*:" portnum))
    (let loop ((msg-in (nn-recv repsoc)))
      (let* ((dat    (db:string->obj msg-in transport: 'nmsg)))
	(debug:print 0 "server, received: " dat)
	(let ((result (api:execute-requests dbstruct dat)))
	  (debug:print 0 "server, sending: " result)
	  (nn-send repsoc (db:obj->string result  transport: 'nmsg)))
	(loop (nn-recv repsoc))))))

;; all routes though here end in exit ...
;;
(define (nmsg-transport:launch run-id)
  (let* ((tdbdat   (tasks:open-db))
	 (dbstruct (db:setup run-id))
	 (hostn    (or (args:get-arg "-server") "-")))
    (set! *run-id*   run-id)
    (set! *inmemdb* dbstruct)
    ;; with nbfake daemonize isn't really needed
    ;;
    ;; (if (args:get-arg "-daemonize")
    ;;     (begin
    ;;       (daemon:ize)
    ;;       (if *alt-log-file* ;; we should re-connect to this port, I think daemon:ize disrupts it
    ;;           (begin
    ;;     	(current-error-port *alt-log-file*)
    ;;     	(current-output-port *alt-log-file*)))))
    (if (server:check-if-running run-id)
	(begin
	  (debug:print-info 0 "Server for run-id " run-id " already running")
	  (exit 0)))
    (let loop ((server-id (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id))
	       (remtries  4))
      (if (not server-id)
	  (if (> remtries 0)
	      (begin
		(thread-sleep! 2)
		(if (not (server:check-if-running run-id))
		    (loop (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id)
			  (- remtries 1))
		    (begin
		      (debug:print-info 0 "Another server took the slot, exiting")
		      (exit 0))))
	      (begin
		;; since we didn't get the server lock we are going to clean up and bail out
		(debug:print-info 2 "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue")
		(tasks:server-delete-records-for-this-pid (db:delay-if-busy tdbdat) " http-transport:launch")
		))
	  ;; locked in a server id, try to start up
	  (nmsg-transport:run dbstruct hostn run-id server-id))
      (set! *didsomething* #t)
      (exit))))

;;======================================================================
;; S E R V E R   U T I L I T I E S 
;;======================================================================

(define (nmsg-transport:mk-signature)
  (message-digest-string (md5-primitive) 
			 (with-output-to-string
			   (lambda ()
			     (write (list (current-directory)
					  (argv)))))))

;;======================================================================
;; C L I E N T S
;;======================================================================

;; ping the server at host:port
;;   return the open socket if successful (return-socket == #t)
;;   expect the key expected-key returned in payload
;;   send our-key or #f as payload
;;
(define (nmsg-transport:ping hostn port #!key (timeout 3)(return-socket #t)(expected-key #f)(our-key #f)(socket #f))
  ;; send a random number along with pid and check that we get it back
  (let* ((host    (if (or (not hostn)
			  (equal? hostn "-")) ;; use localhost
		      (get-host-name)
		      hostn))
	 (req     (or socket
		      (let ((soc (nn-socket 'req)))
			(nn-connect soc (conc "tcp://" host ":" port))
			soc)))
	 (success #t)
	 (dat     (vector "ping" our-key))
	 (result  (condition-case 
		   (nmsg-transport:client-api-send-receive-raw req dat timeout: timeout)
		   ((timeout)(set! success #f) #f)))
	 (key     (if success 
		      (vector-ref result 1)
		      #f)))
    (debug:print 0 "success=" success ", key=" key ", expected-key=" expected-key ", equal? " (equal? key expected-key))
    (if (and success
	     (or (not expected-key) ;; just getting a reply is good enough then
		 (equal? key expected-key)))
	(if return-socket
	    req
	    (begin
	      (if (not socket)(nn-close req)) ;; don't want a side effect of closing socket if handed it
	      #t))
	(begin
	  (if (not socket)(nn-close req)) ;; failed to ping, close socket as side effect
	  #f))))

;; send data to server, wait max of timeout seconds for a response.
;; return #( success/fail result )
;;
;; for effiency it is easier to do the obj->string and string->obj here.
;;
(define (nmsg-transport:client-api-send-receive-raw socreq indat #!key (enable-send #t)(timeout 25))
  (let* ((success     #f)
	 (result      #f)
	 (keepwaiting #t)
	 (dat         (db:obj->string indat transport: 'nmsg))
	 (send-recv   (make-thread
		       (lambda ()
			 (nn-send socreq dat)
			 (let* ((res (nn-recv socreq)))
			   (set! success #t)
			   (set! result (db:string->obj res transport: 'nmsg))))
		       "send-recv"))
	 (timeout     (make-thread
		       (lambda ()
			 (let loop ((count 0))
			   (thread-sleep! 1)
			   (debug:print-info 1 "send-receive-raw, still waiting after " count " seconds...")
			   (if (and keepwaiting (< count timeout)) ;; yes, this is very aproximate
			       (loop (+ count 1))))
			 (if keepwaiting
			     (begin
			       (print "timeout waiting for ping")
			       (thread-terminate! send-recv))))
		       "timeout")))
    ;; replace with condition-case?
    (handle-exceptions
     exn
     (set! result "timeout")
     (thread-start! timeout)
     (thread-start! send-recv)
     (thread-join! send-recv)
     (if success (thread-terminate! timeout)))
    ;; raise timeout error if timed out
    (if success
	(if (and (vector? result)
		 (vector-ref result 0)) ;; did it fail at the server?
	    result                ;; nope, all good
	    (begin
	      (debug:print 0 "ERROR: error occured at server, info=" (vector-ref result 2))
	      (debug:print 0 " client call chain:")
	      (print-call-chain (current-error-port))
	      (debug:print 0 " server call chain:")
	      (pp (vector-ref result 1) (current-error-port))
	      (signal (vector-ref result 0))))
	(signal (make-composite-condition
		 (make-property-condition 'timeout 'message "nmsg-transport:client-api-send-receive-raw timed out talking to server"))))))

;; run nmsg-transport:keep-running in a parallel thread to monitor that the db is being 
;; used and to shutdown after sometime if it is not.
;;
(define (nmsg-transport:keep-running server-id run-id)
  ;; if none running or if > 20 seconds since 
  ;; server last used then start shutdown
  ;; This thread waits for the server to come alive
  (let* ((server-info (let loop ()
                        (let ((sdat #f))
                          (mutex-lock! *heartbeat-mutex*)
                          (set! sdat *server-info*)
                          (mutex-unlock! *heartbeat-mutex*)
                          (if sdat 
			      (begin
				(debug:print-info 0 "keep-running got sdat=" sdat)
				sdat)
                              (begin
                                (thread-sleep! 0.5)
                                (loop))))))
         (iface       (car server-info))
         (port        (cadr server-info))
         (last-access 0)
	 (tdbdat      (tasks:open-db))
	 (server-timeout (let ((tmo (configf:lookup  *configdat* "server" "timeout")))
			   (if (and (string? tmo)
				    (string->number tmo))
			       (* 60 60 (string->number tmo))
			       ;; (* 3 24 60 60) ;; default to three days
			       (* 60 1)         ;; default to one minute
			       ;; (* 60 60 25)      ;; default to 25 hours
			       ))))
    (print "Keep-running got server pid " server-id ", using iface " iface " and port " port)
    (let loop ((count 0))
      (thread-sleep! 4) ;; no need to do this very often
      ;; NB// sync currently does NOT return queue-length
      (let () ;; (queue-len (cdb:client-call server-info 'sync #t 1)))
      ;; (print "Server running, count is " count)
        (if (< count 1) ;; 3x3 = 9 secs aprox
            (loop (+ count 1)))
        
        (mutex-lock! *heartbeat-mutex*)
        (set! last-access *last-db-access*)
        (mutex-unlock! *heartbeat-mutex*)
	(db:sync-touched *inmemdb* run-id force-sync: #t)
        (if (and *server-run*
	       (> (+ last-access server-timeout)
		  (current-seconds)))
            (begin
              (debug:print-info 0 "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
              (loop 0))
            (begin
              (debug:print-info 0 "Starting to shutdown the server.")
              (set! *time-to-exit* #t)
	      (db:sync-touched *inmemdb* run-id force-sync: #t)
              (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id " http-transport:keep-running")
              (debug:print-info 0 "Server shutdown complete. Exiting")
              (exit)
	      ))))))

;;======================================================================
;; C L I E N T S
;;======================================================================

(define (nmsg-transport:client-connect iface portnum)
  (let* ((reqsoc      (nmsg-transport:ping iface portnum return-socket: #t)))
    (vector iface portnum #f #f #f (current-seconds) reqsoc)))

;; returns result, there is no sucess/fail flag - handled via excpections
;;
(define (nmsg-transport:client-api-send-receive run-id connection-info cmd param #!key (remtries 5))
  ;; NB// In the html version of this routine there is a call to 
  ;;      tasks:kill-server-run-id when there is an exception
  (mutex-lock! *http-mutex*)
  (let* ((packet  (vector cmd param))
	 (reqsoc  (http-transport:server-dat-get-socket connection-info))
	 (res     (nmsg-transport:client-api-send-receive-raw reqsoc packet)))
;;	 (status  (vector-ref rawres 0))
;;	 (result  (vector-ref rawres 1)))
    (mutex-unlock! *http-mutex*)
    res)) ;; (vector status (if status (db:string->obj result transport: 'nmsg) result))))
	
;;======================================================================
;; J U N K 
;;======================================================================

;; DO NOT USE
;;
(define (nmsg-transport:client-signal-handler signum)
  (handle-exceptions
   exn
   (debug:print " ... exiting ...")
   (let ((th1 (make-thread (lambda ()
			     (if (not *received-response*)
				 (receive-message* *runremote*))) ;; flush out last call if applicable
			   "eat response"))
	 (th2 (make-thread (lambda ()
			     (debug:print 0 "ERROR: Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.")
			     (thread-sleep! 3) ;; give the flush three seconds to do it's stuff
			     (debug:print 0 "       Done.")
			     (exit 4))
			   "exit on ^C timer")))
     (thread-start! th2)
     (thread-start! th1)
     (thread-join! th2))))

Modified process.scm from [781c177a90] to [13bb37a3d1].

99
100
101
102
103
104
105
106
107













108
109
110
111
112
113
114
99
100
101
102
103
104
105


106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125







-
-
+
+
+
+
+
+
+
+
+
+
+
+
+







            (loop (let ((l (read-line fh)))
                    (if (eof-object? l) l (proc l)))
                  (append result (list curr)))
            result))))

;; here is an example line where the shell is sh or bash
;; "find / -print 2&>1 > findall.log"
(define (run-n-wait cmdline)
  (let ((pid (process-run cmdline)))
(define (run-n-wait cmdline #!key (params #f)(print-cmd #f))
  (if print-cmd 
      (debug:print 0 
		   (if (string? print-cmd)
		       print-cmd
		       "")
		   cmdline
		   (if params
		       (string-intersperse params " ")
		       "")))
  (let ((pid (if params
		 (process-run cmdline params)
		 (process-run cmdline))))
    (let loop ((i 0))
      (let-values (((pid-val exit-status exit-code) (process-wait pid #t)))
         (if (eq? pid-val 0)
	     (begin
	       (thread-sleep! 2)
	       (loop (+ i 1)))
	     (values pid-val exit-status exit-code))))))

Modified rmt.scm from [0582204ff9] to [6e9129f161].

11
12
13
14
15
16
17

18
19
20
21
22
23
24

25
26
27
28
29
30
31




32
33
34
35
36









37

38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57



58
59
60
61
62
63
64
65
66
67
68

69
70
71

72
73
74

75
76
77
78
79
80
81
82









83
84

85
86

87

88
89


90
91
92
93










94
95
96
97

98
99
100
101



102
103
104

105
106



107






108
109
110
111
112
113


114





115

116

117


118


119
120







121
122
123
124
125
126
127
128
129
130
131
132
133

134
135
136
137
138
139
140
11
12
13
14
15
16
17
18
19
20
21
22
23
24

25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50

51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84

85



86
87
88

89
90
91
92





93
94
95
96
97
98
99
100
101
102
103
104
105

106

107
108
109
110
111




112
113
114
115
116
117
118
119
120
121
122



123




124
125
126



127
128

129
130
131
132
133
134
135
136
137
138
139
140
141
142


143
144
145
146
147
148
149
150

151

152
153
154
155
156
157
158


159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177

178
179
180
181
182
183
184
185







+






-
+







+
+
+
+





+
+
+
+
+
+
+
+
+
-
+




















+
+
+










-
+
-
-
-
+


-
+



-
-
-
-
-
+
+
+
+
+
+
+
+
+


+

-
+
-
+


+
+
-
-
-
-
+
+
+
+
+
+
+
+
+
+

-
-
-
+
-
-
-
-
+
+
+
-
-
-
+

-
+
+
+

+
+
+
+
+
+




-
-
+
+

+
+
+
+
+
-
+
-
+

+
+

+
+
-
-
+
+
+
+
+
+
+












-
+








(use json format)

(declare (unit rmt))
(declare (uses api))
(declare (uses tdb))
(declare (uses http-transport))
(declare (uses nmsg-transport))

;;
;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!!
;;

;; ;; For debugging add the following to ~/.megatestrc
;;
;;a
;; (require-library trace)
;; (import trace)
;; (trace
;; rmt:send-receive
;; api:execute-requests
;; )

;; generate entries for ~/.megatestrc with the following
;;
;;  grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u


;;======================================================================
;;  S U P P O R T   F U N C T I O N S
;;======================================================================

;; NOT USED?
;;
;; (define (rmt:call-transport run-id connection-info cmd jparams)
;;   (case (server:get-transport)
;;     ((rpc)  ( rpc-transport:client-api-send-receive run-id connection-info cmd jparams))
;;     ((http) (http-transport:client-api-send-receive run-id connection-info cmd jparams))
;;     ((fs)   ( fs-transport:client-api-send-receive run-id connection-info cmd jparams))
;;     ((zmq)  (zmq-transport:client-api-send-receive run-id connection-info cmd jparams))
;;     (else   ( rpc-transport:client-api-send-receive run-id connection-info cmd jparams))))
;; #t means - please start a server!

;;
(define (rmt:write-frequency-over-limit? cmd run-id)
  (and (not (member cmd api:read-only-queries))
       (let* ((tmprec (hash-table-ref/default *write-frequency* run-id #f))
	      (record (if tmprec tmprec 
			  (let ((v (vector (current-seconds) 0)))
			    (hash-table-set! *write-frequency* run-id v)
			    v)))
	      (count  (+ 1 (vector-ref record 1)))
	      (start  (vector-ref record 0))
	      (queries-per-second (/ (* count 1.0)
				     (max (- (current-seconds) start) 1))))
	 (vector-set! record 1 count)
	 (if (and (> count 10)
		  (> queries-per-second 10))
	     (begin
	       (debug:print-info 1 "db write rate too high, starting a server, count=" count " start=" start " run-id=" run-id " queries-per-second=" queries-per-second)
	       #t)
	     #f))))

;; if a server is either running or in the process of starting call client:setup
;; else return #f to let the calling proc know that there is no server available
;;
(define (rmt:get-connection-info run-id)
  (let ((cinfo (hash-table-ref/default *runremote* run-id #f)))
    (if cinfo
	cinfo
	;; NB// can cache the answer for server running for 10 seconds ...
	;;  ;; (and (not (rmt:write-frequency-over-limit? cmd run-id))
	(if (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id)
	    (client:setup run-id)
	    #f))))

;; cmd is a symbol
(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id
;; vars is a json string encoding the parameters for the call
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 0))
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)) ;; start attemptnum at 1 so the modulo below works as expected
  ;; clean out old connections
  (mutex-lock! *db-multi-sync-mutex*)
  (let ((expire-time (- (current-seconds) 60)))
  (let ((expire-time (- (current-seconds) (server:get-timeout) 10))) ;; don't forget the 10 second margin
    (for-each 
     (lambda (run-id)
       (let ((connection (hash-table-ref/default *runremote* run-id #f)))
	 (if (and connection 
		  (< (http-transport:server-dat-get-last-access connection) expire-time))
	     (begin
	       (debug:print-info 0 "Discarding connection to server for run-id " run-id ", too long between accesses")
	       (hash-table-delete! *runremote* run-id)))))
         (if (and (vector? connection)
        	  (< (http-transport:server-dat-get-last-access connection) expire-time))
             (begin
               (debug:print-info 0 "Discarding connection to server for run-id " run-id ", too long between accesses")
               ;; SHOULD CLOSE THE CONNECTION HERE
	       (case *transport-type*
		 ((nmsg)(nn-close (http-transport:server-dat-get-socket 
				   (hash-table-ref *runremote* run-id)))))
               (hash-table-delete! *runremote* run-id)))))
     (hash-table-keys *runremote*)))
  (mutex-unlock! *db-multi-sync-mutex*)
  ;; (mutex-lock! *send-receive-mutex*)
  (let* ((run-id          (if rid rid 0))
	 (connection-info (rmt:get-connection-info run-id))
	 (connection-info (rmt:get-connection-info run-id)))
	 (jparams         (db:obj->string params)))
    ;; the nmsg method does the encoding under the hood (the http method should be changed to do this also)
    (if connection-info
	;; use the server if have connection info
	(let* ((dat     (case *transport-type*
			  ((http)(condition-case
	(let* ((dat     (http-transport:client-api-send-receive run-id connection-info cmd jparams))
	       (res     (if (and dat (vector? dat)) (vector-ref dat 1) #f))
	       (success (if (and dat (vector? dat)) (vector-ref dat 0) #f)))
	  (http-transport:server-dat-update-last-access connection-info)
				  (http-transport:client-api-send-receive run-id connection-info cmd params)
				  ((commfail)(vector #f "communications fail"))
				  ((exn)(vector #f "other fail"))))
			  ((nmsg)(condition-case
				  (nmsg-transport:client-api-send-receive run-id connection-info cmd params)
				  ((timeout)(vector #f "timeout talking to server"))))
			  (else  (exit))))
	       (success (if (vector? dat) (vector-ref dat 0) #f))
	       (res     (if (vector? dat) (vector-ref dat 1) #f)))
	  (if (vector? connection-info)(http-transport:server-dat-update-last-access connection-info))
	  (if success
	      (db:string->obj res)
	      ;; (if (< attemptnum 100)
	      ;;     (begin
	      (begin
	      ;;       (hash-table-delete! *runremote* run-id)
	      ;;       (thread-sleep! 0.5)
	      ;;       (rmt:send-receive cmd rid params attempnum: (+ attemptnum 1)))
	      ;;     (begin
		;; (mutex-unlock! *send-receive-mutex*)
		(case *transport-type* 
		  ((http) res) ;; (db:string->obj res))
	      ;;       (print-call-chain (current-error-port))
	      ;;       (debug:print 0 "ERROR: too many attempts to communicate have failed. Giving up. Kill your mtest processes and start over")
	      ;;       (exit 1)))))
		  ((nmsg) res))) ;; (vector-ref res 1)))
	      (begin ;; let ((new-connection-info (client:setup run-id)))
		(debug:print 0 "WARNING: Communication failed, trying call to http-transport:client-api-send-receive again.")
		(debug:print 0 "WARNING: Communication failed, trying call to rmt:send-receive again.")
		;; (case *transport-type*
		;;   ((nmsg)(nn-close (http-transport:server-dat-get-socket connection-info))))
		(hash-table-delete! *runremote* run-id) ;; don't keep using the same connection
		;; NOTE: killing server causes this process to block forever. No idea why. Dec 2. 
		;; (if (eq? (modulo attemptnum 5) 0)
		;;     (tasks:kill-server-run-id run-id tag: "api-send-receive-failed"))
		;; (mutex-unlock! *send-receive-mutex*) ;; close the mutex here to allow other threads access to communications
		(tasks:start-and-wait-for-server (tasks:open-db) run-id 15)
		;; (nmsg-transport:client-api-send-receive run-id connection-info cmd param remtries: (- remtries 1))))))

		;; no longer killing the server in http-transport:client-api-send-receive
		;; may kill it here but what are the criteria?
		;; start with three calls then kill server
		(if (eq? attemptnum 3)(tasks:kill-server-run-id run-id))
		(thread-sleep! 2)
		;; (if (eq? attemptnum 3)(tasks:kill-server-run-id run-id))
		;; (thread-sleep! 2)
		(rmt:send-receive cmd run-id params attemptnum: (+ attemptnum 1)))))
	;; no connection info? try to start a server, or access locally if no
	;; server and the query is read-only
	;;
	;; Note: The tasks db was checked for a server in starting mode in the rmt:get-connection-info call
	;;
	(if (and (< attemptnum 10)
	(if (and (< attemptnum 15)
		 (tasks:need-server run-id))
		 (member cmd api:write-queries))
	    (begin
	      (hash-table-delete! *runremote* run-id)
	      ;; (mutex-unlock! *send-receive-mutex*)
	      (tasks:start-and-wait-for-server (db:delay-if-busy (tasks:open-db)) run-id 10)
	      ;; (client:setup run-id) ;; client setup happens in rmt:get-connection-info
	      (thread-sleep! (random 5)) ;; give some time to settle and minimize collison?
	      (rmt:send-receive cmd rid params (+ attemptnum 1)))
	    (rmt:open-qry-close-locally cmd run-id params)))))
	      (rmt:send-receive cmd rid params attemptnum: (+ attemptnum 1)))
	    (begin
	      ;; (debug:print 0 "ERROR: Communication failed!")
	      ;; (mutex-unlock! *send-receive-mutex*)
	      ;; (exit)
	      (rmt:open-qry-close-locally cmd run-id params)
	      )))))

(define (rmt:update-db-stats run-id rawcmd params duration)
  (mutex-lock! *db-stats-mutex*)
  (handle-exceptions
   exn
   (begin
     (debug:print 0 "WARNING: stats collection failed in update-db-stats")
     (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
     (print "exn=" (condition->list exn))
     #f) ;; if this fails we don't care, it is just stats
   (let* ((cmd      (conc "run-id=" run-id " " (if (eq? rawcmd 'general-call) (car params) rawcmd)))
	  (stat-vec (hash-table-ref/default *db-stats* cmd #f)))
     (if (not stat-vec)
     (if (not (vector? stat-vec))
	 (let ((newvec (vector 0 0)))
	   (hash-table-set! *db-stats* cmd newvec)
	   (set! stat-vec newvec)))
     (vector-set! stat-vec 0 (+ (vector-ref stat-vec 0) 1))
     (vector-set! stat-vec 1 (+ (vector-ref stat-vec 1) duration))))
  (mutex-unlock! *db-stats-mutex*))

172
173
174
175
176
177
178
179

180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200



























201
202
203
204
205
206
207
208
209
210













211
212
213
214
215
216
217
217
218
219
220
221
222
223

224
225
226
227
228
229
230















231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260







261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280







-
+






-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+



-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+







			     (if (> tot 10)
				 (cons newmax-cmd currmax)
				 (cons 'none 0))
			     (loop (car tal)(cdr tal) newmax-cmd currmax)))))))
    (mutex-unlock! *db-stats-mutex*)
    res))
	  
(define (rmt:open-qry-close-locally cmd run-id params)
(define (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5))
  (let* ((dbstruct-local (if *dbstruct-db*
			     *dbstruct-db*
			     (let* ((dbdir (conc    (configf:lookup *configdat* "setup" "linktree") "/.db"))
				    (db (make-dbr:dbstruct path:  dbdir local: #t)))
			       (set! *dbstruct-db* db)
			       db)))
	 (db-file-path   (db:dbfile-path 0)))
    ;; (read-only      (not (file-read-access? db-file-path)))
    (let* ((start         (current-milliseconds))
	   (res           (api:execute-requests dbstruct-local (symbol->string cmd) params))
	   (duration      (- (current-milliseconds) start)))
      (rmt:update-db-stats run-id cmd params duration)
      ;; mark this run as dirty if this was a write
      (if (not (member cmd api:read-only-queries))
	  (let ((start-time (current-seconds)))
	    (mutex-lock! *db-multi-sync-mutex*)
	    ;; (if (not (hash-table-ref/default *db-local-sync* run-id #f))
	    ;; just set it every time. Is a write more expensive than a read and does it matter?
	    (hash-table-set! *db-local-sync* (or run-id 0) start-time) ;; the oldest "write"
	    (mutex-unlock! *db-multi-sync-mutex*)))
      res)))
	 (db-file-path   (db:dbfile-path 0))
	 ;; (read-only      (not (file-read-access? db-file-path)))
	 (start          (current-milliseconds))
	 (resdat         (api:execute-requests dbstruct-local (vector (symbol->string cmd) params)))
	 (success        (vector-ref resdat 0))
	 (res            (vector-ref resdat 1))
	 (duration       (- (current-milliseconds) start)))
    (if (not success)
	(if (> remretries 0)
	    (begin
	      (debug:print 0 "ERROR: local query failed. Trying again.")
	      (thread-sleep! (/ (random 5000) 1000)) ;; some random delay 
	      (rmt:open-qry-close-locally cmd run-id params remretries: (- remretries 1)))
	    (begin
	      (debug:print 0 "ERROR: too many retries in rmt:open-qry-close-locally, giving up")
	      #f))
	(begin
	  (rmt:update-db-stats run-id cmd params duration)
	  ;; mark this run as dirty if this was a write
	  (if (not (member cmd api:read-only-queries))
	      (let ((start-time (current-seconds)))
		(mutex-lock! *db-multi-sync-mutex*)
		;; (if (not (hash-table-ref/default *db-local-sync* run-id #f))
		;; just set it every time. Is a write more expensive than a read and does it matter?
		(hash-table-set! *db-local-sync* (or run-id 0) start-time) ;; the oldest "write"
		(mutex-unlock! *db-multi-sync-mutex*)))
	  res))))

(define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params)
  (let* ((run-id   (if run-id run-id 0))
	 (jparams  (db:obj->string params)) ;; (rmt:dat->json-str params))
	 (dat      (http-transport:client-api-send-receive run-id connection-info cmd jparams)))
    (if (and dat (vector-ref dat 0))
	(db:string->obj (vector-ref dat 1))
	(begin
	  (debug:print 0 "ERROR: rmt:send-receive-no-auto-client-setup failed, attempting to continue. Got " dat)
	  dat))))
	 ;; (jparams  (db:obj->string params)) ;; (rmt:dat->json-str params))
	 (res  	   (handle-exceptions
		    exn
		    #f
		    (http-transport:client-api-send-receive run-id connection-info cmd params))))
;;		    ((commfail) (vector #f "communications fail")))))
    (if (and res (vector-ref res 0))
	(vector-ref res 1) ;;; YES!! THIS IS CORRECT!! CHANGE IT HERE, THEN CHANGE rmt:send-receive ALSO!!!
	#f)))
;; 	(db:string->obj (vector-ref dat 1))
;; 	(begin
;; 	  (debug:print 0 "ERROR: rmt:send-receive-no-auto-client-setup failed, attempting to continue. Got " dat)
;; 	  dat))))

;; Wrap json library for strings (why the ports crap in the first place?)
(define (rmt:dat->json-str dat)
  (with-output-to-string 
    (lambda ()
      (json-write dat))))

240
241
242
243
244
245
246

247
248

249
250



251
252
253
254
255
256
257
303
304
305
306
307
308
309
310
311
312
313


314
315
316
317
318
319
320
321
322
323







+


+
-
-
+
+
+







;;  M I S C
;;======================================================================

(define (rmt:login run-id)
  (rmt:send-receive 'login run-id (list *toppath* megatest-version run-id *my-client-signature*)))

;; This login does no retries under the hood - it acts a bit like a ping.
;; Deprecated for nmsg-transport.
;;
(define (rmt:login-no-auto-client-setup connection-info run-id)
  (case *transport-type*
  (rmt:send-receive-no-auto-client-setup connection-info 'login run-id (list *toppath* megatest-version run-id *my-client-signature*)))
  
    ((http)(rmt:send-receive-no-auto-client-setup connection-info 'login run-id (list *toppath* megatest-version run-id *my-client-signature*)))
    ((nmsg)(nmsg-transport:client-api-send-receive run-id connection-info 'login (list *toppath* megatest-version run-id *my-client-signature*)))))

;; hand off a call to one of the db:queries statements
;; added run-id to make looking up the correct db possible 
;;
(define (rmt:general-call stmtname run-id . params)
  (rmt:send-receive 'general-call run-id (append (list stmtname run-id) params)))

(define (rmt:sync-inmem->db run-id)
313
314
315
316
317
318
319




320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352





















353
354
355
356
357
358
359
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399























400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427







+
+
+
+










-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







  (if (number? run-id)
      (rmt:send-receive 'get-tests-for-run run-id (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals))
      (begin
	(debug:print "ERROR: rmt:get-tests-for-run called with bad run-id=" run-id)
	(print-call-chain (current-error-port))
	'())))

;; get stuff via synchash 
(define (rmt:synchash-get run-id proc synckey keynum params)
  (rmt:send-receive 'synchash-get run-id (list run-id proc synckey keynum params)))

;; IDEA: Threadify these - they spend a lot of time waiting ...
;;
(define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in)
  (let ((multi-run-mutex (make-mutex))
	(run-id-list (if run-ids
			 run-ids
			 (rmt:get-all-run-ids)))
	(result      '()))
    (if (null? run-id-list)
	'()
	(for-each 
	 (lambda (th)

	   (thread-join! th)) ;; I assume that joining completed threads just moves on
	 (let loop ((hed     (car run-id-list))
		    (tal     (cdr run-id-list))
		    (threads '()))
	   (let* ((newthread (make-thread
			      (lambda ()
				(let ((res (rmt:send-receive 'get-tests-for-run-mindata hed (list hed testpatt states status not-in))))
				  (if (list? res)
				      (begin
					(mutex-lock! multi-run-mutex)
					(set! result (append result res))
					(mutex-unlock! multi-run-mutex))
				      (debug:print 0 "ERROR: get-tests-for-run-mindata failed for run-id " hed ", testpatt " testpatt ", states " states ", status " status ", not-in " not-in))))
			      (conc "multi-run-thread for run-id " hed)))
		  (newthreads (cons newthread threads)))
	     (thread-start! newthread)
	     (thread-sleep! 0.5) ;; give that thread some time to start
	     (if (null? tal)
		 newthreads
		 (loop (car tal)(cdr tal) newthreads))))))
	(let loop ((hed     (car run-id-list))
		   (tal     (cdr run-id-list))
		   (threads '()))
	  (if (> (length threads) 5)
	      (loop hed tal (filter (lambda (th)(not (member (thread-state th) '(terminated dead)))) threads))
	      (let* ((newthread (make-thread
				 (lambda ()
				   (let ((res (rmt:send-receive 'get-tests-for-run-mindata hed (list hed testpatt states status not-in))))
				     (if (list? res)
					 (begin
					   (mutex-lock! multi-run-mutex)
					   (set! result (append result res))
					   (mutex-unlock! multi-run-mutex))
					 (debug:print 0 "ERROR: get-tests-for-run-mindata failed for run-id " hed ", testpatt " testpatt ", states " states ", status " status ", not-in " not-in))))
				 (conc "multi-run-thread for run-id " hed)))
		     (newthreads (cons newthread threads)))
		(thread-start! newthread)
		(thread-sleep! 0.5) ;; give that thread some time to start
		(if (null? tal)
		    newthreads
		    (loop (car tal)(cdr tal) newthreads))))))
    result))

;; ;; IDEA: Threadify these - they spend a lot of time waiting ...
;; ;;
;; (define (rmt:get-tests-for-runs-mindata run-ids testpatt states status not-in)
;;   (let ((run-id-list (if run-ids
;; 			 run-ids
480
481
482
483
484
485
486

487

488
489
490
491
492
493
494
548
549
550
551
552
553
554
555

556
557
558
559
560
561
562
563







+
-
+







(define (rmt:update-run-event_time run-id)
  (rmt:send-receive 'update-run-event_time #f (list run-id)))

(define (rmt:get-runs-by-patt  keys runnamepatt targpatt offset limit)
  (rmt:send-receive 'get-runs-by-patt #f (list keys runnamepatt targpatt offset limit)))

(define (rmt:find-and-mark-incomplete run-id ovr-deadtime)
  (if (rmt:send-receive 'have-incompletes? run-id (list run-id ovr-deadtime))
  (rmt:send-receive 'find-and-mark-incomplete run-id (list run-id ovr-deadtime)))
      (rmt:send-receive 'mark-incomplete run-id (list run-id ovr-deadtime))))

;;======================================================================
;; M U L T I R U N   Q U E R I E S
;;======================================================================

;; Need to move this to multi-run section and make associated changes
(define (rmt:find-and-mark-incomplete-all-runs #!key (ovr-deadtime #f))
571
572
573
574
575
576
577
































640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
  (rmt:send-receive 'testmeta-update-field #f (list test-name fld val)))

(define (rmt:test-data-rollup run-id test-id status)
  (rmt:send-receive 'test-data-rollup run-id (list run-id test-id status)))

(define (rmt:csv->test-data run-id test-id csvdata)
  (rmt:send-receive 'csv->test-data run-id (list run-id test-id csvdata)))

;;======================================================================
;;  T A S K S
;;======================================================================

(define (rmt:tasks-find-task-queue-records target run-name test-patt state-patt action-patt)
  (rmt:send-receive 'find-task-queue-records #f (list target run-name test-patt state-patt action-patt)))

(define (rmt:tasks-add action owner target runname testpatt params)
  (rmt:send-receive 'tasks-add #f (list action owner target runname testpatt params)))

(define (rmt:tasks-set-state-given-param-key param-key new-state)
  (rmt:send-receive 'tasks-set-state-given-param-key #f (list  param-key new-state)))

;;======================================================================
;; A R C H I V E S
;;======================================================================

(define (rmt:archive-get-allocations  testname itempath dneeded)
  (rmt:send-receive 'archive-get-allocations #f (list testname itempath dneeded)))

(define (rmt:archive-register-block-name bdisk-id archive-path)
  (rmt:send-receive 'archive-register-block-name #f (list bdisk-id archive-path)))

(define (rmt:archive-allocate-testsuite/area-to-block block-id testsuite-name areakey)
  (rmt:send-receive 'archive-allocate-test-to-block #f (list  block-id testsuite-name areakey)))

(define (rmt:archive-register-disk bdisk-name bdisk-path df)
  (rmt:send-receive 'archive-register-disk #f (list bdisk-name bdisk-path df)))

(define (rmt:test-set-archive-block-id run-id test-id archive-block-id)
  (rmt:send-receive 'test-set-archive-block-id run-id (list run-id test-id archive-block-id)))

Added rpc-transport.scm version [1e1f685d67].



































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

;; Copyright 2006-2012, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.

(require-extension (srfi 18) extras tcp s11n rpc)
(import (prefix rpc rpc:))

(use sqlite3 srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest)
(import (prefix sqlite3 sqlite3:))

(declare (unit rpc-transport))

(declare (uses common))
(declare (uses db))
(declare (uses tests))
(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.

(include "common_records.scm")
(include "db_records.scm")

;; procstr is the name of the procedure to be called as a string
(define (rpc-transport:autoremote procstr params)
  (handle-exceptions
   exn
   (begin
     (debug:print 1 "Remote failed for " proc " " params)
     (apply (eval (string->symbol procstr)) params))
   ;; (if *runremote*
   ;;    (apply (eval (string->symbol (conc "remote:" procstr))) params)
   (apply (eval (string->symbol procstr)) params)))

;; all routes though here end in exit ...
;;
;; start_server? 
;;
(define (rpc-transport:launch run-id)
  (set! *run-id*   run-id)
  (if (args:get-arg "-daemonize")
      (daemon:ize))
  (if (server:check-if-running run-id)
      (begin
	(debug:print 0 "INFO: Server for run-id " run-id " already running")
	(exit 0)))
  (let loop ((server-id (open-run-close tasks:server-lock-slot tasks:open-db run-id))
	     (remtries  4))
    (if (not server-id)
	(if (> remtries 0)
	    (begin
	      (thread-sleep! 2)
	      (loop (open-run-close tasks:server-lock-slot tasks:open-db run-id)
		    (- remtries 1)))
	    (begin
	      ;; since we didn't get the server lock we are going to clean up and bail out
	      (debug:print-info 2 "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue")
	      (open-run-close tasks:server-delete-records-for-this-pid tasks:open-db " rpc-transport:launch")))
	(begin
	  (rpc-transport:run (if (args:get-arg "-server")(args:get-arg "-server") "-") run-id server-id)
	  (exit)))))

(define (rpc-transport:run hostn run-id server-id)
  (debug:print 2 "Attempting to start the rpc server ...")
   ;; (trace rpc:publish-procedure!)

  (rpc:publish-procedure! 'server:login server:login)
  (rpc:publish-procedure! 'testing (lambda () "Just testing"))

  (let* ((db              #f)
	 (hostname        (get-host-name))
	 (ipaddrstr       (let ((ipstr (if (string=? "-" hostn)
					   ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".")
					   (server:get-best-guess-address hostname)
					   #f)))
			    (if ipstr ipstr hostn))) ;; hostname))) 
	 (start-port      (open-run-close tasks:server-get-next-port tasks:open-db))
	 (link-tree-path  (configf:lookup *configdat* "setup" "linktree"))
	 (rpc:listener    (rpc-transport:find-free-port-and-open (rpc:default-server-port)))
	 (th1             (make-thread
			   (lambda ()
			     ((rpc:make-server rpc:listener) #t))
			   "rpc:server"))
			   ;; (cute (rpc:make-server rpc:listener) "rpc:server")
			   ;; 'rpc:server))
	 (hostname        (if (string=? "-" hostn)
			      (get-host-name) 
			      hostn))
	 (ipaddrstr       (if (string=? "-" hostn)
			      (server:get-best-guess-address hostname) ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".")
			      #f))
	 (portnum         (rpc:default-server-port))
	 (host:port       (conc (if ipaddrstr ipaddrstr hostname) ":" portnum))
	 (tdb             (tasks:open-db)))
    (thread-start! th1)
    (set! db *inmemdb*)
    (open-run-close tasks:server-set-interface-port 
		    tasks:open-db 
		    server-id 
		    ipaddrstr portnum)
    (debug:print 0 "Server started on " host:port)
    
    ;; (trace rpc:publish-procedure!)
    ;; (rpc:publish-procedure! 'server:login server:login)
    ;; (rpc:publish-procedure! 'testing (lambda () "Just testing"))

    ;;======================================================================
    ;;	  ;; end of publish-procedure section
    ;;======================================================================
    ;;
    (on-exit (lambda ()
	       (open-run-close tasks:server-set-state! tasks:open-db server-id "stopped")))

    (set! *rpc:listener* rpc:listener)
    (tasks:server-set-state! tdb server-id "running")
    (set! *inmemdb*  (db:setup run-id))
    ;; if none running or if > 20 seconds since 
    ;; server last used then start shutdown
    (let loop ((count 0))
      (thread-sleep! 5) ;; no need to do this very often
      (let ((numrunning -1)) ;; (db:get-count-tests-running db)))
	(if (or (> numrunning 0)
		(> (+ *last-db-access* 60)(current-seconds)))
	    (begin
	      (debug:print-info 0 "Server continuing, tests running: " numrunning ", seconds since last db access: " (- (current-seconds) *last-db-access*))
	      (loop (+ 1 count)))
	    (begin
	      (debug:print-info 0 "Starting to shutdown the server side")
	      (open-run-close tasks:server-delete-record tasks:open-db server-id " rpc-transport:try-start-server stop")
	      (thread-sleep! 10)
	      (debug:print-info 0 "Max cached queries was " *max-cache-size*)
	      (debug:print-info 0 "Server shutdown complete. Exiting")
	      ))))))

(define (rpc-transport:find-free-port-and-open port)
  (handle-exceptions
   exn
	  (begin
     (print "Failed to bind to port " (rpc:default-server-port) ", trying next port")
     (rpc-transport:find-free-port-and-open (+ port 1)))
   (rpc:default-server-port port)
   (tcp-read-timeout 240000)
   (tcp-listen (rpc:default-server-port) 10000)))

(define (rpc-transport:ping run-id host port)
  (handle-exceptions
   exn
   (begin
     (print "SERVER_NOT_FOUND")
     (exit 1))
   (let ((login-res ((rpc:procedure 'server:login host port) *toppath*)))
     (if (and (list? login-res)
	      (car login-res))
	 (begin
	   (print "LOGIN_OK")
	   (exit 0))
	 (begin
	   (print "LOGIN_FAILED")
	   (exit 1))))))

(define (rpc-transport:client-setup run-id #!key (remtries 10))
  (if *runremote*
      (begin
	(debug:print 0 "ERROR: Attempt to connect to server but already connected")
	#f)
      (let* ((host-info (hash-table-ref/default *runremote* run-id #f))) ;; (open-run-close db:get-var #f "SERVER"))
	(if host-info
	    (let ((iface    (car host-info))
		  (port     (cadr host-info))
		  (ping-res ((rpc:procedure 'server:login host port) *toppath*)))
	      (if ping-res
		  (let ((server-dat (list iface port #f #f #f)))
		    (hash-table-set! *runremote* run-id server-dat)
		    server-dat)
		  (begin
		    (server:try-running run-id)
		    (thread-sleep! 2)
		    (rpc-transport:client-setup run-id (- remtries 1)))))
 	    (let* ((server-db-info (open-run-close tasks:get-server tasks:open-db run-id)))
 	      (debug:print-info 0 "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
	      (if server-db-info
 		  (let* ((iface     (tasks:hostinfo-get-interface server-db-info))
 			 (port      (tasks:hostinfo-get-port      server-db-info))
			 (server-dat (list iface port #f #f #f))
 			 (ping-res  ((rpc:procedure 'server:login host port) *toppath*)))
 		    (if start-res
 			(begin
 			  (hash-table-set! *runremote* run-id server-dat)
			  server-dat)
			(begin
			  (server:try-running run-id)
			  (thread-sleep! 2)
			  (rpc-transport:client-setup run-id (- remtries 1)))))
		  (begin
		    (server:try-running run-id)
		    (thread-sleep! 2)
		    (rpc-transport:client-setup run-id (- remtries 1)))))))))
;; 
;; 	     (port     (if (and hostinfo (> (length hostdat) 1))(cadr hostdat) #f)))
;; 	(if (and port
;; 		 (string->number port))
;; 	    (let ((portn (string->number port)))
;; 	      (debug:print-info 2 "Setting up to connect to host " host ":" port)
;; 	      (handle-exceptions
;; 	       exn
;; 	       (begin
;; 		 (debug:print 0 "ERROR: Failed to open a connection to the server at host: " host " port: " port)
;; 		 (debug:print 0 "   EXCEPTION: " ((condition-property-accessor 'exn 'message) exn))
;; 		 ;; (open-run-close 
;; 		 ;;  (lambda (db . param) 
;; 		 ;;    (sqlite3:execute db "DELETE FROM metadat WHERE var='SERVER'"))
;; 		 ;;  #f)
;; 		 (set! *runremote* #f))
;; 	       (if (and (not (args:get-arg "-server")) ;; no point in the server using the server using the server
;; 			((rpc:procedure 'server:login host portn) *toppath*))
;; 		   (begin
;; 		     (debug:print-info 2 "Logged in and connected to " host ":" port)
;; 		     (set! *runremote* (vector host portn)))
;; 		   (begin
;; 		     (debug:print-info 2 "Failed to login or connect to " host ":" port)
;; 		     (set! *runremote* #f)))))
;; 	    (debug:print-info 2 "no server available")))))

Modified runs.scm from [396462afab] to [da9f606770].

17
18
19
20
21
22
23

24
25
26
27
28
29
30
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31







+







(declare (uses db))
(declare (uses common))
(declare (uses items))
(declare (uses runconfig))
(declare (uses tests))
(declare (uses server))
(declare (uses mt))
(declare (uses archive))
;; (declare (uses filedb))

(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "test_records.scm")
158
159
160
161
162
163
164
165
166
167



168
169
170
171
172
173
174
159
160
161
162
163
164
165



166
167
168
169
170
171
172
173
174
175







-
-
-
+
+
+







    (if (> (- currtime lasttime) waitval)
	(begin
	  (hash-table-set! *runs:denoise* key currtime)
	  #t)
	#f)))

(define (runs:can-run-more-tests run-id jobgroup max-concurrent-jobs)
  ;;(thread-sleep! (cond
  ;;      	  ((> *runs:can-run-more-tests-count* 20) 2);; obviously haven't had any work to do for a while
  ;;      	  (else 0)))
  (thread-sleep! (cond
        	  ((> *runs:can-run-more-tests-count* 20) 2);; obviously haven't had any work to do for a while
        	  (else 0)))
  (let* ((num-running             (rmt:get-count-tests-running run-id))
	 (num-running-in-jobgroup (rmt:get-count-tests-running-in-jobgroup run-id jobgroup))
	 (job-group-limit         (let ((jobg-count (config-lookup *configdat* "jobgroups" jobgroup)))
				    (if (string? jobg-count)
					(string->number jobg-count)
					jobg-count))))
    (if (> (+ num-running num-running-in-jobgroup) 0)
221
222
223
224
225
226
227
228

229
230
231
232
233
234


235
236
237
238
239
240
241
222
223
224
225
226
227
228

229
230
231
232
233


234
235
236
237
238
239
240
241
242







-
+




-
-
+
+







    (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10))

    (set-signal-handler! signal/int
			 (lambda (signum)
			   (signal-mask! signum)
			   (print "Received signal " signum ", cleaning up before exit. Please wait...")
			   (let ((tdbdat (tasks:open-db)))
			     (tasks:set-state-given-param-key (db:delay-if-busy tdbdat) task-key "killed"))
			     (rmt:tasks-set-state-given-param-key task-key "killed"))
			   (print "Killed by signal " signum ". Exiting")
			   (exit)))

    ;; register this run in monitor.db
    (tasks:add (db:delay-if-busy tdbdat) "run-tests" user target runname test-patts task-key) ;; params)
    (tasks:set-state-given-param-key (db:delay-if-busy tdbdat) task-key "running")
    (rmt:tasks-add "run-tests" user target runname test-patts task-key) ;; params)
    (rmt:tasks-set-state-given-param-key task-key "running")
    (runs:set-megatest-env-vars run-id inkeys: keys inrunname: runname) ;; these may be needed by the launching process
    (if (file-exists? runconfigf)
	(setup-env-defaults runconfigf run-id *already-seen-runconfig-info* keyvals target)
	(debug:print 0 "WARNING: You do not have a run config file: " runconfigf))

    ;; Now generate all the tests lists
    (set! all-tests-registry (tests:get-all))
365
366
367
368
369
370
371
372


373








374


375
376
377
378
379
380



381

382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399

400
401
402
403
404
405
406
366
367
368
369
370
371
372

373
374
375
376
377
378
379
380
381
382
383

384
385
386
387
388
389
390
391
392
393
394

395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412

413
414
415
416
417
418
419
420







-
+
+

+
+
+
+
+
+
+
+
-
+
+






+
+
+
-
+

















-
+








    (if (not (null? required-tests))
	(debug:print-info 1 "Adding " required-tests " to the run queue"))
    ;; NOTE: these are all parent tests, items are not expanded yet.
    (debug:print-info 4 "test-records=" (hash-table->alist test-records))
    (let ((reglen (configf:lookup *configdat* "setup" "runqueue")))
      (if (> (length (hash-table-keys test-records)) 0)
	  (let* ((keep-going #t)
	  (let* ((keep-going        #t)
		 (run-queue-retries 5)
		 (th1        (make-thread (lambda ()
					    (handle-exceptions
					     exn
					     (begin
					       (print-call-chain (current-error-port))
					       (debug:print 0 "ERROR: failure in runs:run-tests-queue thread, error: " ((condition-property-accessor 'exn 'message) exn))
					       (if (> run-queue-retries 0)
						   (begin
						     (set! run-queue-retries (- run-queue-retries 1))
					    (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry))
						     (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry))))
					     (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests (any->number reglen) all-tests-registry)))
					  "runs:run-tests-queue"))
		 (th2        (make-thread (lambda ()				    
					    ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ...
					    (let ((run-ids (rmt:get-all-run-ids)))
					      (for-each (lambda (run-id)
							  (if keep-going
							      (handle-exceptions
							       exn
							       (debug:print 0 "error in calling find-and-mark-incomplete for run-id " run-id)
							      (rmt:find-and-mark-incomplete run-id #f))) ;; ovr-deadtime)))
							       (rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime)))
							run-ids)))
					  "runs: mark-incompletes")))
	    (thread-start! th1)
	    (thread-start! th2)
	    (thread-join! th1)
	    (set! keep-going #f)
	    (thread-join! th2)
	    ;; if run-count > 0 call, set -preclean and -rerun STUCK/DEAD
	    (if (> run-count 0)
		(begin
		  (if (not (hash-table-ref/default flags "-preclean" #f))
		      (hash-table-set! flags "-preclean" #t))
		  (if (not (hash-table-ref/default flags "-rerun" #f))
		      (hash-table-set! flags "-rerun" "STUCK/DEAD,n/a,ZERO_ITEMS"))
		  (runs:run-tests target runname test-patts user flags run-count: (- run-count 1)))))
	  (debug:print-info 0 "No tests to run")))
    (debug:print-info 4 "All done by here")
    (tasks:set-state-given-param-key (db:delay-if-busy tdbdat) task-key "done")
    (rmt:tasks-set-state-given-param-key task-key "done")
    ;; (sqlite3:finalize! tasks-db)
    ))


;; loop logic. These are used in runs:run-tests-queue to make it a bit more readable.
;;
;; If reg not full and have items in tal then loop with (car tal)(cdr tal) reg reruns
942
943
944
945
946
947
948
949

950
951
952
953
954
955
956
956
957
958
959
960
961
962

963
964
965
966
967
968
969
970







-
+







	     (newtal      (append tal (list hed)))
	     (regfull     (>= (length reg) reglen))
	     (num-running (rmt:get-count-tests-running-for-run-id run-id)))

	;; every couple minutes verify the server is there for this run
	(if (and (common:low-noise-print 60 "try start server"  run-id)
		 (tasks:need-server run-id))
	    (tasks:start-and-wait-for-server tdbdat run-id 10))
	    (tasks:start-and-wait-for-server tdbdat run-id 10)) ;; NOTE: delay and wait is done under the hood
	
	(if (> num-running 0)
	  (set! last-time-some-running (current-seconds)))

      (if (> (current-seconds)(+ last-time-some-running 240))
	  (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))
	;; (debug:print 0 "max-tries-hash: " (hash-table->alist *max-tries-hash*))
1399
1400
1401
1402
1403
1404
1405
1406

1407
1408
1409
1410
1411
1412
1413
1413
1414
1415
1416
1417
1418
1419

1420
1421
1422
1423
1424
1425
1426
1427







-
+







;; fields are passing in through 
;; action:
;;    'remove-runs
;;    'set-state-status
;;
;; NB// should pass in keys?
;;
(define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)(remove-data-only #f))
(define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)(mode 'remove-all)(options '()))
  (common:clear-caches) ;; clear all caches
  (let* ((db           #f)
	 (tdbdat       (tasks:open-db))
	 (keys         (rmt:get-keys))
	 (rundat       (mt:get-runs-by-patt keys runnamepatt target))
	 (header       (vector-ref rundat 0))
	 (runs         (vector-ref rundat 1))
1433
1434
1435
1436
1437
1438
1439
1440


1441
1442
1443
1444
1445
1446
1447
1448
1449

1450
1451
1452
1453
1454
1455
1456
1457
1458
1459









1460
1461



1462
1463
1464
1465
1466
1467
1468
1447
1448
1449
1450
1451
1452
1453

1454
1455
1456
1457
1458
1459
1460
1461
1462
1463

1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495







-
+
+








-
+










+
+
+
+
+
+
+
+
+


+
+
+







							       (else          'event_time))))))
	 (let* ((run-id    (db:get-value-by-header run header "id"))
		(run-state (db:get-value-by-header run header "state"))
		(run-name  (db:get-value-by-header run header "runname"))
		(tests     (if (not (equal? run-state "locked"))
			       (proc-get-tests run-id)
			       '()))
		(lasttpath "/does/not/exist/I/hope"))
		(lasttpath "/does/not/exist/I/hope")
		(worker-thread #f))
	   (debug:print-info 4 "runs:operate-on run=" run ", header=" header)
	   (if (not (null? tests))
	       (begin
		 (case action
		   ((remove-runs)
		    (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10))
		    ;; seek and kill in flight -runtests with % as testpatt here
		    (if (equal? testpatt "%")
			(tasks:kill-runner (db:delay-if-busy tdbdat) target run-name)
			(tasks:kill-runner target run-name)
			(debug:print 0 "not attempting to kill any run launcher processes as testpatt is " testpatt))
		    (debug:print 1 "Removing tests for run: " runkey " " (db:get-value-by-header run header "runname")))
		   ((set-state-status)
		    (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10))
		    (debug:print 1 "Modifying state and staus for tests for run: " runkey " " (db:get-value-by-header run header "runname")))
		   ((print-run)
		    (debug:print 1 "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header)
		    action)
		   ((run-wait)
		    (debug:print 1 "Waiting for run " runkey ", run=" runnamepatt " to complete"))
		   ((archive)
		    (debug:print 1 "Archiving data for run: " runkey " " (db:get-value-by-header run header "runname"))
		    (set! worker-thread (make-thread (lambda ()
						       (case (string->symbol (args:get-arg "-archive"))
							 ((save save-remove keep-html)(archive:run-bup (args:get-arg "-archive") run-id run-name tests))
							 ((restore)(archive:bup-restore (args:get-arg "-archive") run-id run-name tests))
							 (else (debug:print 0 "ERROR: unrecognised sub command to -archive. Run \"megatest\" to see help"))))
						     "archive-bup-thread"))
		    (thread-start! worker-thread))
		   (else
		    (debug:print-info 0 "action not recognised " action)))
		 
		 ;; actions that operate on one test at a time can be handled below
		 ;;
		 (let ((sorted-tests     (sort tests (lambda (a b)(let ((dira ;; (rmt:sdb-qry 'getstr 
									 (db:test-get-rundir a)) ;; )  ;; (filedb:get-path *fdb* (db:test-get-rundir a)))
									(dirb ;; (rmt:sdb-qry 'getstr 
									 (db:test-get-rundir b))) ;; ) ;; ((filedb:get-path *fdb* (db:test-get-rundir b))))
								    (if (and (string? dira)(string? dirb))
									(> (string-length dira)(string-length dirb))
									#f)))))
1520
1521
1522
1523
1524
1525
1526
1527

1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542











1543
1544
1545
1546
1547
1548
1549
1547
1548
1549
1550
1551
1552
1553

1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567


1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585







-
+













-
-
+
+
+
+
+
+
+
+
+
+
+







					    (mt:test-set-state-status-by-id run-id (db:test-get-id test) "KILLREQ" "n/a" #f)
						  (thread-sleep! 1)))
					    ;; NOTE: This is suboptimal as the testdata will be used later and the state/status may have changed ...
					    (if (null? tal)
						(loop new-test-dat tal)
						(loop (car tal)(append tal (list new-test-dat)))))
					  (begin
					    (runs:remove-test-directory db new-test-dat remove-data-only)
					    (runs:remove-test-directory new-test-dat mode) ;; 'remove-all)
					    (if (not (null? tal))
						(loop (car tal)(cdr tal))))))))
			       ((set-state-status)
				(debug:print-info 2 "new state " (car state-status) ", new status " (cadr state-status))
				(mt:test-set-state-status-by-id run-id (db:test-get-id test) (car state-status)(cadr state-status) #f)
				(if (not (null? tal))
				    (loop (car tal)(cdr tal))))
			       ((run-wait)
				(debug:print-info 2 "still waiting, " (length tests) " tests still running")
				(thread-sleep! 10)
				(let ((new-tests (proc-get-tests run-id)))
				  (if (null? new-tests)
				      (debug:print-info 1 "Run completed according to zero tests matching provided criteria.")
				      (loop (car new-tests)(cdr new-tests))))))))
		       )))))
				      (loop (car new-tests)(cdr new-tests)))))
			       ((archive)
				(if (not toplevel-with-children)
				    (begin
				      (debug:print-info 0 "Estimating disk space usage for " test-fulln)
				      (debug:print-info 0 "   " (common:get-disk-space-used (conc run-dir "/")))))
				(if (not (null? tal))
				    (loop (car tal)(cdr tal))))
			       )))
		       )
		     (if worker-thread (thread-join! worker-thread))))))
	   ;; remove the run if zero tests remain
	   (if (eq? action 'remove-runs)
	       (let ((remtests (mt:get-tests-for-run (db:get-value-by-header run header "id") #f '("DELETED") '("n/a") not-in: #t)))
		 (if (null? remtests) ;; no more tests remaining
		     (let* ((dparts  (string-split lasttpath "/"))
			    (runpath (conc "/" (string-intersperse 
						(take dparts (- (length dparts) 1))
1560
1561
1562
1563
1564
1565
1566
1567

1568
1569
1570
1571
1572
1573
1574




1575
1576
1577
1578
1579
1580
1581
1596
1597
1598
1599
1600
1601
1602

1603
1604
1605
1606
1607



1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618







-
+




-
-
-
+
+
+
+







		       )))))
	 ))
     runs)
    ;; (sqlite3:finalize! (db:delay-if-busy tdbdat))
    )
  #t)

(define (runs:remove-test-directory db test remove-data-only)
(define (runs:remove-test-directory test mode) ;; remove-data-only)
  (let* ((run-dir       (db:test-get-rundir test))    ;; run dir is from the link tree
	 (real-dir      (if (file-exists? run-dir)
			    (resolve-pathname run-dir)
			    #f)))
    (if remove-data-only
	(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "CLEANING" "LOCKED" #f)
	(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "REMOVING" "LOCKED" #f))
    (case mode
      ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "CLEANING" "LOCKED" #f))
      ((remove-all)      (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "REMOVING" "LOCKED" #f))
      ((archive-remove)  (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVE_REMOVING" #f #f)))
    (debug:print-info 1 "Attempting to remove " (if real-dir (conc " dir " real-dir " and ") "") " link " run-dir)
    (if (and real-dir 
	     (> (string-length real-dir) 5)
	     (file-exists? real-dir)) ;; bad heuristic but should prevent /tmp /home etc.
	(begin ;; let* ((realpath (resolve-pathname run-dir)))
	  (debug:print-info 1 "Recursively removing " real-dir)
	  (if (file-exists? real-dir)
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609




1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628

1629
1630


1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647



1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658

1659
1660
1661
1662
1663
1664
1665
1637
1638
1639
1640
1641
1642
1643



1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665

1666
1667

1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684


1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697

1698
1699
1700
1701
1702
1703
1704
1705







-
-
-
+
+
+
+


















-
+

-
+
+















-
-
+
+
+










-
+







		 (delete-directory run-dir)))
	    (if (and run-dir
		     (not (member run-dir (list "n/a" "/tmp/badname"))))
		(debug:print 0 "WARNING: not removing " run-dir " as it either doesn't exist or is not a symlink")
		(debug:print 0 "NOTE: the run dir for this test is undefined. Test may have already been deleted."))
	    ))
    ;; Only delete the records *after* removing the directory. If things fail we have a record 
    (if remove-data-only
	(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "NOT_STARTED" "n/a" #f)
	(rmt:delete-test-records (db:test-get-run_id test) (db:test-get-id test)))))
    (case mode
      ((remove-data-only)(mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "NOT_STARTED" "n/a" #f))
      ((archive-remove)  (mt:test-set-state-status-by-id (db:test-get-run_id test)(db:test-get-id test) "ARCHIVED" #f #f))
      (else (rmt:delete-test-records (db:test-get-run_id test) (db:test-get-id test))))))

;;======================================================================
;; Routines for manipulating runs
;;======================================================================

;; Since many calls to a run require pretty much the same setup 
;; this wrapper is used to reduce the replication of code
(define (general-run-call switchname action-desc proc)
  (let ((runname (or (args:get-arg "-runname")(args:get-arg ":runname")))
	(target  (common:args-get-target)))
    (cond
     ((not target)
      (debug:print 0 "ERROR: Missing required parameter for " switchname ", you must specify the target with -target")
      (exit 3))
     ((not runname)
      (debug:print 0 "ERROR: Missing required parameter for " switchname ", you must specify the run name with -runname runname")
      (exit 3))
     (else
      (let ((db   #f)
      (let (;; (db   #f)
	    (keys #f))
	(if (not (launch:setup-for-run))
	(if (launch:setup-for-run)
	    (launch:cache-config)
	    (begin 
	      (debug:print 0 "Failed to setup, exiting")
	      (exit 1)))
	;; (if (args:get-arg "-server")
	;;     (cdb:remote-run server:start db (args:get-arg "-server")))
	(set! keys (keys:config-get-fields *configdat*))
	;; have enough to process -target or -reqtarg here
	(if (args:get-arg "-reqtarg")
	    (let* ((runconfigf (conc  *toppath* "/runconfigs.config")) ;; DO NOT EVALUATE ALL 
		   (runconfig  (read-config runconfigf #f #t environ-patt: #f)))
	      (if (hash-table-ref/default runconfig (args:get-arg "-reqtarg") #f)
		  (keys:target-set-args keys (args:get-arg "-reqtarg") args:arg-hash)
		    
		  (begin
		    (debug:print 0 "ERROR: [" (args:get-arg "-reqtarg") "] not found in " runconfigf)
		    (if db (sqlite3:finalize! db))
		    (exit 1))))
		    ;; (if db (sqlite3:finalize! db))
		    (exit 1)
		    )))
	    (if (args:get-arg "-target")
		(keys:target-set-args keys (args:get-arg "-target" args:arg-hash) args:arg-hash)))
	(if (not (car *configinfo*))
	    (begin
	      (debug:print 0 "ERROR: Attempted to " action-desc " but run area config file not found")
	      (exit 1))
	    ;; Extract out stuff needed in most or many calls
	    ;; here then call proc
	    (let* ((keyvals    (keys:target->keyval keys target)))
	      (proc target runname keys keyvals)))
	(if db (sqlite3:finalize! db))
	;; (if db (sqlite3:finalize! db))
	(set! *didsomething* #t))))))

;;======================================================================
;; Lock/unlock runs
;;======================================================================

(define (runs:handle-locking target keys runname lock unlock user)

Modified server.scm from [f2b9d5f3d9] to [d9e8792ebb].

18
19
20
21
22
23
24
25
26



27
28
29
30
31
32
33
18
19
20
21
22
23
24


25
26
27
28
29
30
31
32
33
34







-
-
+
+
+







(declare (unit server))

(declare (uses common))
(declare (uses db))
(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
(declare (uses synchash))
(declare (uses http-transport))
(declare (uses launch))
;; (declare (uses zmq-transport))
(declare (uses rpc-transport))
(declare (uses nmsg-transport))
(declare (uses launch))
(declare (uses daemon))

(include "common_records.scm")
(include "db_records.scm")

(define (server:make-server-url hostport)
  (if (not hostport)
45
46
47
48
49
50
51

52
53


54
55

56
57
58



59
60
61
62
63
64











65
66
67
68
69
70
71
72
73
74
75
76
77




78










79
80
81
82
83
84
85
46
47
48
49
50
51
52
53


54
55


56



57
58
59

60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83

84
85
86
87
88
89
90
91

92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108







+
-
-
+
+
-
-
+
-
-
-
+
+
+
-





+
+
+
+
+
+
+
+
+
+
+








-




+
+
+
+
-
+
+
+
+
+
+
+
+
+
+







;;

;; all routes though here end in exit ...
;;
;; start_server
;;
(define (server:launch run-id)
  (case *transport-type*
  (http-transport:launch run-id))

    ((http)(http-transport:launch run-id))
    ((nmsg)(nmsg-transport:launch run-id))
;;======================================================================
;; Q U E U E   M A N A G E M E N T
    ((rpc)  (rpc-transport:launch run-id))
;;======================================================================

;; We don't want to flush the queue if it was just flushed
    (else (debug:print 0 "ERROR: unknown server type " *transport-type*))))
;;       (else   (debug:print 0 "ERROR: No known transport set, transport=" transport ", using rpc")
;; 	      (rpc-transport:launch run-id)))))
(define *server:last-write-flush* (current-milliseconds))

;;======================================================================
;; S E R V E R   U T I L I T I E S 
;;======================================================================

;; Get the transport
(define (server:get-transport)
  (if *transport-type*
      *transport-type*
      (let ((ttype (string->symbol
		    (or (args:get-arg "-transport")
			(configf:lookup *configdat* "server" "transport")
			"rpc"))))
	(set! *transport-type* ttype)
	ttype)))
	    
;; Generate a unique signature for this server
(define (server:mk-signature)
  (message-digest-string (md5-primitive) 
			 (with-output-to-string
			   (lambda ()
			     (write (list (current-directory)
					  (argv)))))))


;; When using zmq this would send the message back (two step process)
;; with spiffy or rpc this simply returns the return data to be returned
;; 
(define (server:reply return-addr query-sig success/fail result)
  (debug:print-info 11 "server:reply return-addr=" return-addr ", result=" result)
  ;; (send-message pubsock target send-more: #t)
  ;; (send-message pubsock 
  (case (server:get-transport)
  (db:obj->string (vector success/fail query-sig result)))
    ((rpc)  (db:obj->string (vector success/fail query-sig result)))
    ((http) (db:obj->string (vector success/fail query-sig result)))
    ((zmq)
     (let ((pub-socket (vector-ref *runremote* 1)))
       (send-message pub-socket return-addr send-more: #t)
       (send-message pub-socket (db:obj->string (vector success/fail query-sig result)))))
    ((fs)   result)
    (else 
     (debug:print 0 "ERROR: unrecognised transport type: " *transport-type*)
     result)))

;; Given a run id start a server process    ### NOTE ### > file 2>&1 
;; if the run-id is zero and the target-host is set 
;; try running on that host
;;
(define  (server:run run-id)
  (let* ((curr-host   (get-host-name))
106
107
108
109
110
111
112






113
114
115
116
117
118
119
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148







+
+
+
+
+
+







	  (setenv "TARGETHOST" target-host)))
    (setenv "TARGETHOST_LOGF" logfile)
    (system (conc "nbfake " cmdln))
    (unsetenv "TARGETHOST_LOGF")
    (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
    ;; (system cmdln)
    (pop-directory)))

(define (server:get-client-signature)
  (if *my-client-signature* *my-client-signature*
      (let ((sig (server:mk-signature)))
	(set! *my-client-signature* sig)
	*my-client-signature*)))

;; kind start up of servers, wait 40 seconds before allowing another server for a given
;; run-id to be launched
(define (server:kind-run run-id)
  (let ((last-run-time (hash-table-ref/default *server-kind-run* run-id #f)))
    (if (or (not last-run-time)
	    (> (- (current-seconds) last-run-time) 30))
134
135
136
137
138
139
140

141
142
143






144
145
146
147
148
149
150
163
164
165
166
167
168
169
170



171
172
173
174
175
176
177
178
179
180
181
182
183







+
-
-
-
+
+
+
+
+
+







	       (trycount 0))
    (if server
	;; note: client:start will set *runremote*. this needs to be changed
	;;       also, client:start will login to the server, also need to change that.
	;;
	;; client:start returns #t if login was successful.
	;;
	(let ((res (case *transport-type*
	(let ((res (server:ping-server run-id 
				       (tasks:hostinfo-get-interface server)
				       (tasks:hostinfo-get-port      server))))
		     ((http)(server:ping-server run-id 
						(tasks:hostinfo-get-interface server)
						(tasks:hostinfo-get-port      server)))
		     ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server)
						 (tasks:hostinfo-get-port      server)
						 timeout: 2)))))
	  ;; if the server didn't respond we must remove the record
	  (if res
	      #t
	      (begin
		(debug:print-info 0 "server at " server " not responding, removing record")
		(tasks:server-force-clean-running-records-for-run-id (db:delay-if-busy tdbdat) run-id 
				" server:check-if-running")
194
195
196
197
198
199
200






















227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
		(res "NOREPLY"))
       (if (eof-object? inl)
	   (case (string->symbol res)
	     ((NOREPLY)  #f)
	     ((LOGIN_OK) #t)
	     (else       #f))
	   (loop (read-line) inl))))))

(define (server:login toppath)
  (lambda (toppath)
    (set! *last-db-access* (current-seconds))
    (if (equal? *toppath* toppath)
	(begin
	  ;; (debug:print-info 2 "login successful")
	  #t)
	(begin
	  ;; (debug:print-info 2 "login failed")
	  #f))))

(define (server:get-timeout)
  (let ((tmo (configf:lookup  *configdat* "server" "timeout")))
    (if (and (string? tmo)
	     (string->number tmo))
	(* 60 60 (string->number tmo))
	;; (* 3 24 60 60) ;; default to three days
	(* 60 1)         ;; default to one minute
	;; (* 60 60 25)      ;; default to 25 hours
	)))

Modified synchash.scm from [9881f5a738] to [67fbbc1d0f].

60
61
62
63
64
65
66
67

68
69

70
71
72
73
74
75
76
60
61
62
63
64
65
66

67


68
69
70
71
72
73
74
75







-
+
-
-
+







    
;; (cdb:remote-run db:get-keys #f)  
;; (cdb:remote-run db:get-num-runs #f "%")
;; (cdb:remote-run db:get-runs #f runnamepatt numruns *start-run-offset* keypatts)
;;
;; keynum => the field to use as the unique key (usually 0 but can be other field)
;;
(define (synchash:client-get proc synckey keynum synchash . params)
(define (synchash:client-get proc synckey keynum synchash run-id . params)
  (let* ((data   ;; (apply cdb:remote-run synchash:server-get #f proc synckey keynum params))
	  (apply synchash:server-get #f proc synckey keynum params))
  (let* ((data   (rmt:synchash-get run-id proc synckey keynum params))
	 (newdat (car data))
	 (removs (cadr data))
	 (myhash (hash-table-ref/default synchash synckey #f)))
    (if (not myhash)
	(begin
	  (set! myhash (make-hash-table))
	  (hash-table-set! synchash synckey myhash)))
87
88
89
90
91
92
93
94

95

96

97
98
99
100

101
102
103
104
105
106
107
86
87
88
89
90
91
92

93
94
95

96
97
98
99

100
101
102
103
104
105
106
107







-
+

+
-
+



-
+







     removs)
    ;; WHICH ONE!?
    ;; data)) ;; return the changed and deleted list
    (list newdat removs))) ;; synchash))

(define *synchashes* (make-hash-table))

(define (synchash:server-get indb proc synckey keynum . params)
(define (synchash:server-get dbstruct run-id proc synckey keynum params)
  ;; (debug:print-info 2 "synckey: " synckey ", keynum: " keynum ", params: " params)
  (let* ((dbdat     (db:get-db dbstruct run-id))
  (let* ((db        (if indb indb (db:open-megatest-db)))
	 (db        (db:dbdat-get-db dbdat))
	 (synchash  (hash-table-ref/default *synchashes* synckey #f))
	 (newdat    (apply (case proc
			     ((db:get-runs)                   db:get-runs)
			     ((db:get-tests-for-runs-mindata) db:get-tests-for-runs-mindata)
			     ((db:get-tests-for-run-mindata)  db:get-tests-for-run-mindata)
			     ((db:get-test-info-by-ids)       db:get-test-info-by-ids)
			     (else
			      (print "ERROR: sync for hash " proc " not setup! Edits needed in synchash.scm")
			      print))
			   db params))
	 (postdat  #f)
	 (make-indexed (lambda (x)

Modified tasks.scm from [af4bc3dbb1] to [1ea4ffe4d5].

10
11
12
13
14
15
16

17
18
19
20
21
22
23
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24







+







;;  strftime('%m/%d/%Y %H:%M:%S','now','localtime')

(use sqlite3 srfi-1 posix regex regex-case srfi-69 dot-locking format)
(import (prefix sqlite3 sqlite3:))

(declare (unit tasks))
(declare (uses db))
(declare (uses rmt))
(declare (uses common))

(include "task_records.scm")

;;======================================================================
;; Tasks db
;;======================================================================
101
102
103
104
105
106
107



108
109
110
111
112
113
114
115
116
117
118











119
120
121
122
123
124
125
102
103
104
105
106
107
108
109
110
111











112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129







+
+
+
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+







	     (set! *db-write-access* write-access)) ;; only unset so other db's also can use this control
	 (sqlite3:set-busy-handler! mdb handler)
	 (db:set-sync mdb) ;; (sqlite3:execute mdb (conc "PRAGMA synchronous = 0;"))
	 ;;  (if (or (and (not exists)
	 ;; 	      (file-write-access? *toppath*))
	 ;; 	 (not (file-read-access? dbpath)))
	 ;;      (begin
	 ;; 
	 ;; TASKS QUEUE MOVED TO main.db
	 ;;
	 (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY,
                                action TEXT DEFAULT '',
                                owner TEXT,
                                state TEXT DEFAULT 'new',
                                target TEXT DEFAULT '',
                                name TEXT DEFAULT '',
                                testpatt TEXT DEFAULT '',
                                keylock TEXT,
                                params TEXT,
                                creation_time TIMESTAMP,
                                execution_time TIMESTAMP);")
	 ;; (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS tasks_queue (id INTEGER PRIMARY KEY,
         ;;                        action TEXT DEFAULT '',
         ;;                        owner TEXT,
         ;;                        state TEXT DEFAULT 'new',
         ;;                        target TEXT DEFAULT '',
         ;;                        name TEXT DEFAULT '',
         ;;                        testpatt TEXT DEFAULT '',
         ;;                        keylock TEXT,
         ;;                        params TEXT,
         ;;                        creation_time TIMESTAMP,
         ;;                        execution_time TIMESTAMP);")
	 (sqlite3:execute mdb "CREATE TABLE IF NOT EXISTS monitors (id INTEGER PRIMARY KEY,
                                pid INTEGER,
                                start_time TIMESTAMP,
                                last_update TIMESTAMP,
                                hostname TEXT,
                                username TEXT,
                               CONSTRAINT monitors_constraint UNIQUE (pid,hostname));")
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
147
148
149
150
151
152
153

154
155
156
157
158
159
160







-







                                  hostname TEXT,
                                  cmdline TEXT,
                                  login_time TIMESTAMP,
                                  logout_time TIMESTAMP DEFAULT -1,
                                CONSTRAINT clients_constraint UNIQUE (pid,hostname));")
	       
	       ;))
	 (sqlite3:execute mdb "DELETE FROM tasks_queue WHERE state='done' AND creation_time < ?;" (- (current-seconds)(* 24 60 60))) ;; remove older than 24 hrs
	 (set! *task-db* (cons mdb dbpath))
	 *task-db*))))

;;======================================================================
;; Server and client management
;;======================================================================

165
166
167
168
169
170
171
172

173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190










191
192
193
194
195
196
197
168
169
170
171
172
173
174

175
176
177
178
179
180
181
182
183
184









185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201







-
+









-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+







(define (tasks:hostinfo-get-hostname    vec)    (vector-ref  vec 6))

(define (tasks:server-lock-slot mdb run-id)
  (tasks:server-clean-out-old-records-for-run-id mdb run-id " tasks:server-lock-slot")
  (if (< (tasks:num-in-available-state mdb run-id) 4)
      (begin 
	(tasks:server-set-available mdb run-id)
	;; (thread-sleep! 2) ;; Try removing this. It may not be needed.
	(thread-sleep! (/ (random 1500) 1000)) ;; (thread-sleep! 2) ;; Try removing this. It may not be needed.
	(tasks:server-am-i-the-server? mdb run-id))
      #f))
	
;; register that this server may come online (first to register goes though with the process)
(define (tasks:server-set-available mdb run-id)
  (sqlite3:execute 
   mdb 
   "INSERT INTO servers (pid,hostname,port,pubport,start_time,      priority,state,mt_version,heartbeat,   interface,transport,run_id)
                   VALUES(?, ?,       ?,   ?, strftime('%s','now'), ?,       ?,    ?,-1,?,        ?,        ?);"
   (current-process-id)       ;; pid
   (get-host-name)            ;; hostname
   -1                         ;; port
   -1                         ;; pubport
   (random 1000)              ;; priority (used a tiebreaker on get-available)
   "available"                ;; state
   (common:version-signature) ;; mt_version
   -1                         ;; interface
   "http"                     ;; transport
   (current-process-id)          ;; pid
   (get-host-name)               ;; hostname
   -1                            ;; port
   -1                            ;; pubport
   (random 1000)                 ;; priority (used a tiebreaker on get-available)
   "available"                   ;; state
   (common:version-signature)    ;; mt_version
   -1                            ;; interface
   ;; (conc (server:get-transport)) ;; transport
   (conc *transport-type*)    ;; transport
   run-id
   ))

(define (tasks:num-in-available-state mdb run-id)
  (let ((res 0))
    (sqlite3:for-each-row
     (lambda (num-in-queue)
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
















296
297
298
299
300
301
302
279
280
281
282
283
284
285














286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308







-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







		(loop (get-rand-port)(- remtries 1))
		(get-rand-port))
	    port))))))

(define (tasks:server-am-i-the-server? mdb run-id)
  (let* ((all    (tasks:server-get-servers-vying-for-run-id mdb run-id))
	 (first  (if (null? all)
		     (begin (debug:print 0 "ERROR: no servers listed, should be at least one by now.") 
			    (sqlite3:finalize! mdb)
			    (exit 1))
		     (car (db:get-rows all))))
	 (header   (db:get-header all))
	 (id       (db:get-value-by-header first header "id"))
	 (hostname (db:get-value-by-header first header "hostname"))
	 (pid      (db:get-value-by-header first header "pid"))
	 (priority (db:get-value-by-header first header "priority")))
    (debug:print 0 "INFO: am-i-the-server got record " first)
    ;; for now a basic check. add tiebreaking by priority later
    (if (and (equal? hostname (get-host-name))
	     (equal? pid      (current-process-id)))
	id
		     #f;; (begin (debug:print 0 "ERROR: no servers listed, should be at least one by now.") 
		       ;;      (sqlite3:finalize! mdb)
		       ;;      (exit 1))
		     (car (db:get-rows all)))))
    (if first
	(let* ((header   (db:get-header all))
	       (id       (db:get-value-by-header first header "id"))
	       (hostname (db:get-value-by-header first header "hostname"))
	       (pid      (db:get-value-by-header first header "pid"))
	       (priority (db:get-value-by-header first header "priority")))
	  ;; (debug:print 0 "INFO: am-i-the-server got record " first)
	  ;; for now a basic check. add tiebreaking by priority later
	  (if (and (equal? hostname (get-host-name))
		   (equal? pid      (current-process-id)))
	      id
	      #f))
	#f)))
	     
;; Use: (db:get-value-by-header (car (db:get-rows dat)) (db:get-header dat) "fieldname")
;;  to extract info from the structure returned
;;
(define (tasks:server-get-servers-vying-for-run-id mdb run-id)
   (let* ((header (list "id" "hostname" "pid" "interface" "port" "pubport" "state" "run_id" "priority" "start_time"))
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373















374
375
376
377
378
379
380
381
382
383
384
385

386
387

388
389
390
391
392
393
394
395
396
397


398












399
400
401
402
403
404
405
359
360
361
362
363
364
365














366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394

395
396
397
398
399
400
401
402
403
404

405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426







-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+












+

-
+









-
+
+

+
+
+
+
+
+
+
+
+
+
+
+







     (lambda (id)
       (set! res id))
     mdb ;; NEEDS dbprep ADDED
     "SELECT id FROM servers WHERE run_id=? AND state = 'running';" run-id)
    res))

(define (tasks:need-server run-id)
  (let ((forced (configf:lookup *configdat* "server" "required"))
	(maxqry (cdr (rmt:get-max-query-average run-id)))
	(threshold   (string->number (or (configf:lookup *configdat* "server" "server-query-threshold") "10"))))
    (cond
     (forced 
      (if (common:low-noise-print 60 run-id "server required is set")
	  (debug:print-info 0 "Server required is set, starting server."))
      #t)
     ((> maxqry threshold)
      (if (common:low-noise-print 60 run-id "Max query time execeeded")
	  (debug:print-info 0 "Max avg query time of " maxqry "ms exceeds limit of " threshold "ms, starting server."))
      #t)
     (else
      #f))))
  (configf:lookup *configdat* "server" "required"))

;; 	(maxqry (cdr (rmt:get-max-query-average run-id)))
;; 	(threshold   (string->number (or (configf:lookup *configdat* "server" "server-query-threshold") "10"))))
;;     (cond
;;      (forced 
;;       (if (common:low-noise-print 60 run-id "server required is set")
;; 	  (debug:print-info 0 "Server required is set, starting server for run-id " run-id "."))
;;       #t)
;;      ((> maxqry threshold)
;;       (if (common:low-noise-print 60 run-id "Max query time execeeded")
;; 	  (debug:print-info 0 "Max avg query time of " maxqry "ms exceeds limit of " threshold "ms, server needed for run-id " run-id "."))
;;       #t)
;;      (else
;;       #f))))

;; try to start a server and wait for it to be available
;;
(define (tasks:start-and-wait-for-server tdbdat run-id delay-max-tries)
  ;; ensure a server is running for this run
  (let loop ((server-dat (tasks:get-server (db:delay-if-busy tdbdat) run-id))
	     (delay-time 0))
      (if (and (not server-dat)
	       (< delay-time delay-max-tries))
	  (begin
	    (if (common:low-noise-print 60 "tasks:start-and-wait-for-server" run-id)
		(debug:print 0 "Try starting server for run-id " run-id))
	    (thread-sleep! (/ (random 2000) 1000))
	    (server:kind-run run-id)
	    (thread-sleep! (min delay-time 5))
	    (thread-sleep! (min delay-time 1))
	    (loop (tasks:get-server (db:delay-if-busy tdbdat) run-id)(+ delay-time 1))))))

(define (tasks:get-all-servers mdb)
  (let ((res '()))
    (sqlite3:for-each-row
     (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id)
       ;;                       0  1     2         3      4     5          6        7     8          9          10        11     12
       (set! res (cons (vector id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) res)))
     mdb
     "SELECT id,pid,hostname,interface,port,pubport,start_time,priority,state,mt_version,strftime('%s','now')-heartbeat AS last_update,transport,run_id FROM servers WHERE state NOT LIKE 'defunct%' ORDER BY start_time DESC;")
     "SELECT id,pid,hostname,interface,port,pubport,start_time,priority,state,mt_version,strftime('%s','now')-heartbeat AS last_update,transport,run_id 
        FROM servers WHERE state NOT LIKE 'defunct%' ORDER BY start_time DESC;")
    res))

(define (tasks:get-server-records mdb run-id)
  (let ((res '()))
    (sqlite3:for-each-row
     (lambda (id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id)
       ;;                       0  1     2         3      4     5          6        7     8          9          10        11     12
       (set! res (cons (vector id pid hostname interface port pubport start-time priority state mt-version last-update transport run-id) res)))
     mdb
     "SELECT id,pid,hostname,interface,port,pubport,start_time,priority,state,mt_version,strftime('%s','now')-heartbeat AS last_update,transport,run_id 
        FROM servers WHERE run_id=? AND state NOT LIKE 'defunct%' ORDER BY start_time DESC;"
     run-id)
    (reverse res)))

;; no elegance here ...
;;
(define (tasks:kill-server hostname pid)
  (debug:print-info 0 "Attempting to kill server process " pid " on host " hostname)
  (setenv "TARGETHOST" hostname)
  (setenv "TARGETHOST_LOGF" "server-kills.log")
420
421
422
423
424
425
426
427
428
429
430
431

432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546

547
548

549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565

566
567
568
569
570

571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
441
442
443
444
445
446
447

448



449

450







































































451









































452


453

















454





455













456
457
458
459
460
461
462
463
464
465
466

















467
468
469
470
471
472
473







-

-
-
-
+
-

-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-

-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-











-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-







	  (debug:print-info 0 "Killing server " server-id " for run-id " run-id " on host " hostname " with pid " pid)
	  (tasks:kill-server hostname pid)
	  (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id tag) )
	(debug:print-info 0 "No server found for run-id " run-id ", nothing to kill"))
    ;; (sqlite3:finalize! tdb)
    ))
    

;;======================================================================
;; Tasks and Task monitors
;;======================================================================

;; M O N I T O R S

;;======================================================================
;; Tasks
;;======================================================================



;;======================================================================
;; Task Monitors
;;======================================================================

(define (tasks:register-monitor db mdb)
  (let* ((pid (current-process-id))
	 (hostname (get-host-name))
	 (userinfo (user-information (current-user-id)))
	 (username (car userinfo)))
    (print "Register monitor, pid: " pid ", hostname: " hostname ", username: " username)
    (sqlite3:execute mdb "INSERT INTO monitors (pid,start_time,last_update,hostname,username) VALUES (?,strftime('%s','now'),strftime('%s','now'),?,?);"
		     pid hostname username)))

(define (tasks:get-num-alive-monitors mdb)
  (let ((res 0))
    (sqlite3:for-each-row 
     (lambda (count)
       (set! res count))
     mdb
     "SELECT count(id) FROM monitors WHERE last_update < (strftime('%s','now') - 300) AND username=?;"
     (car (user-information (current-user-id))))
    res))

;; register a task
(define (tasks:add mdb action owner target runname testpatt params)
  (sqlite3:execute mdb "INSERT INTO tasks_queue (action,owner,state,target,name,testpatt,params,creation_time,execution_time)
                       VALUES (?,?,'new',?,?,?,?,strftime('%s','now'),0);" 
		   action
		   owner
		   target
		   runname
		   testpatt
		   (if params params "")))

(define (keys:key-vals-hash->target keys key-params)
  (let ((tmp (hash-table-ref/default key-params (vector-ref (car keys) 0) "")))
    (if (> (length keys) 1)
	(for-each (lambda (key)
		    (set! tmp (conc tmp "/" (hash-table-ref/default key-params (vector-ref key 0) ""))))
		  (cdr keys)))
    tmp))
								
;; for use from the gui
(define (tasks:add-from-params mdb action keys key-params var-params)
  (let ((target    (keys:key-vals-hash->target keys key-params))
	(owner     (car (user-information (current-user-id))))
	(runname   (hash-table-ref/default var-params "runname" #f))
	(testpatts (hash-table-ref/default var-params "testpatts" "%"))
	(params    (hash-table-ref/default var-params "params"    "")))
    (tasks:add mdb action owner target runname testpatts params)))

;; return one task from those who are 'new' OR 'waiting' AND more than 10sec old
;;
(define (tasks:snag-a-task mdb)
  (let ((res    #f)
	(keytxt (conc (current-process-id) "-" (get-host-name) "-" (car (user-information (current-user-id))))))

    ;; first randomly set a new to pid-hostname-hostname
    (sqlite3:execute
     mdb 
     "UPDATE tasks_queue SET keylock=? WHERE id IN
        (SELECT id FROM tasks_queue 
           WHERE state='new' OR 
                 (state='waiting' AND (strftime('%s','now')-execution_time) > 10) OR
                 state='reset'
           ORDER BY RANDOM() LIMIT 1);" keytxt)

    (sqlite3:for-each-row
     (lambda (id . rem)
       (set! res (apply vector id rem)))
     mdb
     "SELECT id,action,owner,state,target,name,test,item,params,creation_time,execution_time FROM tasks_queue WHERE keylock=? ORDER BY execution_time ASC LIMIT 1;" keytxt)
    (if res ;; yep, have work to be done
	(begin
	  (sqlite3:execute mdb "UPDATE tasks_queue SET state='inprogress',execution_time=strftime('%s','now') WHERE id=?;"
			   (tasks:task-get-id res))
	  res)
	#f)))

(define (tasks:reset-stuck-tasks mdb)
  (let ((res '()))
    (sqlite3:for-each-row
     (lambda (id delta)
       (set! res (cons id res)))
     mdb
     "SELECT id,strftime('%s','now')-execution_time AS delta FROM tasks_queue WHERE state='inprogress' AND delta>700 ORDER BY delta DESC LIMIT 2;")
    (sqlite3:execute 
     mdb 
     (conc "UPDATE tasks_queue SET state='reset' WHERE id IN ('" (string-intersperse (map conc res) "','") "');"))))

;; return all tasks in the tasks_queue table
;;
(define (tasks:get-tasks mdb types states)
  (let ((res '()))
    (sqlite3:for-each-row
     (lambda (id . rem)
       (set! res (cons (apply vector id rem) res)))
     mdb
     (conc "SELECT id,action,owner,state,target,name,test,item,params,creation_time,execution_time 
               FROM tasks_queue "
               ;; WHERE  
               ;;   state IN " statesstr " AND 
	       ;;   action IN " actionsstr 
	   " ORDER BY creation_time DESC;"))
    res))

;; remove tasks given by a string of numbers comma separated
(define (tasks:remove-queue-entries mdb task-ids)
(define (tasks:remove-monitor-record mdb)
  (sqlite3:execute mdb (conc "DELETE FROM tasks_queue WHERE id IN (" task-ids ");")))

  (sqlite3:execute mdb "DELETE FROM monitors WHERE pid=? AND hostname=?;"
;; 
(define (tasks:start-monitor db mdb)
  (if (> (tasks:get-num-alive-monitors mdb) 2) ;; have two running, no need for more
      (debug:print-info 1 "Not starting monitor, already have more than two running")
      (let* ((megatestdb     (conc *toppath* "/megatest.db"))
	     (monitordbf     (conc (configf:lookup *configdat* "setup" "linktree") "/.db/monitor.db"))
	     (last-db-update 0)) ;; (file-modification-time megatestdb)))
	(task:register-monitor mdb)
	(let loop ((count      0)
		   (next-touch 0)) ;; next-touch is the time where we need to update last_update
	  ;; if the db has been modified we'd best look at the task queue
	  (let ((modtime (file-modification-time megatestdbpath )))
	    (if (> modtime last-db-update)
		(tasks:process-queue db mdb last-db-update megatestdb next-touch))
	    ;; WARNING: Possible race conditon here!!
	    ;; should this update be immediately after the task-get-action call above?
	    (if (> (current-seconds) next-touch)
		   (current-process-id)
		(begin
		  (tasks:monitors-update mdb)
		  (loop (+ count 1)(+ (current-seconds) 240)))
		(loop (+ count 1) next-touch)))))))
      
		   (get-host-name)))
(define (tasks:process-queue db mdb)
  (let* ((task   (tasks:snag-a-task mdb))
	 (action (if task (tasks:task-get-action task) #f)))
    (if action (print "tasks:process-queue task: " task))
    (if action
	(case (string->symbol action)
	  ((run)       (tasks:start-run   db mdb task))
	  ((remove)    (tasks:remove-runs db mdb task))
	  ((lock)      (tasks:lock-runs   db mdb task))
	  ;; ((monitor)   (tasks:start-monitor db task))
	  ((rollup)    (tasks:rollup-runs db mdb task))
	  ((updatemeta)(tasks:update-meta db mdb task))
	  ((kill)      (tasks:kill-monitors db mdb task))))))

(define (tasks:get-monitors mdb)
  (let ((res '()))
    (sqlite3:for-each-row
     (lambda (a . rem)
       (set! res (cons (apply vector a rem) res)))
     mdb
     "SELECT id,pid,strftime('%m/%d/%Y %H:%M',datetime(start_time,'unixepoch'),'localtime'),strftime('%m/%d/%Y %H:%M:%S',datetime(last_update,'unixepoch'),'localtime'),hostname,username FROM monitors ORDER BY last_update ASC;")
    (reverse res)
    ))

(define (tasks:tasks->text tasks)
  (let ((fmtstr "~10a~10a~10a~12a~20a~12a~12a~10a"))
    (conc (format #f fmtstr "id" "action" "owner" "state" "target" "runname" "testpatts" "params") "\n"
	  (string-intersperse 
	   (map (lambda (task)
		  (format #f fmtstr
			  (tasks:task-get-id     task)
			  (tasks:task-get-action task)
			  (tasks:task-get-owner  task)
			  (tasks:task-get-state  task)
			  (tasks:task-get-target task)
			  (tasks:task-get-name   task)
			  (tasks:task-get-test   task)
			  ;; (tasks:task-get-item   task)
			  (tasks:task-get-params task)))
		tasks) "\n"))))
   
(define (tasks:monitors->text-table monitors)
  (let ((fmtstr "~4a~8a~20a~20a~10a~10a"))
    (conc (format #f fmtstr "id" "pid" "start time" "last update" "hostname" "user") "\n"
	  (string-intersperse 
	   (map (lambda (monitor)
		  (format #f fmtstr
			  (tasks:monitor-get-id          monitor)
635
636
637
638
639
640
641








642











643
644
645
646


















































































647




















































































648
649
650
651







652
653
654
655
656
657
658
659
660
661









662
663
664





665
666
667
668
669
670
671
672










673
674
675
676
677
678
679

680
681
682
683

684

685
686
687
688
689
690



691
692
693
694
695
696






697
698
699
700
701
702
703
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516




517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683




684
685
686
687
688
689
690
691
692
693
694
695





696
697
698
699
700
701
702
703
704
705


706
707
708
709
710
711







712
713
714
715
716
717
718
719
720
721
722
723





724
725
726
727
728
729

730
731
732
733



734
735
736
737
738
739



740
741
742
743
744
745
746
747
748
749
750
751
752







+
+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
+
+
+
+
+
+
+





-
-
-
-
-
+
+
+
+
+
+
+
+
+

-
-
+
+
+
+
+

-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+


-
-
-
-
-
+




+
-
+



-
-
-
+
+
+



-
-
-
+
+
+
+
+
+







     (lambda (id pid host last-update delta)
       (print "Going to delete stale record for monitor with pid " pid " on host " host " last updated " delta " seconds ago")
       (set! deadlist (cons id deadlist)))
     mdb 
     "SELECT id,pid,hostname,last_update,strftime('%s','now')-last_update AS delta FROM monitors WHERE delta > 700;")
    (sqlite3:execute mdb (conc "DELETE FROM monitors WHERE id IN ('" (string-intersperse (map conc deadlist) "','") "');")))
  )
(define (tasks:register-monitor db mdb)
  (let* ((pid (current-process-id))
	 (hostname (get-host-name))
	 (userinfo (user-information (current-user-id)))
	 (username (car userinfo)))
    (print "Register monitor, pid: " pid ", hostname: " hostname ", username: " username)
    (sqlite3:execute mdb "INSERT INTO monitors (pid,start_time,last_update,hostname,username) VALUES (?,strftime('%s','now'),strftime('%s','now'),?,?);"
		     pid hostname username)))

(define (tasks:get-num-alive-monitors mdb)
  (let ((res 0))
    (sqlite3:for-each-row 
     (lambda (count)
       (set! res count))
     mdb
     "SELECT count(id) FROM monitors WHERE last_update < (strftime('%s','now') - 300) AND username=?;"
     (car (user-information (current-user-id))))
    res))

;; 
(define (tasks:remove-monitor-record mdb)
  (sqlite3:execute mdb "DELETE FROM monitors WHERE pid=? AND hostname=?;"
		   (current-process-id)
		   (get-host-name)))
(define (tasks:start-monitor db mdb)
  (if (> (tasks:get-num-alive-monitors mdb) 2) ;; have two running, no need for more
      (debug:print-info 1 "Not starting monitor, already have more than two running")
      (let* ((megatestdb     (conc *toppath* "/megatest.db"))
	     (monitordbf     (conc (configf:lookup *configdat* "setup" "linktree") "/.db/monitor.db"))
	     (last-db-update 0)) ;; (file-modification-time megatestdb)))
	(task:register-monitor mdb)
	(let loop ((count      0)
		   (next-touch 0)) ;; next-touch is the time where we need to update last_update
	  ;; if the db has been modified we'd best look at the task queue
	  (let ((modtime (file-modification-time megatestdbpath )))
	    (if (> modtime last-db-update)
		(tasks:process-queue db mdb last-db-update megatestdb next-touch))
	    ;; WARNING: Possible race conditon here!!
	    ;; should this update be immediately after the task-get-action call above?
	    (if (> (current-seconds) next-touch)
		(begin
		  (tasks:monitors-update mdb)
		  (loop (+ count 1)(+ (current-seconds) 240)))
		(loop (+ count 1) next-touch)))))))
      
;;======================================================================
;; T A S K S   Q U E U E
;;
;;   NOTE:: These operate on task_queue which is in main.db
;;
;;======================================================================

;; NOTE: It might be good to add one more layer of checking to ensure
;;       that no task gets run in parallel.



;; register a task
(define (tasks:add dbstruct action owner target runname testpatt params)
  (db:with-db 
   dbstruct #f #t
   (lambda (db)
     (sqlite3:execute db "INSERT INTO tasks_queue (action,owner,state,target,name,testpatt,params,creation_time,execution_time)
                             VALUES (?,?,'new',?,?,?,?,strftime('%s','now'),0);" 
		      action
		      owner
		      target
		      runname
		      testpatt
		      (if params params "")))))

(define (keys:key-vals-hash->target keys key-params)
  (let ((tmp (hash-table-ref/default key-params (vector-ref (car keys) 0) "")))
    (if (> (length keys) 1)
	(for-each (lambda (key)
		    (set! tmp (conc tmp "/" (hash-table-ref/default key-params (vector-ref key 0) ""))))
		  (cdr keys)))
    tmp))
								
;; for use from the gui, not ported
;;
;; (define (tasks:add-from-params mdb action keys key-params var-params)
;;   (let ((target    (keys:key-vals-hash->target keys key-params))
;; 	(owner     (car (user-information (current-user-id))))
;; 	(runname   (hash-table-ref/default var-params "runname" #f))
;; 	(testpatts (hash-table-ref/default var-params "testpatts" "%"))
;; 	(params    (hash-table-ref/default var-params "params"    "")))
;;     (tasks:add mdb action owner target runname testpatts params)))

;; return one task from those who are 'new' OR 'waiting' AND more than 10sec old
;;
(define (tasks:snag-a-task dbstruct)
  (let ((res    #f)
	(keytxt (conc (current-process-id) "-" (get-host-name) "-" (car (user-information (current-user-id))))))
    (db:with-db
     dbstruct #f #t
     (lambda (db)
       ;; first randomly set a new to pid-hostname-hostname
       (sqlite3:execute
	db 
	"UPDATE tasks_queue SET keylock=? WHERE id IN
           (SELECT id FROM tasks_queue 
              WHERE state='new' OR 
                    (state='waiting' AND (strftime('%s','now')-execution_time) > 10) OR
                    state='reset'
              ORDER BY RANDOM() LIMIT 1);" keytxt)

       (sqlite3:for-each-row
	(lambda (id . rem)
	  (set! res (apply vector id rem)))
	db
	"SELECT id,action,owner,state,target,name,test,item,params,creation_time,execution_time FROM tasks_queue WHERE keylock=? ORDER BY execution_time ASC LIMIT 1;" keytxt)
       (if res ;; yep, have work to be done
	   (begin
	     (sqlite3:execute db "UPDATE tasks_queue SET state='inprogress',execution_time=strftime('%s','now') WHERE id=?;"
			      (tasks:task-get-id res))
	     res)
	   #f)))))

(define (tasks:reset-stuck-tasks dbstruct)
  (let ((res '()))
    (db:with-db
     dbstruct #f #t
     (lambda (db)
       (sqlite3:for-each-row
	(lambda (id delta)
	  (set! res (cons id res)))
	db
	"SELECT id,strftime('%s','now')-execution_time AS delta FROM tasks_queue WHERE state='inprogress' AND delta>700 ORDER BY delta DESC LIMIT 2;")
       (sqlite3:execute 
	db 
	(conc "UPDATE tasks_queue SET state='reset' WHERE id IN ('" (string-intersperse (map conc res) "','") "');")
	)))))

;; return all tasks in the tasks_queue table
;;
(define (tasks:get-tasks dbstruct types states)
  (let ((res '()))
    (db:with-db
     dbstruct #f #f
     (lambda (db)
       (sqlite3:for-each-row
	(lambda (id . rem)
	  (set! res (cons (apply vector id rem) res)))
	db
	(conc "SELECT id,action,owner,state,target,name,test,item,params,creation_time,execution_time 
                  FROM tasks_queue "
	      ;; WHERE  
	      ;;   state IN " statesstr " AND 
	      ;;   action IN " actionsstr 
	      " ORDER BY creation_time DESC;"))
       res))))

;; remove tasks given by a string of numbers comma separated
(define (tasks:remove-queue-entries dbstruct task-ids)
  (db:with-db
   dbstruct #f #t
   (lambda (db)
     (sqlite3:execute db (conc "DELETE FROM tasks_queue WHERE id IN (" task-ids ");")))))

(define (tasks:process-queue dbstruct)
  (let* ((task   (tasks:snag-a-task dbstruct))
	 (action (if task (tasks:task-get-action task) #f)))
    (if action (print "tasks:process-queue task: " task))
    (if action
	(case (string->symbol action)
	  ((run)       (tasks:start-run     dbstruct task))
	  ((remove)    (tasks:remove-runs   dbstruct task))
	  ((lock)      (tasks:lock-runs     dbstruct task))
	  ;; ((monitor)   (tasks:start-monitor db task))
	  ((rollup)    (tasks:rollup-runs   dbstruct task))
	  ((updatemeta)(tasks:update-meta   dbstruct task))
	  ((kill)      (tasks:kill-monitors dbstruct task))))))

(define (tasks:tasks->text tasks)
  (let ((fmtstr "~10a~10a~10a~12a~20a~12a~12a~10a"))
    (conc (format #f fmtstr "id" "action" "owner" "state" "target" "runname" "testpatts" "params") "\n"
	  (string-intersperse 
	   (map (lambda (task)
		  (format #f fmtstr
			  (tasks:task-get-id     task)
			  (tasks:task-get-action task)
			  (tasks:task-get-owner  task)
			  (tasks:task-get-state  task)
			  (tasks:task-get-target task)
			  (tasks:task-get-name   task)
			  (tasks:task-get-test   task)
			  ;; (tasks:task-get-item   task)
			  (tasks:task-get-params task)))
		tasks) "\n"))))
   
(define (tasks:set-state mdb task-id state)
  (sqlite3:execute mdb "UPDATE tasks_queue SET state=? WHERE id=?;" 
		   state 
		   task-id))
(define (tasks:set-state dbstruct task-id state)
  (db:with-db 
   dbstruct #f #t
   (lambda (db)
     (sqlite3:execute db "UPDATE tasks_queue SET state=? WHERE id=?;" 
		      state 
		      task-id))))

;;======================================================================
;; Access using task key (stored in params; (hash-table->alist flags) hostname pid
;;======================================================================

(define (tasks:param-key->id mdb task-params)
  (handle-exceptions
   exn
   #f
   (sqlite3:first-result mdb "SELECT id FROM tasks_queue WHERE params LIKE ?;" task-params)))
(define (tasks:param-key->id dbstruct task-params)
  (db:with-db
   dbstruct #f #f
   (lambda (db)
     (handle-exceptions
      exn
      #f
      (sqlite3:first-result db "SELECT id FROM tasks_queue WHERE params LIKE ?;"
			    task-params)))))

(define (tasks:set-state-given-param-key mdb param-key new-state)
  (sqlite3:execute mdb "UPDATE tasks_queue SET state=? WHERE params LIKE ?;" new-state param-key))
(define (tasks:set-state-given-param-key dbstruct param-key new-state)
  (db:with-db
   dbstruct #f #t
   (lambda (db)
     (sqlite3:execute db "UPDATE tasks_queue SET state=? WHERE params LIKE ?;" new-state param-key))))

(define (tasks:get-records-given-param-key mdb param-key state-patt action-patt test-patt)
  (handle-exceptions
   exn
   '()
   (sqlite3:first-row mdb "SELECT id,action,owner,state,target,name,testpatt,keylock,params WHERE
         params LIKE ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;"
		      param-key state-patt action-patt test-patt)))
(define (tasks:get-records-given-param-key dbstruct param-key state-patt action-patt test-patt)
  (db:with-db
   dbstruct #f #f
   (lambda (db)
     (handle-exceptions
      exn
      '()
      (sqlite3:first-row db "SELECT id,action,owner,state,target,name,testpatt,keylock,params WHERE
                               params LIKE ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;"
			 param-key state-patt action-patt test-patt)))))


;;======================================================================
;; Rogue items, no place to put these yet
;;======================================================================

(define (tasks:find-task-queue-records mdb target run-name test-patt state-patt action-patt)
(define (tasks:find-task-queue-records dbstruct target run-name test-patt state-patt action-patt)
  ;; (handle-exceptions
  ;;  exn
  ;;  '()
  ;;  (sqlite3:first-row
  (let ((db (db:delay-if-busy (db:get-db dbstruct #f)))
  (let ((res '()))
	(res '()))
    (sqlite3:for-each-row 
     (lambda (a . b)
       (set! res (cons (cons a b) res)))
     mdb "SELECT id,action,owner,state,target,name,testpatt,keylock,params FROM tasks_queue 
       WHERE
         target = ? AND name = ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;"
     db "SELECT id,action,owner,state,target,name,testpatt,keylock,params FROM tasks_queue 
           WHERE
              target = ? AND name = ? AND state LIKE ? AND action LIKE ? AND testpatt LIKE ?;"
     target run-name state-patt action-patt test-patt)
    res)) ;; )


(define (tasks:kill-runner mdb target run-name)
  (let ((records    (tasks:find-task-queue-records mdb target run-name "%" "running" "run-tests"))
;; kill any runner processes (i.e. processes handling -runtests) that match target/runname
;; 
;; do a remote call to get the task queue info but do the killing as self here.
;;
(define (tasks:kill-runner target run-name)
  (let ((records    (rmt:tasks-find-task-queue-records target run-name "%" "running" "run-tests"))
	(hostpid-rx (regexp "\\s+(\\w+)\\s+(\\d+)$"))) ;; host pid is at end of param string
    (if (null? records)
	(debug:print 0 "No run launching processes found for " target " / " run-name)
	(debug:print 0 "Found " (length records) " run(s) to kill."))
    (for-each 
     (lambda (record)
       (let* ((param-key (list-ref record 8))
723
724
725
726
727
728
729
730

731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769






























772
773
774
775
776
777
778

779
780
781





































782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811







-
+


-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
		   (let ((old-targethost (getenv "TARGETHOST")))
		     (setenv "TARGETHOST" hostname)
		     (setenv "TARGETHOST_LOGF" "server-kills.log")
		     (system (conc "nbfake kill " pid))
		     (if old-targethost (setenv "TARGETHOST" old-targethost))
		     (unsetenv "TARGETHOST")
		     (unsetenv "TARGETHOST_LOGF"))))
	     (debug:print 0 "ERROR: no record or improper record for " target "/" run-name " in tasks_queue in monitor.db"))))
	     (debug:print 0 "ERROR: no record or improper record for " target "/" run-name " in tasks_queue in main.db"))))
     records)))


;;======================================================================
;; The routines to process tasks
;;======================================================================

;; NOTE: It might be good to add one more layer of checking to ensure
;;       that no task gets run in parallel.

(define (tasks:start-run db mdb task)
  (let ((flags (make-hash-table)))
    (hash-table-set! flags "-rerun" "NOT_STARTED")
    (if (not (string=? (tasks:task-get-params task) ""))
	(hash-table-set! flags "-setvars" (tasks:task-get-params task)))
    (print "Starting run " task)
    ;; sillyness, just call the damn routine with the task vector and be done with it. FIXME SOMEDAY
    (runs:run-tests db
		    (tasks:task-get-target task)
		    (tasks:task-get-name   task)
		    (tasks:task-get-test   task)
		    (tasks:task-get-item   task)
		    (tasks:task-get-owner  task)
		    flags)
    (tasks:set-state mdb (tasks:task-get-id task) "waiting")))

(define (tasks:rollup-runs db mdb task)
  (let* ((flags (make-hash-table)) 
	 (keys  (db:get-keys db))
	 (keyvals (keys:target-keyval keys (tasks:task-get-target task))))
    ;; (hash-table-set! flags "-rerun" "NOT_STARTED")
    (print "Starting rollup " task)
    ;; sillyness, just call the damn routine with the task vector and be done with it. FIXME SOMEDAY
    (runs:rollup-run db
		     keys 
		     keyvals
		     (tasks:task-get-name  task)
		     (tasks:task-get-owner  task))
    (tasks:set-state mdb (tasks:task-get-id task) "waiting")))
;; (define (tasks:start-run dbstruct mdb task)
;;   (let ((flags (make-hash-table)))
;;     (hash-table-set! flags "-rerun" "NOT_STARTED")
;;     (if (not (string=? (tasks:task-get-params task) ""))
;; 	(hash-table-set! flags "-setvars" (tasks:task-get-params task)))
;;     (print "Starting run " task)
;;     ;; sillyness, just call the damn routine with the task vector and be done with it. FIXME SOMEDAY
;;     (runs:run-tests db
;; 		    (tasks:task-get-target task)
;; 		    (tasks:task-get-name   task)
;; 		    (tasks:task-get-test   task)
;; 		    (tasks:task-get-item   task)
;; 		    (tasks:task-get-owner  task)
;; 		    flags)
;;     (tasks:set-state mdb (tasks:task-get-id task) "waiting")))
;; 
;; (define (tasks:rollup-runs db mdb task)
;;   (let* ((flags (make-hash-table)) 
;; 	 (keys  (db:get-keys db))
;; 	 (keyvals (keys:target-keyval keys (tasks:task-get-target task))))
;;     ;; (hash-table-set! flags "-rerun" "NOT_STARTED")
;;     (print "Starting rollup " task)
;;     ;; sillyness, just call the damn routine with the task vector and be done with it. FIXME SOMEDAY
;;     (runs:rollup-run db
;; 		     keys 
;; 		     keyvals
;; 		     (tasks:task-get-name  task)
;; 		     (tasks:task-get-owner  task))
;;     (tasks:set-state mdb (tasks:task-get-id task) "waiting")))

Added testnanomsg/basic-req-rep.scm version [1436c827c9].




1
2
3
+
+
+
(use nanomsg srfi-18 sqlite3 numbers)

(define resp (nn-socket 'rep))

Added testnanomsg/mockupclient.scm version [63a8c6685a].











































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
(use zmq posix numbers)

(define cname "Bob")
(define runtime 10)
(let ((args (argv)))
  (if (< (length args) 3)
      (begin
	(print "Usage: mockupclient clientname runtime")
	(exit))
      (begin
	(set! cname (cadr args))
	(set! runtime (string->number (caddr args))))))
      
;; (define start-delay (/ (random 100) 9))
;; (define runtime     (+ 1 (/ (random 200) 2)))

(print "Starting client " cname " with runtime " runtime)

(include "mockupclientlib.scm")

(set! endtime (+ (current-seconds) runtime))

;; first ping the server to ensure we have a connection
(if (server-ping cname 5)
    (print "SUCCESS: Client " cname " connected to server")
    (begin
      (print "ERROR: Client " cname " failed ping of server, exiting")
      (exit)))

(let loop ()
  (let ((x (random 15))
	(varname (list-ref (list "hello" "goodbye" "saluton" "kiaorana")(random 4))))
    (case x
      ;; ((1)(dbaccess cname 'sync "nodat"    #f))
      ((2 3 4 5)(dbaccess cname 'set varname (random 999)))
      ((6 7 8 9 10)(print cname ": Get \"" varname "\" " (dbaccess cname 'get varname #f)))
      (else
       (thread-sleep! 0.011)))
    (if (< (current-seconds) endtime)
	(loop))))

(print "Client " cname " all done!!")

Added testnanomsg/mockupclientlib.scm version [3b245ba7a9].



























































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
(define reqs (nn-socket 'req))

(connect-socket reqs "tcp://localhost:6563")

(thread-sleep! 0.2)

(define (server-ping cname timeout)
  (let ((msg     (conc cname ":ping:" timeout))
	(maxtime (+ (current-seconds) timeout)))
    (print "pinging server from " cname " with timeout " timeout)
    (let loop ((res   #f))
      (if (< maxtime (current-seconds))
	  #f ;; failed to ping
	  (if (equal? res "Got ping")
	      #t
	      (begin
		(print "Ping received from server " res)
		(send-message push msg)
		(thread-sleep! 0.1)
		(loop (receive-message sub non-blocking: #t))))))))
  
(define (dbaccess cname cmd var val #!key (numtries 20))
  (let* ((msg (conc cname ":" cmd ":" (if val (conc var " " val) var)))
	 (res #f)
	 (mtx1 (make-mutex))
	 (do-access (lambda ()
		      (let ((tmpres #f))
			(print "Sending msg: " msg)
			(send-message push msg)
			(print "Message " msg " sent")
			(print "Client " cname " waiting for response to " msg)
			(print "Client " cname " received address " (receive-message* sub))
			(set! tmpres (receive-message* sub))
			(mutex-lock! mtx1)
			(set! res tmpres)
			(mutex-unlock! mtx1))))
	 (th1 (make-thread do-access "do access"))
	 (th2 (make-thread (lambda ()
			     (let ((result #f))
			       (mutex-lock! mtx1)
			       (set! result res)
			       (mutex-unlock! mtx1)
			       (thread-sleep! 5)
			       (if (not result)
				   (if (> numtries 0)
				       (begin
					 (print "WARNING: access timed out for " cname ", trying again. Trys remaining=" numtries)
					 (dbaccess cname cmd var val numtries: (- numtries 1)))
				       (begin
					 (print "ERROR: dbaccess timed out. Exiting")
					 (exit)))))
			     "timeout thread"))))
    (thread-start! th1)
    (thread-start! th2)
    (thread-join! th1)
    (if res (print "SUCCESS: received " res " with " numtries " remaining possible attempts"))
    res))

Added testnanomsg/mockupserver.scm version [a4d3e5594c].



















































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
;; pub/sub with envelope address
;; Note that if you don't insert a sleep, the server will crash with SIGPIPE as soon
;; as a client disconnects.  Also a remaining client may receive tons of
;; messages afterward.

(use nanomsg srfi-18 sqlite3 numbers)

(define resp (nn-socket 'rep))
(define cname "server")
(define total-db-accesses 0)
(define start-time (current-seconds))

(nn-bind resp  "tcp://*:6563")

(thread-sleep! 0.2)

(define (open-db)
  (let* ((dbpath    "mockup.db")
	 (dbexists  (file-exists? dbpath))
	 (db        (open-database dbpath)) ;; (never-give-up-open-db dbpath))
	 (handler   (make-busy-timeout 10)))
    (set-busy-handler! db handler)
    (if (not dbexists)
	(for-each
	 (lambda (stmt)
	   (execute db stmt))
	 (list
	  "PRAGMA SYNCHRONOUS=0;"
	  "CREATE TABLE clients (id INTEGER PRIMARY KEY,name TEXT,num_accesses INTEGER DEFAULT 0);"
	  "CREATE TABLE vars    (var TEXT,val TEXT,CONSTRAINT vars_constraint UNIQUE (var));")))
    db))

(define cid-cache (make-hash-table))

(define (get-client-id db cname)
  (let ((cid (hash-table-ref/default cid-cache cname #f)))
    (if cid 
	cid
	(begin
	  (execute db "INSERT OR REPLACE INTO clients (name) VALUES(?);" cname)
	  (for-each-row 
	   (lambda (id)
	     (set! cid id))
	   db
	   "SELECT id FROM clients WHERE name=?;" cname)
	  (hash-table-set! cid-cache cname cid)
	  (set! total-db-accesses (+ total-db-accesses 2))
	  cid))))

(define (count-client db cname)
  (let ((cid (get-client-id db cname)))
    (execute db "UPDATE clients SET num_accesses=num_accesses+1 WHERE id=?;" cid)
    (set! total-db-accesses (+ total-db-accesses 1))
    ))

(define db (open-db))
;; (define queuelst '())
;; (define mx1 (make-mutex))

(define max-queue-len 0)

(define (process-queue queuelst)
  (let ((queuelen (length queuelst)))
    (if (> queuelen max-queue-len)
	(set! max-queue-len queuelen))
    (for-each
     (lambda (item)
       (let ((cname (vector-ref item 1))
	     (clcmd (vector-ref item 2))
	     (cdata (vector-ref item 3)))
	 (send-message pub cname send-more: #t)
	 (send-message pub (case clcmd
			     ((sync)
			      (conc queuelen))
			     ((set)
			      (set! total-db-accesses (+ total-db-accesses 1))
			      (apply execute db "INSERT OR REPLACE INTO vars (var,val) VALUES (?,?);" (string-split cdata))
			      "ok")
			     ((get)
			      (set! total-db-accesses (+ total-db-accesses 1))
			      (let ((res "noval"))
				(for-each-row
				 (lambda (val)
				   (set! res val))
				 db 
				 "SELECT val FROM vars WHERE var=?;" cdata)
				res))
			     (else (conc "unk cmd: " clcmd))))))
     queuelst)))

;; SERVER THREAD
(define th1 (make-thread 
	     (lambda ()
	       (let ((last-run 0)) ;; current-seconds when run last
		 (let loop ((queuelst '()))
		   (let* ((indat (receive-message* pull))
			  (parts (string-split indat ":"))
			  (cname (car parts))                   ;; client name
			  (clcmd (string->symbol (cadr parts))) ;; client cmd
			  (cdata (caddr parts))                 ;; client data
			  (svect (vector (current-seconds) cname clcmd cdata))) ;; record for the queue
		     ;; (print "Server received message: " indat)
		     (count-client db cname)
		     (case clcmd
		       ((ping)
			(print "Got ping from " cname)
			(send-message pub cname send-more: #t)
			(send-message pub "Got ping")
			(loop queuelst))
		       ((sync) ;; just process the queue
			(print "Got sync from " cname)
			(process-queue (cons svect queuelst))
			(loop '()))
		       ((get)
			(process-queue (cons svect queuelst))
			(loop '()))
		       (else
			(loop (cons svect queuelst))))))))
	     "server thread"))

(include "mockupclientlib.scm")

;; SYNC THREAD
;; send a sync to the pull port
(define th2 (make-thread
	     (lambda ()
	       (let ((last-action-time (current-seconds)))
		 (let loop ()
		   (thread-sleep! 5)
		   (let ((queuelen (string->number (dbaccess "server" 'sync "nada" #f)))
			 (last-action-delta #f))
		     (if (> queuelen 1)(set! last-action-time (current-seconds)))
		     (set! last-action-delta (- (current-seconds) last-action-time))
		     (print "Server: Got queuelen=" queuelen ", last-action-delta=" last-action-delta)
		     (if (< last-action-delta 60)
			 (loop)
			 (print "Server exiting, 25 seconds since last access"))))))
	     "sync thread"))

(thread-start! th1)
(thread-start! th2)
(thread-join! th2)

(let* ((run-time       (- (current-seconds) start-time))
       (queries/second (/  total-db-accesses run-time)))
  (print "Server exited! Total db accesses=" total-db-accesses " in " run-time " seconds for " queries/second " queries/second with max queue length of: " max-queue-len))

Added testnanomsg/pipeline.scm version [1d4d831eb6].


























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
;; watch nanomsg's pipeline load-balancer in action.
(use nanomsg)

(define push (nn-socket 'push))
(define pull1 (nn-socket 'pull))
(define pull2 (nn-socket 'pull))

(nn-bind    push  "inproc://test")
(nn-connect pull1 "inproc://test")
(nn-connect pull2 "inproc://test")

(nn-send push "a")
(nn-send push "b")
(nn-send push "c")
(nn-send push "d")

(define ((th sock))
  (print (current-thread) ": " (nn-recv sock))
  (print (current-thread) ": " (nn-recv sock))
  (print (current-thread) " is done"))

(thread-start! (th pull1))
(thread-start! (th pull2))

(thread-sleep! 1)

Added testnanomsg/req-rep-client.scm version [7998d54555].































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
;; watch nanomsg's pipeline load-balancer in action.
(use nanomsg)

(define req   (nn-socket 'req))

(nn-connect req  "tcp://localhost:22022")

;; (with-output-to-string (lambda ()(serialize obj)))
(define (client-send-receive soc msg)
  (nn-send soc msg)
  (nn-recv soc))

(define ((talk-to-server soc))
  (let loop ((cnt 20))
    (let ((name (list-ref '("Matt" "Tom" "Bob" "Jill" "James" "Jane")(random 6))))
      (print "Sending " name)
      (print (client-send-receive req name))
      (if (> cnt 0)(loop (- cnt 1)))))
  (print (client-send-receive req "quit"))
  (nn-close req)
  (exit))

;; (thread-start! (lambda ()
;; 		 (thread-sleep! 20)
;; 		 (print "Give up on waiting for the server")
;; 		 (nn-close req)
;; 		 (exit)))

(thread-join! (thread-start! (talk-to-server req)))

Added testnanomsg/req-rep-server.scm version [d9de6da037].



























































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
;; watch nanomsg's pipeline load-balancer in action.
(use nanomsg)

;; (use trace)
;; (trace nn-bind nn-socket nn-assert nn-recv nn-send thread-terminate! nn-close )

(define port  22022)
(define host  "127.0.0.1")

(define rep   (nn-socket 'rep))

(print "connecting, got: " (nn-bind    rep  (conc "tcp://" "*" ":" port)))

(define (server soc)
  (print "server starting")
  (let loop ((msg-in (nn-recv soc)))
    (print "server received: " msg-in)
    (cond
     ((equal? msg-in "quit")
      (nn-send soc "Ok, quitting"))
     ((and (>= (string-length msg-in) 4)
	   (equal? (substring msg-in 0 4) "ping"))
      (nn-send soc (conc (current-process-id)))
      (loop (nn-recv soc)))
     ;;((and (>= (string-length msg-in)
     (else
      (let ((this-task (random 15)))
	(thread-sleep! this-task)
	(nn-send soc (conc "hello " msg-in " this task took " this-task " seconds to complete"))
	(loop (nn-recv soc)))))))

(define (ping-self host port #!key (return-socket #t))
  ;; send a random number along with pid and check that we get it back
  (let* ((req     (nn-socket 'req))
	 (key     "ping")
	 (success #f)
	 (keepwaiting #t)
	 (ping    (make-thread
		   (lambda ()
		     (print "ping: sending string \"" key "\", expecting " (current-process-id))
		     (nn-send req key)
		     (let ((result  (nn-recv req)))
		       (if (equal? (conc (current-process-id)) result)
			   (begin
			     (print "ping, success: received \"" result "\"")
			     (set! success #t))
			   (begin
			     (print "ping, failed: received key \"" result "\"")
			     (set! keepwaiting #f)
			     (set! success #f)))))
		   "ping"))
	 (timeout (make-thread (lambda ()
				 (let loop ((count 0))
				   (thread-sleep! 1)
				   (print "still waiting after count seconds...")
				   (if (and keepwaiting (< count 10))
				       (loop (+ count 1))))
				 (if keepwaiting
				     (begin
				       (print "timeout waiting for ping")
				       (thread-terminate! ping))))
			       "timeout")))
    (nn-connect req (conc "tcp://" host ":" port))
    (handle-exceptions
     exn
     (begin
       (print-call-chain)
       (print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
       (print "exn=" (condition->list exn))
       (print "ping failed to connect to " host ":" port))
     (thread-start! timeout)
     (thread-start! ping)
     (thread-join! ping)
     (if success (thread-terminate! timeout)))
    (if return-socket
	(if success req #f)
	(begin
	  (nn-close req)
	  success))))

(let ((server-thread (make-thread (lambda ()(server rep)) "server")))
  (thread-start! server-thread)
  ;; (thread-sleep! 1)
  (if (ping-self host port)
      (begin
	(thread-join! server-thread)
	(nn-close rep))
      (print "ping failed")))

(exit)

Added testnanomsg/req-rep.scm version [b77ebf1421].































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
;; watch nanomsg's pipeline load-balancer in action.
(use nanomsg)

(define req   (nn-socket 'req))
(define rep   (nn-socket 'rep))

(nn-bind    rep  "inproc://test")
(nn-connect req  "inproc://test")

(define (client-send-receive soc msg)
  (nn-send soc msg)
  (nn-recv soc))

(define ((server soc))
  (let loop ((msg-in (nn-recv soc)))
    (if (not (equal? msg-in "quit"))
	(begin
	  (nn-send soc (conc "hello " msg-in))
	  (loop (nn-recv soc))))))

(thread-start! (server rep))

(print (client-send-receive req "Matt"))
(print (client-send-receive req "Tom"))

;; (client-send-receive req "quit")

(nn-close req)
(nn-close rep)
(exit)

Added testrpc/client.scm version [eacc9c3c29].









1
2
3
4
5
6
7
8
+
+
+
+
+
+
+
+
;;;; client.scm
(use rpc posix)

(define call (rpc:procedure 'foo "localhost"))

(do ((i 10 (sub1 i)))
    ((zero? i))
  (print "-> " (call (random 100))))

Added testrpc/server.scm version [d4d2e05e92].
















1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
;;;; server.scm
(use rpc)

(rpc:publish-procedure!
 'foo
 (lambda (x)
   (print "foo: " x)
   #f))

(rpc:publish-procedure!
 'fini
 (lambda () (print "fini") (thread-start! (lambda () (thread-sleep! 3) (print "terminate") (exit))) #f))

((rpc:make-server (tcp-listen (rpc:default-server-port))) #t)

Modified tests.scm from [2a580a2e0e] to [fe032b0eb9].

34
35
36
37
38
39
40
41

42
43
44
45
46
47
48
34
35
36
37
38
39
40

41
42
43
44
45
46
47
48







-
+







;; Call this one to do all the work and get a standardized list of tests
(define (tests:get-all)
  (let* ((test-search-path   (tests:get-tests-search-path *configdat*)))
    (tests:get-valid-tests (make-hash-table) test-search-path)))

(define (tests:get-tests-search-path cfgdat)
  (let ((paths (map cadr (configf:get-section cfgdat "tests-paths"))))
    (cons (conc *toppath* "/tests") paths)))
    (append paths (list (conc *toppath* "/tests")))))

(define (tests:get-valid-tests test-registry tests-paths)
  (if (null? tests-paths) 
      test-registry
      (let loop ((hed (car tests-paths))
		 (tal (cdr tests-paths)))
	(if (file-exists? hed)

Modified tests/Makefile from [502a984b43] to [4ce1c0dba5].

32
33
34
35
36
37
38
39

40
41
42
43
44
45
46
32
33
34
35
36
37
38

39
40
41
42
43
44
45
46







-
+








stopserver :
	cd ..;make -j && make install
	cd fullrun;$(MEGATEST) -stop-server 0

repl :
	cd ..;make -j && make install
	cd fullrun;$(MEGATEST) -repl
	cd fullrun;$(MEGATEST) -:b -repl

test0 : cleanprep
	cd simplerun ; $(MEGATEST) -server - -debug $(DEBUG)

test1 : cleanprep

test2 : fullprep
67
68
69
70
71
72
73

74
75
76
77
78
79
80
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81







+







	cd fullrun;time $(MEGATEST) -debug $(DEBUG) -run-wait -runtests % -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b -m "This is a comment specific to a run" -v $(LOGGING)

test4a : cleanprep
	cd fullrun;time $(MEGATEST) -debug $(DEBUG) -preclean -runtests all_toplevel -reqtarg ubuntu/nfs/none :runname $(RUNNAME)_b -m "This is a comment specific to a run" -v $(LOGGING)

# NOTE: Only one instance can be a server
test5 : cleanprep
	rm -f fullrun/a*.log fullrun/logs/*
	@echo "WARNING: No longer running fullprep, test converage may be lessened"
	cd fullrun;sleep 0;$(MEGATEST) -preclean -runtests % -target $(TARGET) :runname $(RUNNAME)_aa -debug $(DEBUG) $(LOGGING) > aa.log 2> aa.log &
	cd fullrun;sleep 0;$(MEGATEST) -preclean -runtests % -target $(TARGET) :runname $(RUNNAME)_ab -debug $(DEBUG) $(LOGGING) > ab.log 2> ab.log &
	cd fullrun;sleep 5;$(MEGATEST) -preclean -runtests % -target $(TARGET) :runname $(RUNNAME)_ac -debug $(DEBUG) $(LOGGING) > ac.log 2> ac.log &
	cd fullrun;sleep 8;$(MEGATEST) -preclean -runtests % -target $(TARGET) :runname $(RUNNAME)_ad -debug $(DEBUG) $(LOGGING) > ad.log 2> ad.log &	
#	cd fullrun;sleep 0;$(MEGATEST) -preclean -runtests % -target $(TARGET) :runname $(RUNNAME)_ae -debug $(DEBUG) $(LOGGING) > ae.log 2> ae.log &	
#	cd fullrun;sleep 0;$(MEGATEST) -preclean -runtests % -target $(TARGET) :runname $(RUNNAME)_af -debug $(DEBUG) $(LOGGING) > af.log 2> af.log &
114
115
116
117
118
119
120
121

122
123
124
125

126
127
128
129

130
131
132
133

134
135
136
137

138
139
140
141
142
143
144
115
116
117
118
119
120
121

122
123
124
125

126
127
128
129

130
131
132
133

134
135
136
137

138
139
140
141
142
143
144
145







-
+



-
+



-
+



-
+



-
+







# Some simple checks for bootstrapping and run loop logic 

test9 : minsetup test9a test9b test9c test9d test9e

test9a :
	@echo Run super-simple mintest e, no waitons.
	cd mintest;$(DASHBOARD)&
	cd mintest;$(MEGATEST) -preclean -runtests e -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG)
	cd mintest;$(MEGATEST) -preclean -runtests e -target $(VER) -runname $(shell date +%H.%M.%S) -debug $(DEBUG)

test9b :
	@echo Run simple mintest d with one waiton c
	cd mintest;$(MEGATEST) -preclean -runtests d -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG)
	cd mintest;$(MEGATEST) -preclean -runtests d -target $(VER) -runname `date +%H.%M.%S` -debug $(DEBUG)

test9c :
	@echo Run mintest a with full waiton chain a -> b -> c -> d -> e
	cd mintest;$(MEGATEST) -preclean -runtests a -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG)
	cd mintest;$(MEGATEST) -preclean -runtests a -target $(VER) -runname `date +%H.%M.%S` -debug $(DEBUG)

test9d :
	@echo Run an itemized test with no items
	cd mintest;$(MEGATEST) -preclean -runtests g -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG)
	cd mintest;$(MEGATEST) -preclean -runtests g -target $(VER) -runname `date +%H.%M.%S` -debug $(DEBUG)

test9e :
	@echo Run mintest a1 with full waiton chain with d1fail: a1 -> b1 -> c1 -> d1fail -> e1
	cd mintest;$(MEGATEST) -preclean -runtests a1 -target $(VER) :runname `date +%H.%M.%S` -debug $(DEBUG)
	cd mintest;$(MEGATEST) -preclean -runtests a1 -target $(VER) -runname `date +%H.%M.%S` -debug $(DEBUG)

test10 :
	@echo Run a bunch of different targets simultaneously
	(cd fullrun;$(MEGATEST) -server - ;sleep 2)&
	for targ in mint/btrfs/mintdir sunos/sshfs/loc; do \
	   (cd fullrun;$(MEGATEST) -preclean -runtests priority_10_waiton_1 -target $$targ :runname $(RUNNAME) &); done
	for sys in ubuntu suse redhat debian;do \
155
156
157
158
159
160
161
162

163
164
165
166
167
168
169
156
157
158
159
160
161
162

163
164
165
166
167
168
169
170







-
+







	mkdir -p mintest/runs mintest/links
	cd mintest;$(MEGATEST) -stop-server 0
	cd mintest;$(MEGATEST) -server - -debug $(DEBUG) > server.log 2> server.log & 
	sleep 3
	cd mintest;$(DASHBOARD) -rows 18 &

cleanprep : ../*.scm Makefile */*.config
	mkdir -p fullrun/tmp/mt_runs fullrun/tmp/mt_links
	mkdir -p fullrun/tmp/mt_runs fullrun/tmp/mt_links /tmp/$(USER)/adisk1
	cd ..;make -j;make install
	rm -f */logging.db
	touch cleanprep

fullprep : cleanprep
	cd fullrun;$(MEGATEST) -remove-runs :runname $(RUNNAME)% -target %/%/% -testpatt %/%
	cd fullrun;$(BINPATH)/dashboard -rows 15 &

Modified tests/fullrun/megatest.config from [4c87b72a5a] to [584499bd33].

29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
29
30
31
32
33
34
35



36
37
38
39
40
41
42







-
-
-







waivercommentpatt ^WW\d+ [a-z].*
incomplete-timeout 1

# yes, anything else is no
run-wait yes


# Use http instead of direct filesystem access
# transport http
# transport fs

# If set to "default" the old code is used. Otherwise defaults to 200 or uses
# numeric value given.
#
runqueue 20

# Default runtimelim 1d 1h 1m 10s
122
123
124
125
126
127
128






129
130
131
132
133

134
135
136
137
138

139
140
141

142
143

144
145

146
147
148
149
150
151
152
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135

136
137
138
139
140

141
142
143

144
145
146
147
148

149
150
151
152
153
154
155
156







+
+
+
+
+
+




-
+




-
+


-
+


+

-
+







blah

MAX_ALLOWED_LOAD 200
# XTERM   [system xterm]
# RUNDEAD [system exit 56]

[server]

# Use http instead of direct filesystem access
transport http
# transport fs
# transport nmsg

synchronous 0

# If the server can't be started on this port it will try the next port until
# it succeeds
port 8080
port 9080

# This server will keep running this number of hours after last access. 
# Three minutes is 0.05 hours
# timeout 0.025
timeout 0.1
timeout 0.061

# Server is required - slower but more resistant to Sqlite issues.
# required yes
required yes

# Start server when average query takes longer than this
# server-query-threshold 55500
server-query-threshold 100
# 55500
timeout 0.01

# daemonize yes
# hostname #{scheme (get-host-name)}

## disks are:
## name host:/path/to/area
## -or-
165
166
167
168
169
170
171






























169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
#       to your jobgroups where N is the number of parallel runs you are likely to see
#      
sqlite3 6
blockz  10
#       to your jobgroups where N is the number of parallel runs you are likely to see
#       

#======================================================================
# Machine flavors
#
#   These specify lists of hosts or scripts to use or call for various
#   flavors of task.
#
#======================================================================

[flavors]

plain hosts: xena, phoebe
strong command: NBFAKE_HOST=zeus nbfake
arm hosts: cubian

[archive]

# use machines of these flavor
useflavors plain
targsize 2G

# minimum space required on an archive disk before allowing archiving to start (MB)
minspace 10

[archive-disks]

# Archives will be organised under these paths like this:
#  <testsuite>/<creationdate>
# Within the archive the data is structured like this:
#  <target>/<runname>/<test>/
disk0 /tmp/#{getenv USER}/adisk1

Modified tests/mintest/megatest.config from [158955d103] to [74b434d2c6].

1
2
3
4
5
6

7
8
9
10
11
12
13
1
2
3
4
5

6
7
8
9
10
11
12
13





-
+







[fields]
X TEXT

[setup]
max_concurrent_jobs 50
linktree #{getenv PWD}/linktree
linktree #{getenv MT_RUN_AREA_HOME}/linktree
transport http

[server]
port 8090

[jobtools]
useshell yes

Modified tests/unittests/basicserver.scm from [4e0a526d82] to [bbe04303a3].

12
13
14
15
16
17
18
19

20
21
22
23


24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52































53
54
55
56
57
58
59

60
61
62
63
64
65
66
67

68
69



70
71
72
73
74
75
76
77
78
79
80
81
82



83
84
85
86
87
88
89
90








91
92
93

94
95
96
97
98
99
100
101
102
103
104

105
106
107
108
109




































110
111
112
113
114
115
116
12
13
14
15
16
17
18

19
20
21
22
23
24
25





























26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62

63
64
65
66
67
68
69
70
71
72


73
74
75
76
77
78
79
80
81
82
83
84
85



86
87
88

89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116


117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165







-
+




+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+






-
+








+
-
-
+
+
+










-
-
-
+
+
+
-







+
+
+
+
+
+
+
+



+









-
-
+





+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







(test "setup for run" #t (begin (launch:setup-for-run)
 				(string? (getenv "MT_RUN_AREA_HOME"))))

;; NON Server tests go here

(test #f #f (db:dbdat-get-path *db*))
(test #f #f (db:get-run-name-from-id *db* run-id))
(test #f '("SYSTEM" "RELEASE") (rmt:get-keys))
;; (test #f '("SYSTEM" "RELEASE") (rmt:get-keys))

;; (exit)

;; Server tests go here 
(for-each
 (lambda (run-id)
(test #f #f (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id))
(server:kind-run run-id)
(test "did server start within 20 seconds?"
      #t
      (let loop ((remtries 20)
		 (running (tasks:server-running-or-starting? (db:delay-if-busy
							      (tasks:open-db))
							     run-id)))
	(if running 
	    (> running 0)
	    (if (> remtries 0)
		(begin
		  (thread-sleep! 1.1)
		  (loop (- remtries 1)
			(tasks:server-running-or-starting? (db:delay-if-busy
							    (tasks:open-db))
							   run-id)))))))

(test "did server become available" #t
      (let loop ((remtries 10)
		 (res      (tasks:get-server (db:delay-if-busy (tasks:open-db)) run-id)))
	(if res
	    (vector? res)
	    (begin
	      (if (> remtries 0)
		  (begin
		    (thread-sleep! 1.1)
		    (loop (- remtries 1)(tasks:get-server (db:delay-if-busy (tasks:open-db)) run-id)))
		  res)))))
   (test #f #f (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id))
   (server:kind-run run-id)
   (test "did server start within 20 seconds?"
	 #t
	 (let loop ((remtries 20)
		    (running (tasks:server-running-or-starting? (db:delay-if-busy
								 (tasks:open-db))
								run-id)))
	   (if running 
	       (> running 0)
	       (if (> remtries 0)
		   (begin
		     (thread-sleep! 1)
		     (loop (- remtries 1)
			   (tasks:server-running-or-starting? (db:delay-if-busy
							       (tasks:open-db))
							      run-id)))))))
   
   (test "did server become available" #t
	 (let loop ((remtries 10)
		    (res      (tasks:get-server (db:delay-if-busy (tasks:open-db)) run-id)))
	   (if res
	       (vector? res)
	       (begin
		 (if (> remtries 0)
		     (begin
		       (thread-sleep! 1.1)
		       (loop (- remtries 1)(tasks:get-server (db:delay-if-busy (tasks:open-db)) run-id)))
		     res)))))
   )
 (list 0 1))

(define user    (current-user-name))
(define runname "mytestrun")
(define keys    (rmt:get-keys))
(define runinfo #f)
(define keyvals '(("SYSTEM" "abc")("RELEASE" "def")))
(define header  (vector "SYSTEM" "RELEASE" "id" "runname" "state" "status" "owner" "event_time"))
(define header  (list "SYSTEM" "RELEASE" "id" "runname" "state" "status" "owner" "event_time"))

;; Setup
;;
(test #f #f  (not (client:setup run-id)))
(test #f #f  (not (hash-table-ref/default *runremote* run-id #f)))

;; Login
;;
(test #f'(#t "successful login")
(test #f '(#t "successful login") (rmt:login-no-auto-client-setup (hash-table-ref/default *runremote* run-id #f) run-id))
(test #f '(#t "successful login") (rmt:login run-id))
      (rmt:login-no-auto-client-setup (hash-table-ref/default *runremote* run-id #f) run-id))
(test #f '(#t "successful login")
      (rmt:login run-id))

;; Keys
;;
(test #f '("SYSTEM" "RELEASE")  (rmt:get-keys))

;; No data in db
;;
(test #f '() (rmt:get-all-run-ids))
(test #f #f  (rmt:get-run-name-from-id run-id))
(test #f 
      (let ((runrec (vector #f #f)))
	(vector-set! runrec header 0)
	(vector-set! runrec (vector #f       #f        #f   #f) 1)
      (vector
       header
       (vector #f #f #f #f))
	runrec)
      (rmt:get-run-info run-id))

;; Insert data into db
;;
(test #f 1 (rmt:register-run keyvals runname "new" "n/a" user))
;; (test #f #f (rmt:get-runs-by-patt keys runname))
(test #f #t (rmt:general-call 'register-test run-id run-id "test-one" ""))
(define test-one-id #f)
(test #f 1  (let ((test-id (rmt:get-test-id run-id "test-one" "")))
	      (set! test-one-id test-id)
	      test-id))
(define test-one-rec #f)
(test #f "test-one" (let ((test-rec (rmt:get-test-info-by-id run-id test-one-id)))
		      (set! test-one-rec test-rec)
		      (vector-ref test-rec 2)))

;; With data in db
;;
(print "Using runame=" runname)
(test #f '(1)    (rmt:get-all-run-ids))
(test #f runname (rmt:get-run-name-from-id run-id))
(test #f 
      runname
      (let ((run-info (rmt:get-run-info run-id)))
	(db:get-value-by-header (db:get-rows run-info)
				(db:get-header run-info)
				"runname")))

      ;; (vector header (vector "abc" "def" 1 "mytestrun" "new" "n/a" "matt" 1416280640.0))

(for-each (lambda (run-id)
;; test killing server
;;
(tasks:kill-server-run-id run-id)

(test #f #f (tasks:server-running-or-starting? (db:delay-if-busy (tasks:open-db)) run-id))
)
(list 0 1))

;; Tests to assess reading/writing while servers are starting/stopping
(define start-time (current-seconds))
(let loop ((test-state 'start))
  (let* ((server-dats (tasks:get-server-records (db:delay-if-busy (tasks:open-db)) run-id))
	 (first-dat   (if (not (null? server-dats))
			  (car server-dats)
			  #f)))
    (map (lambda (dat)
	   (apply print (intersperse (vector->list dat) ", ")))
	 server-dats)
    (test #f test-one-rec (rmt:get-test-info-by-id run-id test-one-id))
    (thread-sleep! 1)
    (case test-state
      ((start)
       (print "Trying to start server")
       (server:kind-run run-id)
       (loop 'server-started))
      ((server-started)
       (case (vector-ref first-dat)
	 ((running)
	  (print "Server appears to be running. Now ask it to shutdown")
	  (rmt:kill-server run-id)
	  (loop 'server-shutdown))
	 ((shutting-down)
	  (loop test-state))
	 (else (print "Don't know what to do if get here"))))
      ((server-shutdown)
       (loop test-state)))))

;;======================================================================
;; END OF TESTS
;;======================================================================


;; (test #f #f (client:setup run-id))

;; (set! *transport-type* 'http)
;; 
;; (test "setup for run" #t (begin (launch:setup-for-run)
;; 				(string? (getenv "MT_RUN_AREA_HOME"))))

Modified tests/unittests/server.scm from [fc736c5f6c] to [d45af24828].

1
2
3
4
5
6
7
8
9


10
11
12


13








14

15
16
17











18
19
20
21






22

23
24


25
26
27
28
29
30
31
32
33
34
35
36















37
38
39
40
41
42
43
44
45
46
47
48
49






















50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86























































































87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116























1
2
3
4
5
6
7
8

9
10
11


12
13
14
15
16
17
18
19
20
21
22

23



24
25
26
27
28
29
30
31
32
33
34
35



36
37
38
39
40
41
42
43


44
45












46
47
48
49
50
51
52
53
54
55
56
57
58
59
60













61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82

83



































84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170






























171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193








-
+
+

-
-
+
+

+
+
+
+
+
+
+
+
-
+
-
-
-
+
+
+
+
+
+
+
+
+
+
+

-
-
-
+
+
+
+
+
+

+
-
-
+
+
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-

-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
;;======================================================================
;; S E R V E R
;;======================================================================

;; Run like this:
;;
;;  (cd ..;make && make install) && ./rununittest.sh server 1;(cd simplerun;megatest -stop-server 0)

(set! *transport-type* 'http)
(delete-file* "logs/1.log")
(define run-id 1)

(test "setup for run" #t (begin (setup-for-run)
				(string? (getenv "MT_RUN_AREA_HOME"))))
(test "setup for run" #t (begin (launch:setup-for-run)
 				(string? (getenv "MT_RUN_AREA_HOME"))))

;; Insert data into db
;;
(define user    (current-user-name))
(define runname "mytestrun")
(define keys    (rmt:get-keys))
(define runinfo #f)
(define keyvals '(("SYSTEM" "abc")("RELEASE" "def")))
(define header  (list "SYSTEM" "RELEASE" "id" "runname" "state" "status" "owner" "event_time"))
(test "server-register, get-best-server" #t (let ((res #f))

					      (open-run-close tasks:server-register tasks:open-db 1 "bob" 1234 100 'live 'http)
					      (set! res (open-run-close tasks:get-best-server tasks:open-db))
					      (number? (vector-ref res 3))))
(test #f 1 (rmt:register-run keyvals runname "new" "n/a" user))
;; (test #f #f (rmt:get-runs-by-patt keys runname))
(test #f #t (rmt:general-call 'register-test run-id run-id "test-one" ""))
(define test-one-id #f)
(test #f 30001  (let ((test-id (rmt:get-test-id run-id "test-one" "")))
	      (set! test-one-id test-id)
	      test-id))
(define test-one-rec #f)
(test #f "test-one" (let ((test-rec (rmt:get-test-info-by-id run-id test-one-id)))
		      (set! test-one-rec test-rec)
		      (vector-ref test-rec 2)))

(test "de-register server" #f (let ((res #f))
				(open-run-close tasks:server-deregister tasks:open-db "bob" port: 1234)
				(vector? (open-run-close tasks:get-best-server tasks:open-db))))
(use trace)
(import trace)
;; (trace
;;  rmt:send-receive
;;  rmt:open-qry-close-locally
;; )

;; Tests to assess reading/writing while servers are starting/stopping
(define server-pid #f)

(define start-time (current-seconds))
(let loop ((test-state 'start))
;; Not sure how the following should work, replacing it with system of megatest -server
;; (test "launch server" #t (let ((pid (process-fork (lambda ()
;; 						    ;; (daemon:ize)
;; 						    (server:launch 'http)))))
;; 			   (set! server-pid pid)
;; 			   (number? pid)))
(system "../../bin/megatest -server - -debug 22 > server.log 2> server.log &")

(let loop ((n 10))
  (thread-sleep! 1) ;; need to wait for server to start.
  (let ((res (open-run-close tasks:get-best-server tasks:open-db)))
    (print "tasks:get-best-server returned " res)
  (let* ((server-dats (tasks:get-server-records (db:delay-if-busy (tasks:open-db)) run-id))
	 (first-dat   (if (not (null? server-dats))
			  (car server-dats)
			  #f))
	 (server-state (or (and first-dat (string->symbol (vector-ref first-dat 8))) 'no-dat)))
    (if first-dat
	(map (lambda (dat)
	       (apply print (intersperse (vector->list dat) ", ")))
	     server-dats)
	(print "No server"))
    (test #f test-one-rec (rmt:get-test-info-by-id run-id test-one-id))
    (thread-sleep! 1)
    (case test-state
      ((start)
       (print "Trying to start server")
    (if (and (not res)
	     (> n 0))
	(loop (- n 1)))))

(test "get-best-server" #t (begin 
			     (client:launch)
			     (let ((dat (open-run-close tasks:get-best-server tasks:open-db)))
			       (vector? dat))))

(define *keys*               (keys:config-get-fields *configdat*))
(define *keyvals*            (keys:target->keyval *keys* "a/b/c"))

(test #f #t                       (string? (car *runremote*)))
       (server:kind-run run-id)
       (loop 'server-started))
      ((server-started)
       (case server-state 
	 ((running)
	  (print "Server appears to be running. Now ask it to shutdown")
	  (rmt:kill-server run-id)
	  ;; (trace rmt:open-qry-close-locally rmt:send-receive)
	  (loop 'shutdown-started))
	 ((available)
	  (loop test-state))
	 ((shutting-down)
	  (loop test-state))
	 ((no-dat)
	  (loop test-state))
	 (else (print "Don't know what to do if get here"))))
      ((shutdown-started)
       (case server-state 
	 ((no-dat)
	  (print "Server appears to have shutdown, ending this test"))
	 (else
	  (loop test-state)))))))
(test #f '(#t "successful login") (rmt:login)) ;;  *runremote* *toppath* *my-client-signature*)))

(test #f #f                       (rmt:get-test-info-by-id 1 99)) ;; get non-existant test

;; RUNS
(test #f 1                        (rmt:register-run  *keyvals* "firstrun" "new" "n/a" (current-user-name)))
(test "get run info"  "firstrun"  (let ((rinfo (rmt:get-run-info 1)))
				    (vector-ref (vector-ref rinfo 1) 3)))
(test "get runname from id" "firstrun" (rmt:get-run-name-from-id 1))

;; TESTS
(test "get tests (no data)" '()   (rmt:get-tests-for-run 1 "%" '() '() #f #f #f #f #f #f))
(test "register test"       #t    (rmt:general-call 'register-test 1 1 "test1" ""))
(test "get tests (some data)"  1  (length (rmt:get-tests-for-run 1 "%" '() '() #f #f #f #f #f #f)))
(test "get test id"            1  (rmt:get-test-id 1 "test1" ""))

(test "sync back"              #t (> (rmt:sync-inmem->db) 0))
(test "get test id from main"  1  (db:get-test-id *db* 1 "test1" ""))

(test "get keys"               #t (list? (rmt:get-keys)))
(test "set comment"            #t (begin (rmt:general-call 'set-test-comment 1 "this is a comment" 1) #t))
(test "get comment" "this is a comment" (let ((trec (rmt:get-test-info-by-id 1 1)))
					  (db:test-get-comment trec)))

;; MORE RUNS
(test "get runs"  #t (let* ((runs   (rmt:get-runs "%" #f #f '()))
			    (header (vector-ref runs 0))
			    (data   (vector-ref runs 1)))
		       (and (list?   header)
			    (list?   data)
			    (vector? (car data)))))

(test "get local testinfo" "test1" (vector-ref (db:get-testinfo-state-status *db* 1 1) 2))
(test "get testinfo"       "test1" (vector-ref (rmt:get-testinfo-state-status 1 1) 2))

;;======================================================================
;; D B
(exit)	 
       
;; (set! *transport-type* 'http)
;; 
;; (test "setup for run" #t (begin (setup-for-run)
;; 				(string? (getenv "MT_RUN_AREA_HOME"))))
;; 
;; (test "server-register, get-best-server" #t (let ((res #f))
;; 					      (open-run-close tasks:server-register tasks:open-db 1 "bob" 1234 100 'live 'http)
;; 					      (set! res (open-run-close tasks:get-best-server tasks:open-db))
;; 					      (number? (vector-ref res 3))))
;; 
;; (test "de-register server" #f (let ((res #f))
;; 				(open-run-close tasks:server-deregister tasks:open-db "bob" port: 1234)
;; 				(vector? (open-run-close tasks:get-best-server tasks:open-db))))
;; 
;; (define server-pid #f)
;; 
;; ;; Not sure how the following should work, replacing it with system of megatest -server
;; ;; (test "launch server" #t (let ((pid (process-fork (lambda ()
;; ;; 						    ;; (daemon:ize)
;; ;; 						    (server:launch 'http)))))
;; ;; 			   (set! server-pid pid)
;; ;; 			   (number? pid)))
;; (system "../../bin/megatest -server - -debug 22 > server.log 2> server.log &")
;; 
;; (let loop ((n 10))
;;   (thread-sleep! 1) ;; need to wait for server to start.
;;   (let ((res (open-run-close tasks:get-best-server tasks:open-db)))
;;     (print "tasks:get-best-server returned " res)
;;     (if (and (not res)
;; 	     (> n 0))
;; 	(loop (- n 1)))))
;; 
;; (test "get-best-server" #t (begin 
;; 			     (client:launch)
;; 			     (let ((dat (open-run-close tasks:get-best-server tasks:open-db)))
;; 			       (vector? dat))))
;; 
;; (define *keys*               (keys:config-get-fields *configdat*))
;; (define *keyvals*            (keys:target->keyval *keys* "a/b/c"))
;; 
;; (test #f #t                       (string? (car *runremote*)))
;; (test #f '(#t "successful login") (rmt:login)) ;;  *runremote* *toppath* *my-client-signature*)))
;; 
;; (test #f #f                       (rmt:get-test-info-by-id 1 99)) ;; get non-existant test
;; 
;; ;; RUNS
;; (test #f 1                        (rmt:register-run  *keyvals* "firstrun" "new" "n/a" (current-user-name)))
;; (test "get run info"  "firstrun"  (let ((rinfo (rmt:get-run-info 1)))
;; 				    (vector-ref (vector-ref rinfo 1) 3)))
;; (test "get runname from id" "firstrun" (rmt:get-run-name-from-id 1))
;; 
;; ;; TESTS
;; (test "get tests (no data)" '()   (rmt:get-tests-for-run 1 "%" '() '() #f #f #f #f #f #f))
;; (test "register test"       #t    (rmt:general-call 'register-test 1 1 "test1" ""))
;; (test "get tests (some data)"  1  (length (rmt:get-tests-for-run 1 "%" '() '() #f #f #f #f #f #f)))
;; (test "get test id"            1  (rmt:get-test-id 1 "test1" ""))
;; 
;; (test "sync back"              #t (> (rmt:sync-inmem->db) 0))
;; (test "get test id from main"  1  (db:get-test-id *db* 1 "test1" ""))
;; 
;; (test "get keys"               #t (list? (rmt:get-keys)))
;; (test "set comment"            #t (begin (rmt:general-call 'set-test-comment 1 "this is a comment" 1) #t))
;; (test "get comment" "this is a comment" (let ((trec (rmt:get-test-info-by-id 1 1)))
;; 					  (db:test-get-comment trec)))
;; 
;; ;; MORE RUNS
;; (test "get runs"  #t (let* ((runs   (rmt:get-runs "%" #f #f '()))
;; 			    (header (vector-ref runs 0))
;; 			    (data   (vector-ref runs 1)))
;; 		       (and (list?   header)
;; 			    (list?   data)
;; 			    (vector? (car data)))))
;; 
;; (test "get local testinfo" "test1" (vector-ref (db:get-testinfo-state-status *db* 1 1) 2))
;; (test "get testinfo"       "test1" (vector-ref (rmt:get-testinfo-state-status 1 1) 2))
;; 
;; ;;======================================================================
;; ;; D B
;; ;;======================================================================
;; 
;; (test "pass fail counts" #t (rmt:general-call 'pass-fail-counts 10 9 1))
;; (test "get pass fail counts" 19 (let ((dat (rmt:get-test-info-by-id 1)))
;; 				  (+ (db:test-get-pass_count dat)
;; 				     (db:test-get-fail_count dat))))
;; 
;;======================================================================

(test "pass fail counts" #t (rmt:general-call 'pass-fail-counts 10 9 1))
(test "get pass fail counts" 19 (let ((dat (rmt:get-test-info-by-id 1)))
				  (+ (db:test-get-pass_count dat)
				     (db:test-get-fail_count dat))))

(define testregistry (make-hash-table))
(for-each
 (lambda (tname)
   (for-each
    (lambda (itempath)
      (let ((tkey  (conc tname "/" itempath))
	    (rpass (random 10))
	    (rfail (random 10)))
	(hash-table-set! testregistry tkey (list tname itempath))
	(rmt:general-call 'register-test 1 tname itempath)
	(let* ((tid  (rmt:get-test-id 1 tname itempath))
	       (tdat (rmt:get-test-info-by-id tid)))
	  (rmt:general-call 'pass-fail-counts rpass rfail (db:test-get-id tdat))
	  (let* ((resdat (rmt:get-test-info-by-id tid)))
	    (test "set/get pass fail counts" (list rpass rfail)
		  (list (db:test-get-pass_count resdat)
			(db:test-get-fail_count resdat)))))))
    (list "" "a" "b" "c" "d" "e" "f" "g" "h" "i" "j")))
 (list "test1" "test2" "test3" "test4" "test5"))


(test #f '(#t "exit process started") (rmt:kill-server)) ;; *toppath* *my-client-signature* #f)))

;; (define testregistry (make-hash-table))
;; (for-each
;;  (lambda (tname)
;;    (for-each
;;     (lambda (itempath)
;;       (let ((tkey  (conc tname "/" itempath))
;; 	    (rpass (random 10))
;; 	    (rfail (random 10)))
;; 	(hash-table-set! testregistry tkey (list tname itempath))
;; 	(rmt:general-call 'register-test 1 tname itempath)
;; 	(let* ((tid  (rmt:get-test-id 1 tname itempath))
;; 	       (tdat (rmt:get-test-info-by-id tid)))
;; 	  (rmt:general-call 'pass-fail-counts rpass rfail (db:test-get-id tdat))
;; 	  (let* ((resdat (rmt:get-test-info-by-id tid)))
;; 	    (test "set/get pass fail counts" (list rpass rfail)
;; 		  (list (db:test-get-pass_count resdat)
;; 			(db:test-get-fail_count resdat)))))))
;;     (list "" "a" "b" "c" "d" "e" "f" "g" "h" "i" "j")))
;;  (list "test1" "test2" "test3" "test4" "test5"))
;; 
;; 
;; (test #f '(#t "exit process started") (rmt:kill-server)) ;; *toppath* *my-client-signature* #f)))
;; 

Modified txtdb/nada3/RunsToLock.dat from [9e39b39de9] to [5d15c8e35d].

1

2
3
4
5
6
7
8
1
2
3
4
5
6
7
8
9

+







[def]
def def
ghi jkl
qrst 
uvwx 
yz12 

[mno]
abc 

Modified utils/Makefile.installall from [c3d10e5280] to [63718d9d5f].

41
42
43
44
45
46
47
48

49
50
51
52
53
54
55
41
42
43
44
45
46
47

48
49
50
51
52
53
54
55







-
+







IUPBRANCH=iup-3.10.1

# Eggs to install (straightforward ones)
EGGS=matchable readline apropos base64 regex-literals format regex-case test coops trace csv \
     dot-locking posix-utils posix-extras directory-utils hostinfo tcp-server rpc csv-xml fmt \
     json md5 awful http-client spiffy uri-common intarweb spiffy-request-vars \
     spiffy-directory-listing ssax sxml-serializer sxml-modifications sql-de-lite \
     srfi-19 refdb ini-file
     srfi-19 refdb ini-file sparse-vectors

#
# Derived variables
#

ifeq ($(PROXY),)
PROX:=
143
144
145
146
147
148
149
















150
151
152
153
154
155
156
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+








$(PREFIX)/bin/sqlite3 : sqlite-autoconf-$(SQLITE3_VERSION)/config.log
	cd sqlite-autoconf-$(SQLITE3_VERSION);./configure --prefix=$(PREFIX);make;make install

$(PREFIX)/lib/sqlite3.so : $(PREFIX)/bin/sqlite3
	CSC_OPTIONS="-I$(PREFIX)/include -L$(PREFIX)/lib" $(CHICKEN_INSTALL) $(PROX) sqlite3

#======================================================================
# N  A N O M S G
#======================================================================

nanomsg-0.5-beta.tar.gz :
	wget http://download.nanomsg.org/nanomsg-0.5-beta.tar.gz

nanomsg-0.5-beta/COPYING : nanomsg-0.5-beta.tar.gz
	tar xfvz nanomsg-0.5-beta.tar.gz

$(PREFIX)/bin/nanocat : nanomsg-0.5-beta/COPYING
	cd nanomsg-0.5-beta;./configure --prefix=$(PREFIX);make;make install

$(PREFIX)/lib/nanomsg.so : $(PREFIX)/bin/nanocat
	CSC_OPTIONS="-I$(PREFIX)/include -L$(PREFIX)/lib" $(CHICKEN_INSTALL) $(PROX) nanomsg

#======================================================================
# M A T T S   U T I L S
#======================================================================

opensrc.fossil :
	fossil clone http://www.kiatoa.com/fossils/opensrc opensrc.fossil

Modified utils/plot-code.scm from [de4d05b676] to [cd37a2db38].

1
2







3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19


20
21
22
23
24
25
26
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35


+
+
+
+
+
+
+

















+
+







#!/mfs/pkgs/chicken/4.8.0.5/bin/csi -nbq

;; Coming soon (right?) Usage: plot-code file1.scm,file2.scm "fun1,fun2,x*" *.scm > plot.dot
;; Usage: plot-code file1.scm,file2.scm *.scm > plot.dot
;;        dot -Tpdf plot.dot > plot.pdf
;; first param is comma separated list of files to include in the map, use - to do all
;; second param is list of regexs for functions to include in the map
;; third param is list of files to scan

(use regex srfi-69 srfi-13)

(define targs #f) 
(define files (cddddr (argv)))

(let ((targdat (cadddr (argv))))
  (if (equal? targdat "-")
      (set! targs files)
      (set! targs (string-split targdat ","))))

(define filedat-defns (make-hash-table))
(define filedat-usages (make-hash-table))

(define defn-rx (regexp "^\\s*\\(define\\s+\\(([^\\s\\)]+).*"))
(define all-regexs (make-hash-table))

(define all-fns '())

;; for the se

(define (print-err . data)
  (with-output-to-port (current-error-port)
    (lambda ()
      (apply print data))))

(print-err "Making graph for files: " (string-intersperse targs ", "))
79
80
81
82
83
84
85
86






















87
88
89
90
91
92
93
88
89
90
91
92
93
94

95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123







-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







		    (loop (car tal)
			  (cdr tal)
			  newres)))
	      (if (null? tal)
		  res
		  (loop (car tal)(cdr tal) res)))))
      '()))
  

;; (define mm-header #<<MMHEADER
;; <map version="freeplane 1.2.0">
;; <!--To view this file, download free mind mapping software Freeplane from http://freeplane.sourceforge.net -->
;; 
;; MMHEADER
;; 
;; (define (add-node text)
;;   <node TEXT="homenode" ID="ID_1723255651" CREATED="1283093380553" MODIFIED="1417113442955"><hook NAME="MapStyle">
;; )
;; 
;;  minimal mindmap file
;;  <map version="freeplane 1.2.0">
;;   <!--To view this file, download free mind mapping software Freeplane from http://freeplane.sourceforge.net -->
;;   <node TEXT="homenode" ID="ID_1723255651" CREATED="1283093380553" MODIFIED="1417113442955">
;;   <node TEXT="node1" POSITION="right" ID="ID_1810107939" CREATED="1417113473476" MODIFIED="1417113480425">
;;   <node TEXT="node2" ID="ID_68133256" CREATED="1417113482134" MODIFIED="1417113484466"/>
;;   <node TEXT="node3" ID="ID_1572284821" CREATED="1417113487785" MODIFIED="1417113491589"/>
;;   </node>
;;   </node>
;;   </map>

;; Gather the usages
(print "digraph G {")
(define curr-cluster-num 0)
(define function-calls '())

(for-each
 (lambda (fname)

Added utils/trace/trace.import.scm version [937dcb55c1].

































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
;;;; trace.import.scm - GENERATED BY CHICKEN 4.9.0.1 -*- Scheme -*-

(eval '(import
         scheme
         chicken
         csi
         advice
         extras
         ports
         data-structures
         (except srfi-1 break)
         miscmacros))
(##sys#register-compiled-module
  'trace
  (list)
  '((breakpoint . trace#breakpoint)
    (trace . trace#trace)
    (untrace . trace#untrace)
    (break . trace#break)
    (unbreak . trace#unbreak)
    (trace-output-port . trace#trace-output-port)
    (continue . trace#continue)
    (c . trace#c)
    (traced? . trace#traced?)
    (trace-module . trace#trace-module)
    (untrace-module . trace#untrace-module)
    (trace-verbose . trace#trace-verbose)
    (trace/untrace . trace#trace/untrace))
  (list)
  (list))

;; END OF FILE

Added utils/trace/trace.meta version [9714181a62].











1
2
3
4
5
6
7
8
9
10
+
+
+
+
+
+
+
+
+
+
;;;; trace.meta -*- Scheme -*-


((category tools)
 (synopsis "tracing and breakpoints")
 (author "felix winkelmann")
 (license "public domain")
 (needs advice				; don't we all?
	miscmacros)
 (files "tests/run.scm" "trace.meta" "trace.release-info" "trace.scm" "trace.setup") )

Added utils/trace/trace.scm version [dc3560e035].




































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
;;;; trace.scm


(module trace (breakpoint 
	       trace untrace
	       break unbreak
	       trace-output-port
	       continue c 
	       traced?
	       trace-module untrace-module
	       trace-verbose
	       trace/untrace)
	       
(import scheme chicken csi)

(use advice extras ports data-structures)
(require-library srfi-1)
(import (except srfi-1 break) miscmacros)


(define *last-breakpoint* #f)
(define *traced-procedures* '())
(define *broken-procedures* '())
(define *trace-indent-level* 0)

(define trace-output-port (make-parameter (current-output-port)))
(define trace-verbose (make-parameter #t))

(define (break-entry name args)
  ;; Does _not_ unwind!
  (##sys#call-with-current-continuation
   (lambda (c)
     (let ((exn (##sys#make-structure
		 'condition
		 '(exn breakpoint)
		 (list '(exn . message) "*** breakpoint ***"
		       '(exn . arguments) (list (cons name args))
		       '(exn . location) name
		       '(exn . continuation) c) ) ) )
       (set! *last-breakpoint* exn)
       (signal exn) ) ) ) )

(define (break-resume exn)
  (let ((a (member '(exn . continuation) (##sys#slot exn 2))))
    (if a
	((cadr a) (void))
	(error "condition has no continuation" exn) ) ) )

(define (breakpoint #!optional (name 'breakpoint))
  (break-entry name '()) )

(define (trace-indent)
  (let ((port (trace-output-port)))
    (do ((i (fxmin 3 *trace-indent-level*) (fx- i 1)))
	((fx<= i 0))
      (write-char #\space port) )
    (fprintf port "[~a] " *trace-indent-level*) ) )

(define (traced-procedure-entry name args)
  (let ((port (trace-output-port)))
    (trace-indent)
    (set! *trace-indent-level* (fx+ 1 *trace-indent-level*))
    (write (cons name args) port)
    (write ", Called from: " port)
    (write (conc (car (reverse (get-call-chain)))))
    (write-char #\newline port)
    (flush-output port) ) )

(define (traced-procedure-exit name results)
  (let ((port (trace-output-port)))
    (set! *trace-indent-level* (fx- *trace-indent-level* 1))
    (trace-indent)
    (fprintf port "~a -> " name)
    (if results
	(for-each
	 (lambda (x)
	   (write x port)
	   (write-char #\space port) )
	 results)
	(display "(escaping)" port))
    (write-char #\newline port)
    (flush-output port) ) )

(define (procedure-name proc)
  (cond ((procedure-information proc) =>
	 (lambda (info)
	   (if (pair? info) (car info) info) ) )
	(else '<unknown>)) )

(define (do-trace procs)
  (for-each
   (lambda (s)
     (ensure procedure? s)
     (cond ((traced? s)
	    (warning "procedure already traced" s) )
	   (else
	    (let ((name (procedure-name s)))
	      (when (trace-verbose)
		(fprintf (current-error-port) "; tracing ~a~%" name))
	      (set! *traced-procedures* (cons (cons s name) *traced-procedures*))
	      (advise 
	       'around s
	       (lambda (next args)
		 (let ((results #f))
		   (dynamic-wind
		       (cut traced-procedure-entry name args)
		       (lambda () 
			 (call-with-values (cut apply next args)
			   (lambda rs
			     (set! results rs)
			     (apply values rs))))
		       (cut traced-procedure-exit name results))))
	       '*trace*)))))
   procs) )

(define (do-untrace-all)
  (define (unadvise* p)
    (ignore-errors (unadvise p '*trace*)))
  (for-each
   (lambda (proc)
     (let ((proc (car proc)))
       (when (trace-verbose)
	 (fprintf (current-error-port) "; untracing ~a~%" (procedure-name proc))
	 (unadvise* proc))))
   *traced-procedures*)
  (set! *traced-procedures* '()))

(define (do-untrace procs)
  (for-each
   (lambda (s)
     (ensure procedure? s)
     (let ((p (assq s *traced-procedures*)) 
	   (name (procedure-name s)))
       (cond ((not p) (warning "procedure not traced" name))
	     (else
	      (when (trace-verbose)
		(fprintf (current-error-port) "; untracing ~a~%" name))
	      (ignore-errors (unadvise s '*trace*))
	      (set! *traced-procedures* 
		(delete 
		 p *traced-procedures* 
		 eq?))))))
   procs) )

(define (do-break procs)
  (for-each
   (lambda (s)
     (let ((name (procedure-name s)))
       (ensure procedure? s)
       (cond ((assq s *broken-procedures*)
	      (warning "procedure already has break-point" name))
	     (else
	      (when (trace-verbose)
		(fprintf (current-error-port) "; setting break-point in ~a~%" name))
	      (set! *broken-procedures* (cons (cons s name) *broken-procedures*))
	      (advise 
	       'before s
	       (lambda (args)
		 (break-entry name args) )
	       '*break*) ) )))
   procs) )

(define (do-unbreak procs)
  (for-each
   (lambda (s)
     (ensure procedure? s)
     (let ((p (assq s *broken-procedures*)) 
	   (name (procedure-name s)))
       (cond ((not p) (warning "procedure has no breakpoint" name))
	     (else
	      (when (trace-verbose)
		(fprintf (current-error-port) "; removing break-point in ~a~%" name))
	      (ignore-errors (unadvise s '*break*))
	      (set! *broken-procedures* (delete p *broken-procedures* eq?) ) ) ) ) )
   procs) )

(define (do-unbreak-all)
  (for-each
   (lambda (bp)
     (ignore-errors (unadvise (car bp) '*break*)))
   *broken-procedures*)
  (set! *broken-procedures* '())
  (void))

(define (trace . procs)
  (cond ((null? procs)
	 (when (pair? *traced-procedures*)
	   (printf "Traced:~%~%")
	   (for-each (lambda (p) (printf "  ~a~%" (cdr p))) *traced-procedures*)) )
	(else
	 (do-trace procs) ) ) )

(define (untrace . procs)
  (cond ((null? procs) (do-untrace-all))
	(else (do-untrace procs)))
  (void))

(define (break . procs)
  (cond ((null? procs)
	 (when (pair? *broken-procedures*)
	   (printf "Breakpoints:~%~%")
	   (for-each (lambda (p) (printf "  ~a~%" (cdr p))) *broken-procedures*)) )
	(else
	 (do-break procs) ) ) )

(define (unbreak . procs)
  (cond ((null? procs) (do-unbreak-all))
	(else (do-unbreak procs))))

(define (continue #!optional (bp *last-breakpoint*))
  (cond (*last-breakpoint*
	 (let ((exn *last-breakpoint*))
	   (set! *last-breakpoint* #f)
	   (break-resume exn) ) )
	(else (display "no breakpoint pending\n") ) ) )

(define c continue)

(define (traced? proc)
  (assq proc *traced-procedures*))

(define (trace/untrace . procs)
  (for-each
   (lambda (proc)
     ((if (traced? proc) do-untrace do-trace) (list proc)))
   procs))

(define (walk-module mname proc)
  (let* ((m (##sys#find-module mname))
	 (exps (nth-value 1 (##sys#module-exports m))))
    (for-each
     (lambda (exp)
       (let* ((realname (cdr exp))
	      (prim (get realname '##core#primitive)))
	 (if prim
	     (warning "export is a core-library primitive - not traced" (car exp))
	     (when (##sys#symbol-has-toplevel-binding? realname)
	       (let ((val (##sys#slot realname 0)))
		 (when (procedure? val)
		   (proc val)))))))
     exps)))

(define (trace-module . mnames)
  (for-each
   (lambda (mname)
     (walk-module mname trace))
   mnames))

(define (untrace-module . mnames)
  (for-each
   (lambda (mname)
     (walk-module 
      mname 
      (lambda (proc)
	(when (traced? proc)
	  (do-untrace (list proc))))))
   mnames))

)

Added utils/trace/trace.setup version [d222d610b4].










1
2
3
4
5
6
7
8
9
+
+
+
+
+
+
+
+
+
;;;; trace.setup -*- Scheme -*-


(compile -s trace.scm -O3 -d1 -j trace)
(compile -s trace.import.scm -O3 -d0)

(install-extension
 'trace
 '("trace.so" "trace.import.so"))

Added zmq-transport.scm version [e1f3152a02].














































































































































































































































































































































































































































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

;; Copyright 2006-2012, Matthew Welland.
;; 
;;  This program is made available under the GNU GPL version 2.0 or
;;  greater. See the accompanying file COPYING for details.
;; 
;;  This program is distributed WITHOUT ANY WARRANTY; without even the
;;  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
;;  PURPOSE.

(require-extension (srfi 18) extras tcp s11n)

(use sqlite3 srfi-1 posix regex regex-case srfi-69 hostinfo md5 message-digest)
(import (prefix sqlite3 sqlite3:))

(use zmq)

(declare (unit zmq-transport))

(declare (uses common))
(declare (uses db))
(declare (uses tests))
(declare (uses tasks)) ;; tasks are where stuff is maintained about what is running.
(declare (uses server))

(include "common_records.scm")
(include "db_records.scm")

;; Transition to pub --> sub with pull <-- push
;;
;;   1. client sends request to server via push to the pull port
;;   2. server puts request in queue or processes immediately as appropriate
;;   3. server puts responses from completed requests into pub port 
;;
;; TODO
;;
;; Done Tested
;; [x]  [ ]    1. Add columns pullport pubport to servers table
;; [x]  [ ]    2. Add rm of monitor.db if older than 11/12/2012 
;; [x]  [ ]    3. Add create of pullport and pubport with finding of available ports
;; [x]  [ ]    4. Add client compose of request
;; [x]  [ ]        - name of client: testname/itempath-test_id-hostname 
;; [x]  [ ]        - name of request: callname, params
;; [x]  [ ]        - request key: f(clientname, callname, params)
;; [x]  [ ]    5. Add processing of subscription hits
;; [x]  [ ]        - done when get key 
;; [x]  [ ]        - return results
;; [x]  [ ]    6. Add timeout processing
;; [x]  [ ]        - after 60 seconds
;; [ ]  [ ]            i. check server alive, connect to new if necessary
;; [ ]  [ ]           ii. resend request
;; [ ]  [ ]    7. Turn self ping back on

(define (zmq-transport:make-server-url hostport)
  (if (not hostport)
      #f
      (conc "tcp://" (car hostport) ":" (cadr hostport))))

(define  *server-loop-heart-beat* (current-seconds))
(define *heartbeat-mutex* (make-mutex))

;;======================================================================
;; S E R V E R
;;======================================================================

(define-inline (zmqsock:get-pub  dat)(vector-ref dat 0))
(define-inline (zmqsock:get-pull dat)(vector-ref dat 1))
(define-inline (zmqsock:set-pub! dat s)(vector-set! dat s 0))
(define-inline (zmqsock:set-pull! dat s)(vector-set! dat s 0))

(define (zmq-transport:run hostn)
  (debug:print 2 "Attempting to start the server ...")
  (if (not *toppath*)
      (if (not (setup-for-run))
	  (begin
	    (debug:print 0 "ERROR: cannot find megatest.config, cannot start server, exiting")
	    (exit))))
  (let* ((db              (open-db)) ;; here we *do not* want to be opening and closing the db
	 (zmq-sdat1       #f)
	 (zmq-sdat2       #f)
	 (pull-socket     #f)
	 (pub-socket      #f)
	 (p1              #f)
	 (p2              #f)
	 (zmq-sockets-dat #f)
	 (iface           (if (string=? "-" hostn)
			      "*" ;; (get-host-name) 
			      hostn))
	 (hostname        (get-host-name))
	 (ipaddrstr       (let ((ipstr (if (string=? "-" hostn)
					   (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".")
					   #f)))
			    (if ipstr ipstr hostname)))
	 (last-run       0))
    (set! zmq-sockets-dat (zmq-transport:setup-ports ipaddrstr (if (args:get-arg "-port")
			    (string->number (args:get-arg "-port"))
							    (+ 5000 (random 1001)))))

    (set! zmq-sdat1    (car   zmq-sockets-dat))
    (set! pull-socket  (cadr  zmq-sdat1)) ;; (iface s  port)
    (set! p1           (caddr zmq-sdat1))
    
    (set! zmq-sdat2    (cadr  zmq-sockets-dat))
    (set! pub-socket   (cadr  zmq-sdat2))
    (set! p2           (caddr zmq-sdat2))

    (set! *cache-on* #t)

    (set! *runremote* (vector pull-socket pub-socket)) ;; overloading the use of *runremote* BUG!?

    ;; what to do when we quit
    ;;
;;     (on-exit (lambda ()
;; 	       (if (and *toppath* *server-info*)
;; 		   (open-run-close tasks:server-deregister-self tasks:open-db (car *server-info*))
;; 		   (let loop () 
;; 		     (let ((queue-len 0))
;; 		       (thread-sleep! (random 5))
;; 		       (mutex-lock! *incoming-mutex*)
;; 		       (set! queue-len (length *incoming-data*))
;; 		       (mutex-unlock! *incoming-mutex*)
;; 		       (if (> queue-len 0)
;; 			   (begin
;; 			     (debug:print-info 0 "Queue not flushed, waiting ...")
;; 			     (loop))))))))

    ;; The heavy lifting
    ;;
    ;; make-vector-record cdb packet client-sig qtype immediate query-sig params qtime
    ;;
    (debug:print-info 11 "Server setup complete, start listening for messages")
    (let loop ((queue-lst '()))
      (let* ((rawmsg (receive-message* pull-socket))
	     (packet (db:string->obj rawmsg))
	     (qtype  (cdb:packet-get-qtype packet)))
	(debug:print-info 12 "server=> received packet=" packet)
	(if (not (member qtype '(sync ping)))
	    (begin
	      (mutex-lock! *heartbeat-mutex*)
	      (set! *last-db-access* (current-seconds))
	      (mutex-unlock! *heartbeat-mutex*)))
	(if #t ;; (cdb:packet-get-immediate packet) ;; process immediately or put in queue
	    (begin
	      (db:process-queue-item db packet)
	      ;; (open-run-close db:process-queue #f pub-socket (cons packet queue-lst))
	      
	      (loop '()))
	    (loop (cons packet queue-lst)))))))

;; run zmq-transport:keep-running in a parallel thread to monitor that the db is being 
;; used and to shutdown after sometime if it is not.
;;
(define (zmq-transport:keep-running)
  ;; if none running or if > 20 seconds since 
  ;; server last used then start shutdown
  ;; This thread waits for the server to come alive
  (let* ((server-info (let loop ()
			(let ((sdat #f))
			  (mutex-lock! *heartbeat-mutex*)
			  (set! sdat *server-info*)
			  (mutex-unlock! *heartbeat-mutex*)
			  (if sdat sdat
			      (begin
				(debug:print 12 "WARNING: server not started yet, waiting few seconds before trying again")
				(sleep 4)
				(loop))))))
	 (iface       (cadr server-info))
	 (pullport    (caddr server-info))
	 (pubport     (cadddr server-info)) ;; id interface pullport pubport)
	 ;; (zmq-sockets (zmq-transport:client-connect iface pullport pubport))
	 (last-access 0))
    (debug:print-info 11 "heartbeat started for zmq server on " iface " " pullport " " pubport)
    (let loop ((count 0))
      (thread-sleep! 4) ;; no need to do this very often
      ;; NB// sync currently does NOT return queue-length
      ;; GET REAL QUEUE LENGTH FROM THE VARIABLE
      (let ((queue-len 0)) ;; FOR NOW DO NOT DO THIS (cdb:client-call zmq-sockets 'sync #t 1)))
      ;; (print "Server running, count is " count)
	(if (< count 1) ;; 3x3 = 9 secs aprox
	    (loop (+ count 1)))

	;; NOTE: Get rid of this mechanism! It really is not needed...
	(open-run-close tasks:server-update-heartbeat tasks:open-db (car server-info))

	;; (if ;; (or (> numrunning 0) ;; stay alive for two days after last access
	(mutex-lock! *heartbeat-mutex*)
	(set! last-access *last-db-access*)
	(mutex-unlock! *heartbeat-mutex*)
	(if (> (+ last-access
		  ;; (* 50 60 60)    ;; 48 hrs
		  ;; 60              ;; one minute
		  ;; (* 60 60)       ;; one hour
		  (* 45 60)          ;; 45 minutes, until the db deletion bug is fixed.
		  )
	       (current-seconds))
	    (begin
	      (debug:print-info 2 "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
	      (loop 0))
	    (begin
	      (debug:print-info 0 "Starting to shutdown the server.")
	      ;; need to delete only *my* server entry (future use)
	      (set! *time-to-exit* #t)
	      (open-run-close tasks:server-deregister-self tasks:open-db (get-host-name))
	      (thread-sleep! 1)
	      (debug:print-info 0 "Max cached queries was " *max-cache-size*)
	      (debug:print-info 0 "Server shutdown complete. Exiting")
	      (exit)))))))

(define (zmq-transport:find-free-port-and-open iface s port stype #!key (trynum 50))
  (let ((s (if s s (make-socket stype)))
        (p (if (number? port) port 5555))
        (old-handler (current-exception-handler)))
    (handle-exceptions
     exn
     (begin
       (debug:print 0 "Failed to bind to port " p ", trying next port")
       (debug:print 0 "   EXCEPTION: " ((condition-property-accessor 'exn 'message) exn))
       ;; (old-handler)
       ;; (print-call-chain)
       (if (> trynum 0)
           (zmq-transport:find-free-port-and-open iface s (+ p 1) trynum: (- trynum 1))
           (debug:print-info 0 "Tried ports up to " p 
                             " but all were in use. Please try a different port range by starting the server with parameter \" -port N\" where N is the starting port number to use"))
       (exit)) ;; To exit or not? That is the question.
     (let ((zmq-url (conc "tcp://" iface ":" p)))
       (debug:print 2 "Trying to start server on " zmq-url)
       (bind-socket s zmq-url)
       (list iface s port)))))

(define (zmq-transport:setup-ports ipaddrstr startport)
  (let* ((s1 (zmq-transport:find-free-port-and-open ipaddrstr #f startport 'pull))
         (p1 (caddr s1))
         (s2 (zmq-transport:find-free-port-and-open ipaddrstr #f (+ 1 (if p1 p1 (+ startport 1))) 'pub))
         (p2 (caddr s2)))
    (set! *runremote* #f)
    (debug:print 0 "Server started on " ipaddrstr " ports " p1 " and " p2)
    (mutex-lock! *heartbeat-mutex*)
    (set! *server-info* (open-run-close tasks:server-register 
					tasks:open-db 
					(current-process-id) 
					ipaddrstr p1 
					0 
					'live
					'zmq
					pubport: p2))
    (debug:print-info 11 "*server-info* set to " *server-info*)
    (mutex-unlock! *heartbeat-mutex*)
    (list s1 s2)))

(define (zmq-transport:mk-signature)
  (message-digest-string (md5-primitive) 
			 (with-output-to-string
			   (lambda ()
			     (write (list (current-directory)
					  (argv)))))))

;;======================================================================
;; S E R V E R   U T I L I T I E S 
;;======================================================================

;;======================================================================
;; C L I E N T S
;;======================================================================

;; 
(define (zmq-transport:client-socket-connect iface port #!key (context #f)(type 'req)(subscriptions '()))
  (debug:print-info 3 "client-connect " iface ":" port ", type=" type ", subscriptions=" subscriptions)
  (let ((connect-ok #f)
	(zmq-socket (if context 
			(make-socket type context)
			(make-socket type)))
	(conurl     (zmq-transport:make-server-url (list iface port))))
    (if (socket? zmq-socket)
     (begin
	  ;; first apply subscriptions
	  (for-each (lambda (subscription)
		      (debug:print 2 "Subscribing to " subscription)
		      (socket-option-set! zmq-socket 'subscribe subscription))
		    subscriptions)
	  (connect-socket zmq-socket conurl)
	  zmq-socket)
	(begin
	  (debug:print 0 "ERROR: Failed to open socket to " conurl)
	  #f))))

(define (zmq-transport:client-connect iface pullport pubport)
  (let* ((push-socket (zmq-transport:client-socket-connect iface pullport type: 'push))
	 (sub-socket  (zmq-transport:client-socket-connect iface pubport
						    type: 'sub
						    subscriptions: (list (server:get-client-signature) "all")))
	 (zmq-sockets (vector push-socket sub-socket))
	 (login-res   #f))
    (debug:print-info 11 "zmq-transport:client-connect started. Next is login")
    (set! login-res (server:client-login zmq-sockets))
    (if (and (not (null? login-res))
	     (car login-res))
	(begin
	  (debug:print-info 2 "Logged in and connected to " iface ":" pullport "/" pubport ".")
	  (set! *runremote* zmq-sockets)
	  zmq-sockets)
	(begin
	  (debug:print-info 2 "Failed to login or connect to " conurl)
	  (set! *runremote* #f)
	  #f))))

;; run zmq-transport:keep-running in a parallel thread to monitor that the db is being 
;; used and to shutdown after sometime if it is not.
;;
(define (zmq-transport:keep-running)
  ;; if none running or if > 20 seconds since 
  ;; server last used then start shutdown
  ;; This thread waits for the server to come alive
  (let* ((server-info (let loop ()
                        (let ((sdat #f))
                          (mutex-lock! *heartbeat-mutex*)
                          (set! sdat *runremote*)
                          (mutex-unlock! *heartbeat-mutex*)
                          (if sdat sdat
                              (begin
                                (sleep 4)
                                (loop))))))
         (iface       (car server-info))
         (port        (cadr server-info))
         (last-access 0)
	 (tdb         (tasks:open-db))
	 (spid        (tasks:server-get-server-id tdb #f iface port #f)))
    (print "Keep-running got server pid " spid ", using iface " iface " and port " port)
    (let loop ((count 0))
      (thread-sleep! 4) ;; no need to do this very often
      ;; NB// sync currently does NOT return queue-length
      (let () ;; (queue-len (cdb:client-call server-info 'sync #t 1)))
      ;; (print "Server running, count is " count)
        (if (< count 1) ;; 3x3 = 9 secs aprox
            (loop (+ count 1)))
        
        ;; NOTE: Get rid of this mechanism! It really is not needed...
        (tasks:server-update-heartbeat tdb spid)
      
        ;; (if ;; (or (> numrunning 0) ;; stay alive for two days after last access
        (mutex-lock! *heartbeat-mutex*)
        (set! last-access *last-db-access*)
        (mutex-unlock! *heartbeat-mutex*)
        (if (> (+ last-access
                  ;; (* 50 60 60)    ;; 48 hrs
                  ;; 60              ;; one minute
                  ;; (* 60 60)       ;; one hour
                  (* 45 60)          ;; 45 minutes, until the db deletion bug is fixed.
                  )
               (current-seconds))
            (begin
              (debug:print-info 2 "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
              (loop 0))
            (begin
              (debug:print-info 0 "Starting to shutdown the server.")
              ;; need to delete only *my* server entry (future use)
              (set! *time-to-exit* #t)
              (tasks:server-deregister-self tdb (get-host-name))
              (thread-sleep! 1)
              (debug:print-info 0 "Max cached queries was " *max-cache-size*)
              (debug:print-info 0 "Server shutdown complete. Exiting")
              (exit)))))))

;; all routes though here end in exit ...
(define (zmq-transport:launch)
  (if (not *toppath*)
      (if (not (setup-for-run))
	  (begin
	    (debug:print 0 "ERROR: cannot find megatest.config, exiting")
	    (exit))))
  (debug:print-info 2 "Starting zmq server")
  (if *toppath* 
      (let* (;; (th1 (make-thread (lambda ()
	     ;;      	       (let ((server-info #f))
	     ;;      		 ;; wait for the server to be online and available
	     ;;      		 (let loop ()
	     ;;			   (debug:print-info 2 "Waiting for the server to come online before starting heartbeat")
	     ;;      		   (thread-sleep! 2)
	     ;;      		   (mutex-lock! *heartbeat-mutex*)
	     ;;      		   (set! server-info *server-info* )
	     ;;      		   (mutex-unlock! *heartbeat-mutex*)
	     ;;      		   (if (not server-info)(loop)))
	     ;;			 (debug:print 2 "Server alive, starting self-ping")
	     ;;      		 (zmq-transport:self-ping server-info)
	     ;;      		 ))
	     ;;      	     "Self ping"))
	     (th2 (make-thread (lambda ()
				 (zmq-transport:run 
				  (if (args:get-arg "-server")
				      (args:get-arg "-server")
				      "-"))) "Server run"))
	     ;; (th3 (make-thread (lambda ()(zmq-transport:keep-running)) "Keep running"))
	     )
	(set! *client-non-blocking-mode* #t)
	;; (thread-start! th1)
	(thread-start! th2)
	;; (thread-start! th3)
	(set! *didsomething* #t)
	;; (thread-join! th3)
	(thread-join! th2)
	)
      (debug:print 0 "ERROR: Failed to setup for megatest")))

(define (zmq-transport:client-signal-handler signum)
  (handle-exceptions
   exn
   (debug:print " ... exiting ...")
   (let ((th1 (make-thread (lambda ()
			     (if (not *received-response*)
				 (receive-message* *runremote*))) ;; flush out last call if applicable
			   "eat response"))
	 (th2 (make-thread (lambda ()
			     (debug:print 0 "ERROR: Received ^C, attempting clean exit. Please be patient and wait a few seconds before hitting ^C again.")
			     (thread-sleep! 3) ;; give the flush three seconds to do it's stuff
			     (debug:print 0 "       Done.")
			     (exit 4))
			   "exit on ^C timer")))
     (thread-start! th2)
     (thread-start! th1)
     (thread-join! th2))))

(define (zmq-transport:client-launch)
  (set-signal-handler! signal/int zmq-transport:client-signal-handler)
   (if (zmq-transport:client-setup)
       (debug:print-info 2 "connected as client")
       (begin
	 (debug:print 0 "ERROR: Failed to connect as client")
	 (exit))))

;;======================================================================
;; Defunct functions
;;======================================================================

;; ping a server and return number of clients or #f (if no response)
;; NOT IN USE!
(define (zmq-transport:ping host port #!key (secs 10)(return-socket #f))
  (cdb:use-non-blocking-mode
   (lambda ()
     (let* ((res #f)
	    (th1 (make-thread
		  (lambda ()
		    (let* ((zmq-context (make-context 1))
			   (zmq-socket  (zmq-transport:client-connect host port context: zmq-context)))
		      (if zmq-socket
			  (if (zmq-transport:client-login zmq-socket)
			      (let ((numclients (cdb:num-clients zmq-socket)))
				(if (not return-socket)
				    (begin
				      (zmq-transport:client-logout zmq-socket)
				      (close-socket  zmq-socket)))
				(set! res (list #t numclients (if return-socket zmq-socket #f))))
			      (begin
				;; (close-socket zmq-socket)
				(set! res (list #f "CAN'T LOGIN" #f))))
			  (set! res (list #f "CAN'T CONNECT" #f)))))
		  "Ping: th1"))
	    (th2 (make-thread
		  (lambda ()
		    (let loop ((count 1))
		      (debug:print-info 1 "Ping " count " server on " host " at port " port)
		      (thread-sleep! 2)
		      (if (< count (/ secs 2))
			  (loop (+ count 1))))
		    ;; (thread-terminate! th1)
		    (set! res (list #f "TIMED OUT" #f)))
		  "Ping: th2")))
       (thread-start! th2)
       (thread-start! th1)
       (handle-exceptions
	exn
	(set! res (list #f "TIMED OUT" #f))
	(thread-join! th1 secs))
       res))))

;; (define (zmq-transport:self-ping server-info)
;;   ;; server-info: server-id interface pullport pubport
;;   (let ((iface    (list-ref server-info 1))
;; 	(pullport (list-ref server-info 2))
;; 	(pubport  (list-ref server-info 3)))
;;     (zmq-transport:client-connect iface pullport pubport)
;;     (let loop ()
;;       (thread-sleep! 2)
;;       (cdb:client-call *runremote* 'ping #t)
;;       (debug:print 4 "zmq-transport:self-ping - I'm alive on " iface ":" pullport "/" pubport "!")
;;       (mutex-lock! *heartbeat-mutex*)
;;       (set! *server-loop-heart-beat* (current-seconds))
;;       (mutex-unlock! *heartbeat-mutex*)
;;       (loop))))
    
(define (zmq-transport:reply pubsock target query-sig success/fail result)
  (debug:print-info 11 "zmq-transport:reply target=" target ", result=" result)
  (send-message pubsock target send-more: #t)
  (send-message pubsock (db:obj->string (vector success/fail query-sig result))))