Megatest

Check-in [20b4054f76]
Login
Overview
Comment:wip, misc cleanup and reduce some messages.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v2.0001
Files: files | file ages | folders
SHA1: 20b4054f7651769905da1e5b29a7b1b4610d45ce
User & Date: matt on 2022-01-12 16:40:10
Other Links: branch diff | manifest | tags
Context
2022-01-14
11:08
Fix to help ulex with connections when buffer isn't filled (and fix merge) check-in: 2e839ecd70 user: jmoon18 tags: v2.0001
2022-01-13
18:50
Merged fork check-in: 6bbd0fa9a2 user: matt tags: v2.0001, ulex-smoketest
2022-01-12
16:52
Sync up with v2.0001 check-in: d7b4fe7a7f user: matt tags: v2.0001-ulex-one-shot
16:40
wip, misc cleanup and reduce some messages. check-in: 20b4054f76 user: matt tags: v2.0001
2022-01-11
09:00
Go back to single log for a server. The splitting of the logs was not proving useful check-in: b4ff9e2f1d user: matt tags: v2.0001
Changes

Modified dashboard.scm from [cc72246a09] to [4505f63ba6].

3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
(define (dashboard:do-update-rundat tabdat)
  (dboard:update-rundat
   tabdat
   (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) "runname" "%")
   (dboard:tabdat-numruns tabdat)
   (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) "test-name" "%/%")
   ;; generate key patterns from the target stored in tabdat
   (let* ((dbkeys (dboard:tabdat-dbkeys tabdat)))
     (let ((fres   (if (dboard:tabdat-target tabdat)
                       (let ((ptparts (append (dboard:tabdat-target tabdat)(make-list (length dbkeys) "%"))))
                         (map (lambda (k v)(list k v)) dbkeys ptparts))
                       (let ((res '()))
                         (for-each (lambda (key)
                                     (if (not (equal? key "runname"))
                                         (let ((val (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) key #f)))







|







3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
(define (dashboard:do-update-rundat tabdat)
  (dboard:update-rundat
   tabdat
   (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) "runname" "%")
   (dboard:tabdat-numruns tabdat)
   (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) "test-name" "%/%")
   ;; generate key patterns from the target stored in tabdat
   (let* ((dbkeys  (dboard:tabdat-dbkeys tabdat)))
     (let ((fres   (if (dboard:tabdat-target tabdat)
                       (let ((ptparts (append (dboard:tabdat-target tabdat)(make-list (length dbkeys) "%"))))
                         (map (lambda (k v)(list k v)) dbkeys ptparts))
                       (let ((res '()))
                         (for-each (lambda (key)
                                     (if (not (equal? key "runname"))
                                         (let ((val (hash-table-ref/default (dboard:tabdat-searchpatts tabdat) key #f)))

Modified dbmod.scm from [4d2069b432] to [ac637164a6].

691
692
693
694
695
696
697


698
699
700
701
702
703
704
705
;; ;;     (mutex-unlock! *db-multi-sync-mutex*)
;; ;;     (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb)))

;; NOTE: touched logic is disabled/not done
;; sync run to disk if touched
;;
(define (db:sync-inmem->disk dbstruct apath dbfile #!key (force-sync #f))


  #f) ;; disabled
;;   (let* ((dbdat       (db:get-dbdat dbstruct apath dbfile))
;; 	 (dbfullname  (conc apath "/" dbfile))
;; 	 (db          (db:open-run-db dbfullname db:initialize-db)) ;; (dbr:dbdat-db dbdat))
;; 	 (inmem       (dbr:dbdat-inmem dbdat))
;; 	 (start-t     (current-seconds))
;; 	 (last-update (dbr:dbdat-last-write dbdat))
;; 	 (last-sync   (dbr:dbdat-last-sync dbdat)))







>
>
|







691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
;; ;;     (mutex-unlock! *db-multi-sync-mutex*)
;; ;;     (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb)))

;; NOTE: touched logic is disabled/not done
;; sync run to disk if touched
;;
(define (db:sync-inmem->disk dbstruct apath dbfile #!key (force-sync #f))
  (if #f
      (debug:print-info 0 *default-log-port* "syncing "*toppath*" "dbfile" at "(current-seconds))
      #f)) ;; disabled
;;   (let* ((dbdat       (db:get-dbdat dbstruct apath dbfile))
;; 	 (dbfullname  (conc apath "/" dbfile))
;; 	 (db          (db:open-run-db dbfullname db:initialize-db)) ;; (dbr:dbdat-db dbdat))
;; 	 (inmem       (dbr:dbdat-inmem dbdat))
;; 	 (start-t     (current-seconds))
;; 	 (last-update (dbr:dbdat-last-write dbdat))
;; 	 (last-sync   (dbr:dbdat-last-sync dbdat)))

Modified rmtmod.scm from [a8f42f4480] to [e895913787].

2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
			(debug:print-info 0 *default-log-port* "Starting watchdog thread (in state "(thread-state watchdog)")")
			(thread-start! watchdog))
		      (debug:print-info 0 *default-log-port* "Not starting watchdog thread (in state "(thread-state watchdog)")"))
		  (debug:print 0 *default-log-port* "ERROR: *watchdog* not setup, cannot start it."))
	      #;(loop (+ count 1) bad-sync-count start-time)
	      ))
	
	(debug:print-info 0 *default-log-port* "syncing "*toppath*" "dbname" at "(current-seconds))
	(db:sync-inmem->disk *dbstruct-db* *toppath* dbname force-sync: #t)
	
	(mutex-unlock! *heartbeat-mutex*)
	
	;; when things go wrong we don't want to be doing the various
	;; queries too often so we strive to run this stuff only every
	;; four seconds or so.







<







2146
2147
2148
2149
2150
2151
2152

2153
2154
2155
2156
2157
2158
2159
			(debug:print-info 0 *default-log-port* "Starting watchdog thread (in state "(thread-state watchdog)")")
			(thread-start! watchdog))
		      (debug:print-info 0 *default-log-port* "Not starting watchdog thread (in state "(thread-state watchdog)")"))
		  (debug:print 0 *default-log-port* "ERROR: *watchdog* not setup, cannot start it."))
	      #;(loop (+ count 1) bad-sync-count start-time)
	      ))
	

	(db:sync-inmem->disk *dbstruct-db* *toppath* dbname force-sync: #t)
	
	(mutex-unlock! *heartbeat-mutex*)
	
	;; when things go wrong we don't want to be doing the various
	;; queries too often so we strive to run this stuff only every
	;; four seconds or so.

Modified ulex/ulex.scm from [70c15d4319] to [81b8992868].

217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244



















































245
246
247
248
249

250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282

283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
;;        - I believe (without substantial evidence) that re-using connections will
;;          be beneficial ...
;;
(define (send udata host-port qrykey cmd params)
  (let* ((my-host-port (udat-host-port udata))          ;; remote will return to this
	 (isme         #f #;(equal? host-port my-host-port)) ;; calling myself?
	 ;; dat is a self-contained work block that can be sent or handled locally
	 (dat          (list my-host-port qrykey cmd params)))
    (cond
     (isme (ulex-handler udata dat)) ;; no transmission needed
     (else
      (handle-exceptions ;; TODO - MAKE THIS EXCEPTION CMD SPECIFIC?
	  exn
	  #f
	(begin
	  ;; (mutex-lock! *send-mutex*)
	  (let-values (((inp oup)(tcp-connect host-port)))
	    (let ((res (if (and inp oup)
			   (begin
			     (serialize dat oup)
			     (deserialize inp))
			   (begin
			     (print "ERROR: send called but no receiver has been setup. Please call setup first!")
			     #f))))
	      (close-input-port inp)
	      (close-output-port oup)
	      ;; (mutex-unlock! *send-mutex*)
	      res)))))))) ;; res will always be 'ack unless return-method is direct



















































  
;; send a request to the given host-port and register a mailbox in udata
;; wait for the mailbox data and return it
;;
(define (send-receive uconn host-port cmd data)

  (cond
   ((member cmd '(ping goodbye)) ;; these are immediate
    (send uconn host-port 'ping cmd data))
   ((eq? (work-method) 'direct)
    ;; the result from send will be the actual result, not an 'ack
    (send uconn host-port 'direct cmd data))
   (else
    (case (return-method)
      ((polling)
       (let* ((qrykey (make-cookie uconn))
	      (sres   (send uconn host-port qrykey cmd data)))
	 (case sres
	   ((ack)
	    (let loop ((start-time (current-milliseconds)))
	      (if (> (current-milliseconds)(+ start-time 10000)) ;; ten seconds timeout
		  (begin
		    (print "ULEX ERROR: timed out waiting for response from "host-port", "cmd" "data)
		    #f)
		  (let* ((result (hash-table-ref/default (udat-mboxes uconn) qrykey #f))) ;; NOTE: we are re-using mboxes hash
		    (if result ;; result is '(status . result-data) or #f for nothing yet
			(begin
			  (hash-table-delete! (udat-mboxes uconn) qrykey)
			  (cdr result))
			(begin
			  (thread-sleep! 0.01)
			  (loop start-time)))))))
	   (else
	    (print "ULEX ERROR: Communication failed? sres="sres)
	    #f))))
      ((mailbox) 
       (let* ((cmbox     (get-cmbox uconn)) ;; would it be better to keep a stack of mboxes to reuse?
	      (qrykey    (car cmbox))
	      (mbox      (cdr cmbox))

	      (mbox-time (current-milliseconds))
	      (sres      (send uconn host-port qrykey cmd data))) ;; short res
	 (if (eq? sres 'ack)
	     (let* ((mbox-timeout-secs    120 #;(if (eq? 'primordial (thread-name (current-thread)))
					  #f
					  120)) ;; timeout)
		    (mbox-timeout-result 'MBOX_TIMEOUT)
		    (res                  (mailbox-receive! mbox mbox-timeout-secs mbox-timeout-result))
		    (mbox-receive-time    (current-milliseconds)))
	       ;; (put-cmbox uconn cmbox) ;; reuse mbox and cookie. is it worth it?
	       (hash-table-delete! (udat-mboxes uconn) qrykey)
	       (if (eq? res 'MBOX_TIMEOUT)
		   (begin
		     (print "WARNING: mbox timed out for query "cmd", with data "data", waiting for response from "host-port".")

		     ;; here it might make sense to clean up connection records and force clean start?
		     ;; NO. The progam using ulex needs to do the reset. Right thing here is exception
		     
		     #f)  ;; convert to raising exception?
		   res))
	     (begin
	       (print "ERROR: Communication failed? Got "sres)
	       #f))))
      (else
       (print "ULEX ERROR: unrecognised return-method "(return-method)".")
       #f)))))

;;======================================================================
;; responder side
;;======================================================================

;; take a request, rdat, and if not immediate put it in the work queue
;;
;; Reserved cmds; ack ping goodbye response
;;
(define (ulex-handler uconn rdat)
  (assert (list? rdat) "FATAL: ulex-handler give rdat as not list")
  (match rdat ;;  (string-split controldat)
    ((rem-host-port qrykey cmd params)
     ;; (print "ulex-handler got: "rem-host-port" qrykey: "qrykey" cmd: "cmd" params: "params)
     (case cmd
       ;; ((ack )(print "Got ack! But why? Should NOT get here.") 'ack)
       ((ping)
	;; (print "Got Ping!")
	;; (add-to-work-queue uconn rdat)
	'ack)







|




















>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>





>
|
|
|
|
|
|
|
|
|
<
|
<
|
<
<
<
|
<
<
<
<
<
<
<
<
<
|
|
|
<
<
<
<
>
|
<
<
<
<
<
<
<
|
<
<
<
<
|
|
<
<
|
<
<
<
<
<
<
<
<












|







217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310

311

312



313









314
315
316




317
318







319




320
321


322








323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
;;        - I believe (without substantial evidence) that re-using connections will
;;          be beneficial ...
;;
(define (send udata host-port qrykey cmd params)
  (let* ((my-host-port (udat-host-port udata))          ;; remote will return to this
	 (isme         #f #;(equal? host-port my-host-port)) ;; calling myself?
	 ;; dat is a self-contained work block that can be sent or handled locally
	 (dat          (list my-host-port qrykey cmd params #;(cons (current-seconds)(current-milliseconds)))))
    (cond
     (isme (ulex-handler udata dat)) ;; no transmission needed
     (else
      (handle-exceptions ;; TODO - MAKE THIS EXCEPTION CMD SPECIFIC?
	  exn
	  #f
	(begin
	  ;; (mutex-lock! *send-mutex*)
	  (let-values (((inp oup)(tcp-connect host-port)))
	    (let ((res (if (and inp oup)
			   (begin
			     (serialize dat oup)
			     (deserialize inp))
			   (begin
			     (print "ERROR: send called but no receiver has been setup. Please call setup first!")
			     #f))))
	      (close-input-port inp)
	      (close-output-port oup)
	      ;; (mutex-unlock! *send-mutex*)
	      res)))))))) ;; res will always be 'ack unless return-method is direct

(define (send-via-polling uconn host-port cmd data)
  (let* ((qrykey (make-cookie uconn))
	 (sres   (send uconn host-port qrykey cmd data)))
    (case sres
      ((ack)
       (let loop ((start-time (current-milliseconds)))
	 (if (> (current-milliseconds)(+ start-time 10000)) ;; ten seconds timeout
	     (begin
	       (print "ULEX ERROR: timed out waiting for response from "host-port", "cmd" "data)
	       #f)
	     (let* ((result (hash-table-ref/default (udat-mboxes uconn) qrykey #f))) ;; NOTE: we are re-using mboxes hash
	       (if result ;; result is '(status . result-data) or #f for nothing yet
		   (begin
		     (hash-table-delete! (udat-mboxes uconn) qrykey)
		     (cdr result))
		   (begin
		     (thread-sleep! 0.01)
		     (loop start-time)))))))
      (else
       (print "ULEX ERROR: Communication failed? sres="sres)
       #f))))

(define (send-via-mailbox uconn host-port cmd data)
  (let* ((cmbox     (get-cmbox uconn)) ;; would it be better to keep a stack of mboxes to reuse?
	 (qrykey    (car cmbox))
	 (mbox      (cdr cmbox))
	 (mbox-time (current-milliseconds))
	 (sres      (send uconn host-port qrykey cmd data))) ;; short res
    (if (eq? sres 'ack)
	(let* ((mbox-timeout-secs    120 #;(if (eq? 'primordial (thread-name (current-thread)))
				     #f
				     120)) ;; timeout)
	       (mbox-timeout-result 'MBOX_TIMEOUT)
	       (res                  (mailbox-receive! mbox mbox-timeout-secs mbox-timeout-result))
	       (mbox-receive-time    (current-milliseconds)))
	  ;; (put-cmbox uconn cmbox) ;; reuse mbox and cookie. is it worth it?
	  (hash-table-delete! (udat-mboxes uconn) qrykey)
	  (if (eq? res 'MBOX_TIMEOUT)
	      (begin
		(print "WARNING: mbox timed out for query "cmd", with data "data
		       ", waiting for response from "host-port".")

		;; here it might make sense to clean up connection records and force clean start?
		;; NO. The progam using ulex needs to do the reset. Right thing here is exception
		
		#f)  ;; convert to raising exception?
	      res))
	(begin
	  (print "ERROR: Communication failed? Got "sres)
	  #f))))
  
;; send a request to the given host-port and register a mailbox in udata
;; wait for the mailbox data and return it
;;
(define (send-receive uconn host-port cmd data)
  (let* ((start-time (current-milliseconds))
	 (result     (cond
		      ((member cmd '(ping goodbye)) ;; these are immediate
		       (send uconn host-port 'ping cmd data))
		      ((eq? (work-method) 'direct)
		       ;; the result from send will be the actual result, not an 'ack
		       (send uconn host-port 'direct cmd data))
		      (else
		       (case (return-method)
			 ((polling)

			  (send-via-polling uconn host-port cmd data))

			 ((mailbox) 



			  (send-via-mailbox uconn host-port cmd data))









			 (else
			  (print "ULEX ERROR: unrecognised return-method "(return-method)".")
			  #f))))))




    ;; this is ONLY for development and debugging. It will be removed once Ulex is stable.
    (if (< 5000 (- (current-milliseconds) start-time))







	(print "ULEX WARNING: round-trip took over 5 seconds; "




	       cmd", host-port="host-port", data="data))
    result))


    









;;======================================================================
;; responder side
;;======================================================================

;; take a request, rdat, and if not immediate put it in the work queue
;;
;; Reserved cmds; ack ping goodbye response
;;
(define (ulex-handler uconn rdat)
  (assert (list? rdat) "FATAL: ulex-handler give rdat as not list")
  (match rdat ;;  (string-split controldat)
    ((rem-host-port qrykey cmd params);; timedata)
     ;; (print "ulex-handler got: "rem-host-port" qrykey: "qrykey" cmd: "cmd" params: "params)
     (case cmd
       ;; ((ack )(print "Got ack! But why? Should NOT get here.") 'ack)
       ((ping)
	;; (print "Got Ping!")
	;; (add-to-work-queue uconn rdat)
	'ack)