Megatest

Check-in [fc6b05f924]
Login
Overview
Comment:Added journal based statical droop based throttling of queries.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.81-journal-based-throttling
Files: files | file ages | folders
SHA1: fc6b05f92450cc3eeea48602396e7d4700f3f590
User & Date: mrwellan on 2024-07-10 17:44:52
Other Links: branch diff | manifest | tags
Context
2024-07-10
18:10
Gate test launch based on journal load. Values from load calc seem wrong. Should be 0-1.0 but seeing integers 0, 1, 2 ... check-in: 2635b582e7 user: mrwellan tags: v1.81-journal-based-throttling
17:44
Added journal based statical droop based throttling of queries. check-in: fc6b05f924 user: mrwellan tags: v1.81-journal-based-throttling
09:17
Added docs/csirc which works with both chicken 4 and 5 check-in: e75a04de3e user: mrwellan tags: v1.81-journal-based-throttling
Changes

Modified rmt.scm from [519878889b] to [a86edf18db].

90
91
92
93
94
95
96
97















98
99
100
101
102
103
104
105
106
107
108
109
110
;; NB// area-dat replaced by ttdat
;; 
(define (rmt:send-receive cmd run-id params #!key (attemptnum 1)(ttdat #f))
  (assert (or (not run-id) (number? run-id)) "FATAL: run-id is required to be a number or #f")
  (assert *toppath* "FATAL: rmt:send-receive called with *toppath* not set.")
  (let* ((areapath      *toppath*) ;; TODO - resolve from dbstruct to be compatible with multiple areas
	 (readonly-mode (rmtmod:calc-ro-mode ttdat *toppath*))
	 (testsuite     (common:get-testsuite-name)))















    (case (rmt:transport-mode)
      ((tcp)
       (let* ((start-time    (current-seconds)) ;; snapshot time so all use cases get same value
	      (attemptnum    (+ 1 attemptnum))
	      (mtexe         (common:find-local-megatest))
	      (dbfname       (conc (dbfile:run-id->dbnum run-id)".db"))
	      (ttdat         (rmt:set-ttdat areapath ttdat))
	      (conn          (tt:get-conn ttdat dbfname))
	      (is-main       (equal? dbfname "main.db")) ;; why not (not run-id) ?
	      (server-start-proc (if is-main
				     #f
				     (lambda ()
				       ;; (debug:print-info 0 *default-log-port* "starting server for dbfname: "dbfname)







|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>





<







90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117

118
119
120
121
122
123
124
;; NB// area-dat replaced by ttdat
;; 
(define (rmt:send-receive cmd run-id params #!key (attemptnum 1)(ttdat #f))
  (assert (or (not run-id) (number? run-id)) "FATAL: run-id is required to be a number or #f")
  (assert *toppath* "FATAL: rmt:send-receive called with *toppath* not set.")
  (let* ((areapath      *toppath*) ;; TODO - resolve from dbstruct to be compatible with multiple areas
	 (readonly-mode (rmtmod:calc-ro-mode ttdat *toppath*))
	 (testsuite     (common:get-testsuite-name))
	 (dbfname       (conc (dbfile:run-id->dbnum run-id)".db"))
	 (dbdir         (conc areapath "/.mtdb")))
    (if (and (not *journal-stats*)
	     (file-exists? dbdir))
	(tt:start-stats dbdir)) ;; fixme - find the right call to get the db directory
    
    ;; check the load on dbfname and add some delay using a droop curve of sorts
    (if *journal-stats*
	(let* ((stats (tt:get-journal-stats))
	       (load  (or (alist-ref dbfname stats equal?) 0)))
	  (if (> load 0)
	      (let ((dely (* 10 load)))
		(debug:print 0 *default-log-port* "Journal load "load" delaying queries "dely"s.")
		(thread-sleep! dely)))))
	
    (case (rmt:transport-mode)
      ((tcp)
       (let* ((start-time    (current-seconds)) ;; snapshot time so all use cases get same value
	      (attemptnum    (+ 1 attemptnum))
	      (mtexe         (common:find-local-megatest))

	      (ttdat         (rmt:set-ttdat areapath ttdat))
	      (conn          (tt:get-conn ttdat dbfname))
	      (is-main       (equal? dbfname "main.db")) ;; why not (not run-id) ?
	      (server-start-proc (if is-main
				     #f
				     (lambda ()
				       ;; (debug:print-info 0 *default-log-port* "starting server for dbfname: "dbfname)

Modified tcp-transportmod.scm from [2f9da6d8df] to [98a778bd3e].

1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181

1182
1183
1184
1185
















































1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198

(defstruct jstats
  (count 0)
  (jcount (make-hash-table)) ;; 1.db => journal_count
  )

;; timeblk => jstats
(define *journal-stats* (make-hash-table))

;; monte-carlo-esque random sampling of journal files
;; for all the files:
;;   if .journal
;;      update stats +1 +1
;;      update stats +1  0
;;
(define (tt:write-load-tracking dbdir)
  (let* ((cs    (current-seconds))
	 (key   (inexact->exact (quotient cs 10)))
	 (old   (- key 4)) ;; 4 x 10 seconds ago
	 (jstat (if (hash-table-exists? *journal-stats* key)
		    (hash-table-ref *journal-stats* key )
		    (let ((new (make-jstats)))
		      (hash-table-set! *journal-stats* key new)
		      new))))
    ;; clear out records over 30s old
    (for-each
     (lambda (key)
       (if (< key old)
	   (hash-table-delete! *journal-stats* key)))
     (hash-table-keys *journal-stats*))

    ;; increment our count of observations
    (jstats-count-set! jstat (+ (jstats-count jstat) 1))
    
    ;; now find and increment journal file counts
    (directory-fold
     (lambda (fname res)
       ;; is it a journal file?
       (let ((parts (string-match "^(.*\\.db)-journal.*" fname)))
	 (match parts
	   ((_ dbfname)
	    (hash-table-set! (jstats-jcount jstat) dbfname
			     (+ (hash-table-ref/default (jstats-jcount jstat) dbfname 0) 1)
			     ))

	   )))
     '()
     dbdir 
     )))

















































;; megatest> (import tcp-transportmod)
;; megatest> (tt:write-load-tracking ".mtdb")
;; megatest> (hash-table-keys *journal-stats*)
;; (172060297)
;; megatest> (jstats->alist (hash-table-ref *journal-stats* 172060297))
;; ((count . 1) (jcount . #<hash-table (1)>))
;; megatest> (jstats-jcount (hash-table-ref *journal-stats* 172060297))
;; #<hash-table (1)>
;; megatest> (hash-table->alist (jstats-jcount (hash-table-ref *journal-stats* 172060297)))
;; (("1.db" . 4))

)







|










|





|



















>




>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>













1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247

(defstruct jstats
  (count 0)
  (jcount (make-hash-table)) ;; 1.db => journal_count
  )

;; timeblk => jstats
(define *journal-stats* #f) ;; (make-hash-table))

;; monte-carlo-esque random sampling of journal files
;; for all the files:
;;   if .journal
;;      update stats +1 +1
;;      update stats +1  0
;;
(define (tt:write-load-tracking dbdir)
  (let* ((cs    (current-seconds))
	 (key   (inexact->exact (quotient cs 10)))
	 (old   (- key 5)) ;; 4 x 10 seconds ago
	 (jstat (if (hash-table-exists? *journal-stats* key)
		    (hash-table-ref *journal-stats* key )
		    (let ((new (make-jstats)))
		      (hash-table-set! *journal-stats* key new)
		      new))))
    ;; clear out old records
    (for-each
     (lambda (key)
       (if (< key old)
	   (hash-table-delete! *journal-stats* key)))
     (hash-table-keys *journal-stats*))

    ;; increment our count of observations
    (jstats-count-set! jstat (+ (jstats-count jstat) 1))
    
    ;; now find and increment journal file counts
    (directory-fold
     (lambda (fname res)
       ;; is it a journal file?
       (let ((parts (string-match "^(.*\\.db)-journal.*" fname)))
	 (match parts
	   ((_ dbfname)
	    (hash-table-set! (jstats-jcount jstat) dbfname
			     (+ (hash-table-ref/default (jstats-jcount jstat) dbfname 0) 1)
			     ))
	   (else #f)
	   )))
     '()
     dbdir 
     )))

(define *journal-stats-mutex* (make-mutex))

(define (tt:journal-stats-run dbdir)
  (if (not *journal-stats*)(set! *journal-stats* (make-hash-table)))
  (let loop ()
    (mutex-lock! *journal-stats-mutex*)
    (tt:write-load-tracking dbdir)
    (mutex-unlock! *journal-stats-mutex*)
    (thread-sleep! (/ (random 1000) 100.0))
    (loop)))

;; call this to start a thread that is keeping the journal-stats up to date.
(define (tt:start-stats dbdir)
  (thread-start!
   (make-thread
    (lambda ()(tt:journal-stats-run dbdir)) "Journal stats collection thread")))

(define (tt:get-journal-stats)
  (let* ((result    (make-jstats))
	 (hitcounts (jstats-jcount result)))
    (if *journal-stats*
	(begin
	  (mutex-lock! *journal-stats-mutex*)
	  (hash-table-for-each
	   *journal-stats*
	   (lambda (k v) ;; key jstats
	     (let* ((count  (jstats-count v))
		    (jcount (jstats-jcount v))) ;; dbfname => hit count
	       (jstats-count-set! result
				  (+ (jstats-count result)
				     (jstats-count v)))
	       (hash-table-for-each
		jcount
		(lambda (dbfname hit-count)
		  (hash-table-set! hitcounts dbfname
				   (+ hit-count
				      (hash-table-ref/default hitcounts dbfname 0))))))))
	  (mutex-unlock! *journal-stats-mutex*))
	(debug:print 0 *default-log-port* "INFO: *journal-stats* not set."))
    ;; convert to normalized alist
    (let ((tot  (min (jstats-count result) 1)) ;; avoid divide by zero
	  (hits (jstats-jcount result))) ;; 1.db => count
      (hash-table-map
       hits
       (lambda (fname hitcount)
	 (cons fname (/ hitcount tot)))))
    ))

;; megatest> (import tcp-transportmod)
;; megatest> (tt:write-load-tracking ".mtdb")
;; megatest> (hash-table-keys *journal-stats*)
;; (172060297)
;; megatest> (jstats->alist (hash-table-ref *journal-stats* 172060297))
;; ((count . 1) (jcount . #<hash-table (1)>))
;; megatest> (jstats-jcount (hash-table-ref *journal-stats* 172060297))
;; #<hash-table (1)>
;; megatest> (hash-table->alist (jstats-jcount (hash-table-ref *journal-stats* 172060297)))
;; (("1.db" . 4))

)