Overview
Comment: | enhanced common:get-least-loaded-host to better distribute work |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | v1.63 |
Files: | files | file ages | folders |
SHA1: |
aa5b5f0d4b70b37d99b11e88239ed9ee |
User & Date: | bjbarcla on 2016-12-15 19:33:48 |
Other Links: | branch diff | manifest | tags |
Context
2016-12-16
| ||
13:08 | Fixed two bugs in getting and using cpuload check-in: 473da90c19 user: mrwellan tags: v1.63 | |
2016-12-15
| ||
19:33 | enhanced common:get-least-loaded-host to better distribute work check-in: aa5b5f0d4b user: bjbarcla tags: v1.63 | |
12:01 | fixed bug where server start storm appears in envqa check-in: c16914fba9 user: bjbarcla tags: v1.63 | |
Changes
Modified api.scm from [fe7a2f21be] to [97e3840c02].
︙ | ︙ | |||
237 238 239 240 241 242 243 244 245 246 247 248 249 250 | ((get-steps-data) (apply db:get-steps-data dbstruct params)) ((get-steps-for-test) (apply db:get-steps-for-test dbstruct params)) ;; TEST DATA ((read-test-data) (apply db:read-test-data dbstruct params)) ;; MISC ((have-incompletes?) (apply db:have-incompletes? dbstruct params)) ((login) (apply db:login dbstruct params)) ((general-call) (let ((stmtname (car params)) (run-id (cadr params)) (realparams (cddr params))) (db:with-db dbstruct run-id #t ;; these are all for modifying the db (lambda (db) | > | 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 | ((get-steps-data) (apply db:get-steps-data dbstruct params)) ((get-steps-for-test) (apply db:get-steps-for-test dbstruct params)) ;; TEST DATA ((read-test-data) (apply db:read-test-data dbstruct params)) ;; MISC ((get-latest-host-load) (apply db:get-latest-host-load dbstruct params)) ((have-incompletes?) (apply db:have-incompletes? dbstruct params)) ((login) (apply db:login dbstruct params)) ((general-call) (let ((stmtname (car params)) (run-id (cadr params)) (realparams (cddr params))) (db:with-db dbstruct run-id #t ;; these are all for modifying the db (lambda (db) |
︙ | ︙ |
Modified common.scm from [7404179285] to [24157bbd1e].
︙ | ︙ | |||
1141 1142 1143 1144 1145 1146 1147 | (define (common:unix-ping hostname) (let ((res (system (conc "ping -c 1 " hostname " > /dev/null")))) (eq? res 0))) ;; ideally put all this info into the db, no need to preserve it across moving homehost ;; | > > | > > > > > > > > > > > > > > > > > > > > > > > > > > > > > < < < < < < < < < < | | | | | | | | < | | | < > | | | > > > > > > > > > > > > > > > > > > > | > | | > > | < < < < | | | > > > | 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 | (define (common:unix-ping hostname) (let ((res (system (conc "ping -c 1 " hostname " > /dev/null")))) (eq? res 0))) ;; ideally put all this info into the db, no need to preserve it across moving homehost ;; ;; return list of ;; ( reachable? cpuload update-time ) (define (common:get-host-info hostname) (let* ((loadinfo (rmt:get-latest-host-load hostname)) (load (car loadinfo)) (load-sample-time (cdr loadinfo)) (load-sample-age (- (current-seconds) load-sample-time)) (loadinfo-timeout-seconds 20) (host-last-update-timeout-seconds 10) (host-rec (hash-table-ref/default *host-loads* hostname #f)) ) (cond ((< load-sample-age loadinfo-timeout-seconds) ;;(print "BB> chr - 1") (list #t load-sample-time load)) ((and host-rec (< (current-seconds) (+ (host-last-update host-rec) host-last-update-timeout-seconds))) ;;(print "BB> chr - 2") (list #t (host-last-update host-rec) (host-last-cpuload host-rec ))) ((common:unix-ping hostname) ;;(print "BB> chr - 3 host-rec="host-rec" lu="(if host-rec (- (current-seconds) (host-last-update host-rec)) "None")) (list #t (current-seconds) (alist-ref 'adj-core-load (common:get-normalized-cpu-load hostname)))) (else (list #f 0 -1))))) (define (common:update-host-loads-table hosts-raw) (let* ((hosts (filter (lambda (x) (string-match (regexp "^\\S+$") x)) hosts-raw))) (for-each (lambda (hostname) (let* ((rec (let ((h (hash-table-ref/default *host-loads* hostname #f))) (if h h (let ((h (make-host))) (hash-table-set! *host-loads* hostname h) h)))) (host-info (common:get-host-info hostname)) (is-reachable (car host-info)) (last-reached-time (cadr host-info)) (load (caddr host-info))) (host-reachable-set! rec is-reachable) (host-last-update-set! rec last-reached-time) (host-last-cpuload-set! rec load))) hosts))) (define (common:get-least-loaded-host hosts-raw) (let* ((hosts (filter (lambda (x) (string-match (regexp "^\\S+$") x)) hosts-raw)) (best-host #f) (best-load 99999) (curr-time (current-seconds))) (common:update-host-loads-table hosts) (for-each (lambda (hostname) (let* ((rec (let ((h (hash-table-ref/default *host-loads* hostname #f))) (if h h (let ((h (make-host))) (hash-table-set! *host-loads* hostname h) h)))) (reachable (host-reachable rec)) (load (host-last-cpuload rec))) (cond ((not reachable) #f) ((< (+ load (/ (random 250) 1000)) ;; add a random factor to keep from getting in a rut (+ best-load (/ (random 250) 1000)) ) (set! best-load load) (set! best-host hostname))))) hosts) best-host)) (define (common:wait-for-cpuload maxload numcpus waitdelay #!key (count 1000) (msg #f)(remote-host #f)) (let* ((loadavg (common:get-cpu-load remote-host)) (first (car loadavg)) (next (cadr loadavg)) (adjload (* maxload numcpus)) (loadjmp (- first next))) |
︙ | ︙ |
Modified db.scm from [f61ce7e6de] to [eba0b31003].
︙ | ︙ | |||
3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 | (sqlite3:for-each-row (lambda (state status count) (set! res (cons (vector state status count) res))) db "SELECT state,status,count(state) FROM tests WHERE run_id=? AND testname=? AND item_path='' GROUP BY state,status;" run-id testname) res)) (define (db:set-top-level-from-items dbstruct run-id testname) (let* ((dbdat (db:get-db dbstruct run-id)) (db (db:dbdat-get-db dbdat)) (summ (db:get-state-status-summary db run-id testname)) (find (lambda (state status) (if (null? summ) | > > > > > > > > > > > > > > > > > > > | 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 | (sqlite3:for-each-row (lambda (state status count) (set! res (cons (vector state status count) res))) db "SELECT state,status,count(state) FROM tests WHERE run_id=? AND testname=? AND item_path='' GROUP BY state,status;" run-id testname) res)) (define (db:get-latest-host-load dbstruct raw-hostname) (let* ((hostname (string-substitute "\\..*$" "" raw-hostname)) (res (cons -1 0)) (mydb (db:dbdat-get-db (db:get-db dbstruct 0))) ) (print "BB> hostname="hostname" raw-hostname="raw-hostname" dbstruct="dbstruct" mydb="mydb) (db:with-db dbstruct 0 #f (lambda (db) (sqlite3:for-each-row (lambda (cpuload update-time) (set! res (cons cpuload update-time))) db "SELECT tr.cpuload, tr.update_time FROM test_rundat tr, tests t WHERE t.host=? AND tr.cpuload != -1 AND tr.test_id=t.id ORDER BY tr.update_time DESC LIMIT 1;" hostname))) res )) (define (db:set-top-level-from-items dbstruct run-id testname) (let* ((dbdat (db:get-db dbstruct run-id)) (db (db:dbdat-get-db dbdat)) (summ (db:get-state-status-summary db run-id testname)) (find (lambda (state status) (if (null? summ) |
︙ | ︙ |
Modified launch.scm from [bc19a897b2] to [4553eebf08].
︙ | ︙ | |||
316 317 318 319 320 321 322 | (kill-tries 0)) ;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area) ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area) (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10) (let loop ((minutes (calc-minutes)) (cpu-load (get-cpu-load)) (disk-free (get-df (current-directory)))) | | | 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 | (kill-tries 0)) ;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area) ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area) (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10) (let loop ((minutes (calc-minutes)) (cpu-load (get-cpu-load)) (disk-free (get-df (current-directory)))) (let ((new-cpu-load (let* ((load (common:get-normalized-cpu-load #f)) (delta (abs (- load cpu-load)))) (if (> delta 0.6) ;; don't bother updating with small changes load #f))) (new-disk-free (let* ((df (get-df (current-directory))) (delta (abs (- df disk-free)))) (if (> delta 200) ;; ignore changes under 200 Meg |
︙ | ︙ |
Modified rmt.scm from [2632e87e3e] to [209649af60].
︙ | ︙ | |||
318 319 320 321 322 323 324 325 326 327 328 329 330 331 | ;; hand off a call to one of the db:queries statements ;; added run-id to make looking up the correct db possible ;; (define (rmt:general-call stmtname run-id . params) (rmt:send-receive 'general-call run-id (append (list stmtname run-id) params))) ;; (define (rmt:sync-inmem->db run-id) ;; (rmt:send-receive 'sync-inmem->db run-id '())) (define (rmt:sdb-qry qry val run-id) ;; add caching if qry is 'getid or 'getstr (rmt:send-receive 'sdb-qry run-id (list qry val))) | > > > > > | 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 | ;; hand off a call to one of the db:queries statements ;; added run-id to make looking up the correct db possible ;; (define (rmt:general-call stmtname run-id . params) (rmt:send-receive 'general-call run-id (append (list stmtname run-id) params))) ;; given a hostname, return a pair of cpu load and update time representing latest intelligence from tests running on that host (define (rmt:get-latest-host-load hostname) (rmt:send-receive 'get-latest-host-load 0 (list hostname))) ;; (define (rmt:sync-inmem->db run-id) ;; (rmt:send-receive 'sync-inmem->db run-id '())) (define (rmt:sdb-qry qry val run-id) ;; add caching if qry is 'getid or 'getstr (rmt:send-receive 'sdb-qry run-id (list qry val))) |
︙ | ︙ |