Megatest

Diff
Login

Differences From Artifact [0f56c7d848]:

To Artifact [5eabc5f129]:


1082
1083
1084
1085
1086
1087
1088
1089


1090
1091
1092
1093
1094
1095
1096
1082
1083
1084
1085
1086
1087
1088

1089
1090
1091
1092
1093
1094
1095
1096
1097







-
+
+







	   (with-input-from-pipe 
	    (conc "ssh " remote-host " cat /proc/loadavg")
	    (lambda ()(list (read)(read)(read)))))
      (with-input-from-file "/proc/loadavg" 
	(lambda ()(list (read)(read)(read))))))

;; get normalized cpu load by reading from /proc/loadavg and /proc/cpuinfo return all three values and the number of real cpus and the number of threads
;; returns list (normalized-proc-load normalized-core-load 1m 5m 15m ncores nthreads)
;; returns alist '((adj-cpu-load . normalized-proc-load) ... etc.
;;  keys: adj-proc-load, adj-core-load, 1m-load, 5m-load, 15m-load
;;
(define (common:get-normalized-cpu-load remote-host)
  (let ((data (if remote-host
                  (with-input-from-pipe 
                   (conc "ssh " remote-host " cat /proc/loadavg;cat /proc/cpuinfo;echo end")
                   read-lines)
                  (append 
1141
1142
1143
1144
1145
1146
1147


1148
1149
1150












1151
1152
1153
1154






1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166




















1167
1168
1169
1170



1171
1172
1173
1174
1175
1176
1177
1178






























1179
1180
1181
1182
1183
1184
1185






1186
1187
1188
1189
1190
1191
1192
1142
1143
1144
1145
1146
1147
1148
1149
1150



1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162




1163
1164
1165
1166
1167
1168












1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188




1189
1190
1191








1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221







1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234







+
+
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
+
+
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
+
+
+
+
+
+








(define (common:unix-ping hostname)
  (let ((res (system (conc "ping -c 1 " hostname " > /dev/null"))))
    (eq? res 0)))

;; ideally put all this info into the db, no need to preserve it across moving homehost
;;
;; return list of
;;  ( reachable? cpuload update-time )
(define (common:get-least-loaded-host hosts)
  (if (null? hosts)
      #f
(define (common:get-host-info hostname)
  (let* ((loadinfo (rmt:get-latest-host-load hostname))
         (load (car loadinfo))
         (load-sample-time (cdr loadinfo))
         (load-sample-age (- (current-seconds) load-sample-time))
         (loadinfo-timeout-seconds 20)
         (host-last-update-timeout-seconds 10)
         (host-rec (hash-table-ref/default *host-loads* hostname #f))
         )
    (cond
     ((< load-sample-age loadinfo-timeout-seconds)
      (list #t
      ;;
      ;; stategy:
      ;;    sort by last-used and normalized-load
      ;;    if last-updated > 15 seconds then re-update
            load-sample-time
            load))
     ((and host-rec
           (< (current-seconds) (+ (host-last-update host-rec) host-last-update-timeout-seconds)))
      (list #t
            (host-last-update host-rec)
      ;;    take the host with the lowest load with the lowest last-used (i.e. not used for longest time)
      ;;
      (let ((best-host #f)
	    (curr-time (current-seconds)))
	(for-each
	 (lambda (hostname)
	   (let* ((rec       (let ((h (hash-table-ref/default *host-loads* hostname #f)))
			       (if h
				   h
				   (let ((h (make-host)))
				     (hash-table-set! *host-loads* hostname h)
				     h))))
            (host-last-cpuload host-rec )))
     ((common:unix-ping hostname)
      (list #t
            (current-seconds)
            (alist-ref 'adj-core-load (common:get-normalized-cpu-load hostname))))
     (else
      (list #f 0 -1)))))
    
(define (common:update-host-loads-table hosts-raw)
  (let* ((hosts (filter (lambda (x)
                          (string-match (regexp "^\\S+$") x))
                        hosts-raw)))
    (for-each
     (lambda (hostname)
       (let* ((rec       (let ((h (hash-table-ref/default *host-loads* hostname #f)))
                          (if h
                              h
                              (let ((h (make-host)))
                                (hash-table-set! *host-loads* hostname h)
                                h))))
		  ;; if host hasn't been pinged in 15 sec update it's data
		  (ping-good (if (< (- curr-time (host-last-update rec)) 15)
				 (host-reachable rec)
				 (or (host-reachable rec)
              (host-info         (common:get-host-info hostname))
              (is-reachable      (car host-info))
              (last-reached-time (cadr host-info))
				     (begin
				       (host-reachable-set! rec (common:unix-ping hostname))
				       (host-last-update-set! rec curr-time)
				       (host-last-cpuload-set! rec (common:get-normalized-cpu-load hostname))
				       (host-reachable rec))))))
	     (cond
	      ((not best-host)
	       (set! best-host hostname))
              (load              (caddr host-info)))
         (host-reachable-set!    rec is-reachable)
         (host-last-update-set!  rec last-reached-time)
         (host-last-cpuload-set! rec load)))
     hosts)))

(define (common:get-least-loaded-host hosts-raw)
  (let* ((hosts (filter (lambda (x)
                          (string-match (regexp "^\\S+$") x))
                        hosts-raw))
         (best-host #f)
         (best-load 99999)
         (curr-time (current-seconds)))
    (common:update-host-loads-table hosts)
    (for-each
     (lambda (hostname)
       (let* ((rec
               (let ((h (hash-table-ref/default *host-loads* hostname #f)))
                 (if h
                     h
                     (let ((h (make-host)))
                       (hash-table-set! *host-loads* hostname h)
                       h))))
              (reachable (host-reachable rec))
              (load      (host-last-cpuload   rec)))
         (cond
          ((not reachable) #f)
          ((< (+ load (/ (random 250) 1000))         ;; add a random factor to keep from getting in a rut
              (+ best-load (/ (random 250) 1000))  )
           (set! best-load load)
	      ((and ping-good
		    (< (alist-ref 'adj-core-load (host-last-cpuload rec))
		       (alist-ref 'adj-core-load
				  (host-last-cpuload (hash-table-ref *host-loads* best-host)))))
	       (set! best-host rec)))))
	 hosts)
	best-host)))
           (set! best-host hostname)))))
     hosts)
    best-host))




(define (common:wait-for-cpuload maxload numcpus waitdelay #!key (count 1000) (msg #f)(remote-host #f))
  (let* ((loadavg (common:get-cpu-load remote-host))
	 (first   (car loadavg))
	 (next    (cadr loadavg))
	 (adjload (* maxload numcpus))
	 (loadjmp (- first next)))