Comment: | Automated merge of server-log-handshaking/a05b1e5025/integ into integ-home |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | integ-home |
Files: | files | file ages | folders |
SHA1: |
eb064be6a3184379d41aae343a457c79 |
User & Date: | matt on 2017-02-01 11:07:31 |
Other Links: | branch diff | manifest | tags |
2017-02-02
| ||
15:20 | Automated merge of html-gen/ae20a8a286/integ into integ-home check-in: 729025ee8c user: matt tags: integ-home | |
2017-02-01
| ||
11:07 | Automated merge of server-log-handshaking/a05b1e5025/integ into integ-home check-in: eb064be6a3 user: matt tags: integ-home | |
10:50 | Fixed an issue with log-rotate that was causing some run-away scenarios. check-in: a05b1e5025 user: mrwellan tags: server-log-handshaking | |
2017-01-29
| ||
16:48 | Automated merge of server-log-handshaking/a642f429b1/integ into integ-home check-in: ad7df3e1eb user: matt tags: integ-home | |
Modified common.scm from [3380145d50] to [1694e7ccde].
240 241 242 243 244 245 246 | ;; WARNING: This proc operates assuming that it is in the directory above the ;; logs directory you wish to log-rotate. ;; (define (common:rotate-logs) (if (not (directory-exists? "logs"))(create-directory "logs")) (directory-fold (lambda (file rem) | > > > > > | | > > > | | | | | | | > > > > > | 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 | ;; WARNING: This proc operates assuming that it is in the directory above the ;; logs directory you wish to log-rotate. ;; (define (common:rotate-logs) (if (not (directory-exists? "logs"))(create-directory "logs")) (directory-fold (lambda (file rem) (handle-exceptions exn (debug:print-info 0 *default-log-port* "failed to rotate log " file ", probably handled by another process.") (let* ((fullname (conc "logs/" file)) (file-age (- (current-seconds)(file-modification-time fullname)))) (if (or (and (string-match "^.*.log" file) (> (file-size fullname) 200000)) (and (string-match "^server-.*.log" file) (> (- (current-seconds) (file-modification-time fullname)) (* 8 60 60)))) (let ((gzfile (conc fullname ".gz"))) (if (file-exists? gzfile) (begin (debug:print-info 0 *default-log-port* "removing " gzfile) (delete-file gzfile))) (debug:print-info 0 *default-log-port* "compressing " file) (system (conc "gzip " fullname))) (if (> file-age (* (string->number (or (configf:lookup *configdat* "setup" "log-expire-days") "30")) 24 3600)) (handle-exceptions exn #f (delete-file fullname))))))) '() "logs")) ;; Force a megatest cleanup-db if version is changed and skip-version-check not specified ;; (define (common:exit-on-version-changed) (if (common:version-changed?) |
Modified dashboard.scm from [bb7acd661f] to [9bbb1ee284].
99 100 101 102 103 104 105 | (if (args:get-arg "-h") (begin (print help) (exit))) (if (not (common:on-homehost?)) (begin | | | 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 | (if (args:get-arg "-h") (begin (print help) (exit))) (if (not (common:on-homehost?)) (begin (debug:print 0 *default-log-port* "ERROR: Current policy requires running dashboard on homehost: " (common:get-homehost)))) ;; TODO: Move this inside (main) ;; (if (not (launch:setup)) (begin (print "Failed to find megatest.config, exiting") (exit 1))) |
Modified server.scm from [b68dac663e] to [5c1183db18].
217 218 219 220 221 222 223 | (let ((now (current-seconds))) (sort (filter (lambda (rec) (let ((start-time (list-ref rec 3)) (mod-time (list-ref rec 0))) ;; (print "start-time: " start-time " mod-time: " mod-time) (and start-time mod-time | | | | 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 | (let ((now (current-seconds))) (sort (filter (lambda (rec) (let ((start-time (list-ref rec 3)) (mod-time (list-ref rec 0))) ;; (print "start-time: " start-time " mod-time: " mod-time) (and start-time mod-time (> (- now start-time) 0) ;; been running at least 0 seconds (< (- now mod-time) 16) ;; still alive - file touched in last 16 seconds (< (- now start-time) (string->number (or (configf:lookup *configdat* "server" "runtime") "3600"))) ;; under one hour running time ))) srvlst) (lambda (a b) (< (list-ref a 3) (list-ref b 3)))))) (define (server:get-first-best areapath) |
249 250 251 252 253 254 255 | (let ((sig (server:mk-signature))) (set! *my-client-signature* sig) *my-client-signature*))) ;; kind start up of servers, wait 40 seconds before allowing another server for a given ;; run-id to be launched (define (server:kind-run areapath) | | > | > > > > > > | < | | | | | > > | | > > > > > | | > > > | | | | | | < < < | 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 | (let ((sig (server:mk-signature))) (set! *my-client-signature* sig) *my-client-signature*))) ;; kind start up of servers, wait 40 seconds before allowing another server for a given ;; run-id to be launched (define (server:kind-run areapath) (let* ((last-run-dat (hash-table-ref/default *server-kind-run* areapath '(0 0))) ;; callnum, whenrun (call-num (car last-run-dat)) (when-run (cadr last-run-dat)) (run-delay (+ (case call-num ((0) 0) ((1) 20) ((2) 300) (else 600)) (random 5)))) ;; add a small random number just in case a lot of jobs hit the work hosts simultaneously (if (> (- (current-seconds) when-run) run-delay) (server:run areapath)) (hash-table-set! *server-kind-run* areapath (list (+ call-num 1)(current-seconds))))) (define (server:start-and-wait areapath #!key (timeout 60)) (let ((give-up-time (+ (current-seconds) timeout))) (let loop ((server-url (server:check-if-running areapath))) (if (or server-url (> (current-seconds) give-up-time)) server-url (let ((num-ok (length (server:get-best (server:get-list areapath))))) (if (< num-ok 2) ;; if there are no decent candidates for servers then try starting a new one (server:kind-run areapath)) (thread-sleep! 5) (loop (server:check-if-running areapath))))))) (define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG. (define (server:dotserver-age-seconds areapath) (let ((server-file (conc areapath "/.server"))) (begin (handle-exceptions exn #f (- (current-seconds) (file-modification-time server-file)))))) ;; no longer care if multiple servers are started by accident. older servers will drop off in time. ;; (define (server:check-if-running areapath) (let* ((servers (server:get-best (server:get-list areapath)))) (if (null? servers) #f (let loop ((hed (car servers)) (tal (cdr servers))) (let ((res (server:check-server hed))) (if res res (if (null? tal) #f (loop (car tal)(cdr tal))))))))) ;; ping the given server ;; (define (server:check-server server-record) (let* ((server-url (server:record->url server-record)) (res (case *transport-type* ((http)(server:ping server-url)) ;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server) ))) (if res server-url #f))) (define (server:kill servr) (match-let (((mod-time hostname port start-time pid) servr)) (tasks:kill-server hostname pid))) |
Modified utils/lock-stats.sh from [3f061e6171] to [84d255afaf].
1 2 3 4 5 | #!/bin/bash while IFS=': ' read x x x x p x x i x; do if ! [[ ${i}x == "x" ]];then if ! $(echo $i|grep EOF >/dev/null);then | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 | #!/bin/bash while IFS=': ' read x x x x p x x i x; do if ! [[ ${i}x == "x" ]];then if ! $(echo $i|grep EOF >/dev/null);then fname=$(find -L "/proc/$p/fd" -maxdepth 1 -inum "$i" -exec readlink {} \; -quit) if $(echo $fname | grep megatest.db > /dev/null) || \ $(echo $fname | egrep '.db/\d+.db' > /dev/null);then echo $fname fi fi fi done < /proc/locks |