Overview
Context
Changes
Modified http-transport.scm
from [9bfccef351]
to [c23198333f].
︙ | | |
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
|
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
|
+
-
+
+
|
;; used and to shutdown after sometime if it is not.
;;
(define (http-transport:keep-running)
;; if none running or if > 20 seconds since
;; server last used then start shutdown
;; This thread waits for the server to come alive
(debug:print-info 0 *default-log-port* "Starting the sync-back, keep alive thread in server")
(let* ((servinfofile #f)
(let* ((sdat #f)
(sdat #f)
(no-sync-db (db:open-no-sync-db))
(tmp-area (common:get-db-tmp-area))
(started-file (conc tmp-area "/.server-started"))
(server-start-time (current-seconds))
(server-info (let loop ((start-time (current-seconds))
(changed #t)
(last-sdat "not this"))
(begin ;; let ((sdat #f))
(thread-sleep! 0.01)
(debug:print-info 0 *default-log-port* "Waiting for server alive signature")
(mutex-lock! *heartbeat-mutex*)
(set! sdat *server-info*)
(mutex-unlock! *heartbeat-mutex*)
(if (and sdat
(not changed)
(> (- (current-seconds) start-time) 2))
(let* ((servinfodir (conc *toppath*"/.servinfo"))
(ipaddr (car sdat))
(port (cadr sdat))
(servinf (conc servinfodir"/"ipaddr":"port)))
(set! servinfofile servinf)
(if (not (file-exists? servinfodir))
(create-directory servinfodir #t))
(with-output-to-file servinf
(lambda ()
(let* ((serv-id (server:mk-signature)))
(set! *server-id* serv-id)
(print "SERVER STARTED: "ipaddr":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id))
|
︙ | | |
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
|
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
|
-
+
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
|
;; Use this opportunity to sync the tmp db to megatest.db
(if (not server-going) ;; *dbstruct-dbs*
(begin
(debug:print 0 *default-log-port* "SERVER: dbprep")
(set! *dbstruct-dbs* (db:setup #t)) ;; run-id)) FIXME!!!
(set! server-going #t)
(debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version)) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine.
(debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version))) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine.
;; (thread-start! *watchdog*)
)
(if (and no-sync-db
(common:low-noise-print 5 "sync-all")) ;; cheesy way to reduce frequency of running sync :)
(begin
(debug:print 0 *default-log-port* "keep-running calling db:all-db-sync at " (time->string (seconds->local-time) "%H:%M:%S"))
;; This is tougher than it seems - have to deal with multiple dbs
;; (db:process-transaction-queue *dbstruct-dbs*)
(db:all-db-sync *dbstruct-dbs*)
(db:all-db-sync *dbstruct-dbs*))))
;; (db:do-sync no-sync-db)
;; (db:run-lock-and-sync *no-sync-db*)
)
)
)
;; when things go wrong we don't want to be doing the various queries too often
;; so we strive to run this stuff only every four seconds or so.
(let* ((sync-time (- (current-milliseconds) start-time))
(rem-time (quotient (- 4000 sync-time) 1000)))
(if (and (<= rem-time 4)
(> rem-time 0))
|
︙ | | |
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
|
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
|
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
-
|
(flush-output *default-log-port*)))
(if (common:low-noise-print 60 "dbstats")
(begin
(debug:print 0 *default-log-port* "Server stats:")
(db:print-current-query-stats)))
(let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600)))
(cond
#;((and *server-run*
(> (- (current-seconds) server-start-time) 420)) ;; let's try server replacement
;; ((adj-proc-load . 0.056875) (adj-core-load . 0.11375) (1m-load . 0.91) (5m-load . 0.77) (15m-load . 1.0) (proc . 16) (core . 8) (phys . 1))
(let* ((loaddat (common:get-normalized-cpu-load #f))
(adj-proc-load (alist-ref 'adj-proc-load loaddat))
(adj-core-load (alist-ref 'adj-core-load loaddat))
(adj-load (max adj-proc-load adj-core-load)))
(if (< adj-load 2) ;; reduce chance of runaway
(server:run *toppath*))
(db:all-db-sync *dbstruct-dbs*)
(thread-sleep! 30)
(http-transport:server-shutdown port)))
((and *server-run*
(> (+ last-access server-timeout)
(current-seconds)))
(if (common:low-noise-print 120 "server continuing")
(debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
(let ((curr-time (current-seconds)))
(handle-exceptions
exn
(debug:print 0 *default-log-port* "ERROR: Failed to change timestamp on log file " server-log-file ". Are you out of space on that disk? exn=" exn)
(if (and (< (- (current-seconds) server-start-time) 600) ;; run for ten minutes for experiment, 3600 thereafter
(not *server-overloaded*))
(change-file-times server-log-file curr-time curr-time)
(if (common:low-noise-print 120 "start new server")
(server:kind-run *toppath*) ;; server:kind-run uses [servers] numservers
(change-file-times servinfofile curr-time curr-time)))
(if (or (common:low-noise-print 120 "start new server")
(> *api-process-request-count* 50)) ;; if this server is kind of busy start up another
(begin
(debug:print-info 0 *default-log-port* "Server is busy, start another if possible...")
(server:kind-run *toppath*)))))
)))))
(loop 0 server-state bad-sync-count (current-milliseconds)))
(else
(debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
(http-transport:server-shutdown port)))))))
(define (http-transport:server-shutdown port)
(begin
|
︙ | | |
Modified server.scm
from [787d7f0596]
to [69e932d5ac].
︙ | | |
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
|
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
|
-
+
|
((homehost) (cons host (am-home?))) ;; shut up old code
((home?) (am-home?))
((best-five)(names->dats (best-five)))
((all-valid)(names->dats all-valid))
((best) (let* ((best-five (best-five))
(len (length best-five)))
(hash-table-ref serversdat (list-ref best-five (random len)))))
((count)(length all-valid))
(else
(debug:print 0 *default-log-port* "ERROR: invalid command "mode)
#f)))
(begin
(server:run areapath)
(thread-sleep! 3)
(case mode
|
︙ | | |
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
|
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
|
+
-
+
|
;; kind start up of server, wait before allowing another server for a given
;; area to be launched
;;
(define (server:kind-run areapath)
;; look for $MT_RUN_AREA_HOME/logs/server-start-last
;; and wait for it to be at least <server idletime> seconds old
;; (server:wait-for-server-start-last-flag areapath)
(if (< (server:choose-server areapath 'count) 10)
(server:run areapath)
(server:run areapath))
#;(if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
(let* ((lock-file (conc areapath "/logs/server-start.lock")))
(let* ((start-flag (conc areapath "/logs/server-start-last")))
(common:simple-file-lock-and-wait lock-file expire-time: 25)
(debug:print-info 2 *default-log-port* "server:kind-run: touching " start-flag)
(system (conc "touch " start-flag)) ;; lazy but safe
(server:run areapath)
|
︙ | | |