Megatest

Check-in [8a6ca9fd18]
Login
Overview
Comment:Merged v1.63 changes to multi-server-hack
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | multi-server-hack
Files: files | file ages | folders
SHA1: 8a6ca9fd183018708709029ce7adfcdbebf68006
User & Date: matt on 2017-03-24 11:27:46
Other Links: branch diff | manifest | tags
Context
2017-03-24
13:51
Show connection stats every 60 seconds. Remove stat of megatest.db from rmt:send-receive, it was happening on every call. check-in: 6baac6187e user: matt tags: multi-server-hack
11:27
Merged v1.63 changes to multi-server-hack check-in: 8a6ca9fd18 user: matt tags: multi-server-hack
11:19
Added some error handling on the locking calls. Silenced a dashboard message. check-in: a5dbcdd2ac user: matt tags: v1.63
2017-03-23
17:50
Couple fixes for variable server hack check-in: 842f12e5fe user: matt tags: multi-server-hack
Changes

Modified common.scm from [22d5399385] to [4158ce55d8].

412
413
414
415
416
417
418



419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
















435
436
437
438
439
440
441


442

443
444
445



446

447
448
449
450
451
452
453
412
413
414
415
416
417
418
419
420
421
















422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446

447
448
449
450
451
452
453

454
455
456
457
458
459
460
461







+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







+
+
-
+



+
+
+
-
+







   (read (open-input-string (z3:decode-buffer (base64:base64-decode instr))))))

;; dot-locking egg seems not to work, using this for now
;; if lock is older than expire-time then remove it and try again
;; to get the lock
;;
(define (common:simple-file-lock fname #!key (expire-time 300))
  (handle-exceptions
      exn
      #f ;; don't really care what went wrong right now. NOTE: I have not seen this one actually fail.
  (if (file-exists? fname)
      (if (> (- (current-seconds)(file-modification-time fname)) expire-time)
	  (begin
	    (delete-file* fname)
	    (common:simple-file-lock fname expire-time: expire-time))
	  #f)
      (let ((key-string (conc (get-host-name) "-" (current-process-id))))
	(with-output-to-file fname
	  (lambda ()
	    (print key-string)))
	(thread-sleep! 0.25)
	(if (file-exists? fname)
	    (with-input-from-file fname
	      (lambda ()
		(equal? key-string (read-line))))
	    #f))))
    (if (file-exists? fname)
	(if (> (- (current-seconds)(file-modification-time fname)) expire-time)
	    (begin
	      (delete-file* fname)
	      (common:simple-file-lock fname expire-time: expire-time))
	    #f)
	(let ((key-string (conc (get-host-name) "-" (current-process-id))))
	  (with-output-to-file fname
	    (lambda ()
	      (print key-string)))
	  (thread-sleep! 0.25)
	  (if (file-exists? fname)
	      (with-input-from-file fname
		(lambda ()
		  (equal? key-string (read-line))))
	      #f)))))

(define (common:simple-file-lock-and-wait fname #!key (expire-time 300))
  (let ((end-time (+ expire-time (current-seconds))))
    (let loop ((got-lock (common:simple-file-lock fname expire-time: expire-time)))
      (if got-lock
	  #t
	  (if (> end-time (current-seconds))
	      (begin
		(thread-sleep! 3)
	      (loop (common:simple-file-lock fname expire-time: expire-time))
		(loop (common:simple-file-lock fname expire-time: expire-time)))
	      #f)))))

(define (common:simple-file-release-lock fname)
  (handle-exceptions
      exn
      #f ;; I don't really care why this failed (at least for now)
  (delete-file* fname))
    (delete-file* fname)))

;;======================================================================
;; S T A T E S   A N D   S T A T U S E S
;;======================================================================

(define *common:std-states*   
  '((0 "ARCHIVED")

Modified dashboard.scm from [20957064b0] to [267452dd0e].

2716
2717
2718
2719
2720
2721
2722
2723

2724
2725
2726
2727
2728
2729
2730
2716
2717
2718
2719
2720
2721
2722

2723
2724
2725
2726
2727
2728
2729
2730







-
+







;; Force creation of the db in case it isn't already there.
(tasks:open-db)

(define (dashboard:get-youngest-run-db-mod-time dbdir)
  (handle-exceptions
   exn
   (begin
     (debug:print 0 *default-log-port* "WARNING: error in accessing databases in get-youngest-run-db-mod-time: " ((condition-property-accessor 'exn 'message) exn) " db-dir="dbdir)
     (debug:print 2 *default-log-port* "WARNING: error in accessing databases in get-youngest-run-db-mod-time: " ((condition-property-accessor 'exn 'message) exn) " db-dir="dbdir)
     (current-seconds)) ;; something went wrong - just print an error and return current-seconds
   (common:max (map (lambda (filen)
		      (file-modification-time filen))
		    (glob (conc dbdir "/*.db*"))))))

(define (dashboard:monitor-changed? commondat tabdat)
  (let* ((run-update-time (current-seconds))

Modified launch.scm from [c20e1b3984] to [442d86a53d].

469
470
471
472
473
474
475

476





477
478
479
480
481
482
483
469
470
471
472
473
474
475
476

477
478
479
480
481
482
483
484
485
486
487
488







+
-
+
+
+
+
+







                                 (start-res (http-transport:client-connect host port))
                                 (ping-res  (rmt:login-no-auto-client-setup start-res)))
			    (if (and start-res
				     ping-res)
				(let ((url  (http-transport:server-dat-make-url start-res)))
				  (remote-conndat-set! *runremote* start-res)
				  (remote-server-url-set! *runremote* url)
				  (if (server:ping url)
				  (debug:print-info 0 *default-log-port* "connected to " url " using CMDINFO data."))
				      (debug:print-info 0 *default-log-port* "connected to " url " using CMDINFO data.")
				      (begin
					(debug:print-info 0 *default-log-port* "have CMDINFO data but failed to connect to " url)
					(remote-conndat-set! *runremote* #f)
					(remote-server-url-set! *runremote* #f))))
				(debug:print-info 0 *default-log-port* "received " host ":" port " for url but could not connect.")
				)))))))
	  ;; NFS might not have propagated the directory meta data to the run host - give it time if needed
	  (let loop ((count 0))
	    (if (or (file-exists? top-path)
		    (> count 10))
		(change-directory top-path)