Megatest

Check-in [cfb9ac119d]
Login
Overview
Comment:added support for -kill-servers and -transport switches on megatest
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.62
Files: files | file ages | folders
SHA1: cfb9ac119dc5698a4f6c9963353ab63bea2ea251
User & Date: bjbarcla on 2016-10-26 14:21:24
Other Links: branch diff | manifest | tags
Context
2016-10-27
15:21
Run tab resize fixed check-in: 5ff16368ff user: ritikaag tags: v1.62
2016-10-26
14:48
branch for rpc support check-in: d306d8dea0 user: bjbarcla tags: rpc-transport
14:21
added support for -kill-servers and -transport switches on megatest check-in: cfb9ac119d user: bjbarcla tags: v1.62
2016-10-25
21:02
fixed -list-servers and -stop-server switches on megatest check-in: 73502a1626 user: bjbarcla tags: v1.62
Changes

Modified megatest.scm from [8187d3ec4f] to [7f46162c88].

736
737
738
739
740
741
742
743


744
745
746

747
748
749
750
751
752
753
754
755
756
757

758
759
760
761
762
763
764
736
737
738
739
740
741
742

743
744
745
746

747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766







-
+
+


-
+











+








(if (args:get-arg "-server")

    ;; Server? Start up here.
    ;;
    (let ((tl        (launch:setup))
	  (run-id    (and (args:get-arg "-run-id")
			  (string->number (args:get-arg "-run-id")))))
			  (string->number (args:get-arg "-run-id"))))
          (transport-type (string->symbol (or (args:get-arg "-transport") "http"))))
      (if run-id
	  (begin
	    (server:launch run-id)
	    (server:launch run-id transport-type)
	    (set! *didsomething* #t))
	  (debug:print-error 0 *default-log-port* "server requires run-id be specified with -run-id")))

    ;; Not a server? This section will decide how to communicate
    ;;
    ;;  Setup client for all expect listed here
    (if (null? (lset-intersection 
		equal?
		(hash-table-keys args:arg-hash)
		'("-list-servers"
		  "-stop-server"
                  "-kill-server"
		  "-show-cmdinfo"
		  "-list-runs"
		  "-ping")))
	(if (launch:setup)
	    (let ((run-id    (and (args:get-arg "-run-id")
				  (string->number (args:get-arg "-run-id")))))
	      ;; (set! *fdb*   (filedb:open-db (conc *toppath* "/db/paths.db")))
773
774
775
776
777
778
779
780


781
782
783
784
785
786

787

788
789
790
791
792
793
794
775
776
777
778
779
780
781

782
783
784
785
786
787
788
789
790

791
792
793
794
795
796
797
798







-
+
+






+
-
+







		    #t
		    ))))))

;; MAY STILL NEED THIS
;;		       (set! *megatest-db* (make-dbr:dbstruct path: *toppath* local: #t))))))))))

(if (or (args:get-arg "-list-servers")
	(args:get-arg "-stop-server"))
	(args:get-arg "-stop-server")
        (args:get-arg "-kill-server"))
    (let ((tl (launch:setup)))
      (if tl 
	  (let* ((tdbdat  (tasks:open-db))
		 (servers (tasks:get-all-servers (db:delay-if-busy tdbdat)))
		 (fmtstr  "~5a~12a~8a~20a~24a~10a~10a~10a~10a\n")
		 (servers-to-kill '())
                 (kill-switch  (if (args:get-arg "-kill-server") "-9" ""))
		 (killinfo   (args:get-arg "-stop-server"))
                 (killinfo   (or (args:get-arg "-stop-server") (args:get-arg "-kill-server") ))
		 (khost-port (if killinfo (if (substring-index ":" killinfo)(string-split ":") #f) #f))
		 (sid        (if killinfo (if (substring-index ":" killinfo) #f (string->number killinfo)) #f)))
	    (format #t fmtstr "Id" "MTver" "Pid" "Host" "Interface:OutPort" "InPort" "LastBeat" "State" "Transport")
	    (format #t fmtstr "==" "=====" "===" "====" "=================" "======" "========" "=====" "=========")
	    (for-each 
	     (lambda (server)
	       (let* ((id         (vector-ref server 0))
814
815
816
817
818
819
820
821
822


823
824
825
826
827
828
829
818
819
820
821
822
823
824


825
826
827
828
829
830
831
832
833







-
-
+
+







		     (if (> last-update 20)        ;; Mark as dead if not updated in last 20 seconds
			 (tasks:server-deregister (db:delay-if-busy tdbdat) hostname pullport: pullport pid: pid)))
		 (format #t fmtstr id mt-ver pid hostname (conc interface ":" pullport) pubport last-update
			 (if status "alive" "dead") transport)
		 (if (or (equal? id sid)
			 (equal? sid 0)) ;; kill all/any
		     (begin
		       (debug:print-info 0 *default-log-port* "Attempting to stop server with pid " pid)
		       (tasks:kill-server hostname pid)))))
		       (debug:print-info 0 *default-log-port* "Attempting to kill "kill-switch" server with pid " pid)
		       (tasks:kill-server hostname pid kill-switch: kill-switch)))))
	     servers)
	    (debug:print-info 1 *default-log-port* "Done with listservers")
	    (set! *didsomething* #t)
	    (exit)) ;; must do, would have to add checks to many/all calls below
	  (exit))))

;;======================================================================

Modified rpc-transport.scm from [7e38f4f2de] to [62a65daa58].

36
37
38
39
40
41
42


43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64






















65
66
67
68
69
70
71
36
37
38
39
40
41
42
43
44






















45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73







+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







   (apply (eval (string->symbol procstr)) params)))

;; all routes though here end in exit ...
;;
;; start_server? 
;;
(define (rpc-transport:launch run-id)
  (let* ((tdbdat (tasks:open-db)))
    (BB> "rpc-transport:launch fired for run-id="run-id)
  (set! *run-id*   run-id)
  (if (args:get-arg "-daemonize")
      (daemon:ize))
  (if (server:check-if-running run-id)
      (begin
	(debug:print 0 *default-log-port* "INFO: Server for run-id " run-id " already running")
	(exit 0)))
  (let loop ((server-id (open-run-close tasks:server-lock-slot tasks:open-db run-id))
	     (remtries  4))
    (if (not server-id)
	(if (> remtries 0)
	    (begin
	      (thread-sleep! 2)
	      (loop (open-run-close tasks:server-lock-slot tasks:open-db run-id)
		    (- remtries 1)))
	    (begin
	      ;; since we didn't get the server lock we are going to clean up and bail out
	      (debug:print-info 2 *default-log-port* "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue")
	      (open-run-close tasks:server-delete-records-for-this-pid tasks:open-db " rpc-transport:launch")))
	(begin
	  (rpc-transport:run (if (args:get-arg "-server")(args:get-arg "-server") "-") run-id server-id)
	  (exit)))))
    (set! *run-id*   run-id)
    (if (args:get-arg "-daemonize")
        (daemon:ize))
    (if (server:check-if-running run-id)
        (begin
          (debug:print 0 *default-log-port* "INFO: Server for run-id " run-id " already running")
          (exit 0)))
    (let loop ((server-id (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id))
               (remtries  4))
      (if (not server-id)
          (if (> remtries 0)
              (begin
                (thread-sleep! 2)
                (loop (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id)
                      (- remtries 1)))
              (begin
                ;; since we didn't get the server lock we are going to clean up and bail out
                (debug:print-info 2 *default-log-port* "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue")
                (tasks:server-delete-records-for-this-pid (db:delay-if-busy tdbdat) " rpc-transport:launch")))
          (begin
            (rpc-transport:run (if (args:get-arg "-server")(args:get-arg "-server") "-") run-id server-id)
            (exit))))))

(define (rpc-transport:run hostn run-id server-id)
  (debug:print 2 *default-log-port* "Attempting to start the rpc server ...")
   ;; (trace rpc:publish-procedure!)

  (rpc:publish-procedure! 'server:login server:login)
  (rpc:publish-procedure! 'testing (lambda () "Just testing"))

Modified server.scm from [8a9483a036] to [19061b35b0].

45
46
47
48
49
50
51
52
53



54
55
56
57

58
59
60
61
62
63
64
45
46
47
48
49
50
51


52
53
54
55
56
57

58
59
60
61
62
63
64
65







-
-
+
+
+



-
+







;; Call this to start the actual server
;;

;; all routes though here end in exit ...
;;
;; start_server
;;
(define (server:launch run-id)
  (case *transport-type*
(define (server:launch run-id transport-type)
  (BB> "server:launch fired for run-id="run-id" transport-type="transport-type)
  (case transport-type
    ((http)(http-transport:launch run-id))
    ;;((nmsg)(nmsg-transport:launch run-id))
    ((rpc)  (rpc-transport:launch run-id))
    (else (debug:print-error 0 *default-log-port* "unknown server type " *transport-type*))))
    (else (debug:print-error 0 *default-log-port* "unknown server type " transport-type))))
;;       (else   (debug:print-error 0 *default-log-port* "No known transport set, transport=" transport ", using rpc")
;; 	      (rpc-transport:launch run-id)))))

;;======================================================================
;; S E R V E R   U T I L I T I E S 
;;======================================================================

Modified tasks.scm from [7c3f9c7793] to [a06114a2ac].

429
430
431
432
433
434
435
436

437
438
439
440

441
442
443
444
445
446
447
429
430
431
432
433
434
435

436
437
438
439

440
441
442
443
444
445
446
447







-
+



-
+







     "SELECT id,pid,hostname,interface,port,pubport,start_time,priority,state,mt_version,strftime('%s','now')-heartbeat AS last_update,transport,run_id 
        FROM servers WHERE run_id=? AND state NOT LIKE 'defunct%' ORDER BY start_time DESC;"
     run-id)
    (reverse res)))

;; no elegance here ...
;;
(define (tasks:kill-server hostname pid)
(define (tasks:kill-server hostname pid #!key (kill-switch ""))
  (debug:print-info 0 *default-log-port* "Attempting to kill server process " pid " on host " hostname)
  (setenv "TARGETHOST" hostname)
  (setenv "TARGETHOST_LOGF" "server-kills.log")
  (system (conc "nbfake kill " pid))
  (system (conc "nbfake kill "kill-switch" "pid))
  (unsetenv "TARGETHOST_LOGF")
  (unsetenv "TARGETHOST"))
 
;; look up a server by run-id and send it a kill, also delete the record for that server
;;
(define (tasks:kill-server-run-id run-id #!key (tag "default"))
  (let* ((tdbdat  (tasks:open-db))