︙ | | | ︙ | |
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
|
;;
;; TODO: This is unnecessarily re-creating the record in the hash table
;;
(define (rmt:open-main-connection remdat apath)
(let* ((fullpath (db:dbname->path apath "/.db/main.db"))
(conns (remotedat-conns remdat))
(conn (hash-table-ref/default conns fullpath #f))) ;; TODO - create call for this
(if (and conn ;; conn is NOT a socket, just saying ...
(< (current-seconds) (conndat-expires conn)))
#t ;; we are current and good to go - we'll deal elsewhere with a server that was killed or died
;; Below we will find or create and connect to main
(let* ((dbname (db:run-id->dbname #f))
(the-srv (rmt:find-main-server apath dbname))
(start-main-srv (lambda () ;; call IF there is no the-srv found
(mutex-lock! *connstart-mutex*)
(if (> (- (current-seconds) *last-main-start*) 5) ;; at least four seconds since last attempt to start main server
(begin
(api:run-server-process apath dbname)
(set! *last-main-start* (current-seconds))
(thread-sleep! 1)))
(mutex-unlock! *connstart-mutex*)
(rmt:open-main-connection remdat apath) ;; TODO: Add limit to number of tries
)))
(if (not the-srv) ;; have server, try connecting to it
(start-main-srv)
(let* ((srv-addr (server-address the-srv)) ;; need serv
(ipaddr (alist-ref 'ipaddr the-srv))
(port (alist-ref 'port the-srv))
(srvkey (alist-ref 'servkey the-srv))
(fullpath (db:dbname->path apath dbname))
(new-the-srv (make-conndat
apath: apath
dbname: dbname
fullname: fullpath
hostport: srv-addr
socket: (open-nn-connection srv-addr)
ipaddr: ipaddr
port: port
srvpkt: the-srv
srvkey: srvkey ;; generated by rmt:get-signature on the server side
lastmsg: (current-seconds)
expires: (+ (current-seconds) 60) ;; this needs to be gathered during the ping
)))
(hash-table-set! conns fullpath new-the-srv)))
#t))))
;; NB// remdat is a remotedat struct
;;
(define (rmt:general-open-connection remdat apath dbname #!key (num-tries 5))
(assert (not (equal? dbname ".db/main.db")) "ERROR: general-open-connection should never be called with main as the db")
(let* ((mdbname (db:run-id->dbname #f))
(fullname (db:dbname->path apath dbname))
(conns (remotedat-conns remdat))
(mconn (rmt:get-conn remdat apath mdbname)))
(if (and mconn
(not (debug:print-logger)))
(begin
(debug:print-info 0 *default-log-port* "Turning on logging to main, look in logs dir for main log.")
(debug:print-logger rmt:log-to-main)))
(cond
((or (not mconn) ;; no channel open to main?
(< (conndat-expires mconn)(+ (current-seconds) 2))) ;; restablish connection if less than 2 seconds on the lease
(rmt:open-main-connection remdat apath)
(rmt:general-open-connection remdat apath mdbname))
((not (rmt:get-conn remdat apath dbname)) ;; no channel open to dbname?
(let* ((res (rmt:send-receive-real remdat apath mdbname 'get-server `(,apath ,dbname))))
(case res
((server-started)
(if (> num-tries 0)
|
>
|
|
|
>
>
>
>
>
>
>
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
>
>
>
>
>
|
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
|
;;
;; TODO: This is unnecessarily re-creating the record in the hash table
;;
(define (rmt:open-main-connection remdat apath)
(let* ((fullpath (db:dbname->path apath "/.db/main.db"))
(conns (remotedat-conns remdat))
(conn (hash-table-ref/default conns fullpath #f))) ;; TODO - create call for this
(cond
((and conn ;; conn is NOT a socket, just saying ...
(< (current-seconds) (conndat-expires conn)))
#t) ;; we are current and good to go - we'll deal elsewhere with a server that was killed or died
((and conn
(>= (current-seconds)(conndat-expires conn)))
(debug:print-info 0 *default-log-port* "connection to "fullpath" server expired. Reconnecting.")
(if (conndat-socket conn)
(nng-close! (conndat-socket conn)))
(hash-table-set! conns fullpath #f) ;; clean up
(rmt:open-main-connection remdat apath))
(else
;; Below we will find or create and connect to main
(let* ((dbname (db:run-id->dbname #f))
(the-srv (rmt:find-main-server apath dbname))
(start-main-srv (lambda () ;; call IF there is no the-srv found
(mutex-lock! *connstart-mutex*)
(if (> (- (current-seconds) *last-main-start*) 5) ;; at least four seconds since last attempt to start main server
(begin
(api:run-server-process apath dbname)
(set! *last-main-start* (current-seconds))
(thread-sleep! 1)))
(mutex-unlock! *connstart-mutex*)
(rmt:open-main-connection remdat apath) ;; TODO: Add limit to number of tries
)))
(if (not the-srv) ;; have server, try connecting to it
(start-main-srv)
(let* ((srv-addr (server-address the-srv)) ;; need serv
(ipaddr (alist-ref 'ipaddr the-srv))
(port (alist-ref 'port the-srv))
(srvkey (alist-ref 'servkey the-srv))
(fullpath (db:dbname->path apath dbname))
(new-the-srv (make-conndat
apath: apath
dbname: dbname
fullname: fullpath
hostport: srv-addr
socket: (open-nn-connection srv-addr)
ipaddr: ipaddr
port: port
srvpkt: the-srv
srvkey: srvkey ;; generated by rmt:get-signature on the server side
lastmsg: (current-seconds)
expires: (+ (current-seconds) 60) ;; this needs to be gathered during the ping
)))
(hash-table-set! conns fullpath new-the-srv)))
#t)))))
;; NB// remdat is a remotedat struct
;;
(define (rmt:general-open-connection remdat apath dbname #!key (num-tries 5))
(assert (not (equal? dbname ".db/main.db")) "ERROR: general-open-connection should never be called with main as the db")
(let* ((mdbname (db:run-id->dbname #f))
(fullname (db:dbname->path apath dbname))
(conns (remotedat-conns remdat))
(mconn (rmt:get-conn remdat apath mdbname)))
(if (and mconn
(not (debug:print-logger)))
(begin
(debug:print-info 0 *default-log-port* "Turning on logging to main, look in logs dir for main log.")
(debug:print-logger rmt:log-to-main)))
(cond
((or (not mconn) ;; no channel open to main?
(< (conndat-expires mconn)(+ (current-seconds) 2))) ;; restablish connection if less than 2 seconds on the lease
(if mconn ;; previously opened - clean up NB// consolidate this with the similar code in open main above
(begin
(debug:print-info 0 *default-log-port* "Clearing out connection to main that has expired.")
(nng-close! (conndat-socket mconn))
(hash-table-set! conns fullname #f)))
(rmt:open-main-connection remdat apath)
(rmt:general-open-connection remdat apath mdbname))
((not (rmt:get-conn remdat apath dbname)) ;; no channel open to dbname?
(let* ((res (rmt:send-receive-real remdat apath mdbname 'get-server `(,apath ,dbname))))
(case res
((server-started)
(if (> num-tries 0)
|
︙ | | | ︙ | |
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
|
(not (equal? (common:get-last-run-version)
(common:version-signature))))
(define (common:api-changed?)
(not (equal? (substring (->string megatest-version) 0 4)
(substring (conc (common:get-last-run-version)) 0 4))))
(define (rmt:server-shutdown)
(let ((dbfile (servdat-dbfile *server-info*)))
(debug:print-info 0 *default-log-port* "dbfile is "dbfile)
(if dbfile
(let* ((am-server (args:get-arg "-server"))
(dbfile (args:get-arg "-db"))
(apath *toppath*)
(remdat *remotedat*)) ;; foundation for future fix
|
>
|
|
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
|
(not (equal? (common:get-last-run-version)
(common:version-signature))))
(define (common:api-changed?)
(not (equal? (substring (->string megatest-version) 0 4)
(substring (conc (common:get-last-run-version)) 0 4))))
;; host and port are used to ensure we are remove proper records
(define (rmt:server-shutdown host port)
(let ((dbfile (servdat-dbfile *server-info*)))
(debug:print-info 0 *default-log-port* "dbfile is "dbfile)
(if dbfile
(let* ((am-server (args:get-arg "-server"))
(dbfile (args:get-arg "-db"))
(apath *toppath*)
(remdat *remotedat*)) ;; foundation for future fix
|
︙ | | | ︙ | |
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
|
(debug:print-info 0 *default-log-port* "I am not a server, should NOT get here!")
(if (string-match ".*/main.db$" dbfile)
(let ((pkt-file (conc (get-pkts-dir *toppath*)
"/" (servdat-uuid *server-info*)
".pkt")))
(debug:print-info 0 *default-log-port* "removing pkt "pkt-file)
(delete-file* pkt-file)
(debug:print-info 0 *default-log-port* "Releasing lock for "dbfile)
(db:with-lock-db (servdat-dbfile *server-info*)
(lambda (dbh dbfile)
(db:release-lock dbh dbfile))))
(let* ((sdat *server-info*) ;; we have a run-id server
(host (servdat-host sdat))
(port (servdat-port sdat))
(uuid (servdat-uuid sdat))
(res (rmt:deregister-server remdat *toppath* host port uuid dbfile)))
(debug:print-info 0 *default-log-port* "deregistered-server, res="res)
(debug:print-info 0 *default-log-port* "deregistering server "host":"port" with uuid "uuid)
|
|
>
|
|
|
|
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
|
(debug:print-info 0 *default-log-port* "I am not a server, should NOT get here!")
(if (string-match ".*/main.db$" dbfile)
(let ((pkt-file (conc (get-pkts-dir *toppath*)
"/" (servdat-uuid *server-info*)
".pkt")))
(debug:print-info 0 *default-log-port* "removing pkt "pkt-file)
(delete-file* pkt-file)
(debug:print-info 0 *default-log-port* "Releasing lock (if any) for "dbfile ", host "host", port "port)
(db:with-lock-db
(servdat-dbfile *server-info*)
(lambda (dbh dbfile)
(db:release-lock dbh dbfile host port)))) ;; I'm not the server - should not have a lock to remove
(let* ((sdat *server-info*) ;; we have a run-id server
(host (servdat-host sdat))
(port (servdat-port sdat))
(uuid (servdat-uuid sdat))
(res (rmt:deregister-server remdat *toppath* host port uuid dbfile)))
(debug:print-info 0 *default-log-port* "deregistered-server, res="res)
(debug:print-info 0 *default-log-port* "deregistering server "host":"port" with uuid "uuid)
|
︙ | | | ︙ | |
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
|
(let ((th1 (make-thread
(lambda () ;; thread for cleaning up, give it five seconds
(let* ((start-time (current-seconds)))
(if (and *server-info*
*unclean-shutdown*)
(begin
(debug:print-info 0 *default-log-port* "Unclean server exit, calling server-shtudown")
(rmt:server-shutdown)))
(debug:print-info 0 *default-log-port* "Shutdown activities completed in "(- (current-seconds) start-time)" seconds"))
;; (if *dbstruct-db* (db:close-all *dbstruct-db*)) ;; one second allocated
#;(if (bdat-task-db *bdat*) ;; TODO: Check that this is correct for task db
(let ((db (cdr (bdat-task-db *bdat*))))
(if (sqlite3:database? db)
(begin
(debug:print-info 0 *default-log-port* "Closing down task db "db)
|
|
>
|
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
|
(let ((th1 (make-thread
(lambda () ;; thread for cleaning up, give it five seconds
(let* ((start-time (current-seconds)))
(if (and *server-info*
*unclean-shutdown*)
(begin
(debug:print-info 0 *default-log-port* "Unclean server exit, calling server-shtudown")
(rmt:server-shutdown (servdat-host *server-info*)
(servdat-port *server-info*))))
(debug:print-info 0 *default-log-port* "Shutdown activities completed in "(- (current-seconds) start-time)" seconds"))
;; (if *dbstruct-db* (db:close-all *dbstruct-db*)) ;; one second allocated
#;(if (bdat-task-db *bdat*) ;; TODO: Check that this is correct for task db
(let ((db (cdr (bdat-task-db *bdat*))))
(if (sqlite3:database? db)
(begin
(debug:print-info 0 *default-log-port* "Closing down task db "db)
|
︙ | | | ︙ | |
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
|
(if (not (eq? res 'quit))
(begin
(set! *db-last-access* (current-seconds))
(nng-send rep resdat)
(loop (nng-recv rep)))))))
(debug:print-info 0 *default-log-port* "After server, should never see this")
;; server exit stuff here
(let* ((portnum (servdat-port *server-info*)))
(portlogger:open-run-close portlogger:set-port portnum "released")
(rmt:server-shutdown)
;; (bdat-time-to-exit-set! *bdat* #t) ;; tell on-exit to be fast as we've already cleaned up
(portlogger:open-run-close portlogger:set-port port "released") ;; done in rmt:run
;; (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*)
;; (debug:print-info 0 *default-log-port* "Number of cached writes " *number-of-writes*)
;; (debug:print-info 0 *default-log-port* "Average cached write time "
;; (if (eq? *number-of-writes* 0)
;; "n/a (no writes)"
|
|
>
>
>
|
|
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
|
(if (not (eq? res 'quit))
(begin
(set! *db-last-access* (current-seconds))
(nng-send rep resdat)
(loop (nng-recv rep)))))))
(debug:print-info 0 *default-log-port* "After server, should never see this")
;; server exit stuff here
(let* ((portnum (servdat-port *server-info*))
(host (servdat-host *server-info*)))
(portlogger:open-run-close portlogger:set-port portnum "released")
(if (not (equal? (get-host-name) host))
(debug:print-info 0 *default-log-port* "Server shutdown called for host "host", but we are on "(get-host-name))
(rmt:server-shutdown host portnum))
;; (bdat-time-to-exit-set! *bdat* #t) ;; tell on-exit to be fast as we've already cleaned up
(portlogger:open-run-close portlogger:set-port port "released") ;; done in rmt:run
;; (debug:print-info 0 *default-log-port* "Max cached queries was " *max-cache-size*)
;; (debug:print-info 0 *default-log-port* "Number of cached writes " *number-of-writes*)
;; (debug:print-info 0 *default-log-port* "Average cached write time "
;; (if (eq? *number-of-writes* 0)
;; "n/a (no writes)"
|
︙ | | | ︙ | |
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
|
(set! *http-connections-next-cleanup* (+ (current-seconds) 10))
(mutex-unlock! *http-mutex*))
(define (rmt:inc-requests-and-prep-to-close-all-connections)
(mutex-lock! *http-mutex*)
(set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*)))
;; careful closing of connections stored in *runremote*
;;
(define (rmt:close-connections #!key (area-dat #f))
(debug:print-info 0 *default-log-port* "rmt:close-connections doesn't do anything now!"))
;; (let* ((runremote (or area-dat *runremote*))
;; (server-dat (if runremote
;; (remote-conndat runremote)
;; #f))) ;; (hash-table-ref/default *runremote* run-id #f)))
;; (if (vector? server-dat)
;; (let ((api-dat (http-transport:server-dat-get-api-uri server-dat)))
;; (handle-exceptions
;; exn
;; (begin
;; (print-call-chain *default-log-port*)
;; (debug:print-error 0 *default-log-port* " closing connection failed with error: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn))
;; (close-connection! api-dat)
;; ;;(close-idle-connections!)
;; #t))
;; #f)))
;; initialize servdat for client side, setup needed parameters
;; pass in #f as sdat-in to create sdat
;;
#;(define (servdat-init sdat-in iface port uuid)
(let* ((sdat (or sdat-in (make-servdat))))
(assert #f "This is a bad idea.")
(if uuid (servdat-uuid-set! sdat uuid))
(servdat-host-set! sdat iface)
(servdat-port-set! sdat port)
(servdat-api-url-set! sdat (conc "http://" iface ":" port "/api"))
(servdat-api-uri-set! sdat (uri-reference (servdat-api-url sdat)))
(servdat-api-req-set! sdat (make-request method: 'POST
uri: (servdat-api-uri sdat)))
;; set up the http-client parameters
(max-retry-attempts 1)
;; consider all requests indempotent
(retry-request? (lambda (request)
#f))
(determine-proxy (constantly #f))
sdat))
;;======================================================================
;; NEW SERVER METHOD
;;======================================================================
;; only use for main.db - need to re-write some of this :(
;;
(define (get-lock-db sdat dbfile port)
(let* ((dbh (db:open-run-db dbfile db:initialize-db)) ;; open-run-db creates a standard db with schema used by all situations
(res (db:get-iam-server-lock dbh dbfile port)))
;; res => list then already locked, check server is responsive
;; => #t then sucessfully got the lock
;; => #f reserved for future use as to indicate something went wrong
(match res
((owner_pid owner_host owner_port event_time)
(if (server-ready? owner_host owner_port "abc")
#f
(begin
(debug:print 0 *default-log-port* "WARNING: stale lock - have to steal it. This may fail.")
(db:steal-lock-db dbh dbfile port))))
(#t #t) ;; placeholder so that we don't touch res if it is #t
(else (set! res #f)))
(sqlite3:finalize! dbh)
res))
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
>
>
|
|
|
|
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
|
(set! *http-connections-next-cleanup* (+ (current-seconds) 10))
(mutex-unlock! *http-mutex*))
(define (rmt:inc-requests-and-prep-to-close-all-connections)
(mutex-lock! *http-mutex*)
(set! *http-requests-in-progress* (+ 1 *http-requests-in-progress*)))
;;======================================================================
;; NEW SERVER METHOD
;;======================================================================
;; only use for main.db - need to re-write some of this :(
;;
(define (get-lock-db sdat dbfile host port)
(assert host "FATAL: get-lock-db called with host not set.")
(assert port "FATAL: get-lock-db called with port not set.")
(let* ((dbh (db:open-run-db dbfile db:initialize-db)) ;; open-run-db creates a standard db with schema used by all situations
(res (db:get-iam-server-lock dbh dbfile host port)))
;; res => list then already locked, check server is responsive
;; => #t then sucessfully got the lock
;; => #f reserved for future use as to indicate something went wrong
(match res
((owner_pid owner_host owner_port event_time)
(if (server-ready? owner_host owner_port "abc")
#f ;; locked by someone else
(begin ;; locked by someone dead and gone
(debug:print 0 *default-log-port* "WARNING: stale lock - have to steal it. This may fail.")
(db:steal-lock-db dbh dbfile port))))
(#t #t) ;; placeholder so that we don't touch res if it is #t
(else (set! res #f)))
(sqlite3:finalize! dbh)
res))
|
︙ | | | ︙ | |
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
|
(alist-ref 'port srv-pkt)))
(define (server-ready? host port key) ;; server-address is host:port
(let* ((data (sexpr->string `((cmd . ping)
(key . ,key)
(params . ()))))
(res (open-send-receive-nn (conc host ":" port) data)))
(string->sexpr res)))
;; (let ((res (with-input-from-port i
;; read)))
;; (close-output-port o)
;; (close-input-port i)
;; res))
;; (if (string? res)
;; (string->sexpr res)
;; res)))
;; (begin ;; connection failed
;; (debug:print-info 0 *default-log-port* "Server at "host":"port" is not responding.")
;; #f))))
;; (define (loop-test host port data) ;; server-address is host:port
;; ;; ping the server and ask it
;; ;; if it ready
;; ;; (let* ((sdat (servdat-init #f host port #f)))
;; ;; (http-transport:send-receive sdat "abc" 'ping '())))
;; (let* ((payload (sexpr->string data))
;; (res (with-input-from-request
;; (conc "http://"host":"port"/loop-test")
;; `((data . ,payload))
;; read-string)))
;; (string->sexpr res))
;; #f
;; )
; from the pkts return servers associated with dbpath
;; NOTE: Only one can be alive - have to check on each
;; in the list of pkts returned
;;
(define (get-viable-servers serv-pkts dbpath)
(let loop ((tail serv-pkts)
(res '()))
|
<
<
<
<
<
<
|
<
|
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
|
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
|
(alist-ref 'port srv-pkt)))
(define (server-ready? host port key) ;; server-address is host:port
(let* ((data (sexpr->string `((cmd . ping)
(key . ,key)
(params . ()))))
(res (open-send-receive-nn (conc host ":" port) data)))
(if res
(string->sexpr res)
res)))
; from the pkts return servers associated with dbpath
;; NOTE: Only one can be alive - have to check on each
;; in the list of pkts returned
;;
(define (get-viable-servers serv-pkts dbpath)
(let loop ((tail serv-pkts)
(res '()))
|
︙ | | | ︙ | |
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
|
(define (remove-pkts-if-not-alive serv-pkts)
(filter (lambda (pkt)
(let* ((host (alist-ref 'host pkt))
(port (alist-ref 'port pkt))
(key (alist-ref 'servkey pkt))
(pktz (alist-ref 'Z pkt))
(res (handle-exceptions
exn
#f
(server-ready? host port key))))
(if res
res
(let* ((pktsdir (get-pkts-dir *toppath*))
(pktpath (conc pktsdir"/"pktz".pkt")))
(debug:print 0 *default-log-port* "WARNING: pkt with no server "pktpath)
(delete-file* pktpath)
#f))))
|
<
<
<
|
|
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
|
(define (remove-pkts-if-not-alive serv-pkts)
(filter (lambda (pkt)
(let* ((host (alist-ref 'host pkt))
(port (alist-ref 'port pkt))
(key (alist-ref 'servkey pkt))
(pktz (alist-ref 'Z pkt))
(res (server-ready? host port key)))
(if res
res
(let* ((pktsdir (get-pkts-dir *toppath*))
(pktpath (conc pktsdir"/"pktz".pkt")))
(debug:print 0 *default-log-port* "WARNING: pkt with no server "pktpath)
(delete-file* pktpath)
#f))))
|
︙ | | | ︙ | |
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
|
"/" (servdat-uuid *server-info*)
".pkt")))
(debug:print-info 0 *default-log-port* "Attempting to remove bogus pkt file "pktfile)
(delete-file* pktfile))))) ;; remove immediately instead of waiting for on-exit
(debug:print 0 *default-log-port* "best-srv-key: "best-srv-key", server-key: "server-key", i-am-srv: "i-am-srv)
;; am I the best-srv, compare server-keys to know
(if i-am-srv
(if (get-lock-db sdat db-file (servdat-port sdat)) ;; (db:get-iam-server-lock *dbstruct-db* *toppath* run-id)
(begin
(debug:print-info 0 *default-log-port* "I'm the server!")
(servdat-dbfile-set! sdat db-file)
(servdat-status-set! sdat 'db-locked))
(begin
(debug:print-info 0 *default-log-port* "I'm not the server, exiting.")
(bdat-time-to-exit-set! *bdat* #t)
|
|
|
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
|
"/" (servdat-uuid *server-info*)
".pkt")))
(debug:print-info 0 *default-log-port* "Attempting to remove bogus pkt file "pktfile)
(delete-file* pktfile))))) ;; remove immediately instead of waiting for on-exit
(debug:print 0 *default-log-port* "best-srv-key: "best-srv-key", server-key: "server-key", i-am-srv: "i-am-srv)
;; am I the best-srv, compare server-keys to know
(if i-am-srv
(if (get-lock-db sdat db-file (servdat-host sdat)(servdat-port sdat)) ;; (db:get-iam-server-lock *dbstruct-db* *toppath* run-id)
(begin
(debug:print-info 0 *default-log-port* "I'm the server!")
(servdat-dbfile-set! sdat db-file)
(servdat-status-set! sdat 'db-locked))
(begin
(debug:print-info 0 *default-log-port* "I'm not the server, exiting.")
(bdat-time-to-exit-set! *bdat* #t)
|
︙ | | | ︙ | |
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
|
(let* ((remdat *remotedat*)
(server-start-time (current-seconds))
(pkts-dir (get-pkts-dir))
(server-key (rmt:get-signature)) ;; This servers key
(is-main (equal? (args:get-arg "-db") ".db/main.db"))
(last-access 0)
(server-timeout (server:expiration-timeout))
(shutdown-server-sequence (lambda (port)
(set! *unclean-shutdown* #f)
(debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id))
(rmt:server-shutdown)
(portlogger:open-run-close portlogger:set-port port "released")
(exit)))
(timed-out? (lambda ()
(<= (+ last-access server-timeout)
(current-seconds)))))
(servdat-dbfile-set! *server-info* (args:get-arg "-db"))
;; main and run db servers have both got wait logic (could/should merge it)
|
|
|
|
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
|
(let* ((remdat *remotedat*)
(server-start-time (current-seconds))
(pkts-dir (get-pkts-dir))
(server-key (rmt:get-signature)) ;; This servers key
(is-main (equal? (args:get-arg "-db") ".db/main.db"))
(last-access 0)
(server-timeout (server:expiration-timeout))
(shutdown-server-sequence (lambda (host port)
(set! *unclean-shutdown* #f)
(debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id))
(rmt:server-shutdown host port)
(portlogger:open-run-close portlogger:set-port port "released")
(exit)))
(timed-out? (lambda ()
(<= (+ last-access server-timeout)
(current-seconds)))))
(servdat-dbfile-set! *server-info* (args:get-arg "-db"))
;; main and run db servers have both got wait logic (could/should merge it)
|
︙ | | | ︙ | |
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
|
;; IFF I'm not main, call into main and register self
(if (not is-main)
(let ((res (rmt:register-server remdat
*toppath* iface port
server-key dbname)))
(if res ;; we are the server
(servdat-status-set! *server-info* 'have-interface-and-db)
(let* ((serv-info (rmt:get-server-info *toppath* dbname)))
(match serv-info
((host port servkey pid ipaddr apath dbpath)
(if (not (server-ready? host port servkey))
(begin
(debug:print-info 0 *default-log-port* "Server registered but not alive. Removing and trying again.")
(rmt:deregister-server remdat apath host port servkey dbpath) ;; servkey pid ipaddr apath dbpath)
(loop (+ count 1) bad-sync-count start-time))))
(else
(debug:print 0 *default-log-port* "We are not the server for "dbname", exiting. Server info is: "serv-info)
(exit)))))))
(debug:print 0 *default-log-port*
"SERVER: running, db "dbname" opened, megatest version: "
(common:get-full-version))
|
>
>
|
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
|
;; IFF I'm not main, call into main and register self
(if (not is-main)
(let ((res (rmt:register-server remdat
*toppath* iface port
server-key dbname)))
(if res ;; we are the server
(servdat-status-set! *server-info* 'have-interface-and-db)
;; now check that the db locker is alive, clear it out if not
(let* ((serv-info (rmt:get-server-info *toppath* dbname)))
(match serv-info
((host port servkey pid ipaddr apath dbpath)
(if (not (server-ready? host port servkey))
(begin
(debug:print-info 0 *default-log-port* "Server registered but not alive. Removing and trying again.")
(rmt:deregister-server remdat apath host port servkey dbpath) ;; servkey pid ipaddr apath dbpath)
(loop (+ count 1) bad-sync-count start-time))))
(else
(debug:print 0 *default-log-port* "We are not the server for "dbname", exiting. Server info is: "serv-info)
(exit)))))))
(debug:print 0 *default-log-port*
"SERVER: running, db "dbname" opened, megatest version: "
(common:get-full-version))
|
︙ | | | ︙ | |
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
|
(begin
(debug:print 0 *default-log-port* "Server stats:")
(db:print-current-query-stats)))
(let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600)))
(cond
((not *server-run*)
(debug:print-info 0 *default-log-port* "*server-run* set to #f. Shutting down.")
(shutdown-server-sequence port))
((timed-out?)
(debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
(shutdown-server-sequence port))
((and *server-run*
(or (not (timed-out?))
(if is-main ;; do not exit if there are other servers (keep main open until all others gone)
(> (rmt:get-count-servers remdat *toppath*) 1)
#f)))
(if (common:low-noise-print 120 "server continuing")
(debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access)))
(loop 0 bad-sync-count (current-milliseconds)))
(else
(set! *unclean-shutdown* #f)
(debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
(shutdown-server-sequence port)
#;(debug:print-info 0 *default-log-port* "Sending 'quit to server, received: "
(open-send-receive-nn (conc iface":"port) ;; do this here and not in server-shutdown
(sexpr->string 'quit)))
)))))))
;; Call this to start the actual server
;;
|
|
|
|
|
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
|
(begin
(debug:print 0 *default-log-port* "Server stats:")
(db:print-current-query-stats)))
(let* ((hrs-since-start (/ (- (current-seconds) server-start-time) 3600)))
(cond
((not *server-run*)
(debug:print-info 0 *default-log-port* "*server-run* set to #f. Shutting down.")
(shutdown-server-sequence (get-host-name) port))
((timed-out?)
(debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
(shutdown-server-sequence (get-host-name) port))
((and *server-run*
(or (not (timed-out?))
(if is-main ;; do not exit if there are other servers (keep main open until all others gone)
(> (rmt:get-count-servers remdat *toppath*) 1)
#f)))
(if (common:low-noise-print 120 "server continuing")
(debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access)))
(loop 0 bad-sync-count (current-milliseconds)))
(else
(set! *unclean-shutdown* #f)
(debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
(shutdown-server-sequence (get-host-name) port)
#;(debug:print-info 0 *default-log-port* "Sending 'quit to server, received: "
(open-send-receive-nn (conc iface":"port) ;; do this here and not in server-shutdown
(sexpr->string 'quit)))
)))))))
;; Call this to start the actual server
;;
|
︙ | | | ︙ | |