Megatest

Diff
Login

Differences From Artifact [4943a8edf6]:

To Artifact [dd0c23fb98]:


19
20
21
22
23
24
25

26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42




43
44
45


46
47
48

49
50
51
52
53
54
55
56
57
58
59
60
61
62
63

64



65
66
67
68
69
70
71
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42

43
44
45
46
47


48
49



50






51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71







+
















-
+
+
+
+

-
-
+
+
-
-
-
+
-
-
-
-
-
-









+

+
+
+







;;======================================================================

(declare (unit common))
(declare (uses commonmod))
(declare (uses rmtmod))
(declare (uses debugprint))
(declare (uses mtargs))
        

(use srfi-1 data-structures posix regex-case (prefix base64 base64:)
     format dot-locking csv-xml z3 udp ;; sql-de-lite
     hostinfo md5 message-digest typed-records directory-utils stack
     matchable regex posix (srfi 18) extras ;; tcp 
     (prefix nanomsg nmsg:)
     (prefix sqlite3 sqlite3:)
     pkts (prefix dbi dbi:)
     )
(use posix-extras pathname-expand files)


(import commonmod
	debugprint
	rmtmod
	(prefix mtargs args:))

	
(define (remove-server-files directory-path)
  (let ((files (glob (string-append directory-path "/server*"))))
    (for-each delete-file* files)))
(include "common_records.scm")


(define (remove-files filespec)
  (let ((files (glob filespec)))
;; (require-library margs)
;; (include "margs.scm")

    (for-each delete-file* files)))
;; (define old-exit exit)
;; 
;; (define (exit . code)
;;   (if (null? code)
;;       (old-exit)
;;       (old-exit code)))

(define (stop-the-train)
  (thread-start! (make-thread (lambda ()
				(let loop ()
				  (if (and *toppath*
					   (file-exists? (conc *toppath*"/stop-the-train")))
				      (let* ((msg (conc "ERROR: found file "*toppath*"/stop-the-train, exiting immediately")))
					;; yes, print to current-output-port AND *default-log-port*, annoying but necessary I think
					(print msg)
                                        (remove-server-files (conc *toppath* "/logs"))
					(debug:print 0 *default-log-port* msg)
                                        (remove-files (conc *toppath* "/logs/server*"))
                                        (remove-files (conc *toppath* "/.servinfo/*"))
                                        (remove-files (conc *toppath* "/.mtdb/*lock"))
					(exit 1)))
				  (thread-sleep! 5)
				  (loop))))))

;; execute thunk, return value.  If exception thrown, trap exception, return #f, and emit nonfatal condition note to *default-log-port* .
;; arguments - thunk, message
(define (common:fail-safe thunk warning-message-on-exception)
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
155
156
157
158
159
160
161




162
163
164
165
166
167
168







-
-
-
-







;; (define *common:denoise*    (make-hash-table)) ;; for low noise printing
(define *default-log-port*  (current-error-port))
(define *time-zero* (current-seconds)) ;; for the watchdog
(define *on-exit-procs* '()) ;; add procs to this list to be executed on exit
(define *default-area-tag* "local")

;; DATABASE
;; (define *dbstruct-dbs*         #f) ;; used to cache the dbstruct in db:setup. Goal is to remove this.
;; db stats
(define *db-stats*            (make-hash-table)) ;; hash of vectors < count duration-total >
(define *db-stats-mutex*      (make-mutex))
;; db access
(define *db-last-access*      (current-seconds)) ;; last db access, used in server
;; (define *db-write-access*     #t)
;; db sync
;; (define *db-last-sync*        0)                 ;; last time the sync to megatest.db happened
(define *db-sync-in-progress* #f)                ;; if there is a sync in progress do not try to start another
;; (define *db-multi-sync-mutex* (make-mutex))      ;; protect access to *db-sync-in-progress*, *db-last-sync*
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
179
180
181
182
183
184
185

186
187
188
189
190
191
192







-







(define *transport-type*    'http)             ;; override with [server] transport http|rpc|nmsg
(define *runremote*         #f)                ;; if set up for server communication this will hold <host port>
;; (define *max-cache-size*    0)
(define *logged-in-clients* (make-hash-table))
(define *server-id*         #f)
(define *server-info*       #f)  ;; good candidate for easily convert to non-global
(define *time-to-exit*      #f)
(define *server-run*        #t)
(define *run-id*            #f)
(define *server-kind-run*   (make-hash-table))
(define *home-host*         #f)
;; (define *total-non-write-delay* 0)
(define *heartbeat-mutex*   (make-mutex))
;; (define *api-process-request-count* 0)
;; (define *max-api-process-requests* 0)
249
250
251
252
253
254
255
256

257
258
259
260
261
262
263
244
245
246
247
248
249
250

251
252
253
254
255
256
257
258







-
+







;;======================================================================

(define *common:this-exe-fullpath* (common:get-this-exe-fullpath))
(define *common:this-exe-dir*      (pathname-directory *common:this-exe-fullpath*))
(define *common:this-exe-name*     (pathname-strip-directory *common:this-exe-fullpath*))

(define (common:get-sync-lock-filepath)
  (let* ((tmp-area     (common:get-db-tmp-area))
  (let* ((tmp-area     (common:make-tmpdir-name *toppath* ""))
         (lockfile     (conc tmp-area "/megatest.db.lock")))
    lockfile))

(define *common:logpro-exit-code->status-sym-alist*
  '( ( 0 . pass )
     ( 1 . fail )
     ( 2 . warn )
403
404
405
406
407
408
409
410
411




412
413
414
415


416
417
418


419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439

440
441
442
443
444
445
446
398
399
400
401
402
403
404


405
406
407
408
409



410
411



412
413


414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430


431
432
433
434
435
436
437
438







-
-
+
+
+
+

-
-
-
+
+
-
-
-
+
+
-
-

















-
-
+







  (- megatest-version (common:get-last-run-version-number)))

(define (common:version-changed?)
  (not (equal? (common:get-last-run-version)
               (common:version-signature))))


;; From 1.70 to 1.80, db's are compatible.

;; From 1.70 to 1.81, db's are compatible.
;;
;; BUG: This logic is almost certainly not quite correct.
;;
(define (common:api-changed?)
  (let* (
    (megatest-major-version (substring (->string megatest-version) 0 4))
    (run-major-version (substring (conc (common:get-last-run-version)) 0 4))
  (let* ((megatest-major-version (substring (->string megatest-version) 0 4))
	 (run-major-version (substring (conc (common:get-last-run-version)) 0 4)))
   )
   (and (not (equal? megatest-major-version "1.80"))
     (not (equal? megatest-major-version megatest-run-version)))
    (and (not (member megatest-major-version '("1.81" "1.80")))
	 (not (equal? megatest-major-version run-major-version)))))
  )
)

;;======================================================================
;; Move me elsewhere ...
;; RADT => Why do we meed the version check here, this is called only if version misma
;;
(define (common:cleanup-db dbstruct #!key (full #f))
  (case (rmt:transport-mode)
    ((http)
     (apply db:multi-db-sync 
	    dbstruct
	    'schema
	    'killservers
	    'adj-target
	    'new2old
	    '(dejunk)
	    ))
    ((tcp nfs)
     (debug:print 0 *default-log-port* "WARNING: cleanup-db NOT implemented yet for tcp and nfs.")
     #;(apply db:multi-db-sync 
     (apply db:multi-db-sync 
	    dbstruct
	    'schema
	    'killservers
	    'adj-target
	    'new2old
	    '(dejunk)
	    )))
620
621
622
623
624
625
626
627

628
629
630
631
632
633
634
612
613
614
615
616
617
618

619
620
621
622
623
624
625
626







-
+







(define (common:exit-on-version-changed)
  (if (and *toppath*              ;; do nothing if *toppath* not yet provided
	   (common:on-homehost?))
      (if (common:api-changed?)
	  (let* ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config"))
                 (dbfile  (conc (get-environment-variable "MT_RUN_AREA_HOME") ".mtdb/main.db"))
                 (read-only (not (file-write-access? dbfile)))
                 (dbstruct (db:setup #t))) ;; (db:setup-db *dbstruct-dbs* *toppath* #f))) ;;  #t)))
                 (dbstruct (db:setup))) ;; (db:setup-db *dbstruct-dbs* *toppath* #f))) ;;  #t)))
	    (debug:print 0 *default-log-port*
			 "WARNING: Version mismatch!\n"
			 "   expected: " (common:version-signature) "\n"
			 "   got:      " (common:get-last-run-version))
            (cond
             ((get-environment-variable "MT_SKIP_DB_MIGRATE") #t)
             ((and (common:file-exists? mtconf) (common:file-exists? dbfile) (not read-only)
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
927
928
929
930
931
932
933











934
935
936
937
938
939
940







-
-
-
-
-
-
-
-
-
-
-







	    (if (equal? thepath "/")
		(begin
		  (debug:print-error 0 *default-log-port* "Unable to find megatest home directory.")
		  #f)
		(loop (pathname-directory thepath)))))
      ))


(define (common:db-tmp-area-path)
  (conc "/tmp/" 
         (current-user-name)
	 "/megatest_localdb/"
	 (common:get-testsuite-name)
         "/"
	 (string-translate *toppath* "/" ".")
  )
)


;;======================================================================
;; redefine for future cleanup (converge on area-name, the more generic
;;
(define common:get-area-name common:get-testsuite-name)

(define (common:get-db-tmp-area . junk)
969
970
971
972
973
974
975
976

977
978
979
980
981
982
983
950
951
952
953
954
955
956

957
958
959
960
961
962
963
964







-
+







		     (tsname (common:get-testsuite-name))
		     (dbpath (common:get-create-writeable-dir
			      (list (conc "/tmp/" (current-user-name)
					  "/megatest_localdb/"
					  tsname "/"
					  (string-translate toppath "/" "."))
				    (conc "/tmp/" (current-process-id) ;; just in case we have an issue with the dir by own user name
					  "/megatest_localdb/"
					  "/"(current-user-name) "/megatest_localdb/"
					  tsname
					  (string-translate toppath "/" "."))
				    ))))
		(set! *db-cache-path* dbpath)
		;; ensure megatest area has .mtdb
		(let ((dbarea (conc *toppath* "/.mtdb")))
		  (if (not (file-exists? dbarea))
1546
1547
1548
1549
1550
1551
1552
1553

1554
1555
1556
1557
1558
1559
1560
1527
1528
1529
1530
1531
1532
1533

1534
1535
1536
1537
1538
1539
1540
1541







-
+







;;======================================================================
;; lazy-safe get file mod time. on any error (file not existing etc.) return 0
;;
(define (common:lazy-modification-time fpath)
  (handle-exceptions
      exn
    (begin
      (debug:print 0 *default-log-port* "Failed to get modifcation time for " fpath ", treating it as zero. exn=" exn)
      (debug:print 2 *default-log-port* "Failed to get modification time for " fpath ", treating it as zero. exn=" exn)
      0)
    (if (file-exists? fpath)
	(file-modification-time fpath)
	0)))

;;======================================================================
;; find timestamp of newest file associated with a sqlite db file
1662
1663
1664
1665
1666
1667
1668
1669
1670


1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686

1687
1688
1689
1690
1691
1692
1693

1694
1695
1696
1697
1698
1699
1700
1643
1644
1645
1646
1647
1648
1649


1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666

1667
1668
1669
1670
1671
1672
1673

1674
1675
1676
1677
1678
1679
1680
1681







-
-
+
+















-
+






-
+







;;
;; (define (common:print-delay-table)
;;   (let loop ((x 0))
;;     (print x "," (common:get-delay x 1))
;;     (if (< x 2)
;; 	(loop (+ x 0.1)))))

(define (get-cpu-load #!key (remote-host #f))
  (car (common:get-cpu-load remote-host)))
;; (define (get-cpu-load #!key (remote-host #f))
;;    (car (common:get-cpu-load remote-host)))

;;======================================================================
;;   (let* ((load-res (process:cmd-run->list "uptime"))
;; 	 (load-rx  (regexp "load average:\\s+(\\d+)"))
;; 	 (cpu-load #f))
;;     (for-each (lambda (l)
;; 		(let ((match (string-search load-rx l)))
;; 		  (if match
;; 		      (let ((newval (string->number (cadr match))))
;; 			(if (number? newval)
;; 			    (set! cpu-load newval))))))
;; 	      (car load-res))
;;     cpu-load))

;;======================================================================
;; get values from cached info from dropping file in logs dir
;; get values from cached info from dropping file in .sysdata dir
;;  e.g. key is host and dtype is normalized-load
;;
(define (common:get-cached-info key dtype #!key (age 10))
  (if *toppath*
      (let* ((fullpath (conc *toppath* "/.sysdata/" key "-" dtype ".log"))
	     (delfile  (lambda (exn)
			 (debug:print-info 1 *default-log-port* " removing bad file " fullpath ", exn=" exn)
			 (debug:print-info 2 *default-log-port* " removing bad file " fullpath ", exn=" exn)
			 (delete-file* fullpath)
			 #f)))
	(if (and (file-exists? fullpath)
		 (file-read-access? fullpath))
	    (handle-exceptions
		exn
	      (begin
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1718
1719
1720
1721
1722
1723
1724










1725
1726
1727
1728
1729
1730
1731







-
-
-
-
-
-
-
-
-
-







	    exn
	  (begin
	    (debug:print 0 *default-log-port* "failed to write file " fullpath ", exn=" exn)
	    #f)
	  (with-output-to-file fullpath (lambda ()(pp dat)))))
      #f))

(define (common:raw-get-remote-host-load-orig remote-host)
  (handle-exceptions
      exn
    (begin
      (debug:print 0 *default-log-port* "failed to ssh to " remote-host " and get loadavg. exn=" exn)
      #f) ;; more specific handling of errors needed
    (with-input-from-pipe 
     (conc "ssh " remote-host " cat /proc/loadavg")
     (lambda ()(list (read)(read)(read))))))

(define (common:raw-get-remote-host-load remote-host)
  (let* ((inp #f))
    (handle-exceptions
	exn
      (begin
	(close-input-pipe inp)
	(debug:print 0 *default-log-port* "failed to ssh to " remote-host " and get loadavg. exn=" exn)
1771
1772
1773
1774
1775
1776
1777
1778


1779
1780
1781
1782
1783
1784
1785
1742
1743
1744
1745
1746
1747
1748

1749
1750
1751
1752
1753
1754
1755
1756
1757







-
+
+







  (handle-exceptions
      exn
    (begin
      (debug:print 0 *default-log-port* "failed to ssh or read loadavg from host " remote-host ", exn=" exn)
      '(-99 -99 -99))
    (let* ((actual-hostname (or remote-host (get-host-name) "localhost")))
      (or (common:get-cached-info actual-hostname "cpu-load")
	  (let ((result (if remote-host
	  (let ((result (if (and remote-host
				 (not (equal? remote-host (get-host-name))))
			    (map (lambda (res)
				   (if (eof-object? res) 9e99 res))
			         (common:raw-get-remote-host-load remote-host))
			    (with-input-from-file "/proc/loadavg" 
			      (lambda ()(list (read)(read)(read)))))))
	    (match
		result
2034
2035
2036
2037
2038
2039
2040
2041


2042
2043
2044
2045
2046
2047
2048
2006
2007
2008
2009
2010
2011
2012

2013
2014
2015
2016
2017
2018
2019
2020
2021







-
+
+







						   (if (> numcpu 0)
						       numcpu
						       #f) ;; if zero return #f so caller knows that things are not working
						   (loop (if (string-match "^processor\\s+:\\s+\\d+$" inl)
							     (+ numcpu 1)
							     numcpu)
							 (read-line))))))
				   (result (if remote-host
				   (result (if (and remote-host
						    (not (equal? remote-host (get-host-name))))
					       (common:generic-ssh
						(conc "ssh " remote-host " cat /proc/cpuinfo")
						proc -1)
					       (with-input-from-file "/proc/cpuinfo" proc))))
			      (if (and (number? result)
				       (> result 0))
				  (common:write-cached-info actual-host "num-cpus" result))
2301
2302
2303
2304
2305
2306
2307
2308

2309
2310
2311
2312
2313
2314
2315
2274
2275
2276
2277
2278
2279
2280

2281
2282
2283
2284
2285
2286
2287
2288







-
+







;; returns: ok/not dbspace required-space
;;
(define (common:check-db-dir-space)
  (let* ((required (string->number 
                    ;; default is 1GB (or actually a billion bytes) This is the number of 1 kB blocks.
		    (or (configf:lookup *configdat* "setup" "dbdir-space-required")
			"1000000")))
	 (dbdir    (common:get-db-tmp-area)) ;; (db:get-dbdir))
	 (dbdir    (common:make-tmpdir-name *toppath* "")) ;; (db:get-dbdir))
	 (tdbspace (common:check-space-in-dir dbdir required))
	 (mdbspace (common:check-space-in-dir *toppath* required)))
    (sort (list tdbspace mdbspace) (lambda (a b)
				     (< (cadr a)(cadr b))))))

;;======================================================================
;; check available space in dbdir, exit if insufficient