Megatest

Check-in [77cb77fb6a]
Login
Overview
Comment:merged v1.64-new-pkts
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.64
Files: files | file ages | folders
SHA1: 77cb77fb6af73ab821a7a28091f645c55d57bd9a
User & Date: bjbarcla on 2017-03-20 15:15:26
Other Links: branch diff | manifest | tags
Context
2017-03-20
17:56
Support for /QUICK check-in: b93c0e396c user: matt tags: v1.64
15:15
merged v1.64-new-pkts check-in: 77cb77fb6a user: bjbarcla tags: v1.64
15:14
fixed issues related to PWD not matching -start-dir argument check-in: 75dc576fb1 user: bjbarcla tags: v1.64
13:35
Merged server-fix Closed-Leaf check-in: 1de85e740b user: matt tags: v1.64-new-pkts
Changes

Modified api.scm from [e21b71bae7] to [f1d46841da].

116
117
118
119
120
121
122
123

124
125
126
127
128
129
130
116
117
118
119
120
121
122

123
124
125
126
127
128
129
130







-
+







;;    - returns #( flag result )
;;
(define (api:execute-requests dbstruct dat)
  (handle-exceptions
   exn
   (let ((call-chain (get-call-chain))
         )
     (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer")
     (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer, dat=" dat)
     (print-call-chain (current-error-port))
     (debug:print 0 *default-log-port* " message: "  ((condition-property-accessor 'exn 'message) exn))       
     (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens
   (cond
    ((not (vector? dat))                    ;; it is an error to not receive a vector
     (vector #f #f "remote must be called with a vector")       )
    (else  
148
149
150
151
152
153
154
155

156
157
158
159
160
161
162
148
149
150
151
152
153
154

155
156
157
158
159
160
161
162







-
+







                   ((get-keys-write)                        (db:get-keys dbstruct)) ;; force a dummy "write" query to force server; for debug in -repl
                   
                   ;; SERVERS
                   ((start-server)                    (apply server:kind-run params))
                   ((kill-server)                     (set! *server-run* #f))

                   ;; TESTS
                   ((test-set-state-status-by-id)     (apply db:test-set-state-status-by-id dbstruct params))
                   ((test-set-state-status-by-id)     (apply mt:test-set-state-status-by-id dbstruct params))
                   ((delete-test-records)             (apply db:delete-test-records dbstruct params))
                   ((delete-old-deleted-test-records) (apply db:delete-old-deleted-test-records dbstruct params))
                   ((test-set-state-status)           (apply db:test-set-state-status dbstruct params))
                   ((test-set-top-process-pid)        (apply db:test-set-top-process-pid dbstruct params))
                   ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params))
                   ((top-test-set-per-pf-counts)      (apply db:top-test-set-per-pf-counts dbstruct params))
                   ((test-set-archive-block-id)       (apply db:test-set-archive-block-id dbstruct params))

Modified archive.scm from [31c5249136] to [7dd47285c1].

109
110
111
112
113
114
115
116

117
118
119
120
121
122
123
109
110
111
112
113
114
115

116
117
118
119
120
121
122
123







-
+







	 (archive-info (archive:allocate-new-archive-block *toppath* (common:get-testsuite-name) min-space))
	 (archive-dir  (if archive-info (cdr archive-info) #f))
	 (archive-id   (if archive-info (car archive-info) -1))
	 (disk-groups  (make-hash-table))
	 (test-groups  (make-hash-table)) ;; these two (disk and test groups) could be combined nicely
	 (bup-exe      (or (configf:lookup *configdat* "archive" "bup") "bup"))
	 (compress     (or (configf:lookup *configdat* "archive" "compress") "9"))
	 (linktree     (configf:lookup *configdat* "setup" "linktree")))
	 (linktree     (common:get-linktree))) ;; (configf:lookup *configdat* "setup" "linktree")))

    (if (not archive-dir) ;; no archive disk found, this is fatal
	(begin
	  (debug:print 0 *default-log-port* "FATAL: No archive disks found. Please add disks with at least " min-space " MB space to the [archive-disks] section of megatest.config")
	  (debug:print 0 *default-log-port* "       use [archive] minspace to specify minimum available space")
	  (debug:print 0 *default-log-port* "   disks: " (string-intersperse (map cadr (archive:get-archive-disks)) "\n         "))
	  (exit 1))
209
210
211
212
213
214
215
216

217
218
219
220
221
222
223
209
210
211
212
213
214
215

216
217
218
219
220
221
222
223







-
+







     (hash-table-keys disk-groups))
    #t))

(define (archive:bup-restore archive-command run-id run-name tests rp-mutex bup-mutex)  ;; move the getting of archive space down into the below block so that a single run can 
  ;; allocate as needed should a disk fill up
  ;;
  (let* ((bup-exe      (or (configf:lookup *configdat* "archive" "bup") "bup"))
	 (linktree     (configf:lookup *configdat* "setup" "linktree")))
	 (linktree     (common:get-linktree))) ;; (configf:lookup *configdat* "setup" "linktree")))

    ;; from the test info bin the path to the test by stem
    ;;
    (for-each
     (lambda (test-dat)
       ;; When restoring test-dat will initially contain an old and invalid path to the test
       (let* ((best-disk         (get-best-disk *configdat* #f)) ;; BUG: get the testconfig and use it here. Otherwise data pulled out of archive could end up on the wrong kind of disk.

Modified client.scm from [d740aa52d4] to [19d242e4dd].

93
94
95
96
97
98
99
100

101
102
103
104
105
106
107
93
94
95
96
97
98
99

100
101
102
103
104
105
106
107







-
+







	      (if (and host port)
		  (let* ((start-res (case *transport-type*
				      ((http)(http-transport:client-connect host port))))
			 (ping-res  (case *transport-type* 
				      ((http)(rmt:login-no-auto-client-setup start-res)))))
		    (if (and start-res
			     ping-res)
			(begin
			(let ((runremote (or area-dat *runremote*))) ;; it might have been generated only a few statements ago
			  (remote-conndat-set! runremote start-res) ;; (hash-table-set! runremote run-id start-res)
			  (debug:print-info 2 *default-log-port* "connected to " (http-transport:server-dat-make-url start-res))
			  start-res)
			(begin    ;; login failed but have a server record, clean out the record and try again
			  (debug:print-info 0 *default-log-port* "client:setup, login failed, will attempt to start server ... start-res=" start-res ", server-dat=" server-dat) ;; had runid.  Fixes part of Randy;s ticket 1405717332
			  (case *transport-type* 
			    ((http)(http-transport:close-connections)))

Modified common.scm from [525a5df01c] to [eab4c1e605].

15
16
17
18
19
20
21

22
23
24
25
26
27
28
29
30
31
32
33
34
35
36


37
38
39
40
41
42







43
44
45
46
47
48
49
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39






40
41
42
43
44
45
46
47
48
49
50
51
52
53







+















+
+
-
-
-
-
-
-
+
+
+
+
+
+
+








(require-extension (srfi 18) extras tcp rpc)

(import (prefix sqlite3 sqlite3:))
(import (prefix base64 base64:))

(declare (unit common))
(declare (uses keys))

(include "common_records.scm")

;; (require-library margs)
;; (include "margs.scm")

;; (define old-exit exit)
;; 
;; (define (exit . code)
;;   (if (null? code)
;;       (old-exit)
;;       (old-exit code)))

(define getenv get-environment-variable)
(define (safe-setenv key val)
  (if (substring-index ":" key) ;; variables containing : are for internal use and cannot be environment variables.
      (debug:print-error 4 *default-log-port* "skip setting internal use only variables containing \":\"")
  (if (and (string? val)(string? key))
      (handle-exceptions
       exn
       (debug:print-error 0 *default-log-port* "bad value for setenv, key=" key ", value=" val)
       (setenv key val))
      (debug:print-error 0 *default-log-port* "bad value for setenv, key=" key ", value=" val)))
      (if (and (string? val)
	       (string? key))
	  (handle-exceptions
	      exn
	      (debug:print-error 0 *default-log-port* "bad value for setenv, key=" key ", value=" val)
	    (setenv key val))
	  (debug:print-error 0 *default-log-port* "bad value for setenv, key=" key ", value=" val))))

(define home (getenv "HOME"))
(define user (getenv "USER"))

;; GLOBAL GLETCHES

;; CONTEXTS
950
951
952
953
954
955
956
957
958







959
960
961
962
963
964
965
954
955
956
957
958
959
960


961
962
963
964
965
966
967
968
969
970
971
972
973
974







-
-
+
+
+
+
+
+
+







     ((and (equal? args-testpatt "%") rtestpatt)
      (debug:print-info 0 *default-log-port* "testpatt defined in "testpatt-key" from runconfigs: " rtestpatt)
      rtestpatt)
     (else args-testpatt))))
     
(define (common:get-linktree)
  (or (getenv "MT_LINKTREE")
      (if *configdat*
	  (configf:lookup *configdat* "setup" "linktree"))))
      (or (and *configdat*
	       (configf:lookup *configdat* "setup" "linktree"))
	  (if *toppath*
	      (conc *toppath* "/lt")
	      (if (file-exists? "megatest.config") ;; we are in the toppath (new area, mtutils compatible)
		  (conc (current-directory) "/lt")
		  #f)))))

(define (common:args-get-runname)
  (let ((res (or (args:get-arg "-runname")
		 (args:get-arg ":runname")
		 (getenv "MT_RUNNAME"))))
    ;; (if res (set-environment-variable "MT_RUNNAME" res)) ;; not sure if this is a good idea. side effect and all ...
    res))
1008
1009
1010
1011
1012
1013
1014












1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026












1027
1028
1029
1030
1031
1032
1033
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035












1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054







+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+







	  (common:get-homehost trynum: (- trynum 1)))
	#f))
   (else
    (let* ((currhost (get-host-name))
	   (bestadrs (server:get-best-guess-address currhost))
	   ;; first look in config, then look in file .homehost, create it if not found
	   (homehost (or (configf:lookup *configdat* "server" "homehost" )
			 (handle-exceptions
			     exn
			     (if (> trynum 0)
				 (let ((delay-time (* (- 5 trynum) 5)))
				   (mutex-unlock! *homehost-mutex*)
				   (debug:print 0 *default-log-port* "ERROR: Failed to read .homehost file, delaying " delay-time " seconds and trying again, message: "  ((condition-property-accessor 'exn 'message) exn))
				   (thread-sleep! delay-time)
				   (common:get-homehost trynum: (- trynum 1)))
				 (begin
				   (mutex-unlock! *homehost-mutex*)
				   (debug:print 0 *default-log-port* "ERROR: Failed to read .homehost file after trying five times. Giving up and exiting, message: "  ((condition-property-accessor 'exn 'message) exn))
				   (exit 1)))
			 (let ((hhf (conc *toppath* "/.homehost")))
			   (if (file-exists? hhf)
			       (with-input-from-file hhf read-line)
			       (if (file-write-access? *toppath*)
				   (begin
				     (with-output-to-file hhf
				       (lambda ()
					 (print bestadrs)))
				     (begin
				       (mutex-unlock! *homehost-mutex*)
				       (car (common:get-homehost))))
				   #f)))))
			   (let ((hhf (conc *toppath* "/.homehost")))
			     (if (file-exists? hhf)
				 (with-input-from-file hhf read-line)
				 (if (file-write-access? *toppath*)
				     (begin
				       (with-output-to-file hhf
					 (lambda ()
					   (print bestadrs)))
				       (begin
					 (mutex-unlock! *homehost-mutex*)
					 (car (common:get-homehost))))
				     #f))))))
	   (at-home  (or (equal? homehost currhost)
			 (equal? homehost bestadrs))))
      (set! *home-host* (cons homehost at-home))
      (mutex-unlock! *homehost-mutex*)
      *home-host*))))

;; am I on the homehost?
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209



1210
1211
1212
1213
1214
1215
1216



1217
1218
1219


1220
1221
1222
1223
1224
1225
1226
1221
1222
1223
1224
1225
1226
1227



1228
1229
1230
1231
1232
1233
1234



1235
1236
1237
1238


1239
1240
1241
1242
1243
1244
1245
1246
1247







-
-
-
+
+
+




-
-
-
+
+
+

-
-
+
+







;; S Y S T E M   S T U F F
;;======================================================================

;; lazy-safe get file mod time. on any error (file not existing etc.) return 0
;;
(define (common:lazy-modification-time fpath)
  (handle-exceptions
   exn
   0
   (file-modification-time fpath)))
      exn
      0
    (file-modification-time fpath)))

;; find timestamp of newest file associated with a sqlite db file
(define (common:lazy-sqlite-db-modification-time fpath)
  (let* ((glob-list (handle-exceptions
                    exn
                    '("/no/such/file")
                    (glob (conc fpath "*"))))
			exn
			`(,(conc "/no/such/file, message: " ((condition-property-accessor 'exn 'message) exn)))
		      (glob (conc fpath "*"))))
         (file-list (if (eq? 0 (length glob-list))
                        '("/no/such/file")
                        glob-list)))
			'("/no/such/file")
			glob-list)))
  (apply max
   (map
    common:lazy-modification-time 
    file-list))))

;; return a nice clean pathname made absolute
(define (common:nice-path dir)
1609
1610
1611
1612
1613
1614
1615
1616


1617
1618
1619
1620
1621
1622
1623
1630
1631
1632
1633
1634
1635
1636

1637
1638
1639
1640
1641
1642
1643
1644
1645







-
+
+







       (lambda ()
          (for-each (lambda (keyval)
		      (let* ((key (car keyval))
			     (val (cdr keyval))
			     (delim (if (string-search whitesp val) 
					"\""
					"")))
			(print (if (member key ignorevars)
			(print (if (or (member key ignorevars)
				       (string-search ":" key)) ;; internal only values to be skipped.
				   "# export "
				   "export ")
			       key "=" delim (mungeval val) delim)))
                    envvars)))))

;; set some env vars from an alist, return an alist with original values
;; (("VAR" "value") ...)

Modified common_records.scm from [7a47b5d16a] to [0a5321af09].

24
25
26
27
28
29
30
31
32
33

















34
35
36
37
38
39
40
24
25
26
27
28
29
30



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54







-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







;;

(define-syntax define-simple-syntax
  (syntax-rules ()
    ((_ (name arg ...) body ...)
     (define-syntax name (syntax-rules () ((name arg ...) (begin body ...)))))))

(define-syntax common:handle-exceptions
  (syntax-rules ()
    ((_ exn-in errstmt ...)(handle-exceptions exn-in errstmt ...))))
;; (define-syntax common:handle-exceptions
;;   (syntax-rules ()
;;     ((_ exn-in errstmt ...)(handle-exceptions exn-in errstmt ...))))

(define-syntax common:debug-handle-exceptions
  (syntax-rules ()
    ((_ debug exn errstmt body ...)
     (if debug
	 (begin body ...)
	 (handle-exceptions exn errstmt body ...)))))

(define-syntax common:handle-exceptions
  (syntax-rules ()
    ((_ exn errstmt body ...)
     (begin body ...))))

;; (define handle-exceptions common:handle-exceptions)

;; iup callbacks are not dumping the stack, this is a work-around
;;
(define-simple-syntax (debug:catch-and-dump proc procname)
  (handle-exceptions
   exn
   (begin
95
96
97
98
99
100
101
102

103
104
105
106
107
108
109
109
110
111
112
113
114
115

116
117
118
119
120
121
122
123







-
+







    (member n *verbosity*))
   ((and (list? *verbosity*)     ;; list   list
	 (list? n))
    (not (null? (lset-intersection! eq? *verbosity* n))))
   ((and (number? *verbosity*)
	 (list? n))
    (member *verbosity* n))))
      

(define (debug:setup)
  (let ((debugstr (or (args:get-arg "-debug")
		      (getenv "MT_DEBUG_MODE"))))
    (set! *verbosity* (debug:calc-verbosity debugstr))
    (debug:check-verbosity *verbosity* debugstr)
    ;; if we were handed a bad verbosity rule then we will override it with 1 and continue
    (if (not *verbosity*)(set! *verbosity* 1))

Modified configf.scm from [c034045a40] to [346c0caf52].

13
14
15
16
17
18
19

20
21
22
23
24
25
26
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27







+







;; Config file handling
;;======================================================================

(use regex regex-case) ;;  directory-utils)
(declare (unit configf))
(declare (uses process))
(declare (uses env))
(declare (uses keys))

(include "common_records.scm")

;; return list (path fullpath configname)
(define (find-config configname #!key (toppath #f))
  (if toppath
      (let ((cfname (conc toppath "/" configname)))
651
652
653
654
655
656
657

658



659
660


661
662

663
664
665
666











667
668
669
670
671
672
673
652
653
654
655
656
657
658
659
660
661
662
663


664
665
666
667
668




669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686







+

+
+
+
-
-
+
+


+
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+







  (let ((ht (make-hash-table)))
    (for-each
     (lambda (section)
       (hash-table-set! ht (car section)(cdr section)))
     adat)
    ht))

;; if 
(define (configf:read-alist fname)
  (handle-exceptions
      exn
      #f
  (configf:alist->config
   (with-input-from-file fname read)))
    (configf:alist->config
     (with-input-from-file fname read))))

(define (configf:write-alist cdat fname)
  (let ((dat  (configf:config->alist cdat)))
  (with-output-to-file fname
    (lambda ()
      (pp (configf:config->alist cdat)))))
     
    (with-output-to-file fname ;; first write out the file
      (lambda ()
	(pp dat)))
    (if (file-exists? fname)   ;; now verify it is readable
	(if (configf:read-alist fname)
	    #t ;; data is good.
	    (begin
	      (delete-file fname)
	      (debug:print 0 *default-log-port* "WARNING: content " dat " for cache " fname " is not readable. Deleting generated file.")
	      #f))
	#f)))

;; convert hierarchial list to ini format
;;
(define (configf:config->ini data)
  (map 
   (lambda (section)
     (let ((section-name (car section))

Modified dashboard-tests.scm from [84e8bdf580] to [54ccc9758e].

472
473
474
475
476
477
478
479

480
481

482
483
484
485
486
487
488
472
473
474
475
476
477
478

479
480

481
482
483
484
485
486
487
488







-
+

-
+







                                   #f  ;; do nothing, just keep on trucking ....
                                   (setup-env-defaults runconfigf run-id (make-hash-table) keydat environ-patt: keystring))
	 			 (make-hash-table))))
	       (testconfig    (begin
				;; (runs:set-megatest-env-vars run-id inrunname: runname testname: test-name itempath: item-path)
				(runs:set-megatest-env-vars run-id inkeyvals: keydat inrunname: runname intarget: keystring testname: testname itempath: item-path) ;; these may be needed by the launching process
				(handle-exceptions
				 exn
				 exn  ;; NOTE: I've no idea why this was written this way. Research, study and fix needed!
				 (tests:get-testconfig (db:test-get-testname testdat) (db:test-get-item-path testdat) test-registry #f)
				 (tests:get-testconfig (db:test-get-testname testdat) test-registry #t))))
				 (tests:get-testconfig (db:test-get-testname testdat) item-path test-registry #t))))
	       (viewlog    (lambda (x)
			     (if (file-exists? logfile)
					;(system (conc "firefox " logfile "&"))
				 (dashboard-tests:run-html-viewer logfile)
				 (message-window (conc "File " logfile " not found")))))
	       (view-a-log (lambda (lfile) 
			     (let ((lfilename (conc rundir "/" lfile)))

Modified dashboard.scm from [9c476b54db] to [2ddcbd2dba].

2710
2711
2712
2713
2714
2715
2716
2717

2718
2719
2720
2721
2722
2723
2724
2710
2711
2712
2713
2714
2715
2716

2717
2718
2719
2720
2721
2722
2723
2724







-
+







	   (> modtime (- last-db-update-time 3)) ;; add three seconds of margin
	   (> (current-seconds)(+ last-db-update-time 1)))))

;; (define *monitor-db-path* #f)
(define *last-monitor-update-time* 0)

;; Force creation of the db in case it isn't already there.
(tasks:open-db)
;; (tasks:open-db)

(define (dashboard:get-youngest-run-db-mod-time dbdir)
  (handle-exceptions
   exn
   (begin
     (debug:print 0 *default-log-port* "WARNING: error in accessing databases in get-youngest-run-db-mod-time: " ((condition-property-accessor 'exn 'message) exn) " db-dir="dbdir)
     (current-seconds)) ;; something went wrong - just print an error and return current-seconds

Modified db.scm from [9faf096d03] to [3398d87de1].

896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
896
897
898
899
900
901
902

903
904
905
906
907
908
909







-







(define (db:multi-db-sync dbstruct . options)
  (if (not (launch:setup))
      (debug:print 0 *default-log-port* "ERROR: not able to setup up for megatest.")
      (let* ((mtdb     (dbr:dbstruct-mtdb dbstruct))
	     (tmpdb    (db:get-db dbstruct))
             (refndb   (dbr:dbstruct-refndb dbstruct))
	     (allow-cleanup #t) ;; (if run-ids #f #t))
	     ;; (tdbdat  (tasks:open-db))
	     (servers (server:get-list *toppath*)) ;; (tasks:get-all-servers (db:delay-if-busy tdbdat)))
	     (data-synced 0)) ;; count of changed records (I hope)
    
	;; kill servers
	(if (member 'killservers options)
	    (for-each
	     (lambda (server)
2578
2579
2580
2581
2582
2583
2584


2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598





















2599
2600
2601
2602
2603
2604
2605
2577
2578
2579
2580
2581
2582
2583
2584
2585














2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613







+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







;;  AND NOT (item_path='' AND testname in (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));")))
;;  (debug:print 0 *default-log-port* "QRY: " qry)
;;  (db:delay-if-busy)
;;
;; NB// This call only operates on toplevel tests. Consider replacing it with more general call
;;
(define (db:set-tests-state-status dbstruct run-id testnames currstate currstatus newstate newstatus)
  (let ((test-ids '()))
    (for-each
  (for-each (lambda (testname)
	      (let ((qry (conc "UPDATE tests SET state=?,status=? WHERE "
			       (if currstate  (conc "state='" currstate "' AND ") "")
			       (if currstatus (conc "status='" currstatus "' AND ") "")
			       " run_id=? AND testname LIKE ?;"))
		    (test-id (db:get-test-id dbstruct run-id testname "")))
		(db:with-db
		 dbstruct
		 run-id
		 #t
		 (lambda (db)
		   (sqlite3:execute db qry newstate newstatus run-id testname)))
		(if test-id (mt:process-triggers dbstruct run-id test-id newstate newstatus))))
	    testnames))
     (lambda (testname)
       (let ((qry (conc "UPDATE tests SET state=?,status=? WHERE "
			(if currstate  (conc "state='" currstate "' AND ") "")
			(if currstatus (conc "status='" currstatus "' AND ") "")
			" run_id=? AND testname LIKE ?;"))
	     (test-id (db:get-test-id dbstruct run-id testname "")))
	 (db:with-db
	  dbstruct
	  run-id
	  #t
	  (lambda (db)
	    (sqlite3:execute db qry
			     (or newstate  currstate "NOT_STARTED")
			     (or newstatus currstate "UNKNOWN")
			     run-id testname)))
	 (if test-id
	     (begin
	       (set! test-ids (cons test-id test-ids))
	       (mt:process-triggers dbstruct run-id test-id newstate newstatus)))))
     testnames)
    test-ids))

;; ;; speed up for common cases with a little logic
;; ;; NB// Ultimately this will be deprecated in deference to mt:test-set-state-status-by-id
;;
;;      NOTE: run-id is not used
;; ;;
(define (db:test-set-state-status dbstruct run-id test-id newstate newstatus newcomment)

Modified dcommon.scm from [4355903cc1] to [19df68b7a1].

607
608
609
610
611
612
613
614
615

616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
607
608
609
610
611
612
613


614
615
616
617
618
619
620
621
622
623
624

625
626
627
628
629
630
631







-
-
+










-







    ;; (set! dashboard:update-summary-tab updater)
    (iup:attribute-set! stats-matrix "WIDTHDEF" "40")
    (iup:vbox
     ;; (iup:label "Run statistics"  #:expand "HORIZONTAL")
     stats-matrix)))

(define (dcommon:servers-table commondat tabdat)
  (let* ((tdbdat         (tasks:open-db))
	 (colnum         0)
  (let* ((colnum         0)
	 (rownum         0)
	 (servers-matrix (iup:matrix #:expand "YES"
				     #:numcol 7
				     #:numcol-visible 7
				     #:numlin-visible 5
				     ))
	 (colnames       (list "Id" "MTver" "Pid" "Host" "Interface:OutPort" "RunTime" "State" "RunId"))
	 (updater        (lambda ()
			   (if (dashboard:monitor-changed? commondat tabdat)
			       (let ((servers  (server:get-list *toppath* limit: 10)))
				 ;; (tasks:get-all-servers (db:delay-if-busy tdbdat))))
				 (iup:attribute-set! servers-matrix "NUMLIN" (length servers))
				 ;; (set! colnum 0)
				 ;; (for-each (lambda (colname)
				 ;;    	 ;; (print "colnum: " colnum " colname: " colname)
				 ;;    	 (iup:attribute-set! servers-matrix (conc "0:" colnum) colname)
				 ;;    	 (set! colnum (+ 1 colnum)))
				 ;;           colnames)

Modified http-transport.scm from [44c2ce6eea] to [a75c26faf4].

55
56
57
58
59
60
61
62

63
64
65
66
67
68
69
55
56
57
58
59
60
61

62
63
64
65
66
67
68
69







-
+







	 (hostname        (get-host-name))
	 (ipaddrstr       (let ((ipstr (if (string=? "-" hostn)
					   ;; (string-intersperse (map number->string (u8vector->list (hostname->ip hostname))) ".")
					   (server:get-best-guess-address hostname)
					   #f)))
			    (if ipstr ipstr hostn))) ;; hostname))) 
	 (start-port      (portlogger:open-run-close portlogger:find-port))
	 (link-tree-path  (configf:lookup *configdat* "setup" "linktree")))
	 (link-tree-path  (common:get-linktree))) ;; (configf:lookup *configdat* "setup" "linktree")))
    (debug:print-info 0 *default-log-port* "portlogger recommended port: " start-port)
    (root-path     (if link-tree-path 
		       link-tree-path
		       (current-directory))) ;; WARNING: SECURITY HOLE. FIX ASAP!
    (handle-directory spiffy-directory-listing)
    (handle-exception (lambda (exn chain)
			(signal (make-composite-condition
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146






























147
148
149
150
151
152
153
110
111
112
113
114
115
116






























117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+








;; This is recursively run by http-transport:run until sucessful
;;
(define (http-transport:try-start-server ipaddrstr portnum)
  (let ((config-hostname (configf:lookup *configdat* "server" "hostname")))
    (debug:print-info 0 *default-log-port* "http-transport:try-start-server time=" (seconds->time-string (current-seconds)) " ipaddrsstr=" ipaddrstr " portnum=" portnum " config-hostname=" config-hostname)
    (handle-exceptions
     exn
     (begin
       (print-error-message exn)
       (if (< portnum 64000)
	   (begin 
	     (debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...")
	     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	     (debug:print 0 *default-log-port* "exn=" (condition->list exn))
	     (portlogger:open-run-close portlogger:set-failed portnum)
	     (debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port")
	     (thread-sleep! 0.1)

	     ;; get_next_port goes here
	     (http-transport:try-start-server ipaddrstr
					      (portlogger:open-run-close portlogger:find-port)))
	   (begin
	     (print "ERROR: Tried and tried but could not start the server"))))
     ;; any error in following steps will result in a retry
     (set! *server-info* (list ipaddrstr portnum))
     (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum)
     ;; This starts the spiffy server
     ;; NEED WAY TO SET IP TO #f TO BIND ALL
     ;; (start-server bind-address: ipaddrstr port: portnum)
     (if config-hostname ;; this is a hint to bind directly
	 (start-server port: portnum bind-address: (if (equal? config-hostname "-")
						       ipaddrstr
						       config-hostname))
	 (start-server port: portnum))
     (portlogger:open-run-close portlogger:set-port portnum "released")
     (debug:print 1 *default-log-port* "INFO: server has been stopped"))))
	exn
	(begin
	  (print-error-message exn)
	  (if (< portnum 64000)
	      (begin 
		(debug:print 0 *default-log-port* "WARNING: attempt to start server failed. Trying again ...")
		(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
		(debug:print 0 *default-log-port* "exn=" (condition->list exn))
		(portlogger:open-run-close portlogger:set-failed portnum)
		(debug:print 0 *default-log-port* "WARNING: failed to start on portnum: " portnum ", trying next port")
		(thread-sleep! 0.1)
		
		;; get_next_port goes here
		(http-transport:try-start-server ipaddrstr
						 (portlogger:open-run-close portlogger:find-port)))
	      (begin
		(print "ERROR: Tried and tried but could not start the server"))))
      ;; any error in following steps will result in a retry
      (set! *server-info* (list ipaddrstr portnum))
      (debug:print 0 *default-log-port* "INFO: Trying to start server on " ipaddrstr ":" portnum)
      ;; This starts the spiffy server
      ;; NEED WAY TO SET IP TO #f TO BIND ALL
      ;; (start-server bind-address: ipaddrstr port: portnum)
      (if config-hostname ;; this is a hint to bind directly
	  (start-server port: portnum bind-address: (if (equal? config-hostname "-")
							ipaddrstr
							config-hostname))
	  (start-server port: portnum))
      (portlogger:open-run-close portlogger:set-port portnum "released")
      (debug:print 1 *default-log-port* "INFO: server has been stopped"))))

;;======================================================================
;; S E R V E R   U T I L I T I E S 
;;======================================================================

;;======================================================================
;; C L I E N T S
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243










244
245
246
247
248
249
250
251
252
253







254
255
256
257
258
259
260
227
228
229
230
231
232
233










234
235
236
237
238
239
240
241
242
243
244
245
246







247
248
249
250
251
252
253
254
255
256
257
258
259
260







-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+



-
-
-
-
-
-
-
+
+
+
+
+
+
+







			      (mutex-lock! *http-mutex*)
			      ;; (condition-case (with-input-from-request "http://localhost"; #f read-lines)
			      ;;					       ((exn http client-error) e (print e)))
			      (set! res (vector
					 success
					 (db:string->obj 
					  (handle-exceptions
					   exn
					   (begin
					     (set! success #f)
					     (debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".")
					     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
					     (if runremote
                                                 (remote-conndat-set! runremote #f))
					     ;; Killing associated server to allow clean retry.")
					     ;; (tasks:kill-server-run-id run-id)  ;; better to kill the server in the logic that called this routine?
					     (mutex-unlock! *http-mutex*)
					      exn
					      (begin
						(set! success #f)
						(debug:print 0 *default-log-port* "WARNING: failure in with-input-from-request to " fullurl ".")
						(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
						(if runremote
						    (remote-conndat-set! runremote #f))
						;; Killing associated server to allow clean retry.")
						;; (tasks:kill-server-run-id run-id)  ;; better to kill the server in the logic that called this routine?
						(mutex-unlock! *http-mutex*)
					     ;;; (signal (make-composite-condition
					     ;;;          (make-property-condition 'commfail 'message "failed to connect to server")))
					     ;;; "communications failed"
					     (db:obj->string #f))
					   (with-input-from-request ;; was dat
					    fullurl 
					    (list (cons 'key "thekey")
						  (cons 'cmd cmd)
						  (cons 'params sparams))
					    read-string))
						(db:obj->string #f))
					    (with-input-from-request ;; was dat
					     fullurl 
					     (list (cons 'key "thekey")
						   (cons 'cmd cmd)
						   (cons 'params sparams))
					     read-string))
					  transport: 'http)
                                         0)) ;; added this speculatively
			      ;; Shouldn't this be a call to the managed call-all-connections stuff above?
			      (close-all-connections!)
			      (mutex-unlock! *http-mutex*)
			      ))
	      (time-out     (lambda ()
438
439
440
441
442
443
444
445

446
447
448
449
450
451
452
453
454
455
456
457
458
438
439
440
441
442
443
444

445
446
447
448
449
450

451
452
453
454
455
456
457







-
+





-







		(change-file-times server-log-file curr-time curr-time)))
          (loop 0 server-state bad-sync-count (current-milliseconds)))
         (else
          (debug:print-info 0 *default-log-port* "Server timed out. seconds since last db access: " (- (current-seconds) last-access))
          (http-transport:server-shutdown port)))))))

(define (http-transport:server-shutdown port)
  (let ((tdbdat (tasks:open-db)))
  (begin
    ;;(BB> "http-transport:server-shutdown called")
    (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id))
    ;;
    ;; start_shutdown
    ;;
    ;; (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "shutting-down")
    (set! *time-to-exit* #t) ;; tell on-exit to be fast as we've already cleaned up
    (portlogger:open-run-close portlogger:set-port port "released")
    (thread-sleep! 1)

    ;; (debug:print-info 0 *default-log-port* "Max cached queries was    " *max-cache-size*)
    ;; (debug:print-info 0 *default-log-port* "Number of cached writes   " *number-of-writes*)
    ;; (debug:print-info 0 *default-log-port* "Average cached write time "

Modified launch.scm from [e503042943] to [b27c32acbe].

10
11
12
13
14
15
16
17

18
19
20
21
22
23
24
10
11
12
13
14
15
16

17
18
19
20
21
22
23
24







-
+








;;======================================================================
;; launch a task - this runs on the originating host, tests themselves
;;
;;======================================================================

(use regex regex-case base64 sqlite3 srfi-18 directory-utils posix-extras z3 call-with-environment-variables csv)
(use typed-records pathname-expand)
(use typed-records pathname-expand matchable)

(import (prefix base64 base64:))
(import (prefix sqlite3 sqlite3:))

(declare (unit launch))
(declare (uses common))
(declare (uses configf))
411
412
413
414
415
416
417
418

419
420



421
422
423
424
425
426
427
411
412
413
414
415
416
417

418
419

420
421
422
423
424
425
426
427
428
429







-
+

-
+
+
+







	(let* ((testpath  (assoc/default 'testpath  cmdinfo))  ;; testpath is the test spec area
	       (top-path  (assoc/default 'toppath   cmdinfo))
	       (work-area (assoc/default 'work-area cmdinfo))  ;; work-area is the test run area
	       (test-name (assoc/default 'test-name cmdinfo))
	       (runscript (assoc/default 'runscript cmdinfo))
	       (ezsteps   (assoc/default 'ezsteps   cmdinfo))
	       ;; (runremote (assoc/default 'runremote cmdinfo))
	       (transport (assoc/default 'transport cmdinfo))
	       ;; (transport (assoc/default 'transport cmdinfo))  ;; not used
	       ;; (serverinf (assoc/default 'serverinf cmdinfo))
	       (port      (assoc/default 'port      cmdinfo))
	       ;; (port      (assoc/default 'port      cmdinfo))
	       (serverurl (assoc/default 'serverurl cmdinfo))
	       (homehost  (assoc/default 'homehost  cmdinfo))
	       (run-id    (assoc/default 'run-id    cmdinfo))
	       (test-id   (assoc/default 'test-id   cmdinfo))
	       (target    (assoc/default 'target    cmdinfo))
	       (itemdat   (assoc/default 'itemdat   cmdinfo))
	       (env-ovrd  (assoc/default 'env-ovrd  cmdinfo))
	       (set-vars  (assoc/default 'set-vars  cmdinfo)) ;; pre-overrides from -setvar
	       (runname   (assoc/default 'runname   cmdinfo))
441
442
443
444
445
446
447









































448
449
450
451
452
453
454
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







                                                   (file-execute-access? fulln))
                                              fulln
                                              runscript))))) ;; assume it is on the path
	       ) ;; (rollup-status 0)

	  (if contour (setenv "MT_CONTOUR" contour))
	  
	  ;; On NFS it can be slow and unreliable to get needed startup information.
	  ;;  i. Check if we are on the homehost, if so, proceed
	  ;; ii. Check if host and port passed in via CMDINFO are valid and if
	  ;;     possible use them.
	  (let ((bestadrs (server:get-best-guess-address (get-host-name)))
		(needcare #f))
	    (if (equal? homehost bestadrs) ;; we are likely on the homehost
		(debug:print-info 0 *default-log-port* "test " test-name " appears to be running on the homehost " homehost)
		(let ((host-port (if serverurl (string-split serverurl ":") #f)))
		  (if (not *runremote*)(set! *runremote* (make-remote))) ;; init *runremote*
		  (if (string? homehost)
		      (if (and host-port
			       (> (length host-port) 1))
			  (let* ((host      (car host-port))
                                 (port      (cadr host-port))
                                 (start-res (http-transport:client-connect host port))
                                 (ping-res  (rmt:login-no-auto-client-setup start-res)))
			    (if (and start-res
				     ping-res)
				(let ((url  (http-transport:server-dat-make-url start-res)))
				  (remote-conndat-set! *runremote* start-res)
				  (remote-server-url-set! *runremote* url)
				  (debug:print-info 0 *default-log-port* "connected to " url " using CMDINFO data."))
				(debug:print-info 0 *default-log-port* "received " host ":" port " for url but could not connect.")
				))
			  (begin
			    (debug:print-info 0 *default-log-port* (if host-port
								       (conc "received invalid host-port information " host-port)
								       "no host-port information received"))
			    ;; potential for bad situation if simultaneous starting of hundreds of jobs on servers, set needcare.
			    (set! needcare #t)))
		      (begin
			(debug:print-info 0 *default-log-port* "received no homehost information. Please report this to support as it should not happen.")
			(set! needcare #t)))))
	    (if needcare  ;; due to very slow NFS we will do a brute force mkdir to ensure that the directory inode it truly available on this host
		(let ((logdir (conc top-path "/logs"))) ;; we'll try to create this directory
		  (handle-exceptions
		      exn
		      (debug:print 0 *default-log-port* "Failed to create directory " logdir " expect problems, message: " ((condition-property-accessor 'exn 'message) exn))
		    (create-directory logdir #t)))))
		  
	  ;; NFS might not have propagated the directory meta data to the run host - give it time if needed
	  (let loop ((count 0))
	    (if (or (file-exists? top-path)
		    (> count 10))
		(change-directory top-path)
		(begin
		  (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " top-path " not found")
574
575
576
577
578
579
580
581

582
583
584
585
586
587
588
617
618
619
620
621
622
623

624
625
626
627
628
629
630
631







-
+







	      (list  "MT_TEST_RUN_DIR" work-area)
	      (list  "MT_TEST_NAME" test-name)
	      (list  "MT_ITEM_INFO" (conc itemdat))
	      (list  "MT_ITEMPATH"  item-path)
	      (list  "MT_RUNNAME"   runname)
	      (list  "MT_MEGATEST"  megatest)
	      (list  "MT_TARGET"    target)
	      (list  "MT_LINKTREE"  (configf:lookup *configdat* "setup" "linktree"))
	      (list  "MT_LINKTREE"  (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))
	      (list  "MT_TESTSUITENAME" (common:get-testsuite-name))))

	  (if mt-bindir-path (setenv "PATH" (conc (getenv "PATH") ":" mt-bindir-path)))
	  ;; (change-directory top-path)
	  ;; Can setup as client for server mode now
	  ;; (client:setup)

680
681
682
683
684
685
686
687

688
689
690
691
692
693
694
723
724
725
726
727
728
729

730
731
732
733
734
735
736
737







-
+







(define (launch:cache-config)
  ;; if we have a linktree and -runtests and -target and the directory exists dump the config
  ;; to megatest-(current-seconds).cfg and symlink it to megatest.cfg
  (if (and *configdat* 
	   (or (args:get-arg "-run")
	       (args:get-arg "-runtests")
	       (args:get-arg "-execute")))
      (let* ((linktree (get-environment-variable "MT_LINKTREE"))
      (let* ((linktree (common:get-linktree)) ;; (get-environment-variable "MT_LINKTREE"))
	     (target   (common:args-get-target exit-if-bad: #t))
	     (runname  (or (args:get-arg "-runname")
			   (args:get-arg ":runname")
			   (getenv "MT_RUNNAME")))
	     (fulldir  (conc linktree "/"
			     target "/"
			     runname)))
740
741
742
743
744
745
746
747

748
749
750
751
752
753
754
783
784
785
786
787
788
789

790
791
792
793
794
795
796
797







-
+







	res)))

(define (launch:setup-body #!key (force #f) (areapath #f))
  (let* ((toppath  (or *toppath* areapath (getenv "MT_RUN_AREA_HOME"))) ;; preserve toppath
	 (runname  (common:args-get-runname))
	 (target   (common:args-get-target exit-if-bad: #t))
	 (linktree (common:get-linktree))
	 (contour  (args:get-arg "-contour"))
	 (contour  #f) ;; NOT READY FOR THIS (args:get-arg "-contour"))
	 (sections (if target (list "default" target) #f)) ;; for runconfigs
	 (mtconfig (or (args:get-arg "-config") "megatest.config")) ;; allow overriding megatest.config 
	 (rundir   (if (and runname target linktree)(conc linktree (if contour (conc "/" contour) "") "/" target "/" runname) #f))
	 (mtcachef (and rundir (conc rundir "/" ".megatest.cfg-"  megatest-version "-" megatest-fossil-hash)))
	 (rccachef (and rundir (conc rundir "/" ".runconfigs.cfg-"  megatest-version "-" megatest-fossil-hash)))
	 (cancreate (and rundir (file-exists? rundir)(file-write-access? rundir)))
         (cxt       (hash-table-ref/default *contexts* toppath #f)))
798
799
800
801
802
803
804

805
806




807
808
809
810
811
812
813
841
842
843
844
845
846
847
848


849
850
851
852
853
854
855
856
857
858
859







+
-
-
+
+
+
+







		  (begin
		    (debug:print-error 0 *default-log-port* "you are not in a megatest area!")
		    (exit 1)))
	      (setenv "MT_RUN_AREA_HOME" *toppath*)
	      ;; the seed read is done, now read runconfigs, cache it then read megatest.config one more time and cache it
	      (let* ((keys         (rmt:get-keys))
		     (key-vals     (keys:target->keyval keys target))
		     (linktree     (common:get-linktree))
		     (linktree     (or (getenv "MT_LINKTREE")
				       (if *configdat* (configf:lookup *configdat* "setup" "linktree") #f)))
					; (or (getenv "MT_LINKTREE")
					;     (if *configdat*
					; 	   (configf:lookup *configdat* "setup" "linktree")
					; 	   (conc *toppath* "/lt"))))
		     (second-pass  (find-and-read-config
				    mtconfig
				    environ-patt: "env-override"
				    given-toppath: toppath
				    pathenvvar: "MT_RUN_AREA_HOME"))
		     (runconfigdat (begin     ;; this read of the runconfigs will see any adjustments made by re-reading megatest.config
				     (for-each (lambda (kt)
838
839
840
841
842
843
844
845

846
847
848
849
850
851
852
853
884
885
886
887
888
889
890

891

892
893
894
895
896
897
898







-
+
-







	      (set! *runconfigdat* rdat)
	      (set! *toppath*      toppath)
	      (set! *configstatus* 'partial))
	    (begin
	      (debug:print-error 0 *default-log-port* "No " mtconfig " file found. Giving up.")
	      (exit 2))))))
    ;; additional house keeping
    (let* ((linktree (or (getenv "MT_LINKTREE")
    (let* ((linktree (common:get-linktree)))
			 (if *configdat* (configf:lookup *configdat* "setup" "linktree") #f))))
      (if linktree
	  (begin
	    (if (not (file-exists? linktree))
		(begin
		  (handle-exceptions
		   exn
		   (begin
915
916
917
918
919
920
921
922

923
924
925
926
927
928
929
930
931
932
933
934
935
936

937
938



939
940
941
942
943
944
945
960
961
962
963
964
965
966

967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982


983
984
985
986
987
988
989
990
991
992







-
+














+
-
-
+
+
+







(define (create-work-area run-id run-info keyvals test-id test-src-path disk-path testname itemdat #!key (remtries 2))
  (let* ((item-path (if (string? itemdat) itemdat (item-list->path itemdat))) ;; if pass in string - just use it
	 (runname   (if (string? run-info) ;; if we pass in a string as run-info use it as run-name.
			run-info
			(db:get-value-by-header (db:get-rows run-info)
						(db:get-header run-info)
						"runname")))
	 (contour   (args:get-arg "-contour"))
	 (contour   #f) ;; NOT READY FOR THIS (args:get-arg "-contour"))
	 ;; convert back to db: from rdb: - this is always run at server end
	 (target   (string-intersperse (map cadr keyvals) "/"))

	 (not-iterated  (equal? "" item-path))

	 ;; all tests are found at <rundir>/test-base or <linkdir>/test-base
	 (testtop-base (conc target "/" runname "/" testname))
	 (test-base    (conc testtop-base (if not-iterated "" "/") item-path))

	 ;; nb// if itempath is not "" then it is prefixed with "/"
	 (toptest-path (conc disk-path (if contour (conc "/" contour) "") "/" testtop-base))
	 (test-path    (conc disk-path (if contour (conc "/" contour) "") "/" test-base))

	 ;; ensure this exists first as links to subtests must be created there
	 (linktree  (common:get-linktree))
	 (linktree  (let ((rd (config-lookup *configdat* "setup" "linktree")))
		      (if rd rd (conc *toppath* "/runs"))))
	 ;; WAS: (let ((rd (config-lookup *configdat* "setup" "linktree")))
	 ;;         (if rd rd (conc *toppath* "/runs"))))
	 ;; which seems wrong ...

	 (lnkbase   (conc linktree (if contour (conc "/" contour) "") "/" target "/" runname))
	 (lnkpath   (conc lnkbase "/" testname))
	 (lnkpathf  (conc lnkpath (if not-iterated "" "/") item-path))
	 (lnktarget (conc lnkpath "/" item-path)))

    ;; Update the rundir path in the test record for all, rundir=physical, shortdir=logical
1086
1087
1088
1089
1090
1091
1092
1093

1094
1095
1096
1097
1098
1099
1100
1133
1134
1135
1136
1137
1138
1139

1140
1141
1142
1143
1144
1145
1146
1147







-
+







;; 4. remotely run the test on allocated host
;;    - could be ssh to host from hosts table (update regularly with load)
;;    - could be netbatch
;;      (launch-test db (cadr status) test-conf))
(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params)
  (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex
  (let* ((item-path       (item-list->path itemdat))
	 (contour         (args:get-arg "-contour")))
	 (contour         #f)) ;; NOT READY FOR THIS (args:get-arg "-contour")))
    (let loop ((delta        (- (current-seconds) *last-launch*))
	       (launch-delay (string->number (or (configf:lookup *configdat* "setup" "launch-delay") "5"))))
      (if (> launch-delay delta)
	  (begin
	    (debug:print-info 0 *default-log-port* "Delaying launch of " test-name " for " (- launch-delay delta) " seconds")
	    (thread-sleep! (- launch-delay delta))
	    (loop (- (current-seconds) *last-launch*) launch-delay))))
1183
1184
1185
1186
1187
1188
1189
1190

1191







1192
1193
1194
1195
1196
1197
1198
1230
1231
1232
1233
1234
1235
1236

1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252







-
+

+
+
+
+
+
+
+







	    (create-directory work-area #t)
	    (debug:print 0 *default-log-port* "WARNING: No disk work area specified - running in the test directory under tmp_run")))
      (set! cmdparms (base64:base64-encode 
		      (z3:encode-buffer 
		       (with-output-to-string
			 (lambda () ;; (list 'hosts     hosts)
			   (write (list (list 'testpath  test-path)
					(list 'transport (conc *transport-type*))
					;; (list 'transport (conc *transport-type*))
					;; (list 'serverinf *server-info*)
					(list 'homehost  (let* ((hhdat (common:get-homehost)))
							   (if hhdat
							       (car hhdat)
							       #f)))
					(list 'serverurl (if *runremote*
							     (remote-server-url *runremote*)
							     #f)) ;; 
					(list 'toppath   *toppath*)
					(list 'work-area work-area)
					(list 'test-name test-name) 
					(list 'runscript runscript) 
					(list 'run-id    run-id   )
					(list 'test-id   test-id  )
					;; (list 'item-path item-path )

Modified megatest.scm from [564c08894e] to [f0fc76328a].

528
529
530
531
532
533
534
535

536
537
538
539
540
541
542
528
529
530
531
532
533
534

535
536
537
538
539
540
541
542







-
+







;;
(if (args:get-arg "-clean-cache")
    (begin
      (set! *didsomething* #t) ;; suppress the help output.
      (if (getenv "MT_TARGET") ;; no point in trying if no target
	  (if (args:get-arg "-runname")
	      (let* ((toppath  (launch:setup))
		     (linktree (if toppath (configf:lookup *configdat* "setup" "linktree")))
		     (linktree (common:get-linktree)) ;; (if toppath (configf:lookup *configdat* "setup" "linktree")))
		     (runtop   (conc linktree "/" (getenv "MT_TARGET") "/" (args:get-arg "-runname")))
		     (files    (if (file-exists? runtop)
				   (append (glob (conc runtop "/.megatest*"))
					   (glob (conc runtop "/.runconfig*")))
				   '())))
		(if (null? files)
		    (debug:print-info 0 *default-log-port* "No cached megatest or runconfigs files found. None removed.")
783
784
785
786
787
788
789
790

791
792
793
794
795
796
797
798
783
784
785
786
787
788
789

790

791
792
793
794
795
796
797







-
+
-







      (set! *didsomething* #t)))

(if (or (args:get-arg "-list-servers")
	(args:get-arg "-stop-server")
        (args:get-arg "-kill-server"))
    (let ((tl (launch:setup)))
      (if tl 
	  (let* ((tdbdat  (tasks:open-db))
	  (let* ((servers (server:get-list *toppath*))
		 (servers (tasks:get-all-servers (db:delay-if-busy tdbdat)))
		 (fmtstr  "~5a~12a~8a~20a~24a~10a~10a~10a~10a\n")
		 (servers-to-kill '())
                 (kill-switch  (if (args:get-arg "-kill-server") "-9" ""))
                 (killinfo   (or (args:get-arg "-stop-server") (args:get-arg "-kill-server") ))
		 (khost-port (if killinfo (if (substring-index ":" killinfo)(string-split ":") #f) #f))
		 (sid        (if killinfo (if (substring-index ":" killinfo) #f (string->number killinfo)) #f)))
	    (format #t fmtstr "Id" "MTver" "Pid" "Host" "Interface:OutPort" "InPort" "LastBeat" "State" "Transport")
812
813
814
815
816
817
818
819
820
821
822
823





824
825
826
827
828
829
830
811
812
813
814
815
816
817





818
819
820
821
822
823
824
825
826
827
828
829







-
-
-
-
-
+
+
+
+
+







		      (last-update (vector-ref server 10)) 
		      (transport  (vector-ref server 11))
		      (killed     #f)
		      (status     (< last-update 20)))
		 ;;   (zmq-sockets (if status (server:client-connect hostname port) #f)))
		 ;; no need to login as status of #t indicates we are connecting to correct 
		 ;; server
		 (if (equal? state "dead")
		     (if (> last-update (* 25 60 60)) ;; keep records around for slighly over a day.
			 (tasks:server-deregister (db:delay-if-busy tdbdat) hostname pullport: pullport pid: pid action: 'delete))
		     (if (> last-update 20)        ;; Mark as dead if not updated in last 20 seconds
			 (tasks:server-deregister (db:delay-if-busy tdbdat) hostname pullport: pullport pid: pid)))
		;; (if (equal? state "dead")
		;;     (if (> last-update (* 25 60 60)) ;; keep records around for slighly over a day.
		;; 	 (tasks:server-deregister (db:delay-if-busy tdbdat) hostname pullport: pullport pid: pid action: 'delete))
		;;     (if (> last-update 20)        ;; Mark as dead if not updated in last 20 seconds
		;; 	 (tasks:server-deregister (db:delay-if-busy tdbdat) hostname pullport: pullport pid: pid)))
		 (format #t fmtstr id mt-ver pid hostname (conc interface ":" pullport) pubport last-update
			 (if status "alive" "dead") transport)
		 (if (or (equal? id sid)
			 (equal? sid 0)) ;; kill all/any
		     (begin
		       (debug:print-info 0 *default-log-port* "Attempting to kill "kill-switch" server with pid " pid)
		       (tasks:kill-server hostname pid kill-switch: kill-switch)))))
1181
1182
1183
1184
1185
1186
1187
1188

1189
1190
1191
1192
1193
1194
1195
1180
1181
1182
1183
1184
1185
1186

1187
1188
1189
1190
1191
1192
1193
1194







-
+







				     (display (conc "target: " targetstr " "))
				     (display (conc field-name ": " (db:get-value-by-header run header (conc field-name)) " "))))
			       runs-spec)
			      (newline)))))
		       
		     (for-each 
		      (lambda (test)
		      	(handle-exceptions
		      	(common:debug-handle-exceptions #f
			 exn
			 (begin
			   (debug:print-error 0 *default-log-port* "Bad data in test record? " test)
			   (print "exn=" (condition->list exn))
			   (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
			   (print-call-chain (current-error-port)))
			 (let* ((test-id      (if (member "id"           tests-spec)(get-value-by-fieldname test test-field-index "id"          ) #f)) ;; (db:test-get-id         test))

Modified mt.scm from [0a710abd80] to [410c526eee].

206
207
208
209
210
211
212
213

214
215
216
217
218
219
220
206
207
208
209
210
211
212

213
214
215
216
217
218
219
220







-
+







	(let ((test-dirs (tests:get-tests-search-path *configdat*)))
	  (let loop ((hed (car test-dirs))
		     (tal (cdr test-dirs)))
	    ;; Setting MT_LINKTREE here is almost certainly unnecessary. 
	    (let ((tconfig-file (conc hed "/" test-name "/testconfig")))
	      (if (and (file-exists? tconfig-file)
		       (file-read-access? tconfig-file))
		  (let ((link-tree-path (configf:lookup *configdat* "setup" "linktree"))
		  (let ((link-tree-path (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))
			(old-link-tree  (get-environment-variable "MT_LINKTREE")))
		    (if link-tree-path (setenv "MT_LINKTREE" link-tree-path))
		    (let ((newtcfg (read-config tconfig-file #f #f))) ;; NOTE: Does NOT run [system ...]
		      (hash-table-set! *testconfigs* test-name newtcfg)
		      (if old-link-tree 
			  (setenv "MT_LINKTREE" old-link-tree)
			  (unsetenv "MT_LINKTREE"))

Modified mtut.scm from [08476a2140] to [9e88c442a2].

10
11
12
13
14
15
16
17

18
19
20
21
22
23
24
10
11
12
13
14
15
16

17
18
19
20
21
22
23
24







-
+







;; (include "common.scm")
;; (include "megatest-version.scm")

;; fake out readline usage of toplevel-command
(define (toplevel-command . a) #f)

(use srfi-1 posix srfi-69 readline ;;  regex regex-case srfi-69 apropos json http-client directory-utils rpc typed-records;; (srfi 18) extras)
     srfi-18 extras format pkts regex regex-case
     srfi-18 extras format pkts pkts regex regex-case
     (prefix dbi dbi:)) ;;  zmq extras)

(declare (uses common))
(declare (uses megatest-version))
(declare (uses margs))
(declare (uses configf))
;; (declare (uses rmt))
341
342
343
344
345
346
347
348

349
350
351
352
353
354
355
356
357
358
359

360
361
362
363
364
365
366
341
342
343
344
345
346
347

348
349
350
351
352
353
354
355
356
357
358

359
360
361
362
363
364
365
366







-
+










-
+







	       (lambda (pkt)
		 (let* ((uuid    (cadr (string-match ".*/([0-9a-f]+).pkt" pkt)))
			(exists  (lookup-by-uuid pdb uuid #f)))
		   (if (not exists)
		       (let* ((pktdat (string-intersperse
				       (with-input-from-file pkt read-lines)
				       "\n"))
			      (apkt   (convert-pkt->alist pktdat))
			      (apkt   (pkt->alist pktdat))
			      (ptype  (alist-ref 'T apkt)))
			 (add-to-queue pdb pktdat uuid (or ptype 'cmd) #f 0)
			 (debug:print 4 *default-log-port* "Added " uuid " of type " ptype " to queue"))
		       (debug:print 4 *default-log-port* "pkt: " uuid " exists, skipping...")
		       )))
	       pkts))))
      (string-split pktsdirs)))))

(define (get-pkt-alists pkts)
  (map (lambda (x)
	 (alist-ref 'pkta x)) ;; 'pkta pulls out the alist from the read pkt
	 (alist-ref 'apkt x)) ;; 'pkta pulls out the alist from the read pkt
       pkts))

;; given list of pkts (alist mode) return list of D cards as Unix epoch, sorted descending
;; also delete duplicates by target i.e. (car pkt)
(define (get-pkt-times pkts)
  (delete-duplicates
   (sort 
389
390
391
392
393
394
395
396


397
398
399
400
401
402
403
389
390
391
392
393
394
395

396
397
398
399
400
401
402
403
404







-
+
+







  (let* ((sched     (cond
		     ((vector? sched-in)(local-time->seconds sched-in)) ;; we recieved a time
		     ((number? sched-in) sched-in)
		     (else     (current-seconds))))
	 (args-data (if args-alist
			args-alist
			(hash-table->alist args:arg-hash)))
	 (alldat    (apply append (list 'a action
	 (alldat    (apply append (list 'T "cmd"
					'a action
					'U (current-user-name)
					'D sched)
			   (map (lambda (x)
				  (let* ((param (car x))
					 (value (cdr x))
					 (pmeta (assoc param *arg-keys*))
					 (smeta (assoc param *switch-keys*))
759
760
761
762
763
764
765
766

767
768
769
770
771
772
773
760
761
762
763
764
765
766

767
768
769
770
771
772
773
774







-
+







    (fold (lambda (a res)
	    (let* ((key (car a)) ;; get the key name
		   (val (cdr a))
		   (par (lookup-param-by-key key)))
	      ;; (print "key: " key " val: " val " par: " par)
	      (if par
		  (conc res " " (param-translate par) " " val)
		  (if (member key '(a Z U D)) ;; a is the action
		  (if (member key '(a Z U D T)) ;; a is the action
		      res
		      (begin
			(print "ERROR: Unknown key in packet \"" key "\" with value \"" val "\"")
			res)))))
	  (conc "megatest " (if (not (member action '("sync")))
				(conc action " ")
				""))
805
806
807
808
809
810
811
812

813
814
815
816
817
818
819
806
807
808
809
810
811
812

813
814
815
816
817
818
819
820







-
+







	      (areas     (configf:get-section mtconf "areas"))
	      (contours  (configf:get-section mtconf "contours"))
	      (pkts      (find-pkts pdb '(cmd) '()))
	      (torun     (make-hash-table)) ;; target => ( ... info ... )
	      (rgentargs (hash-table-keys rgconf))) ;; these are the targets registered for automatically triggering
	 (for-each
	  (lambda (pktdat)
	    (let* ((pkta    (alist-ref 'pkta pktdat))
	    (let* ((pkta    (alist-ref 'apkt pktdat))
		   (action  (alist-ref 'a pkta))
		   (cmdline (pkt->cmdline pkta))
		   (uuid    (alist-ref 'Z pkta))
		   (logf    (conc logdir "/" uuid "-run.log"))
		   (fullcmd (conc "NBFAKE_LOG=" logf " nbfake " cmdline)))
	      (print "RUNNING: " fullcmd)
	      (system fullcmd)

Modified portlogger.scm from [fd5d390b65] to [e604a481b0].

48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63












64
65
66
67
68
69


70
71
72
73
74
75
76
48
49
50
51
52
53
54









55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71

72
73
74
75
76
77
78
79
80







-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+





-
+
+







            fail_count INTEGER DEFAULT 0,
            update_time TIMESTAMP DEFAULT (strftime('%s','now')) );")
    db))

(define (portlogger:open-run-close proc . params)
  (let* ((fname  (conc "/tmp/." (current-user-name) "-portlogger.db"))
	 (avail  (tasks:wait-on-journal fname 10))) ;; wait up to about 10 seconds for the journal to go away
    (handle-exceptions
     exn
     (begin
       ;; (release-dot-lock fname)
       (debug:print-error 0 *default-log-port* "portlogger:open-run-close failed. " proc " " params)
       (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
       (debug:print 0 *default-log-port* "exn=" (condition->list exn))
       (if (file-exists? fname)(delete-file fname)) ;; brutally get rid of it
       (print-call-chain (current-error-port)))
    ;;(handle-exceptions
    ;; exn
    ;; (begin
    ;;   ;; (release-dot-lock fname)
    ;;   (debug:print-error 0 *default-log-port* "portlogger:open-run-close failed. " proc " " params)
    ;;   (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
    ;;   (debug:print 0 *default-log-port* "exn=" (condition->list exn))
    ;;   (if (file-exists? fname)
    ;;	   (begin
    ;;	     (debug:print 0 *default-log-port* "Removing portlogger database file " fname)
    ;;	     (delete-file fname))) ;; just get rid of the portlogger file
    ;;   (print-call-chain (current-error-port)))
     (let* (;; (lock   (obtain-dot-lock fname 2 9 10))
	    (db     (portlogger:open-db fname))
	    (res    (apply proc db params)))
       (sqlite3:finalize! db)
       ;; (release-dot-lock fname)
       res))))
       res)))
;; )

;; (fold-row PROC INIT DATABASE SQL . PARAMETERS) 
(define (portlogger:take-port db portnum)
  (let* ((qry1 (sqlite3:prepare db "INSERT INTO ports (port,state) VALUES (?,?);"))
	 (qry2 (sqlite3:prepare db "UPDATE ports SET state=?,update_time=strftime('%s','now') WHERE port=?;"))
	 (qry3 (sqlite3:prepare db "SELECT state FROM ports WHERE port=?;"))
	 (res  (sqlite3:with-transaction
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117














118
119
120
121
122
123
124
101
102
103
104
105
106
107














108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128







-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+







    (sqlite3:finalize! qry1)
    (sqlite3:finalize! qry2)
    (sqlite3:finalize! qry3)
    res))

(define (portlogger:get-prev-used-port db)
  (handle-exceptions
   exn
   (begin
     (debug:print 0 *default-log-port* "EXCEPTION: portlogger database probably overloaded or unreadable. If you see this message again remove /tmp/.$USER-portlogger.db")
     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
     (debug:print 0 *default-log-port* "exn=" (condition->list exn))
     (print-call-chain (current-error-port))
     (debug:print 0 *default-log-port* "Continuing anyway.")
     #f)
   (sqlite3:fold-row
    (lambda (var curr)
      (or curr var curr))
    #f
    db
    "SELECT (port) FROM ports WHERE state='released' LIMIT 1;")))
      exn
      (begin
	(debug:print 0 *default-log-port* "EXCEPTION: portlogger database probably overloaded or unreadable. If you see this message again remove /tmp/.$USER-portlogger.db")
	(debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	(debug:print 0 *default-log-port* "exn=" (condition->list exn))
	(print-call-chain (current-error-port))
	(debug:print 0 *default-log-port* "Continuing anyway.")
	#f)
    (sqlite3:fold-row
     (lambda (var curr)
       (or curr var curr))
     #f
     db
     "SELECT (port) FROM ports WHERE state='released' LIMIT 1;")))

(define (portlogger:find-port db)
  (let* ((lowport (let ((val (configf:lookup *configdat* "server" "lowport")))
		    (if (and val 
			     (string->number val))
			(string->number val)
			32768)))

Modified rmt.scm from [fb0a2ceccb] to [5167aff935].

31
32
33
34
35
36
37

38
39
40
41
42
43
44







45
46
47
48
49
50
51
31
32
33
34
35
36
37
38







39
40
41
42
43
44
45
46
47
48
49
50
51
52







+
-
-
-
-
-
-
-
+
+
+
+
+
+
+







;;======================================================================

;; if a server is either running or in the process of starting call client:setup
;; else return #f to let the calling proc know that there is no server available
;;
(define (rmt:get-connection-info areapath #!key (area-dat #f)) ;; TODO: push areapath down.
  (let* ((runremote (or area-dat *runremote*))
	 (cinfo     (if (remote? runremote)
	 (cinfo     (remote-conndat runremote))
        (run-id 0))
    (if cinfo
	cinfo
	(if (server:check-if-running areapath)
	    (client:setup areapath)
	    #f))))
			(remote-conndat runremote)
			#f)))
	  (if cinfo
	      cinfo
	      (if (server:check-if-running areapath)
		  (client:setup areapath)
		  #f))))

(define *send-receive-mutex* (make-mutex)) ;; should have separate mutex per run-id

;; RA => e.g. usage (rmt:send-receive 'get-var #f (list varname))
;;
(define (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected

444
445
446
447
448
449
450
451

452
453
454
455
456
457
458
445
446
447
448
449
450
451

452
453
454
455
456
457
458
459







-
+







    (debug:print 3 *default-log-port* "TEST PATH: " test-path)
    (open-test-db test-path)))

;; WARNING: This currently bypasses the transaction wrapped writes system
(define (rmt:test-set-state-status-by-id run-id test-id newstate newstatus newcomment)
  (rmt:send-receive 'test-set-state-status-by-id run-id (list run-id test-id newstate newstatus newcomment)))

(define (rmt:set-tests-state-status run-id testnames currstate currstatus newstate newstatus)
(define (rmt:set-tests-state-status run-id                      testnames currstate currstatus newstate newstatus)
  (rmt:send-receive 'set-tests-state-status run-id (list run-id testnames currstate currstatus newstate newstatus)))

(define (rmt:get-tests-for-run run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode)
  (if (number? run-id)
      (rmt:send-receive 'get-tests-for-run run-id (list run-id testpatt states statuses offset limit not-in sort-by sort-order qryvals last-update mode))
      (begin
	(debug:print-error 0 *default-log-port* "rmt:get-tests-for-run called with bad run-id=" run-id)

Modified runconfigs.config from [5d4a6b6df5] to [54666c6a32].








1
2
3
4
5
6
7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
+
+
+
+
+
+
+







# To get emacs font highlighing in the various megatest configs do this:
#
# Install emacs-goodies-el:
#    sudo apt install emacs-goodies-el
# Add to your ~/.emacs file:
#    (add-to-list 'auto-mode-alist '("config\\'" . conf-space-mode))
#

# example of a cron entry to run sync using db spec pgdb, with pgdb setting in file local.config
#
[a/b/c]
all:scheduled:sync     cron=  0/5 * * * *;dbdest=pgdb;appendconf=/mfs/matt/.sysmaint/local.config
quick:scheduled:sync   cron=  0/5 * * * *;dbdest=pgdb;appendconf=/mfs/matt/.sysmaint/local.config

Modified runs.scm from [1011cd6b97] to [8959c65de3].

19
20
21
22
23
24
25

26
27
28
29
30
31
32
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33







+







(declare (uses common))
(declare (uses items))
(declare (uses runconfig))
(declare (uses tests))
(declare (uses server))
(declare (uses mt))
(declare (uses archive))
(declare (uses keys))
;; (declare (uses filedb))

(include "common_records.scm")
(include "key_records.scm")
(include "db_records.scm")
(include "run_records.scm")
(include "test_records.scm")
54
55
56
57
58
59
60
61

62
63
64
65
66
67
68
55
56
57
58
59
60
61

62
63
64
65
66
67
68
69







-
+







(define (runs:set-megatest-env-vars run-id #!key (inkeys #f)(inrunname #f)(inkeyvals #f)(intarget #f)(testname #f)(itempath #f))
  (let* ((target    (or intarget 
			(common:args-get-target)
			(get-environment-variable "MT_TARGET")))
	 (keys      (if inkeys    inkeys    (rmt:get-keys)))
	 (keyvals   (if inkeyvals inkeyvals (keys:target->keyval keys target)))
	 (vals      (hash-table-ref/default *env-vars-by-run-id* run-id #f))
	 (link-tree (configf:lookup *configdat* "setup" "linktree")))
	 (link-tree (common:get-linktree))) ;; (configf:lookup *configdat* "setup" "linktree")))
    (if testname (setenv "MT_TEST_NAME" testname))
    (if itempath (setenv "MT_ITEMPATH"  itempath))

    ;; get the info from the db and put it in the cache
    (if link-tree
	(setenv "MT_LINKTREE" link-tree)
	(debug:print-error 0 *default-log-port* "linktree not set, should be set in megatest.config in [setup] section."))
209
210
211
212
213
214
215
216

217
218
219
220
221
222
223
210
211
212
213
214
215
216

217
218
219
220
221
222
223
224







-
+







	 (test-records       (make-hash-table))
	 ;; need to process runconfigs before generating these lists
	 (all-tests-registry #f)  ;; (tests:get-all)) ;; (tests:get-valid-tests (make-hash-table) test-search-path)) ;; all valid tests to check waiton names
	 (all-test-names     #f)  ;; (hash-table-keys all-tests-registry))
	 (test-names         #f)  ;; Generated by a call to (tests:filter-test-names all-test-names test-patts))
	 (required-tests     #f)  ;; Put fully qualified test/testpath names in this list to be done
	 (task-key           (conc (hash-table->alist flags) " " (get-host-name) " " (current-process-id)))
	 (tdbdat             (tasks:open-db))
	 ;; (tdbdat             (tasks:open-db))
	 (config-reruns      (let ((x (configf:lookup *configdat* "setup" "reruns")))
			       (if x (string->number x) #f)))
	 (allowed-tests      #f))

    ;; check if readonly
    (when readonly-mode
      (debug:print-error 0 *default-log-port* "megatest.db is readonly.  Cannot proceed.")
235
236
237
238
239
240
241
242
243


244
245
246
247
248
249
250
236
237
238
239
240
241
242


243
244
245
246
247
248
249
250
251







-
-
+
+







    ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10))

    (let ((sighand (lambda (signum)
		     ;; (signal-mask! signum) ;; to mask or not? seems to cause issues in exiting
		     (set! *time-to-exit* #t)
		     (print "Received signal " signum ", cleaning up before exit. Please wait...")
		     (let ((th1 (make-thread (lambda ()
					       (let ((tdbdat (tasks:open-db)))
						 (rmt:tasks-set-state-given-param-key task-key "killed"))
					       ;; (let ((tdbdat (tasks:open-db)))
						 (rmt:tasks-set-state-given-param-key task-key "killed") ;; )
					       (print "Killed by signal " signum ". Exiting")
					       (thread-sleep! 3)
					       (exit))))
			   (th2 (make-thread (lambda ()
					       (thread-sleep! 5)
					       (debug:print 0 *default-log-port* "Done")
					       (exit 4)))))
316
317
318
319
320
321
322
323
324
325








326
327
328
329
330
331
332
317
318
319
320
321
322
323



324
325
326
327
328
329
330
331
332
333
334
335
336
337
338







-
-
-
+
+
+
+
+
+
+
+







	  ;; on test A but test B reached the point on being registered as NOT_STARTED and test
	  ;; A failed for some reason then on re-run using -keepgoing the run can never complete.
	  ;;
	  ;; (rmt:general-call 'delete-tests-in-state run-id "NOT_STARTED")
	  
	  ;; Now convert anything in allow-auto-rerun to NOT_STARTED
	  ;;
	  (for-each (lambda (state)
		      (rmt:set-tests-state-status run-id test-names state #f "NOT_STARTED" state))
		    (string-split (or (configf:lookup *configdat* "setup" "allow-auto-rerun") "")))))
	  (for-each
	   (lambda (state-status)
	     (let* ((ss-lst (string-split-fields "/" state-status #:infix))
		    (state  (if (> (length ss-lst) 0)(car  ss-lst) #f))
		    (status (if (> (length ss-lst) 1)(cadr ss-lst) #f)))
	       (rmt:set-tests-state-status run-id test-names state status "NOT_STARTED" status)))
	   ;; list of state/status pairs separated by spaces
	   (string-split (or (configf:lookup *configdat* "setup" "allow-auto-rerun") "")))))

    ;; Ensure all tests are registered in the test_meta table
    (runs:update-all-test_meta #f)

    ;; now add non-directly referenced dependencies (i.e. waiton)
    ;;======================================================================
    ;; refactoring this block into tests:get-full-data
1073
1074
1075
1076
1077
1078
1079
1080

1081
1082
1083
1084
1085
1086
1087
1079
1080
1081
1082
1083
1084
1085

1086
1087
1088
1089
1090
1091
1092
1093







-
+







	(max-concurrent-jobs   (let ((mcj (config-lookup *configdat* "setup"     "max_concurrent_jobs")))
				 (if (and mcj (string->number mcj))
				     (string->number mcj)
				     1))) ;; length of the register queue ahead
	(reglen                (if (number? reglen-in) reglen-in 1))
	(last-time-incomplete  (- (current-seconds) 900)) ;; force at least one clean up cycle
	(last-time-some-running (current-seconds))
	(tdbdat                (tasks:open-db))
	;; (tdbdat                (tasks:open-db))
	(runsdat (make-runs:dat
		  ;; hed: hed
		  ;; tal: tal
		  ;; reg: reg
		  ;; reruns: reruns
		  reglen: reglen
		  regfull: #f ;; regfull
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177



1178
1179
1180
1181
1182
1183
1184
1174
1175
1176
1177
1178
1179
1180



1181
1182
1183
1184
1185
1186
1187
1188
1189
1190







-
-
-
+
+
+







			   testmode:    testmode
			   newtal:      newtal
			   itemmaps:    itemmaps
			   ;; prereqs-not-met: prereqs-not-met
			   )))
	(runs:dat-regfull-set! runsdat regfull)
	;; every couple minutes verify the server is there for this run
	(if (and (common:low-noise-print 60 "try start server"  run-id)
		 (tasks:need-server run-id))
	    (tasks:start-and-wait-for-server tdbdat run-id 10)) ;; NOTE: delay and wait is done under the hood
	;; (if (and (common:low-noise-print 60 "try start server"  run-id)
	;; 	 (tasks:need-server run-id))
	;;     (tasks:start-and-wait-for-server tdbdat run-id 10)) ;; NOTE: delay and wait is done under the hood
	
	(if (> num-running 0)
	  (set! last-time-some-running (current-seconds)))

      (if (> (current-seconds)(+ last-time-some-running (or (configf:lookup *configdat* "setup" "give-up-waiting") 36000)))
	  (hash-table-set! *max-tries-hash* tfullname (+ (hash-table-ref/default *max-tries-hash* tfullname 0) 1)))
	;; (debug:print 0 *default-log-port* "max-tries-hash: " (hash-table->alist *max-tries-hash*))
1645
1646
1647
1648
1649
1650
1651
1652

1653
1654
1655
1656
1657
1658
1659
1651
1652
1653
1654
1655
1656
1657

1658
1659
1660
1661
1662
1663
1664
1665







-
+







;;    'set-state-status
;;
;; NB// should pass in keys?
;;
(define (runs:operate-on action target runnamepatt testpatt #!key (state #f)(status #f)(new-state-status #f)(mode 'remove-all)(options '()))
  (common:clear-caches) ;; clear all caches
  (let* ((db           #f)
	 (tdbdat       (tasks:open-db))
	 ;; (tdbdat       (tasks:open-db))
	 (keys         (rmt:get-keys))
	 (rundat       (mt:get-runs-by-patt keys runnamepatt target))
	 (header       (vector-ref rundat 0))
	 (runs         (vector-ref rundat 1))
	 (states       (if state  (string-split state  ",") '()))
	 (statuses     (if status (string-split status ",") '()))
	 (state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f)))
1703
1704
1705
1706
1707
1708
1709
1710

1711
1712
1713
1714
1715
1716
1717
1709
1710
1711
1712
1713
1714
1715

1716
1717
1718
1719
1720
1721
1722
1723







-
+







		    ;; seek and kill in flight -runtests with % as testpatt here
		    ;; (if (equal? testpatt "%")
		    (tasks:kill-runner target run-name testpatt)
		    
		    ;; (debug:print 0 *default-log-port* "not attempting to kill any run launcher processes as testpatt is " testpatt))
		    (debug:print 1 *default-log-port* "Removing tests for run: " runkey " " (db:get-value-by-header run header "runname")))
		   ((set-state-status)
		    (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10))
		    ;; (if (tasks:need-server run-id)(tasks:start-and-wait-for-server tdbdat run-id 10))
		    (debug:print 1 *default-log-port* "Modifying state and staus for tests for run: " runkey " " (db:get-value-by-header run header "runname")))
		   ((print-run)
		    (debug:print 1 *default-log-port* "Printing info for run " runkey ", run=" run ", tests=" tests ", header=" header)
		    action)
		   ((run-wait)
		    (debug:print 1 *default-log-port* "Waiting for run " runkey ", run=" runnamepatt " to complete"))
		   ((archive)

Modified tasks.scm from [57e383db06] to [e18dae9779].

50
51
52
53
54
55
56

57
58
59
60
61
62
63
64

65
66
67
68
69
70
71
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64

65
66
67
68
69
70
71
72







+







-
+







		     (debug:print 0 *default-log-port* waiting-msg))
		 (if (> count 0)
		     (begin
		       (thread-sleep! 1)
		       (loop (file-exists? fullpath)
			     (- count 1)))
		     (begin
		       (debug:print 0 *default-log-port* "ERROR: removing the journal file " fullpath ", this is not good. Look for disk full, write access and other issues.")
		       (if remove (system (conc "rm -rf " fullpath)))
		       #f)))
	       #t))))))

(define (tasks:get-task-db-path)
  (let ((dbdir  (or (configf:lookup *configdat* "setup" "monitordir")
		    (configf:lookup *configdat* "setup" "dbdir")
		    (conc (configf:lookup *configdat* "setup" "linktree") "/.db"))))
		    (conc (common:get-linktree) "/.db"))))
    (handle-exceptions
     exn
     (begin
       (debug:print-error 0 *default-log-port* "Couldn't create path to " dbdir)
       (exit 1))
     (if (not (directory? dbdir))(create-directory dbdir #t)))
    dbdir))
91
92
93
94
95
96
97
98

99
100
101
102
103
104
105
92
93
94
95
96
97
98

99
100
101
102
103
104
105
106







-
+







	     (debug:print 0 *default-log-port* " exn=" (condition->list exn))
	     (thread-sleep! 1)
	     (tasks:open-db numretries (- numretries 1)))
	   (begin
	     (print-call-chain (current-error-port))
	     (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	     (debug:print 0 *default-log-port* " exn=" (condition->list exn))))
       (let* ((dbpath       (tasks:get-task-db-path))
       (let* ((dbpath        (db:dbfile-path )) ;; (tasks:get-task-db-path))
	      (dbfile       (conc dbpath "/monitor.db"))
	      (avail        (tasks:wait-on-journal dbpath 10)) ;; wait up to about 10 seconds for the journal to go away
	      (exists       (file-exists? dbpath))
	      (write-access (file-write-access? dbpath))
	      (mdb          (cond ;; what the hek is *toppath* doing here?
			     ((and (string? *toppath*)(file-write-access? *toppath*))
			      (sqlite3:open-database dbfile))

Modified tests/Makefile from [ca46bf23f9] to [3cbc059672].

17
18
19
20
21
22
23

24

25

26

27
28
29
30
31
32
33
17
18
19
20
21
22
23
24

25
26
27

28
29
30
31
32
33
34
35







+
-
+

+
-
+







FS  = $(shell df -T .|tail -1|awk '{print $$2}')
VER = $(shell fsl info|grep checkout|awk '{print $$2}'|cut -c 1-5)

# The NEWTARGET causes some tests to fail. Do not use until this is fixed.
NEWTARGET  = "$(OS)/$(FS)/$(VER)"
TARGET     = "ubuntu/nfs/none"

all : build unit test4
all : build unit test1 test2 test3 test4 test5 test6 test7 test8 test9
# test1 test2 test3 test4 test5 test6 test7 test8 test9

unit : all-rmt.log
unit : basicserver.log runs.log misc.log tests.log
# basicserver.log runs.log misc.log tests.log

rel : 
	cd release;dashboard -rows 25 &

## basicserver.log : unittests/basicserver.scm
## 	script -c "./rununittest.sh basicserver $(DEBUG)" basicserver.log

Modified tests/fullrun/megatest.config from [353b25ebc0] to [ad7007c3e5].

97
98
99
100
101
102
103
104
105


106
107
108
109
110
111
112
97
98
99
100
101
102
103


104
105
106
107
108
109
110
111
112







-
-
+
+







# override the html viewer launch command
#
# htmlviewercmd firefox -new-window 
htmlviewercmd arora

# -runtests automatically deletes the records for tests with the listed states on starting up a run allowing them to re-run
#     (nb// this is in addition to NOT_STARTED which is automatically re-run)
#
allow-auto-rerun INCOMPLETE ZERO_ITEMS
# format is STATE/STATUS
allow-auto-rerun /INCOMPLETE /ZERO_ITEMS
# could add: STUCK STUCK/DEAD UNKNOWN KILLED KILLREQ PREQ_DISCARD

[validvalues]
state start end 0 1 - 2
status pass fail n/a 0 1 running - 2

# These are set before all tests, override them 

Added tests/unittests/all-rmt.scm version [091111a6e5].



























































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

;;======================================================================
;;  A L L - R M T 
;;======================================================================

;; Run like this:
;;
;;  ./rununittest.sh all-rmt 1

;; Definitions:
;;   NTN - no test needed
;;   DEP - function is deprecated, no point in testing
;;   NED - function nested under others, no test needed.
;;   DEF - deferred

(print "start dir: " (current-directory))
       
(define toppath (current-directory))
(test #f #t (string?(server:start-and-wait *toppath*)))

(test "setup for run" #t (begin (launch:setup)
 				(string? (getenv "MT_RUN_AREA_HOME"))))
(test #f #t (vector? (client:setup toppath)))

(test #f #t (vector? (rmt:get-connection-info toppath))) ;; TODO: push areapath down.
(test #f #t (string? (server:check-if-running ".")))
;; DEF (test #f #f (rmt:send-receive-no-auto-client-setup *runremote* 'get-keys #f '()))
;; DEF (rmt:kill-server run-id)
;; DEF (rmt:start-server run-id)
(test #f '(#t "successful login")(rmt:login #f))
;; DEF (rmt:login-no-auto-client-setup connection-info)
(test #f #t (pair? (rmt:get-latest-host-load (get-host-name))))
(test #f #t (list? (rmt:get-changed-record-ids 0)))
(test #f #f (begin (runs:update-all-test_meta #f) #f))
(test #f '("test1" "test2")(sort (alist-ref "tagtwo" (hash-table->alist (rmt:get-tests-tags)) equal?) string<=))
(test #f '() (rmt:get-key-val-pairs 0))
(test #f '("SYSTEM" "RELEASE") (rmt:get-keys))
(test #f '("SYSTEM" "RELEASE") (rmt:get-keys-write)) ;; dummy query to force server start
(test #f '() (rmt:get-key-vals 1))
(test #f (vector '("SYSTEM" "RELEASE") '()) (rmt:get-targets))
(test #f "" (rmt:get-target 1))
(test #f #t (rmt:register-test 1 "foo" ""))
(test #f 1  (rmt:get-test-id 1 "foo" ""))
(test #f "foo" (vector-ref (rmt:get-test-info-by-id 1 1) 2))
(test #f "/tmp/badname" (rmt:test-get-rundir-from-test-id 1 1))
(test #f '(1) (db:set-tests-state-status *db* 1 '("foo")  "COMPLETED" "PASS" "NOT_STARTED" "PASS"))
(test #f '(1) (rmt:set-tests-state-status 1 '("foo") "COMPLETED" "PASS" "NOT_STARTED" "PASS"))
(test #f #t (mt:test-set-state-status-by-id 1 1 "COMPLETED" "PASS" "Just testing!"))
(test #f #t (list? (rmt:get-tests-for-run 1 "%" '() '() #f #f #f #f #f #f 0 #f)))
(test #f #t (list? (rmt:get-tests-for-runs-mindata '(1) "%" '() '() #f)))
(test #f #f (begin (rmt:delete-test-records 1 2) #f))
(test #f #t (begin (rmt:test-set-state-status 1 1 "COMPLETED" "FAIL" "Another message") #t))
(test #f 0  (rmt:test-toplevel-num-items 1 "foo"))
(test #f '()(rmt:get-matching-previous-test-run-records 1 "foo" ""))
(test #f '("/tmp/badname" "logs/final.log") (rmt:test-get-logfile-info 1 "foo"))
(test #f '()(rmt:test-get-records-for-index-file 1 "foo"))
(test #f #t (vector? (rmt:get-testinfo-state-status 1 1)))
(test #f #t (rmt:test-set-log! 1 1 "/tmp/another/logfile/eh"))
(test #f #f (begin (rmt:test-set-top-process-pid 1 1 123) #f))
(test #f 123  (rmt:test-get-top-process-pid 1 1))
(define keys (rmt:get-keys))
(test #f '()(rmt:get-run-ids-matching-target keys "%/%" #f "%" "%" "%" "%"))
(test #f '()(rmt:test-get-paths-matching-keynames-target-new keys "%/%" #f "%" "%" "%" "%"))
(test #f '()(rmt:get-prereqs-not-met 1 '() "foo" ""))
(test #f 0 (rmt:get-count-tests-running-for-run-id 1))
(test #f 0 (rmt:get-count-tests-running 1))
(test #f 0 (rmt:get-count-tests-running-for-testname 1 "foo"))
(test #f 0 (rmt:get-count-tests-running-in-jobgroup 1 "nada"))
(test #f #f (begin (rmt:set-state-status-and-roll-up-items 1 "foo" "" "COMPLETED" "FAIL" "Just yet another message") #f))
(test #f #t (rmt:top-test-set-per-pf-counts 1 "foo"))
(test #f '() (rmt:get-raw-run-stats 1))
(test #f #t (vector? (rmt:get-run-info 1)))
(test #f 0 (rmt:get-num-runs "%"))
(define keypatts '(("SYSTEM" "ubuntu")("RELEASE" "v1.234")) )
(test #f 1 (rmt:register-run '(("SYSTEM" "ubuntu")("RELEASE" "v1.234")) "bar" "NEW" "JUSTFINE" "bobafett" "quick"))
(test #f "bar" (rmt:get-run-name-from-id 1))
(test #f #t (begin (rmt:delete-run 2) #t)) ;; delete a non-existant run
(test #f #t (begin (rmt:update-run-stats 1 '()) #t))
(test #f #t (begin (rmt:delete-old-deleted-test-records) #t))
(test #f #t (vector? (rmt:get-runs "%" 10 0 keypatts)))
(test #f '(1)(rmt:get-all-run-ids))
(test #f '()(rmt:get-prev-run-ids 1))
(test #f #t (begin (rmt:lock/unlock-run 1 #t #f "mikey") #t))
(test #f "JUSTFINE" (rmt:get-run-status 1))
(test #f #t (begin (rmt:set-run-status 1 "NOTFINE" msg: "A message") #t))
(test #f #t (begin (rmt:update-run-event_time 1) #t))
;; (rmt:get-runs-by-patt  keys runnamepatt targpatt offset limit fields last-runs-update) ;; fields of #f uses default
;; (rmt:find-and-mark-incomplete run-id ovr-deadtime)
;; (rmt:get-main-run-stats run-id)
;; (rmt:get-var varname)
;; (rmt:set-var varname value)
;; (rmt:find-and-mark-incomplete-all-runs #!key (ovr-deadtime #f))
;; (rmt:get-previous-test-run-record run-id test-name item-path)
;; (rmt:get-run-stats)
;; (rmt:teststep-set-status! run-id test-id teststep-name state-in status-in comment logfile)
;; (rmt:get-steps-for-test run-id test-id)
;; (rmt:read-test-data run-id test-id categorypatt #!key (work-area #f)) 
;; (rmt:testmeta-add-record testname)
;; (rmt:testmeta-get-record testname)
;; (rmt:testmeta-update-field test-name fld val)
;; (rmt:test-data-rollup run-id test-id status)
;; (rmt:csv->test-data run-id test-id csvdata)
;; (rmt:tasks-find-task-queue-records target run-name test-patt state-patt action-patt)
;; (rmt:tasks-add action owner target runname testpatt params)
;; (rmt:tasks-set-state-given-param-key param-key new-state)
;; (rmt:tasks-get-last target runname)
;; (rmt:archive-get-allocations  testname itempath dneeded)
;; (rmt:archive-register-block-name bdisk-id archive-path)
;; (rmt:archive-allocate-testsuite/area-to-block block-id testsuite-name areakey)
;; (rmt:archive-register-disk bdisk-name bdisk-path df)
;; (rmt:test-set-archive-block-id run-id test-id archive-block-id)
;; (rmt:test-get-archive-block-info archive-block-id)
;; NED (rmt:open-qry-close-locally cmd run-id params #!key (remretries 5))
;; NED (rmt:send-receive cmd rid params #!key (attemptnum 1)(area-dat #f)) ;; start attemptnum at 1 so the modulo below works as expected
;; DEF (test #f #f (rmt:print-db-stats))
;; DEF (rmt:get-max-query-average run-id)
;; NED (rmt:general-call stmtname run-id . params)
;; DEP (rmt:sdb-qry qry val run-id)
;; DEF (rmt:runtests user run-id testpatt params)
;; DEP (rmt:open-test-db-by-test-id run-id test-id #!key (work-area #f))
;; DEP (rmt:synchash-get run-id proc synckey keynum params)
;; DEP (test #f #f (rmt:update-pass-fail-counts 1 "foo"))