Megatest

Changes On Branch f38d05a68da3b827
Login

Changes In Branch v1.63 Through [f38d05a68d] Excluding Merge-Ins

This is equivalent to a diff from bff9d56983 to f38d05a68d

2018-04-18
00:21
No idea what this was. Commiting just in case it is interesting ... Leaf check-in: 33160354bc user: matt tags: dunno
2017-01-04
16:49
Use item-specific testconfig caches when possible (i.e. where there is enough information available). check-in: f98ca94491 user: mrwellan tags: v1.63
12:10
updated manyservers.sh to be runnable with other instances of itself on same host check-in: f38d05a68d user: bjbarcla tags: v1.63
2017-01-03
15:02
Fixed couple calls to start servers which were receiving the old run-id parameter instead of the needed toppath check-in: d66cd9561f user: mrwellan tags: v1.63
2016-12-05
13:11
Bumped version to v1.6301 check-in: fbf0a07a1d user: mrwellan tags: v1.63, v1.6301
12:58
Protected calls to expensive ping with calls to cheap server:read-dotserver. This appears to 100% the run-away pings problem Closed-Leaf check-in: bff9d56983 user: mrwellan tags: v1.62-no-rpc
11:05
Correct expiration of server connections check-in: b168adb943 user: mrwellan tags: v1.62-no-rpc

Modified Makefile from [1b85fc3382] to [629c3de1dd].

129
130
131
132
133
134
135




136
137
138
139
140
141
142
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146







+
+
+
+







$(PREFIX)/bin/mt_xterm : utils/mt_xterm
	$(INSTALL) $< $@
	chmod a+x $@

$(PREFIX)/bin/nbfake : utils/nbfake
	$(INSTALL) $< $@
	chmod a+x $@

$(PREFIX)/bin/remrun : utils/remrun
	$(INSTALL) $< $@
	chmod a+x $@

$(PREFIX)/bin/viewscreen : utils/viewscreen
	$(INSTALL) $< $@
	chmod a+x $@

$(PREFIX)/bin/nbfind : utils/nbfind
	$(INSTALL) $< $@
157
158
159
160
161
162
163





164
165
166
167
168
169
170
171
172
173
174

175
176
177

178
179
180
181
182
183
184
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182

183
184
185
186
187
188
189
190
191
192
193
194







+
+
+
+
+










-
+



+







deploytarg/viewscreen : utils/viewscreen
	$(INSTALL) $< $@
	chmod a+x $@

deploytarg/nbfind : utils/nbfind
	$(INSTALL) $< $@
	chmod a+x $@

$(PREFIX)/bin/mtest-reaper: helpers/mtest-reaper.scm helpers/ducttape-lib.scm helpers/inteldate.scm helpers/mimetypes.scm
	make -C helpers $@ PREFIX=$(PREFIX) INSTALL=$(INSTALL) ARCHSTR=$(ARCHSTR)

mtest-reaper: $(PREFIX)/bin/mtest-reaper

# install dashboard as dboard so wrapper script can be called dashboard
$(PREFIX)/bin/.$(ARCHSTR)/dboard : dboard $(FILES) utils/mk_wrapper
	utils/mk_wrapper $(PREFIX) dboard $(PREFIX)/bin/dashboard
	chmod a+x $(PREFIX)/bin/dashboard
	$(INSTALL) dboard $(PREFIX)/bin/.$(ARCHSTR)/dboard

install : $(PREFIX)/bin/.$(ARCHSTR) $(PREFIX)/bin/.$(ARCHSTR)/mtest $(PREFIX)/bin/megatest \
          $(PREFIX)/bin/.$(ARCHSTR)/dboard $(PREFIX)/bin/dashboard $(HELPERS) $(PREFIX)/bin/nbfake \
	  $(PREFIX)/bin/nbfind $(PREFIX)/bin/loadrunner $(PREFIX)/bin/viewscreen $(PREFIX)/bin/mt_xterm \
	  $(PREFIX)/share/docs/megatest_manual.html 
	  $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun

$(PREFIX)/bin/.$(ARCHSTR) : 
	mkdir -p $(PREFIX)/bin/.$(ARCHSTR)
	mkdir -p $(PREFIX)/bin/.$(ARCHSTR)/lib

test: tests/tests.scm
	cd tests;csi -I .. -b -n tests.scm

ext-tests/.fslckout : $(MTQA_FOSSIL)
	mkdir -p ext-tests
	cd ext-tests;fossil open --nested $(MTQA_FOSSIL)
209
210
211
212
213
214
215
216

217
218
219
220
221
222
223
219
220
221
222
223
224
225

226
227
228
229
230
231
232
233







-
+








#	for i in apropos base64 canvas-draw csv-xml directory-utils dot-locking extras fmt format hostinfo http-client intarweb json md5 message-digest posix posix-extras readline regex regex-case s11n spiffy spiffy-request-vars sqlite3 srfi-1 srfi-18 srfi-69 tcp test uri-common check-errors synch matchable sql-null tcp-server rpc blob-utils string-utils variable-item defstruct uri-generic sendfile opensll openssl lookup-table list-utils stack; do \
#	chicken-install -prefix deploytarg -deploy $$i;done

# deploytarg/libsqlite3.so : 
# 	CSC_OPTIONS="-Ideploytarg -Ldeploytarg" $CHICKEN_INSTALL -prefix deploytarg -deploy sqlite3

deploy : deploytarg/mtest deploytarg/dboard $(DEPLOYHELPERS) deploytarg/nbfake deploytarg/viewsceen deploytarg/nbfind deploytarg/apropos.so
deploy : deploytarg/mtest deploytarg/dboard $(DEPLOYHELPERS) deploytarg/nbfake deploytarg/remrun deploytarg/viewsceen deploytarg/nbfind deploytarg/apropos.so

# deploytarg/libiupcd.so : $(CKPATH)/lib/libiupcd.so
# 	for i in iup im cd av call sqlite; do \
# 	  cp $(CKPATH)/lib/lib$$i* deploytarg/ ; \
# 	done
# 	cp $(CKPATH)/include/*.h deploytarg

276
277
278
279
280
281
282

286
287
288
289
290
291
292
293







+
	fi
	if csi -ne '(use postgresql)';then \
	   echo "(use postgresql)(hash-table-set! *available-db* 'postgresql #t)" >> altdb.scm;\
	fi

portlogger-example : portlogger-example.scm api.o archive.o client.o common.o configf.o daemon.o dashboard-tests.o db.o dcommon.o ezsteps.o filedb.o genexample.o gutils.o http-transport.o items.o keys.o launch.o lock-queue.o margs.o megatest-version.o mt.o ods.o portlogger.o process.o rmt.o rpc-transport.o runconfig.o runs.o sdb.o server.o synchash.o tasks.o tdb.o tests.o tree.o
	csc $(CSCOPTS) portlogger-example.scm api.o archive.o client.o common.o configf.o daemon.o dashboard-tests.o db.o dcommon.o ezsteps.o filedb.o genexample.o gutils.o http-transport.o items.o keys.o launch.o lock-queue.o margs.o megatest-version.o mt.o ods.o portlogger.o process.o rmt.o rpc-transport.o runconfig.o runs.o sdb.o server.o synchash.o tasks.o tdb.o tests.o tree.o

Modified NOTES from [fdf26c3763] to [24a602c385].







1
2
3
4
5
6
7
8

9
10
11
12
13
14
15
1
2
3
4
5
6
7
8
9
10
11
12
13

14
15
16
17
18
19
20
21
+
+
+
+
+
+







-
+







=====================================================================
NOTES from looking at branch v1.62-rpc
=====================================================================

*last-db-access* or *db-last-access* ==> which is it to be?
seen in singletest: ERROR: Unrecognised arguments: :first_err This is the first error

======================================================================
New way of launching needed to accomodate different target hosttypes
for items
======================================================================

[flavors]
general ssh #{getbgesthost general}
general ssh #{getbesthost general}
nbgeneral nbjob run JOBCOMMAND -log $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME.$MT_TESTNAME-$MT_ITEM_PATH.lgo

[hosts]
general cubian xena

[launchers]
envsetup general

Modified api.scm from [fe7a2f21be] to [1debddd502].

37
38
39
40
41
42
43

44
45
46
47
48
49
50
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51







+







    get-count-tests-running-for-run-id
    get-run-info
    get-run-status
    get-run-stats
    get-targets
    get-target
    ;; register-run
    get-tests-tags
    get-tests-for-run
    get-test-id
    get-tests-for-runs-mindata
    get-run-name-from-id
    get-runs
    get-num-runs
    get-all-run-ids
59
60
61
62
63
64
65


66
67
68
69
70
71
72
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75







+
+







    testmeta-get-record
    have-incompletes?
    synchash-get
    ))

(define api:write-queries
  '(
    get-keys-write ;; dummy "write" query to force server start

    ;; SERVERS
    start-server
    kill-server

    ;; TESTS
    test-set-state-status-by-id
    delete-test-records
109
110
111
112
113
114
115

116
117
118
119
120
121
122
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126







+







;;
;;    - returns #( flag result )
;;
(define (api:execute-requests dbstruct dat)
  (handle-exceptions
   exn
   (let ((call-chain (get-call-chain)))
     (debug:print 0 *default-log-port* "WARNING: api:execute-requests received an exception from peer")
     (print-call-chain (current-error-port))
     (debug:print 0 *default-log-port* " message: "  ((condition-property-accessor 'exn 'message) exn))       
     (vector #f (vector exn call-chain dat))) ;; return some stuff for debug if an exception happens
   (if (not (vector? dat))                    ;; it is an error to not receive a vector
       (vector #f #f "remote must be called with a vector")       
       (vector                                   ;; return a vector + the returned data structure
	#t 
165
166
167
168
169
170
171

172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190

191
192
193
194
195
196
197
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203







+



















+







	    ((sync-inmem->db)               (let ((run-id (car params)))
					      (db:sync-touched dbstruct run-id force-sync: #t)))
	    ((mark-incomplete)              (apply db:find-and-mark-incomplete dbstruct params))

	    ;; TESTMETA
	    ((testmeta-add-record)       (apply db:testmeta-add-record dbstruct params))
	    ((testmeta-update-field)     (apply db:testmeta-update-field dbstruct params))
            ((get-tests-tags)            (db:get-tests-tags dbstruct))

	    ;; TASKS
	    ((tasks-add)                 (apply tasks:add dbstruct params))   
	    ((tasks-set-state-given-param-key) (apply tasks:set-state-given-param-key dbstruct params))
	    ((tasks-get-last)            (apply tasks:get-last dbstruct params))

	    ;; ARCHIVES
	    ;; ((archive-get-allocations)   
	    ((archive-register-disk)     (apply db:archive-register-disk dbstruct params))
	    ((archive-register-block-name)(apply db:archive-register-block-name dbstruct params))
	    ((archive-allocate-testsuite/area-to-block)(apply db:archive-allocate-testsuite/area-to-block dbstruct block-id testsuite-name areakey))

	    ;;======================================================================
	    ;; READ ONLY QUERIES
	    ;;======================================================================

	    ;; KEYS
	    ((get-key-val-pairs)               (apply db:get-key-val-pairs dbstruct params))
	    ((get-keys)                        (db:get-keys dbstruct))
            ((get-keys-write)                        (db:get-keys dbstruct)) ;; force a dummy "write" query to force server
	    ((get-key-vals)                    (apply db:get-key-vals dbstruct params))
	    ((get-target)                      (apply db:get-target dbstruct params))
	    ((get-targets)                     (db:get-targets dbstruct))

	    ;; ARCHIVES
	    ((test-get-archive-block-info)     (apply db:test-get-archive-block-info dbstruct params))
	    
237
238
239
240
241
242
243

244
245
246
247
248
249
250
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257







+







	    ((get-steps-data)               (apply db:get-steps-data dbstruct params))
	    ((get-steps-for-test)           (apply db:get-steps-for-test dbstruct params))

	    ;; TEST DATA
	    ((read-test-data)               (apply db:read-test-data dbstruct params))

	    ;; MISC
            ((get-latest-host-load)         (apply db:get-latest-host-load dbstruct params))
	    ((have-incompletes?)            (apply db:have-incompletes? dbstruct params))
	    ((login)                        (apply db:login dbstruct params))
	    ((general-call)                 (let ((stmtname   (car params))
						  (run-id     (cadr params))
						  (realparams (cddr params)))
					      (db:with-db dbstruct run-id #t ;; these are all for modifying the db
							  (lambda (db)

Modified client.scm from [50265f350f] to [cbdbeb9ff4].

195
196
197
198
199
200
201
202

203
204
205
206
207
208
209
210

211
212
213
214
215
216
217
195
196
197
198
199
200
201

202
203
204
205
206
207
208
209

210
211
212
213
214
215
216
217







-
+







-
+







							   run-id 
							   (tasks:hostinfo-get-interface server-dat)
							   (tasks:hostinfo-get-port      server-dat)
							   " client:setup (server-dat = #t)")
		      (if (> remaining-tries 8)
			  (thread-sleep! (+ 1 (random 5))) ;; spread out the starts a little
			  (thread-sleep! (+ 15 (random 20)))) ;; it isn't going well. give it plenty of time
		      (server:try-running run-id)
		      (server:try-running *toppath*)
		      (thread-sleep! 5)   ;; give server a little time to start up
		      (client:setup run-id remaining-tries: (- remaining-tries 1))
		      )))
	      (begin    ;; no server registered
		(let ((num-available (tasks:num-in-available-state (db:dbdat-get-db tdbdat) run-id)))
		  (debug:print-info 0 *default-log-port* "client:setup, no server registered, remaining-tries=" remaining-tries " num-available=" num-available)
		  (if (< num-available 2)
		      (server:try-running run-id))
		      (server:try-running *toppath*))
		  (thread-sleep! (+ 5 (random (- 20 remaining-tries))))  ;; give server a little time to start up, randomize a little to avoid start storms.
		  (client:setup run-id remaining-tries: (- remaining-tries 1)))))))))

;; keep this as a function to ease future 
(define (client:start run-id server-info)
  (http-transport:client-connect (tasks:hostinfo-get-interface server-info)
				 (tasks:hostinfo-get-port server-info)))

Modified common.scm from [a3fcacf886] to [4b29489636].

88
89
90
91
92
93
94
95
96
97
98

99
100
101
102
103
104
105
88
89
90
91
92
93
94

95
96

97
98
99
100
101
102
103
104







-


-
+







;; db stats
(define *db-stats*            (make-hash-table)) ;; hash of vectors < count duration-total >
(define *db-stats-mutex*      (make-mutex))
;; db access
(define *db-last-access*      (current-seconds)) ;; last db access, used in server
(define *db-write-access*     #t)
;; db sync
(define *db-last-write*       0)                 ;; used to record last touch of db
(define *db-last-sync*        0)                 ;; last time the sync to megatest.db happened
(define *db-sync-in-progress* #f)                ;; if there is a sync in progress do not try to start another
(define *db-multi-sync-mutex* (make-mutex))      ;; protect access to *db-sync-in-progress*, *db-last-sync* and *db-last-write*
(define *db-multi-sync-mutex* (make-mutex))      ;; protect access to *db-sync-in-progress*, *db-last-sync*
;; task db
(define *task-db*             #f) ;; (vector db path-to-db)
(define *db-access-allowed*   #t) ;; flag to allow access
(define *db-access-mutex*     (make-mutex))
(define *db-cache-path*       #f)

;; SERVER
129
130
131
132
133
134
135
136

137
138
139

















140
141
142
143
144
145


146
147
148
149
150
151
152
153

154
155
156
157
158
159
160
128
129
130
131
132
133
134

135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159


160
161
162
163
164
165
166
167
168

169
170
171
172
173
174
175
176







-
+



+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+




-
-
+
+







-
+







(define *keys*              (make-hash-table)) ;; cache the keys here
(define *keyvals*           (make-hash-table))
(define *toptest-paths*     (make-hash-table)) ;; cache toptest path settings here
(define *test-paths*        (make-hash-table)) ;; cache test-id to test run paths here
(define *test-ids*          (make-hash-table)) ;; cache run-id, testname, and item-path => test-id
(define *test-info*         (make-hash-table)) ;; cache the test info records, update the state, status, run_duration etc. from testdat.db

(define *run-info-cache*    (make-hash-table)) ;; run info is stable, no need to reget
(define *run-info-cache*     (make-hash-table)) ;; run info is stable, no need to reget
(define *launch-setup-mutex* (make-mutex))     ;; need to be able to call launch:setup often so mutex it and re-call the real deal only if *toppath* not set
(define *homehost-mutex*     (make-mutex))

(defstruct remote
  (hh-dat            (common:get-homehost)) ;; homehost record ( addr . hhflag )
  (server-url        (if *toppath* (server:read-dotserver *toppath*))) ;; (server:check-if-running *toppath*) #f))
  (last-server-check 0)  ;; last time we checked to see if the server was alive
  (conndat           #f)
  (transport         *transport-type*)
  (server-timeout    (or (server:get-timeout) 100))) ;; default to 100 seconds

;; launching and hosts
(defstruct host
  (reachable    #f)
  (last-update  0)
  (last-used    0)
  (last-cpuload 1))

(define *host-loads*         (make-hash-table))

;; cache environment vars for each run here
(define *env-vars-by-run-id* (make-hash-table))

;; Testconfig and runconfig caches. 
(define *testconfigs*       (make-hash-table)) ;; test-name => testconfig
(define *runconfigs*        (make-hash-table)) ;; target    => runconfig
(define *testconfigs*        (make-hash-table)) ;; test-name => testconfig
(define *runconfigs*         (make-hash-table)) ;; target    => runconfig

;; This is a cache of pre-reqs met, don't re-calc in cases where called with same params less than
;; five seconds ago
(define *pre-reqs-met-cache* (make-hash-table))

;; cache of verbosity given string
;;
(define *verbosity-cache* (make-hash-table))
(define *verbosity-cache*    (make-hash-table))

(define (common:clear-caches)
  (set! *target*             (make-hash-table))
  (set! *keys*               (make-hash-table))
  (set! *keyvals*            (make-hash-table))
  (set! *toptest-paths*      (make-hash-table))
  (set! *test-paths*         (make-hash-table))
520
521
522
523
524
525
526
527
528
529
530
531







532
533
534
535
536
537
538
539
540

541
542

543
544





545
546
547
548

549
550
551
552
553
554





555
556
557

558
559
560
561

562

563

564
565
566
567
568
569
570
536
537
538
539
540
541
542





543
544
545
546
547
548
549
550
551
552
553
554
555
556
557

558
559

560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575



576
577
578
579
580
581
582
583
584
585
586
587

588
589
590

591
592
593
594
595
596
597
598







-
-
-
-
-
+
+
+
+
+
+
+








-
+

-
+


+
+
+
+
+




+



-
-
-
+
+
+
+
+



+



-
+

+
-
+







  (message-digest-string (md5-primitive) *toppath*))

;;======================================================================
;; E X I T   H A N D L I N G
;;======================================================================

(define (common:run-sync?)
  (let ((ohh (common:on-homehost?))
	(srv (args:get-arg "-server")))
    ;; (debug:print-info 0 *default-log-port* "common:run-sync? ohh=" ohh ", srv=" srv)
    (and (common:on-homehost?)
	 (args:get-arg "-server"))))
    (and (common:on-homehost?)
	 (args:get-arg "-server")))

;;   (let ((ohh (common:on-homehost?))
;; 	(srv (args:get-arg "-server")))
;;     (and ohh srv)))
    ;; (debug:print-info 0 *default-log-port* "common:run-sync? ohh=" ohh ", srv=" srv)

;;;; run-ids
;;    if #f use *db-local-sync* : or 'local-sync-flags
;;    if #t use timestamps      : or 'timestamps
(define (common:sync-to-megatest.db dbstruct) 
  (let ((start-time         (current-seconds))
	(res                (db:multi-db-sync dbstruct 'new2old)))
    (let ((sync-time (- (current-seconds) start-time)))
      (debug:print-info 3 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds")
      (debug:print-info 3 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds pid="(current-process-id))
      (if (common:low-noise-print 30 "sync new to old")
	  (debug:print-info 0 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds")))
	  (debug:print-info 0 *default-log-port* "Sync of newdb to olddb completed in " sync-time " seconds pid="(current-process-id))))
    res))




(define *wdnum* 0)
(define *wdnum*mutex (make-mutex))
;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp
;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server)
;;
(define (common:watchdog)
  
  (thread-sleep! 0.05) ;; delay for startup
  (let ((legacy-sync (common:run-sync?))
	(debug-mode  (debug:debug-mode 1))
	(last-time   (current-seconds)))
    (debug:print-info 0 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync)
    (if legacy-sync
	(last-time   (current-seconds))
        (this-wd-num     (begin (mutex-lock! *wdnum*mutex) (let ((x *wdnum*)) (set! *wdnum* (add1 *wdnum*)) (mutex-unlock! *wdnum*mutex) x)))
        )
    (debug:print-info 0 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)" this-wd-num="this-wd-num)
    (if (and legacy-sync (not *time-to-exit*))
	(let ((dbstruct (db:setup)))
	  (debug:print-info 0 *default-log-port* "Server running, periodic sync started.")
	  (let loop ()
            ;;(BB> "watchdog loop.  pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*)
	    ;; sync for filesystem local db writes
	    ;;
	    (mutex-lock! *db-multi-sync-mutex*)
	    (let* ((need-sync        (>= *db-last-write* *db-last-sync*)) ;; no sync since last write
	    (let* ((need-sync        (>= *db-last-access* *db-last-sync*)) ;; no sync since last write
		   (sync-in-progress *db-sync-in-progress*)
		   (should-sync      (and (not *time-to-exit*)
		   (should-sync      (> (- (current-seconds) *db-last-sync*) 5)) ;; sync every five seconds minimum
                                          (> (- (current-seconds) *db-last-sync*) 5))) ;; sync every five seconds minimum
		   (will-sync        (and (or need-sync should-sync)
					  (not sync-in-progress)))
		   (start-time       (current-seconds)))
	      ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync)
	      (if will-sync (set! *db-sync-in-progress* #t))
	      (mutex-unlock! *db-multi-sync-mutex*)
	      (if will-sync
588
589
590
591
592
593
594


595
596
597
598
599
600

601
602

603
604


605
606
607
608
609
610
611
612
613
614

615
616
617
618
619
620
621
622





623

624
625
626
627

628
629
630
631





632
633
634
635





636
637
638
639

640
641
642
643
644
645
646
616
617
618
619
620
621
622
623
624
625
626
627
628
629

630
631

632
633
634
635
636
637
638
639
640
641
642
643
644
645

646
647
648
649
650
651
652
653
654
655
656
657
658
659

660
661
662
663
664
665




666
667
668
669
670
671
672
673

674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690







+
+





-
+

-
+


+
+









-
+








+
+
+
+
+
-
+




+
-
-
-
-
+
+
+
+
+



-
+
+
+
+
+




+







		    (set! last-time start-time)
		    (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*))))))
	    
	    ;; keep going unless time to exit
	    ;;
	    (if (not *time-to-exit*)
		(let delay-loop ((count 0))
                  ;;(BB> "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*)
                                                            
		  (if (and (not *time-to-exit*)
			   (< count 4)) ;; was 11, changing to 4. 
		      (begin
			(thread-sleep! 1)
			(delay-loop (+ count 1))))
		  (loop)))
		  (if (not *time-to-exit*) (loop))))
	    (if (common:low-noise-print 30)
		(debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*)))))))
		(debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)" this-wd-num="this-wd-num)))))))

(define (std-exit-procedure)
  (on-exit (lambda () 0))
  ;;(BB> "std-exit-procedure called; *time-to-exit*="*time-to-exit*)
  (let ((no-hurry  (if *time-to-exit* ;; hurry up
		       #f
		       (begin
			 (set! *time-to-exit* #t)
			 #t))))
    (debug:print-info 4 *default-log-port* "starting exit process, finalizing databases.")
    (if (and no-hurry (debug:debug-mode 18))
	(rmt:print-db-stats))
    (let ((th1 (make-thread (lambda () ;; thread for cleaning up, give it five seconds
			      (if *dbstruct-db* (db:close-all *dbstruct-db*)) ;; one second allocated
                              (if *dbstruct-db* (db:close-all *dbstruct-db*)) ;; one second allocated
			      (if *task-db*    
				  (let ((db (cdr *task-db*)))
				    (if (sqlite3:database? db)
					(begin
					  (sqlite3:interrupt! db)
					  (sqlite3:finalize! db #t)
					  ;; (vector-set! *task-db* 0 #f)
					  (set! *task-db* #f)))))
                              (if (and *runremote*
                                       (remote-conndat *runremote*))
                                  (begin
                                    (http-client#close-all-connections!))) ;; for http-client
                              (if (not (eq? *default-log-port* (current-error-port)))
			      (close-output-port *default-log-port*)
                                  (close-output-port *default-log-port*))
			      (set! *default-log-port* (current-error-port))) "Cleanup db exit thread"))
	  (th2 (make-thread (lambda ()
			      (debug:print 4 *default-log-port* "Attempting clean exit. Please be patient and wait a few seconds...")
			      (if no-hurry
                                  (begin
				  (thread-sleep! 5) ;; give the clean up few seconds to do it's stuff
				  (thread-sleep! 2))
			      (debug:print 4 *default-log-port* " ... done")
			      )
                                    (thread-sleep! 5)) ;; give the clean up few seconds to do it's stuff
                                  (begin
      				  (thread-sleep! 2)))
      			      (debug:print 4 *default-log-port* " ... done")
      			      )
			    "clean exit")))
      (thread-start! th1)
      (thread-start! th2)
      (thread-join! th1))))
      (thread-join! th1)
      )
    )

  0)

(define (std-signal-handler signum)
  ;; (signal-mask! signum)
  (set! *time-to-exit* #t)
  ;;(BB> "got signal "signum)
  (debug:print-error 0 *default-log-port* "Received signal " signum " exiting promptly")
  ;; (std-exit-procedure) ;; shouldn't need this since we are exiting and it will be called anyway
  (exit))

(set-signal-handler! signal/int  std-signal-handler)  ;; ^C
(set-signal-handler! signal/term std-signal-handler)
;; (set-signal-handler! signal/stop std-signal-handler)  ;; ^Z NO, do NOT handle ^Z!
768
769
770
771
772
773
774
775
776




777
778
779
780
781
782
783
784










785
786
787
788
789
790
791
812
813
814
815
816
817
818


819
820
821
822








823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839







-
-
+
+
+
+
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+







(define (common:args-get-state)
  (or (args:get-arg "-state")(args:get-arg ":state")))

(define (common:args-get-status)
  (or (args:get-arg "-status")(args:get-arg ":status")))

(define (common:args-get-testpatt rconf)
  (let* ((rtestpatt     (if rconf (runconfigs-get rconf "TESTPATT") #f))
	 (args-testpatt (or (args:get-arg "-testpatt")
  (let* ((tagexpr (args:get-arg "-tagexpr"))
         (tags-testpatt (if tagexpr (string-join (runs:get-tests-matching-tags tagexpr) ",") #f))
         (testpatt-key  (if (args:get-arg "-mode") (args:get-arg "-mode") "TESTPATT"))
         (args-testpatt (or (args:get-arg "-testpatt") (args:get-arg "-runtests") "%"))
			    (args:get-arg "-runtests")
			    "%"))
	 (testpatt    (or (and (equal? args-testpatt "%")
			       rtestpatt)
			  args-testpatt)))
    (if rtestpatt (debug:print-info 0 *default-log-port* "TESTPATT from runconfigs: " rtestpatt))
    testpatt))

         (rtestpatt     (if rconf (runconfigs-get rconf testpatt-key) #f)))
    (cond
     (tags-testpatt
      (debug:print-info 0 *default-log-port* "-tagexpr "tagexpr" selects testpatt "tags-testpatt)
      tags-testpatt)
     ((and (equal? args-testpatt "%") rtestpatt)
      (debug:print-info 0 *default-log-port* "testpatt defined in "testpatt-key" from runconfigs: " rtestpatt)
      rtestpatt)
     (else args-testpatt))))
     
(define (common:get-linktree)
  (or (getenv "MT_LINKTREE")
      (if *configdat*
	  (configf:lookup *configdat* "setup" "linktree"))))

(define (common:args-get-runname)
  (let ((res (or (args:get-arg "-runname")
1071
1072
1073
1074
1075
1076
1077













































































































































1078
1079
1080
1081
1082
1083
1084
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







      (map (lambda (res)
	     (if (eof-object? res) 9e99 res))
	   (with-input-from-pipe 
	    (conc "ssh " remote-host " cat /proc/loadavg")
	    (lambda ()(list (read)(read)(read)))))
      (with-input-from-file "/proc/loadavg" 
	(lambda ()(list (read)(read)(read))))))

;; get normalized cpu load by reading from /proc/loadavg and /proc/cpuinfo return all three values and the number of real cpus and the number of threads
;; returns alist '((adj-cpu-load . normalized-proc-load) ... etc.
;;  keys: adj-proc-load, adj-core-load, 1m-load, 5m-load, 15m-load
;;
(define (common:get-normalized-cpu-load remote-host)
  (let ((data (if remote-host
                  (with-input-from-pipe 
                   (conc "ssh " remote-host " cat /proc/loadavg;cat /proc/cpuinfo;echo end")
                   read-lines)
                  (append 
                   (with-input-from-file "/proc/loadavg" 
                     read-lines)
                   (with-input-from-file "/proc/cpuinfo"
                     read-lines)
                   (list "end"))))
        (load-rx  (regexp "^([\\d\\.]+)\\s+([\\d\\.]+)\\s+([\\d\\.]+)\\s+.*$"))
        (proc-rx  (regexp "^processor\\s+:\\s+(\\d+)\\s*$"))
        (core-rx  (regexp "^core id\\s+:\\s+(\\d+)\\s*$"))
        (phys-rx  (regexp "^physical id\\s+:\\s+(\\d+)\\s*$"))
        (max-num  (lambda (p n)(max (string->number p) n))))
    ;; (print "data=" data)
    (if (null? data) ;; something went wrong
        #f
        (let loop ((hed      (car data))
                   (tal      (cdr data))
                   (loads    #f)
                   (proc-num 0)  ;; processor includes threads
                   (phys-num 0)  ;; physical chip on motherboard
                   (core-num 0)) ;; core
          ;; (print hed ", " loads ", " proc-num ", " phys-num ", " core-num)
          (if (null? tal) ;; have all our data, calculate normalized load and return result
              (let* ((act-proc (+ proc-num 1))
                     (act-phys (+ phys-num 1))
                     (act-core (+ core-num 1))
                     (adj-proc-load (/ (car loads) act-proc))
                     (adj-core-load (/ (car loads) act-core)))
                (append (list (cons 'adj-proc-load adj-proc-load)
                              (cons 'adj-core-load adj-core-load))
                        (list (cons '1m-load (car loads))
                              (cons '5m-load (cadr loads))
                              (cons '15m-load (caddr loads)))
                        (list (cons 'proc act-proc)
                              (cons 'core act-core)
                              (cons 'phys act-phys))))
              (regex-case
               hed
               (load-rx  ( x l1 l5 l15 ) (loop (car tal)(cdr tal)(map string->number (list l1 l5 l15)) proc-num phys-num core-num))
               (proc-rx  ( x p         ) (loop (car tal)(cdr tal) loads           (max-num p proc-num) phys-num core-num))
               (phys-rx  ( x p         ) (loop (car tal)(cdr tal) loads           proc-num (max-num p phys-num) core-num))
               (core-rx  ( x c         ) (loop (car tal)(cdr tal) loads           proc-num phys-num (max-num c core-num)))
               (else 
                (begin
                  ;; (print "NO MATCH: " hed)
                  (loop (car tal)(cdr tal) loads proc-num phys-num core-num)))))))))

(define (common:unix-ping hostname)
  (let ((res (system (conc "ping -c 1 " hostname " > /dev/null"))))
    (eq? res 0)))

;; ideally put all this info into the db, no need to preserve it across moving homehost
;;
;; return list of
;;  ( reachable? cpuload update-time )
(define (common:get-host-info hostname)
  (let* ((loadinfo (rmt:get-latest-host-load hostname))
         (load (car loadinfo))
         (load-sample-time (cdr loadinfo))
         (load-sample-age (- (current-seconds) load-sample-time))
         (loadinfo-timeout-seconds 20)
         (host-last-update-timeout-seconds 10)
         (host-rec (hash-table-ref/default *host-loads* hostname #f))
         )
    (cond
     ((< load-sample-age loadinfo-timeout-seconds)
      (list #t
            load-sample-time
            load))
     ((and host-rec
           (< (current-seconds) (+ (host-last-update host-rec) host-last-update-timeout-seconds)))
      (list #t
            (host-last-update host-rec)
            (host-last-cpuload host-rec )))
     ((common:unix-ping hostname)
      (list #t
            (current-seconds)
            (alist-ref 'adj-core-load (common:get-normalized-cpu-load hostname))))
     (else
      (list #f 0 -1)))))
    
(define (common:update-host-loads-table hosts-raw)
  (let* ((hosts (filter (lambda (x)
                          (string-match (regexp "^\\S+$") x))
                        hosts-raw)))
    (for-each
     (lambda (hostname)
       (let* ((rec       (let ((h (hash-table-ref/default *host-loads* hostname #f)))
                          (if h
                              h
                              (let ((h (make-host)))
                                (hash-table-set! *host-loads* hostname h)
                                h))))
              (host-info         (common:get-host-info hostname))
              (is-reachable      (car host-info))
              (last-reached-time (cadr host-info))
              (load              (caddr host-info)))
         (host-reachable-set!    rec is-reachable)
         (host-last-update-set!  rec last-reached-time)
         (host-last-cpuload-set! rec load)))
     hosts)))

(define (common:get-least-loaded-host hosts-raw)
  (let* ((hosts (filter (lambda (x)
                          (string-match (regexp "^\\S+$") x))
                        hosts-raw))
         (best-host #f)
         (best-load 99999)
         (curr-time (current-seconds)))
    (common:update-host-loads-table hosts)
    (for-each
     (lambda (hostname)
       (let* ((rec
               (let ((h (hash-table-ref/default *host-loads* hostname #f)))
                 (if h
                     h
                     (let ((h (make-host)))
                       (hash-table-set! *host-loads* hostname h)
                       h))))
              (reachable (host-reachable rec))
              (load      (host-last-cpuload   rec)))
         (cond
          ((not reachable) #f)
          ((< (+ load (/ (random 250) 1000))         ;; add a random factor to keep from getting in a rut
              (+ best-load (/ (random 250) 1000))  )
           (set! best-load load)
           (set! best-host hostname)))))
     hosts)
    best-host))




(define (common:wait-for-cpuload maxload numcpus waitdelay #!key (count 1000) (msg #f)(remote-host #f))
  (let* ((loadavg (common:get-cpu-load remote-host))
	 (first   (car loadavg))
	 (next    (cadr loadavg))
	 (adjload (* maxload numcpus))
	 (loadjmp (- first next)))
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582




1583
1584
1585




1586
1587
1588
1589
1590
1591
1592
1593
1594

1595
1596
1597


1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613





1614

1615
1616
1617
1618
1619
1620
1621
1762
1763
1764
1765
1766
1767
1768



1769
1770
1771
1772
1773


1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784


1785

1786

1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809

1810
1811
1812
1813
1814
1815
1816
1817







-
-
-
+
+
+
+

-
-
+
+
+
+







-
-
+
-

-
+
+
















+
+
+
+
+
-
+







    (query fetch-column
	   (sql db "SELECT ipaddr || ':' || portnum FROM dashboards;"))))
    
;;======================================================================
;;  T E S T   L A U N C H I N G   P E R   I T E M   W I T H   H O S T   T Y P E S
;;======================================================================
;; 
;; [host-types]
;; general ssh #{getbgesthost general}
;; nbgeneral nbjob run JOBCOMMAND -log $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME.$MT_TESTNAME-$MT_ITEM_PATH.lgo
;; [hosts]
;; arm cubie01 cubie02
;; x86_64 zeus xena myth01
;; allhosts #{g hosts arm} #{g hosts x86_64}
;; 
;; [hosts]
;; general cubian xena
;; [host-types]
;; general #MTLOWESTLOAD #{g hosts allhosts}
;; arm     #MTLOWESTLOAD #{g hosts arm}
;; nbgeneral nbjob run JOBCOMMAND -log $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME.$MT_TESTNAME-$MT_ITEM_PATH.lgo
;; 
;; [launchers]
;; envsetup general
;; xor/%/n 4C16G
;; % nbgeneral
;; 
;; [jobtools]
;; launcher bsub
;; # if defined and not "no" flexi-launcher will bypass launcher unless there is no
;; # if defined and not "no" flexi-launcher will bypass "launcher" unless no match.
;; # match.
;; flexi-launcher yes  

;; launcher nbfake
;;
(define (common:get-launcher configdat testname itempath)
  (let ((fallback-launcher (configf:lookup configdat "jobtools" "launcher")))
    (if (and (configf:lookup configdat "jobtools" "flexi-launcher") ;; overrides launcher
	     (not (equal? (configf:lookup configdat "jobtools" "flexi-launcher") "no")))
	(let* ((launchers         (hash-table-ref/default configdat "launchers" '())))
	  (if (null? launchers)
	      fallback-launcher
	      (let loop ((hed (car launchers))
			 (tal (cdr launchers)))
		(let ((patt      (car hed))
		      (host-type (cadr hed)))
		  (if (tests:match patt testname itempath)
		      (begin
			(debug:print-info 2 *default-log-port* "Have flexi-launcher match for " testname "/" itempath " = " host-type)
			(let ((launcher (configf:lookup configdat "host-types" host-type)))
			  (if launcher
			      (let* ((launcher-parts (string-split launcher))
				     (launcher-exe   (car launcher-parts)))
				(if (equal? launcher-exe "#MTLOWESTLOAD") ;; this is our special case, we will find the lowest load and craft a nbfake commandline
				    (let ((targ-host (common:get-least-loaded-host (cdr launcher-parts))))
				      (conc "remrun " targ-host))
			      launcher
				    launcher))
			      (begin
				(debug:print-info 0 *default-log-port* "WARNING: no launcher found for host-type " host-type)
				(if (null? tal)
				    fallback-launcher
				    (loop (car tal)(cdr tal)))))))
		      ;; no match, try again
		      (if (null? tal)

Modified common_records.scm from [0e6990e6a2] to [e3400966c5].

119
120
121
122
123
124
125

126
127
128
129
130
131
132
133
134
135
136

137















































138
139
140
141
142
143
144
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136

137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192







+










-
+

+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







	(lambda ()
	  (if *logging*
	      (db:log-event (apply conc params))
	      (apply print params)
	      )))))

;; Brandon's debug printer shortcut (indulge me :)
(define *BB-process-starttime* (current-milliseconds))
(define (BB> . in-args)
  (let* ((stack (get-call-chain))
         (location #f))
    (for-each
     (lambda (frame)
       (let* ((this-loc (vector-ref frame 0))
              (this-func (cadr (string-split this-loc " "))))
         (if (equal? this-func "BB>")
             (set! location this-loc))))
     stack)
    (let ((dp-args (append (list 0 *default-log-port* location"   "  ) in-args)))
    (let ((dp-args (append (list 0 *default-log-port* (conc location "@"(/ (- (current-milliseconds) *BB-process-starttime*) 1000)"   ")  ) in-args)))
      (apply debug:print dp-args))))

(define *BBpp_custom_expanders_list* (make-hash-table))



;; register hash tables with BBpp.
(hash-table-set! *BBpp_custom_expanders_list* HASH_TABLE:
                 (cons hash-table? hash-table->alist))

;; test name converter
(define (BBpp_custom_converter arg)
  (let ((res #f))
    (for-each
     (lambda (custom-type-name)
       (let* ((custom-type-info      (hash-table-ref *BBpp_custom_expanders_list* custom-type-name))
              (custom-type-test      (car custom-type-info))
              (custom-type-converter (cdr custom-type-info)))
         (when (and (not res) (custom-type-test arg))
           (set! res (custom-type-converter arg)))))
     (hash-table-keys *BBpp_custom_expanders_list*))
    (if res (BBpp_ res) arg)))

(define (BBpp_ arg)
  (cond
   ;;((SOMESTRUCT? arg) (cons SOMESTRUCT: (SOMESTRUCT->alist arg)))
   ;;((dboard:tabdat? arg) (cons dboard:tabdat: (dboard:tabdat->alist arg)))
   ((hash-table? arg)
    (let ((al (hash-table->alist arg)))
      (BBpp_ (cons HASH_TABLE: al))))
   ((null? arg) '())
   ;;((list? arg) (cons (BBpp_ (car arg)) (BBpp_ (cdr arg))))
   ((pair? arg) (cons (BBpp_ (car arg)) (BBpp_ (cdr arg))))
   (else (BBpp_custom_converter arg))))

;; Brandon's pretty printer.  It expands hashes and custom types in addition to regular pp
(define (BBpp arg)
  (pp (BBpp_ arg)))

;(use define-macro)
(define-syntax inspect
  (syntax-rules ()
    [(_ x)
    ;; (with-output-to-port (current-error-port)
       (printf "~a is: ~a\n" 'x (with-output-to-string (lambda () (BBpp x))))
     ;;  )
     ]
    [(_ x y ...) (begin (inspect x) (inspect y ...))]))

(define (debug:print-error n e . params)
  ;; normal print
  (if (debug:debug-mode n)
      (with-output-to-port (or e (current-error-port))
	(lambda ()
	  (if *logging*

Modified configf.scm from [d9393dba52] to [ddff2b4e5d].

55
56
57
58
59
60
61

62
63
64
65
66
67
68
69
70
71
72
73

74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92











93
94
95
96
97
98


99
100
101
102
103
104
105
106
107

108
109

110
111
112
113

114
115
116
117
118
119
120
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73

74
75
76
77
78
79
80
81
82
83
84
85
86
87
88





89
90
91
92
93
94
95
96
97
98
99
100
101
102
103


104
105
106
107
108
109
110
111
112
113

114
115

116
117
118
119

120
121
122
123
124
125
126
127







+











-
+














-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+




-
-
+
+








-
+

-
+



-
+







	 (caar cmdres)))))

;;======================================================================
;; Make the regexp's needed globally available
;;======================================================================

(define configf:include-rx (regexp "^\\[include\\s+(.*)\\]\\s*$"))
(define configf:script-rx  (regexp "^\\[scriptinc\\s+(.*)\\]\\s*$")) ;; include output from a script
(define configf:section-rx (regexp "^\\[(.*)\\]\\s*$"))
(define configf:blank-l-rx (regexp "^\\s*$"))
(define configf:key-sys-pr (regexp "^(\\S+)\\s+\\[system\\s+(\\S+.*)\\]\\s*$"))
(define configf:key-val-pr (regexp "^(\\S+)(\\s+(.*)|())$"))
(define configf:key-no-val (regexp "^(\\S+)(\\s*)$"))
(define configf:comment-rx (regexp "^\\s*#.*"))
(define configf:cont-ln-rx (regexp "^(\\s+)(\\S+.*)$"))
(define configf:settings   (regexp "^\\[configf:settings\\s+(\\S+)\\s+(\\S+)]\\s*$"))

;; read a line and process any #{ ... } constructs

(define configf:var-expand-regex (regexp "^(.*)#\\{(scheme|system|shell|getenv|get|runconfigs-get|rget)\\s+([^\\}\\{]*)\\}(.*)"))
(define configf:var-expand-regex (regexp "^(.*)#\\{(scheme|system|shell|getenv|get|runconfigs-get|rget|scm|sh|rp|gv|g|mtrah)\\s+([^\\}\\{]*)\\}(.*)"))

(define (configf:process-line l ht allow-system #!key (linenum #f))
  (let loop ((res l))
    (if (string? res)
	(let ((matchdat (string-search configf:var-expand-regex res)))
	  (if matchdat
	      (let* ((prestr  (list-ref matchdat 1))
		     (cmdtype (list-ref matchdat 2)) ;; eval, system, shell, getenv
		     (cmd     (list-ref matchdat 3))
		     (poststr (list-ref matchdat 4))
		     (result  #f)
		     (start-time (current-seconds))
		     (cmdsym  (string->symbol cmdtype))
		     (fullcmd (case cmdsym
				((scheme)(conc "(lambda (ht)" cmd ")"))
				((system)(conc "(lambda (ht)(system \"" cmd "\"))"))
				((shell) (conc "(lambda (ht)(shell \""  cmd "\"))"))
				((getenv)(conc "(lambda (ht)(get-environment-variable \"" cmd "\"))"))
				((get)   
				((scheme scm) (conc "(lambda (ht)" cmd ")"))
				((system)     (conc "(lambda (ht)(system \"" cmd "\"))"))
				((shell sh)   (conc "(lambda (ht)(string-translate (shell \""  cmd "\") \"\n\" \" \"))"))
				((realpath rp)(conc "(lambda (ht)(common:nice-path \"" cmd "\"))"))
				((getenv gv)  (conc "(lambda (ht)(get-environment-variable \"" cmd "\"))"))
				((mtrah)      (conc "(lambda (ht)"
                                                    "    (let ((extra \"" cmd "\"))"
						    "       (conc (or *toppath* (get-environment-variable \"MT_RUN_AREA_HOME\"))"
						    "             (if (string-null? extra) \"\" \"/\")"
						    "             extra)))"))
				((get g)   
				 (let* ((parts (string-split cmd))
					(sect  (car parts))
					(var   (cadr parts)))
				   (conc "(lambda (ht)(config-lookup ht \"" sect "\" \"" var "\"))")))
				((runconfigs-get) (conc "(lambda (ht)(runconfigs-get ht \"" cmd "\"))"))
				((rget)           (conc "(lambda (ht)(runconfigs-get ht \"" cmd "\"))"))
				((runconfigs-get rget) (conc "(lambda (ht)(runconfigs-get ht \"" cmd "\"))"))
				;; ((rget)           (conc "(lambda (ht)(runconfigs-get ht \"" cmd "\"))"))
				(else "(lambda (ht)(print \"ERROR\") \"ERROR\")"))))
		;; (print "fullcmd=" fullcmd)
		(handle-exceptions
		 exn
		 (begin
		   (debug:print 0 *default-log-port* "WARNING: failed to process config input \"" l "\"")
		   (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
		   ;; (print "exn=" (condition->list exn))
		   (set! result (conc "#{( " cmdtype ") " cmd"}")))
		   (set! result (conc "#{( " cmdtype ") " cmd "}, full expansion: " fullcmd)))
		 (if (or allow-system
			 (not (member cmdtype '("system" "shell"))))
			 (not (member cmdtype '("system" "shell" "sh"))))
		     (with-input-from-string fullcmd
		       (lambda ()
			 (set! result ((eval (read)) ht))))
		    (set! result (conc "#{(" cmdtype ") "  cmd "}"))))
		     (set! result (conc "#{(" cmdtype ") "  cmd "}"))))
		(case cmdsym
		  ((system shell scheme)
		   (let ((delta (- (current-seconds) start-time)))
		     (if (> delta 2)
			 (debug:print-info 0 *default-log-port* "for line \"" l "\"\n command:  " cmd " took " delta " seconds to run with output:\n   " result)
			 (debug:print-info 9 *default-log-port* "for line \"" l "\"\n command:  " cmd " took " delta " seconds to run with output:\n   " result)))))
		(loop (conc prestr result poststr)))
180
181
182
183
184
185
186

187

188
189
190
191
192



193
194
195
196
197
198
199
200
201
202
203

204

205
206
207
208
209
210
211
187
188
189
190
191
192
193
194

195
196
197
198
199

200
201
202
203
204
205
206
207
208
209
210
211
212
213
214

215
216
217
218
219
220
221
222







+
-
+




-
+
+
+











+
-
+







;; in the environment on the fly
;; sections: #f => get all, else list of sections to gather
;; post-section-procs alist of section-pattern => proc, where: (proc section-name next-section-name ht curr-path)
;;
(define (read-config path ht allow-system #!key (environ-patt #f)(curr-section #f)(sections #f)(settings (make-hash-table))(keep-filenames #f)(post-section-procs '()))
  (debug:print-info 5 *default-log-port* "read-config " path " allow-system " allow-system " environ-patt " environ-patt " curr-section: " curr-section " sections: " sections " pwd: " (current-directory))
  (debug:print 9 *default-log-port* "START: " path)
  (if (and (not (port? path))
  (if (not (file-exists? path))
	   (not (file-exists? path))) ;; for case where we are handed a port
      (begin 
	(debug:print-info 1 *default-log-port* "read-config - file not found " path " current path: " (current-directory))
	;; WARNING: This is a risky change but really, we should not return an empty hash table if no file read?
	#f) ;; (if (not ht)(make-hash-table) ht))
      (let ((inp        (open-input-file path))
      (let ((inp        (if (string? path)
			    (open-input-file path)
			      path)) ;; we can be handed a port
	    (res        (if (not ht)(make-hash-table) ht))
	    (metapath   (if (or (debug:debug-mode 9)
				keep-filenames)
			    path #f)))
	(let loop ((inl               (configf:read-line inp res (calc-allow-system allow-system curr-section sections) settings)) ;; (read-line inp))
		   (curr-section-name (if curr-section curr-section "default"))
		   (var-flag #f);; turn on for key-var-pr and cont-ln-rx, turn off elsewhere
		   (lead     #f))
	  (debug:print-info 8 *default-log-port* "curr-section-name: " curr-section-name " var-flag: " var-flag "\n   inl: \"" inl "\"")
	  (if (eof-object? inl) 
	      (begin
		(if (string? path) ;; we received a path, not a port, thus we are responsible for closing it.
		(close-input-port inp)
		    (close-input-port inp))
		(hash-table-delete! res "") ;; we are using "" as a dumping ground and must remove it before returning the ht
		(debug:print 9 *default-log-port* "END: " path)
		res)
	      (regex-case 
	       inl 
	       (configf:comment-rx _                  (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f))
	       (configf:blank-l-rx _                  (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f))
227
228
229
230
231
232
233
















234
235
236
237
238
239
240
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







							      (read-config full-conf res allow-system environ-patt: environ-patt curr-section: curr-section-name sections: sections settings: settings keep-filenames: keep-filenames)
							      ;; (pop-directory)
							      (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f))
							    (begin
							      (debug:print '(2 9) #f "INFO: include file " include-file " not found (called from " path ")")
							      (debug:print 2 *default-log-port* "        " full-conf)
							      (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f)))))
	       (configf:script-rx ( x include-script );; handle-exceptions
						      ;;    exn
						      ;;    (begin
						      ;;      (debug:print '(0 2 9) #f "INFO: include from script " include-script " failed.")
						      ;;      (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f))
							 (if (and (file-exists? include-script)(file-execute-access? include-script))
							     (let* ((new-inp-port (open-input-pipe include-script)))
							       (debug:print '(2 9) *default-log-port* "Including from script output: " include-script)
							      ;;  (print "We got here, calling read-config next. Port is: " new-inp-port)
							       (read-config new-inp-port res allow-system environ-patt: environ-patt curr-section: curr-section-name sections: sections settings: settings keep-filenames: keep-filenames)
							       (close-input-port new-inp-port)
							       (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f))
							     (begin
							       (debug:print 0 *default-log-port* "Script not found or not exectutable: " include-script)
							       (loop (configf:read-line inp res (calc-allow-system allow-system curr-section-name sections) settings) curr-section-name #f #f)))
							 ) ;; )
	       (configf:section-rx ( x section-name ) (begin
							;; call post-section-procs
							(for-each 
							 (lambda (dat)
							   (let ((patt (car dat))
								 (proc (cdr dat)))
							     (if (string-match patt curr-section-name)

Modified dashboard.scm from [5d219ac9eb] to [3a1cbbc7df].

288
289
290
291
292
293
294











295
296
297
298
299
300
301
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312







+
+
+
+
+
+
+
+
+
+
+







  (runs-summary-source-runname-label #f)
  (runs-summary-dest-runname-label #f)
  ;; runs summary view
  
  tests-tree       ;; used in newdashboard
  )

;; register tabdat with BBpp
;; this is used by BBpp (Brandon's pretty printer) to convert dboard:tabdat into a composition of lists that pp will handle
(hash-table-set! *BBpp_custom_expanders_list* TABDAT:
                 (cons dboard:tabdat?
                       (lambda (tabdat-item)
                         (filter
                          (lambda (alist-entry)
                            (member (car alist-entry)
                                    '(allruns-by-id allruns))) ;; FIELDS OF INTEREST
                          (dboard:tabdat->alist tabdat-item)))))

(define (dboard:tabdat-target-string vec)
  (let ((targ (dboard:tabdat-target vec)))
    (if (list? targ)(string-intersperse targ "/") "no-target-specified")))

(define (dboard:tabdat-test-patts-use vec)    
  (let ((val (dboard:tabdat-test-patts vec)))(if val val ""))) ;;RADT => What is the if for?

358
359
360
361
362
363
364














365
366
367
368
369
370
371
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396







+
+
+
+
+
+
+
+
+
+
+
+
+
+







  ((tests-by-name (make-hash-table)) : hash-table) ;; hash of testfullname => testdat
  key-vals
  ((last-update   0)                 : fixnum) ;; last query to db got records from before last-update
  ((data-changed  #f)                : boolean)
  ((run-data-offset  0)              : number)      ;; get only 100 items per call, set back to zero when received less that 100 items
  (db-path #f)
  )

;; register dboard:rundat with BBpp
;; this is used by BBpp (Brandon's pretty printer) to convert dboard:rundat into a composition of lists that pp will handle
(hash-table-set! *BBpp_custom_expanders_list* RUNDAT:
                 (cons dboard:rundat?
                       (lambda (tabdat-item)
                         (filter
                          (lambda (alist-entry)
                            (member (car alist-entry)
                                    '(run run-data-offset ))) ;; FIELDS OF INTEREST
                          (dboard:rundat->alist tabdat-item)))))




(define (dboard:rundat-make-init #!key (run #f)(key-vals #f)(tests #f));; -100 is before time began
  (make-dboard:rundat 
   run: run
   tests: (or tests (make-hash-table))
   key-vals: key-vals 
   )) 
621
622
623
624
625
626
627


628
629
630
631
632
633
634
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661







+
+







	 (start-time  (current-seconds))
	 (runs-hash   (let ((ht (make-hash-table)))
			 (for-each (lambda (run)
				     (hash-table-set! ht (db:get-value-by-header run header "id") run))
				   runs-tree) ;; (vector-ref runs-dat 1))
			 ht))
	 (tb          (dboard:tabdat-runs-tree tabdat)))
    ;;(BB> "In update-rundat")
    ;;(inspect allruns runs-hash)
    (dboard:tabdat-last-runs-update-set! tabdat (- (current-seconds) 2))
    (dboard:tabdat-header-set! tabdat header)
    ;; 
    ;; trim runs to only those that are changing often here
    ;; 
    (if (null? runs)
	(begin
738
739
740
741
742
743
744
745







746
747
748
749
750
751
752
765
766
767
768
769
770
771

772
773
774
775
776
777
778
779
780
781
782
783
784
785







-
+
+
+
+
+
+
+







	    (let* ((newmaxtests (max num-tests maxtests))
		   ;; (last-update (- (current-seconds) 10))
		   (run-struct  (or run-struct
				    (dboard:rundat-make-init
				     run:         run 
				     tests:       tests-ht
				     key-vals:    key-vals)))
		   (new-res     (if (null? all-test-ids) res (cons run-struct res)))
		   (new-res     (if (null? all-test-ids)
                                    res
                                    (delete-duplicates
                                     (cons run-struct res)
                                     (lambda (a b)
                                       (eq? (db:get-value-by-header (dboard:rundat-run a) header "id")
                                            (db:get-value-by-header (dboard:rundat-run b) header "id"))))))
		   (elapsed-time (- (current-seconds) start-time)))
	      (if (null? all-test-ids)
		  (hash-table-delete! (dboard:tabdat-allruns-by-id tabdat) run-id)
		  (hash-table-set!    (dboard:tabdat-allruns-by-id tabdat) run-id run-struct))
	      (if (or (null? tal)
		      (> elapsed-time 2)) ;; stop loading data after 5 seconds, on the next call more data *should* be loaded since get-tests-for-run uses last update
		  (begin
3389
3390
3391
3392
3393
3394
3395



3396
3397
3398
3399
3400
3401
3402
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438







+
+
+







     (let* ((tabdat (dboard:common-get-tabdat commondat tab-num: tab-num))
	    (dbkeys (dboard:tabdat-dbkeys tabdat)))
       ;;(print "RA => calling runs-tab-updater with commondat " commondat " tab-num " tab-num)
       ;;(tabdat-values tabdat) ;;RA added 
       ;; (pp (dboard:tabdat->alist tabdat))
       ;; (if (dashboard:database-changed? commondat tabdat context-key: 'runs-rundat)      
       (dashboard:do-update-rundat tabdat)
       ;;(BB> "dashboard:runs-tab-updater")
       ;;(inspect tabdat)

       (let ((uidat (dboard:commondat-uidat commondat)))
	 ;;(print "RA => Calling update-buttons with tabdat : " tabdat " uidat " uidat)
	 (update-buttons tabdat uidat (dboard:tabdat-numruns tabdat) (dboard:tabdat-num-tests tabdat)))
       ))
   "dashboard:runs-tab-updater"))

;;======================================================================

Modified db.scm from [b74c06eb1c] to [31eac1d5ff].

191
192
193
194
195
196
197
198

199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216

217
218
219
220
221
222
223
191
192
193
194
195
196
197

198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215

216
217
218
219
220
221
222
223







-
+

















-
+







;;
;; (define db:get-dbdir common:get-db-tmp-area)
;;  (or (configf:lookup *configdat* "setup" "dbdir")
;;      (conc (configf:lookup *configdat* "setup" "linktree") "/.db")))
	       
(define (db:set-sync db)
  (let ((syncprag (configf:lookup *configdat* "setup" "sychronous")))
    (sqlite3:execute db (conc "PRAGMA synchronous = " (or syncprag 1) ";")))) 
    (sqlite3:execute db (conc "PRAGMA synchronous = " (or syncprag 0) ";")))) 

;; open an sql database inside a file lock
;; returns: db existed-prior-to-opening
;; RA => Returns a db handler; sets the lock if opened in writable mode
;;
(define (db:lock-create-open fname initproc)
  (let* ((parent-dir   (or (pathname-directory fname)(current-directory))) ;; no parent? go local
	 (dir-writable (file-write-access? parent-dir))
	 (file-exists  (file-exists? fname))
	 (file-write   (if file-exists
			   (file-write-access? fname)
			   dir-writable )))
    (if file-write ;; dir-writable
	(let (;; (lock    (obtain-dot-lock fname 1 5 10))
	      (db      (sqlite3:open-database fname)))
	  (sqlite3:set-busy-handler! db (make-busy-timeout 136000))
	  ;; (db:set-sync db)
	  (sqlite3:execute db "PRAGMA synchronous = NORMAL;")
	  (sqlite3:execute db "PRAGMA synchronous = 0;")
	  (if (not file-exists)
	      (begin
		(if (string-match "^/tmp/.*" fname) ;; this is a file in /tmp
		    (sqlite3:execute db "PRAGMA journal_mode=WAL;")
		    (print "Creating " fname " in NON-WAL mode."))
		(initproc db)))
	  ;; (release-dot-lock fname)
333
334
335
336
337
338
339

340
341
342
343
344
345
346
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347







+







    (debug:print-info 4 *default-log-port* "Syncing for run-id: " run-id)
    (mutex-lock! *db-multi-sync-mutex*)
    (let ((update_info (cons (if force-sync 0 *db-last-sync*) "last_update")))
      (mutex-unlock! *db-multi-sync-mutex*)
      (db:sync-tables (db:sync-all-tables-list dbstruct) update_info tmpdb refndb mtdb))
    (mutex-lock! *db-multi-sync-mutex*)
    (set! *db-last-sync* start-t)
    (set! *db-last-access* start-t)
    (mutex-unlock! *db-multi-sync-mutex*)))

;; close all opened run-id dbs
(define (db:close-all dbstruct)
  (if (dbr:dbstruct? dbstruct)
      (begin
        ;; (db:sync-touched dbstruct 0 force-sync: #t) ;; NO. Do not do this here. Instead we rely on a server to be started when there are writes, even if the server itself is not going to be used as a server.
737
738
739
740
741
742
743
744

745
746
747
748
749
750
751
738
739
740
741
742
743
744

745
746
747
748
749
750
751
752







-
+







(define (db:get-access-mode)
  (if (args:get-arg "-use-db-cache") 'cached 'rmt))

;; Add db direct
;;
(define (db:dispatch-query access-mode rmt-cmd db-cmd . params)
  (if (eq? access-mode 'cached)
      (print "not doing cached calls right now"))
      (debug:print 2 *default-log-port* "not doing cached calls right now"))
;;      (apply db:call-with-cached-db db-cmd params)
      (apply rmt-cmd params))
;;)

;; return the target db handle so it can be used
;;
(define (db:cache-for-read-only source target #!key (use-last-update #f))
1495
1496
1497
1498
1499
1500
1501







1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
















1517
1518
1519
1520
1521
1522
1523
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509















1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532







+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







	   ;;      			    (> (- (current-seconds)(file-modification-time tdatpath)) 600)))) ;; no change in 10 minutes to testdat.db - she's dead Jim
	   ;;      		    incompleted))
	   (min-incompleted-ids (map car incompleted)) ;; do 'em all
	   (all-ids             (append min-incompleted-ids (map car oldlaunched))))
      (if (> (length all-ids) 0)
	  (begin
	    (debug:print 0 *default-log-port* "WARNING: Marking test(s); " (string-intersperse (map conc all-ids) ", ") " as INCOMPLETE")
            (for-each
             (lambda (test-id)
               (db:test-set-status-state dbstruct run-id test-id "COMPLETE" "DEAD" "Test failed to complete"))
             all-ids))))))

;; ALL REPLACED BY THE BLOCK ABOVE
;;
	    (sqlite3:execute 
	     db
	     (conc "UPDATE tests SET state='INCOMPLETE' WHERE id IN (" 
		   (string-intersperse (map conc all-ids) ",")
		   ");")))))

    ;; Now do rollups for the toplevel tests
    ;;
    ;; (db:delay-if-busy dbdat)
    (for-each
     (lambda (toptest)
       (let ((test-name (list-ref toptest 3)))
;;	     (run-id    (list-ref toptest 5)))
	 (db:top-test-set-per-pf-counts dbstruct run-id test-name)))
     toplevels)))
;; 	    (sqlite3:execute 
;; 	     db
;; 	     (conc "UPDATE tests SET state='INCOMPLETE' WHERE run_id=? AND id IN (" 
;; 		   (string-intersperse (map conc all-ids) ",")
;; 		   ");")
;;              run-id))))
;; 
;;     ;; Now do rollups for the toplevel tests
;;     ;;
;;     ;; (db:delay-if-busy dbdat)
;;     (for-each
;;      (lambda (toptest)
;;        (let ((test-name (list-ref toptest 3)))
;; ;;	     (run-id    (list-ref toptest 5)))
;; 	 (db:top-test-set-per-pf-counts dbstruct run-id test-name)))
;;      toplevels)))

;; BUG: Probably broken - does not explicitly use run-id in the query
;;
(define (db:top-test-set-per-pf-counts dbstruct run-id test-name)
  (db:general-call (db:get-db dbstruct run-id) 'top-test-set-per-pf-counts (list test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name test-name))) 
 
		     
2036
2037
2038
2039
2040
2041
2042
2043


2044
2045
2046
2047
2048
2049
2050
2045
2046
2047
2048
2049
2050
2051

2052
2053
2054
2055
2056
2057
2058
2059
2060







-
+
+







	     (lambda (state status count)
	       (let ((netstate (if (equal? state "COMPLETED") status state)))
		 (if (string? netstate)
		     (begin
		       (hash-table-set! totals netstate (+ (hash-table-ref/default totals netstate 0) count))
		       (hash-table-set! curr   netstate (+ (hash-table-ref/default curr   netstate 0) count))))))
	     db
	     "SELECT state,status,count(id) FROM tests AS t GROUP BY state,status ORDER BY state,status DESC;")
	     "SELECT state,status,count(id) FROM tests AS t WHERE run_id=? GROUP BY state,status ORDER BY state,status DESC;"
             run-id)
	    ;; add the per run counts to res
	    (for-each (lambda (state)
			(set! res (cons (list run-name state (hash-table-ref curr state)) res)))
		      (sort (hash-table-keys curr) string>=))
	    (set! curr (make-hash-table))))))
     runs-info)
    (for-each (lambda (state)
2585
2586
2587
2588
2589
2590
2591



2592
2593
2594
2595
2596
2597
2598
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611







+
+
+







		      (string-intersperse testnames "','")
		      "') AND NOT (uname = 'n/a' AND item_path='');")) ;; should this include the (uname = 'n/a' ...) ???
	       ))
	    0)))))
             ;; DEBUG FIXME - need to merge this v.155 query correctly   
             ;; AND testname in (SELECT testname FROM test_meta WHERE jobgroup=?)
             ;; AND NOT (uname = 'n/a' AND item_path = '');"

;; tags: '("tag%" "tag2" "%ag6")
;;

;; done with run when:
;;   0 tests in LAUNCHED, NOT_STARTED, REMOTEHOSTSTART, RUNNING
(define (db:estimated-tests-remaining dbstruct run-id)
  (db:with-db
   dbstruct
   run-id
3052
3053
3054
3055
3056
3057
3058


3059
3060
3061

3062
3063
3064
3065
3066
3067
3068
3069
3070
3071


3072
3073
3074
3075
3076
3077
3078
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075

3076
3077
3078
3079
3080
3081
3082
3083
3084
3085

3086
3087
3088
3089
3090
3091
3092
3093
3094







+
+


-
+









-
+
+







    (sqlite3:for-each-row
     (lambda (rid)
       (set! row-ids (cons rid row-ids)))
     runsqry)
    (sqlite3:finalize! runsqry)
    row-ids))

;; finds latest matching all patts for given run-id
;;
(define (db:test-get-paths-matching-keynames-target-new dbstruct run-id keynames target res testpatt statepatt statuspatt runname)
  (let* ((testqry (tests:match->sqlqry testpatt))
	 (tstsqry (conc "SELECT rundir FROM tests WHERE " testqry " AND state LIKE '" statepatt "' AND status LIKE '" statuspatt "' ORDER BY event_time ASC;")))
	 (tstsqry (conc "SELECT rundir FROM tests WHERE run_id=? AND " testqry " AND state LIKE '" statepatt "' AND status LIKE '" statuspatt "' ORDER BY event_time ASC;")))
    (db:with-db
     dbstruct
     run-id
     #f
     (lambda (db)
       (sqlite3:for-each-row 
	(lambda (p)
	  (set! res (cons p res)))
	db
	tstsqry)
	tstsqry
	run-id)
       res))))

(define (db:test-toplevel-num-items dbstruct run-id testname)
  (db:with-db
   dbstruct
   run-id
   #f
3308
3309
3310
3311
3312
3313
3314

3315
3316
3317
3318
3319
3320
3321
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338







+







	;; '(test-set-rundir         "UPDATE tests SET rundir=? AND testname=? AND item_path=?;") ;; DONE
	'(test-set-rundir-shortdir "UPDATE tests SET rundir=?,shortdir=? WHERE testname=? AND item_path=? AND run_id=?;")    ;; BROKEN!!! NEEDS run-id
	'(delete-tests-in-state   ;; "DELETE FROM tests WHERE state=?;")                  ;; DONE
	  "UPDATE tests SET state='DELETED' WHERE state=?")
	'(tests:test-set-toplog   "UPDATE tests SET final_logf=? WHERE run_id=? AND testname=? AND item_path='';")
	'(update-cpuload-diskfree "UPDATE tests SET cpuload=?,diskfree=? WHERE id=?;") ;; DONE
	'(update-uname-host       "UPDATE tests SET uname=?,host=? WHERE id=?;")       ;; DONE
        '(update-test-rundat      "INSERT INTO test_rundat (test_id,update_time,cpuload,diskfree,diskusage,run_duration) VALUES (?,?,?,?,?,?);")
	'(update-test-state       "UPDATE tests SET state=? WHERE state=? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));")
	'(update-test-status      "UPDATE tests SET status=? WHERE status like ? AND run_id=? AND testname=? AND NOT (item_path='' AND testname IN (SELECT DISTINCT testname FROM tests WHERE testname=? AND item_path != ''));")
	;; stuff for roll-up-pass-fail-counts
	'(update-pass-fail-counts "UPDATE tests 
             SET fail_count=(SELECT count(id) FROM tests WHERE testname=? AND item_path != '' AND status IN ('FAIL','CHECK','INCOMPLETE','ABORT')),
                 pass_count=(SELECT count(id) FROM tests WHERE testname=? AND item_path != '' AND status IN ('PASS','WARN','WAIVED'))
             WHERE testname=? AND item_path='' AND run_id=?;") ;; DONE  ;; BROKEN!!! NEEDS run-id
3462
3463
3464
3465
3466
3467
3468


















3469
3470
3471
3472
3473
3474
3475
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







    (sqlite3:for-each-row
     (lambda (state status count)
       (set! res (cons (vector state status count) res)))
     db
     "SELECT state,status,count(state) FROM tests WHERE run_id=? AND testname=? AND item_path='' GROUP BY state,status;"
     run-id testname)
    res))


(define (db:get-latest-host-load dbstruct raw-hostname)
  (let* ((hostname (string-substitute "\\..*$" "" raw-hostname))
        (res  (cons -1 0))
        (mydb (db:dbdat-get-db (db:get-db dbstruct 0)))
        )
    (db:with-db
     dbstruct
     0
     #f
     (lambda (db)
       (sqlite3:for-each-row
        (lambda (cpuload update-time)  (set! res (cons cpuload update-time)))
        db
        "SELECT tr.cpuload, tr.update_time FROM test_rundat tr, tests t WHERE t.host=? AND tr.cpuload != -1  AND tr.test_id=t.id ORDER BY tr.update_time DESC LIMIT 1;"
        hostname))) res ))


(define (db:set-top-level-from-items dbstruct run-id testname)
  (let* ((dbdat (db:get-db dbstruct run-id))
	 (db    (db:dbdat-get-db dbdat))
	 (summ  (db:get-state-status-summary db run-id testname))
	 (find  (lambda (state status)
		  (if (null? summ) 
3602
3603
3604
3605
3606
3607
3608



















3609
3610
3611
3612
3613
3614
3615
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646
3647
3648
3649
3650
3651
3652
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







	"SELECT id,item_path,state,status,run_duration,final_logf,comment FROM tests WHERE testname=? AND item_path != '';"
	test-name)
       res))))

;;======================================================================
;; Tests meta data
;;======================================================================

;; returns a hash table of tags to tests
;;
(define (db:get-tests-tags dbstruct)
  (let* ((dbdat   (db:get-db dbstruct #f))
	 (db      (db:dbdat-get-db dbdat))
         (res     (make-hash-table)))
    (sqlite3:for-each-row
     (lambda (testname tags-in)
       (let ((tags (string-split tags-in ",")))
         (for-each
          (lambda (tag)
            (hash-table-set! res tag
                             (delete-duplicates
                              (cons testname (hash-table-ref/default res tag '())))))
          tags)))
     db
     "SELECT testname,tags FROM test_meta")
    res))

;; read the record given a testname
(define (db:testmeta-get-record dbstruct testname)
  (let ((res   #f))
    (db:with-db
     dbstruct
     #f

Modified docs/manual/megatest_manual.html from [2d6199dc08] to [2f3ab9d0a7].

1323
1324
1325
1326
1327
1328
1329








































































1330























1331
1332
1333
1334
1335
1336
1337
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401

1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







</div>
</div>
</div>
<div class="sect1">
<h2 id="_reference">Reference</h2>
<div class="sectionbody">
<div class="sect2">
<h3 id="_config_file_helpers">Config File Helpers</h3>
<div class="paragraph"><p>Various helpers for more advanced config files.</p></div>
<table class="tableblock frame-topbot grid-all"
style="
width:80%;
">
<caption class="title">Table 2. Helpers</caption>
<col style="width:14%;">
<col style="width:28%;">
<col style="width:28%;">
<col style="width:28%;">
<thead>
<tr>
<th class="tableblock halign-center valign-top" >Helper                      </th>
<th class="tableblock halign-left valign-top" > Purpose                       </th>
<th class="tableblock halign-left valign-top" > Valid values            </th>
<th class="tableblock halign-left valign-top" > Comments</th>
</tr>
</thead>
<tbody>
<tr>
<td class="tableblock halign-center valign-top" ><p class="tableblock">#{scheme (scheme code&#8230;)}</p></td>
<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced">Execute arbitrary scheme code</p></td>
<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced">Any valid scheme</p></td>
<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced">Value returned from the call is converted to a string and processed as part of the config file</p></td>
</tr>
<tr>
<td class="tableblock halign-center valign-top" ><p class="tableblock">#{system command}</p></td>
<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced">Execute program, inserts exit code</p></td>
<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced">Any valid Unix command</p></td>
<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced">Discards the output from the program</p></td>
</tr>
<tr>
<td class="tableblock halign-center valign-top" ><p class="tableblock">#{shell  command} or #{sh &#8230;}</p></td>
<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced">Execute program, inserts result from stdout</p></td>
<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced">Any valid Unix command</p></td>
<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced">Value returned from the call is converted to a string and processed as part of the config file</p></td>
</tr>
<tr>
<td class="tableblock halign-center valign-top" ><p class="tableblock">#{realpath path} or #{rp &#8230;}</p></td>
<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced">Replace with normalized path</p></td>
<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced">Must be a valid path</p></td>
<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced"></p></td>
</tr>
<tr>
<td class="tableblock halign-center valign-top" ><p class="tableblock">#{getenv VAR} or #{gv VAR}</p></td>
<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced">Replace with content of env variable</p></td>
<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced">Must be a valid var</p></td>
<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced"></p></td>
</tr>
<tr>
<td class="tableblock halign-center valign-top" ><p class="tableblock">#{get s v} or #{g s v}</p></td>
<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced">Replace with variable v from section s</p></td>
<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced">Variable must be defined before use</p></td>
<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced"></p></td>
</tr>
<tr>
<td class="tableblock halign-center valign-top" ><p class="tableblock">#{rget v}</p></td>
<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced">Replace with variable v from target or default of runconfigs file</p></td>
<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced"></p></td>
<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced"></p></td>
</tr>
<tr>
<td class="tableblock halign-center valign-top" ><p class="tableblock"></p></td>
<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced">Replace with the path to the megatest testsuite area</p></td>
<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced"></p></td>
<td class="tableblock halign-left valign-top" ><p class="tableblock monospaced"></p></td>
</tr>
</tbody>
</table>
</div>
<div class="sect2">
<h3 id="_megatest_config_file_settings">Megatest Config File Settings</h3>
<h3 id="_config_file_settings">Config File Settings</h3>
<div class="paragraph"><p>Settings in megatest.config</p></div>
</div>
<div class="sect2">
<h3 id="_config_file_additional_features">Config File Additional Features</h3>
<div class="paragraph"><p>Including output from a script as if it was inline to the config file:</p></div>
<div class="listingblock">
<div class="content monospaced">
<pre>[scriptinc myscript.sh]</pre>
</div></div>
<div class="paragraph"><p>If the script outputs:</p></div>
<div class="listingblock">
<div class="content monospaced">
<pre>[items]
A a b c
B d e f</pre>
</div></div>
<div class="paragraph"><p>Then the config file would effectively appear to contain an items section
exactly like the output from the script. This is extremely useful when
dynamically creating items, itemstables and other config structures. You can
see the expansion of the call by looking in the cached files (look in your
linktree for megatest.config and runconfigs.config cache files and in your
test run areas for the expanded and cached testconfig).</p></div>
<div class="sect3">
<h4 id="_disk_space_checks">Disk Space Checks</h4>
<div class="paragraph"><p>Some parameters you can put in the [setup] section of megatest.config:</p></div>
<div class="listingblock">
<div class="content monospaced">
<pre># minimum space required in a run disk
minspace 10000000
1446
1447
1448
1449
1450
1451
1452
1453

1454
1455
1456
1457
1458
1459
1460
1540
1541
1542
1543
1544
1545
1546

1547
1548
1549
1550
1551
1552
1553
1554







-
+







</div>
<div class="sect2">
<h3 id="_database_settings">Database settings</h3>
<table class="tableblock frame-topbot grid-all"
style="
width:70%;
">
<caption class="title">Table 2. Database config settings in [setup] section of megatest.config</caption>
<caption class="title">Table 3. Database config settings in [setup] section of megatest.config</caption>
<col style="width:14%;">
<col style="width:28%;">
<col style="width:28%;">
<col style="width:28%;">
<thead>
<tr>
<th class="tableblock halign-center valign-top" >Var                       </th>
1915
1916
1917
1918
1919
1920
1921
1922

1923
1924
1925
1926
1927
1928
1929
2009
2010
2011
2012
2013
2014
2015

2016
2017
2018
2019
2020
2021
2022
2023







-
+







<h2 id="_programming_api">Programming API</h2>
<div class="sectionbody">
<div class="paragraph"><p>These routines can be called from the megatest repl.</p></div>
<table class="tableblock frame-topbot grid-all"
style="
width:70%;
">
<caption class="title">Table 3. API Keys Related Calls</caption>
<caption class="title">Table 4. API Keys Related Calls</caption>
<col style="width:14%;">
<col style="width:28%;">
<col style="width:28%;">
<col style="width:28%;">
<thead>
<tr>
<th class="tableblock halign-center valign-top" >API Call                        </th>
1967
1968
1969
1970
1971
1972
1973
1974

1975
1976
1977
1978
2061
2062
2063
2064
2065
2066
2067

2068
2069
2070
2071
2072







-
+




</div>
</div>
</div>
<div id="footnotes"><hr></div>
<div id="footer">
<div id="footer-text">
Version 1.0<br>
Last updated 2016-10-19 10:23:07 PDT
Last updated 2016-12-12 13:03:08 PST
</div>
</div>
</body>
</html>

Modified docs/manual/reference.txt from [206fb51b8f] to [a08ca10124].

1
2
3
4



















5
6




























7
8
9
10
11
12
13
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23


24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58




+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+








Reference
---------

Config File Helpers
~~~~~~~~~~~~~~~~~~~

Various helpers for more advanced config files.

.Helpers
[width="80%",cols="^,2m,2m,2m",frame="topbot",options="header"]
|======================
|Helper                      | Purpose                       | Valid values            | Comments
| #{scheme (scheme code...)} | Execute arbitrary scheme code | Any valid scheme        | Value returned from the call is converted to a string and processed as part of the config file
| #{system command}          | Execute program, inserts exit code  | Any valid Unix command  | Discards the output from the program
| #{shell  command} or #{sh ...}  | Execute program, inserts result from stdout | Any valid Unix command | Value returned from the call is converted to a string and processed as part of the config file
| #{realpath path} or #{rp ...}   | Replace with normalized path | Must be a valid path |
| #{getenv VAR} or #{gv VAR}      | Replace with content of env variable | Must be a valid var |
| #{get s v} or #{g s v}     | Replace with variable v from section s | Variable must be defined before use |
| #{rget v}                  | Replace with variable v from target or default of runconfigs file | |
| #{mtrah}                   | Replace with the path to the megatest testsuite area | | 
|======================

Megatest Config File Settings
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Config File Settings
~~~~~~~~~~~~~~~~~~~~

Settings in megatest.config

Config File Additional Features
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Including output from a script as if it was inline to the config file:

-------------------------
[scriptinc myscript.sh]
-------------------------

If the script outputs:

-------------------------
[items]
A a b c
B d e f
-------------------------

Then the config file would effectively appear to contain an items section
exactly like the output from the script. This is extremely useful when
dynamically creating items, itemstables and other config structures. You can
see the expansion of the call by looking in the cached files (look in your
linktree for megatest.config and runconfigs.config cache files and in your
test run areas for the expanded and cached testconfig).

Disk Space Checks
^^^^^^^^^^^^^^^^^

Some parameters you can put in the [setup] section of megatest.config:

-------------------

Modified http-transport.scm from [4d8eecbf3a] to [85b3cef6fd].

215
216
217
218
219
220
221
222

223
224
225
226
227
228
229
215
216
217
218
219
220
221

222
223
224
225
226
227
228
229







-
+







;;
(define (http-transport:client-api-send-receive run-id serverdat cmd params #!key (numretries 3))
  (let* ((fullurl    (if (vector? serverdat)
			 (http-transport:server-dat-get-api-req serverdat)
			 (begin
			   (debug:print 0 *default-log-port* "FATAL ERROR: http-transport:client-api-send-receive called with no server info")
			   (exit 1))))
	 (res        #f)
	 (res        (vector #f "uninitialized"))
	 (success    #t)
	 (sparams    (db:obj->string params transport: 'http)))
       (debug:print-info 11 *default-log-port* "fullurl=" fullurl ", cmd=" cmd ", params=" params ", run-id=" run-id "\n")
       ;; set up the http-client here
       (max-retry-attempts 1)
       ;; consider all requests indempotent
       (retry-request? (lambda (request)
381
382
383
384
385
386
387
388

389
390
391
392
393
394
395
396
397

398
399
400
401

402

403

404

405
406
407

408
409
410
411
412
413
414
381
382
383
384
385
386
387

388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405

406
407
408
409
410

411
412
413
414
415
416
417
418







-
+









+




+

+
-
+

+


-
+







         (last-access 0)
	 (server-timeout (server:get-timeout))
	 (server-going  #f))
    (let loop ((count         0)
	       (server-state 'available)
	       (bad-sync-count 0)
	       (start-time     (current-milliseconds)))

      ;;(BB> "http-transport: top of loop; count="count" server-state="server-state" bad-sync-count="bad-sync-count" server-going="server-going)
      ;; Use this opportunity to sync the tmp db to megatest.db
      (if (not server-going) ;; *dbstruct-db* 
	    ;; Removed code is pasted below (keeping it around until we are clear it is not needed).
	    ;; no *dbstruct-db* yet, set running after our first pass through and start the db
	    (if (eq? server-state 'available)
		(let ((new-server-id (tasks:server-am-i-the-server? (db:delay-if-busy tdbdat) run-id))) ;; try to ensure no double registering of servers
		  (if (equal? new-server-id server-id)
		      (begin
			(tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "dbprep")
                        ;;(BB> "http-transport: ->dbprep")
			(thread-sleep! 0.5) ;; give some margin for queries to complete before switching from file based access to server based access
			(set! *dbstruct-db*  (db:setup)) ;;  run-id))
			(set! server-going #t)
			(tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "running")
                        ;;(BB> "http-transport: ->running")
			(server:write-dotserver *toppath* (conc iface ":" port))
                        (thread-start! *watchdog*)
			(delete-file* (conc *toppath* "/.starting-server")))
                        (server:complete-attempt *toppath*))
		      (begin ;; gotta exit nicely
                        ;;(BB> "http-transport: ->collision")
			(tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "collision")
			(http-transport:server-shutdown server-id port))))))

      
      ;; when things go wrong we don't want to be doing the various queries too often
      ;; so we strive to run this stuff only every four seconds or so.
      (let* ((sync-time (- (current-milliseconds) start-time))
	    (rem-time  (quotient (- 4000 sync-time) 1000)))
	(if (and (<= rem-time 4)
		 (>  rem-time 0))
	    (thread-sleep! rem-time)))
484
485
486
487
488
489
490

491

492
493
494
495
496
497
498
488
489
490
491
492
493
494
495

496
497
498
499
500
501
502
503







+
-
+







;; (if (and (<= rem-time 4)
;; 	     (> rem-time 0))
;; 	(thread-sleep! rem-time)
;; 	(thread-sleep! 4))) ;; fallback for if the math is changed ...

(define (http-transport:server-shutdown server-id port)
  (let ((tdbdat (tasks:open-db)))
    ;;(BB> "http-transport:server-shutdown called")
    (debug:print-info 0 *default-log-port* "Starting to shutdown the server.")
    (debug:print-info 0 *default-log-port* "Starting to shutdown the server. pid="(current-process-id))
    ;;
    ;; start_shutdown
    ;;
    (tasks:server-set-state! (db:delay-if-busy tdbdat) server-id "shutting-down")
    (set! *time-to-exit* #t) ;; tell on-exit to be fast as we've already cleaned up
    (portlogger:open-run-close portlogger:set-port port "released")
    (thread-sleep! 5)
511
512
513
514
515
516
517

518
519
520
521
522
523
524
525
526

527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544

545
546
547
548
549
550
551
552
553
554
555
556
557

558
559
560
561
562
563
564
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530


531


532
533
534
535
536
537
538
539
540
541
542
543
544
545
546

547
548
549
550
551
552
553
554
555
556
557
558
559

560
561
562
563
564
565
566
567







+







-
-
+
-
-















-
+












-
+







			  (/ *total-non-write-delay* 
			     *number-non-write-queries*))
		      " ms")
    (debug:print-info 0 *default-log-port* "Server shutdown complete. Exiting")
    (tasks:server-delete-record (db:delay-if-busy tdbdat) server-id " http-transport:keep-running complete")
    ;; if the .server file contained :myport then we can remove it
    (server:remove-dotserver-file *toppath* port)
    ;;(BB> "http-transport:server-shutdown -> exit")
    (exit)))

;; all routes though here end in exit ...
;;
;; start_server? 
;;
(define (http-transport:launch run-id)
  (with-output-to-file
      (conc *toppath* "/.starting-server")
  (server:attempting-start *toppath*)
    (lambda ()
      (print (current-process-id) " on " (get-host-name))))
  (let* ((tdbdat (tasks:open-db)))
    (set! *run-id*   run-id)
    (if (args:get-arg "-daemonize")
	(begin
	  (daemon:ize)
	  (if *alt-log-file* ;; we should re-connect to this port, I think daemon:ize disrupts it
	      (begin
		(current-error-port *alt-log-file*)
		(current-output-port *alt-log-file*)))))
    (if (and (server:read-dotserver *toppath*)
             (server:check-if-running run-id))
	(begin
	  (debug:print 0 *default-log-port* "INFO: Server for run-id " run-id " already running")
	  (exit 0))
	(begin ;; ok, no server detected, clean out any lingering records
	   (tasks:server-force-clean-running-records-for-run-id  (db:delay-if-busy tdbdat) run-id "notresponding")))
          (tasks:server-force-clean-running-records-for-run-id  (db:delay-if-busy tdbdat) run-id "notresponding")))
    (let loop ((server-id (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id))
	       (remtries  4))
      (if (not server-id)
	  (if (> remtries 0)
	      (begin
		(thread-sleep! 2)
		(loop (tasks:server-lock-slot (db:delay-if-busy tdbdat) run-id)
		      (- remtries 1)))
	      (begin
		;; since we didn't get the server lock we are going to clean up and bail out
		(debug:print-info 2 *default-log-port* "INFO: server pid=" (current-process-id) ", hostname=" (get-host-name) " not starting due to other candidates ahead in start queue")
		(tasks:server-delete-records-for-this-pid (db:delay-if-busy tdbdat) " http-transport:launch")
		(delete-file* (conc *toppath* "/.starting-server"))
                (server:complete-attempt *toppath*)
		))
	  (let* ((th2 (make-thread (lambda ()
				     (debug:print-info 0 *default-log-port* "Server run thread started")
				     (http-transport:run 
				      (if (args:get-arg "-server")
					  (args:get-arg "-server")
					  "-")

Deleted inteldate.scm version [a6b831c59f].

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180




















































































































































































-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
(use srfi-19)
(use test)
(use format)
(use regex)
(declare (unit inteldate))
;; utility procedures to convert among
;; different ways to express date (inteldate, seconds since epoch, isodate)
;;
;; samples:
;; isodate   -> "2016-01-01"
;; inteldate -> "16ww01.5"
;; seconds   -> 1451631600

;; procedures provided:
;; ====================
;; seconds->isodate
;; seconds->inteldate
;;
;; isodate->seconds
;; isodate->inteldate
;;
;; inteldate->seconds
;; inteldate->isodate

;; srfi-19 used extensively; this doc is better tha the eggref:
;; http://srfi.schemers.org/srfi-19/srfi-19.html

;; Author: brandon.j.barclay@intel.com 16ww18.6

(define (date->seconds date)
  (inexact->exact
   (string->number
    (date->string date "~s"))))

(define (seconds->isodate seconds)
  (let* ((date (seconds->date seconds))
         (result (date->string date "~Y-~m-~d")))
    result))

(define (isodate->seconds isodate)
  "Takes a string input of the form 'YY-MM-DD' or 'YYYY-MM-DD' and returns epoch time; for YY, assume after Y2K"
  (let* ((numlist (map string->number (string-split isodate "-")))
        (raw-year (car numlist))
        (year (if (< raw-year 100) (+ raw-year 2000) raw-year))
        (month (list-ref numlist 1))
        (day (list-ref numlist 2))
        (date (make-date 0 0 0 0 day month year))
        (seconds (date->seconds date)))

    seconds))

;; adapted from perl Intel::WorkWeek perl module
;; intel year consists of numbered weeks starting from week 1
;;   week 1 is the week containing jan 1 of the year
;;   days of week are numbered starting from 0 on sunday
;;   intel year does not match calendar year in workweek 1
;;     before jan1.
(define (seconds->inteldate-values seconds)
  (define (date-difference->seconds d1 d2)
    (- (date->seconds d1) (date->seconds d2)))

  (let* ((thisdate (seconds->date seconds))
         (thisdow (string->number (date->string thisdate "~w")))

         (year (date-year thisdate))
         ;; intel workweek 1 begins on sunday of week containing jan1
         (jan1 (make-date 0 0 0 0 1 1 year))
         (jan1dow (date-week-day jan1))
         (ww01 (date-subtract-duration jan1 (seconds->time (* 60 60 24 jan1dow))))

         (ww01_delta_seconds (date-difference->seconds thisdate ww01))
         (wwnum_initial (inexact->exact (add1 (floor (/ ww01_delta_seconds 24 3600 7) ))))
         
         ;; we could be in ww1 of next year
         (this-saturday (seconds->date
                         (+ seconds
                            (* 60 60 24 (- 6 thisdow)))))
         (this-week-ends-next-year?
          (> (date-year this-saturday) year))
         (intelyear
          (if this-week-ends-next-year?
              (add1 year)
              year))
         (intelweek
          (if this-week-ends-next-year?
              1
              wwnum_initial)))
   (values intelyear intelweek thisdow)))

(define (seconds->inteldate seconds)
  (define (string-leftpad in width pad-char)
    (let* ((unpadded-str (->string in))
           (padlen_temp (- width (string-length unpadded-str)))
           (padlen (if (< padlen_temp 0) 0 padlen_temp))
           (padding
            (fold conc ""
                  (map (lambda (x) (->string pad-char)) (iota padlen)))))
      (conc padding unpadded-str)))
  (define (zeropad num width)
    (string-leftpad num width #:0))

  (let-values (((intelyear intelweek day-of-week-num)
                (seconds->inteldate-values seconds)))
    (let ((intelyear-str
           (zeropad
            (->string
             (if (> intelyear 1999)
                 (- intelyear 2000) intelyear))
            2))
          (intelweek-str
           (zeropad (->string intelweek) 2))
          (dow-str (->string day-of-week-num)))
      (conc intelyear-str "ww" intelweek-str "." dow-str))))

(define (isodate->inteldate isodate)
  (seconds->inteldate
   (isodate->seconds isodate)))

(define (inteldate->seconds inteldate)
  (let ((match (string-match "^(\\d+)ww(\\d+).(\\d)$" inteldate)))
    (if
     (not match)
     #f
     (let* (
            (intelyear-raw (string->number (list-ref match 1)))
            (intelyear (if (< intelyear-raw 100)
                           (+ intelyear-raw 2000)
                           intelyear-raw))
            (intelww (string->number (list-ref match 2)))
            (dayofweek (string->number (list-ref match 3)))

            (day-of-seconds (* 60 60 24 ))
            (week-of-seconds (* day-of-seconds 7))
            

            ;; get seconds at ww1.0
            (new-years-date (make-date 0 0 0 0 1 1 intelyear))
            (new-years-seconds
             (date->seconds new-years-date))
            (new-years-dayofweek (date-week-day new-years-date))
            (ww1.0_seconds (- new-years-seconds
                              (* day-of-seconds
                                 new-years-dayofweek)))
            (workweek-adjustment (* week-of-seconds (sub1 intelww)))
            (weekday-adjustment (* dayofweek day-of-seconds))

            (result (+ ww1.0_seconds workweek-adjustment weekday-adjustment)))
       result))))

(define (inteldate->isodate inteldate)
  (seconds->isodate (inteldate->seconds inteldate)))

(define (inteldate-tests)
  (test-group
   "date conversion tests"
   (let ((test-table
          '(("16ww01.5" . "2016-01-01")
            ("16ww18.5" . "2016-04-29")
            ("1999ww33.5" . "1999-08-13")
            ("16ww18.4" . "2016-04-28")
            ("16ww18.3" . "2016-04-27")
            ("13ww01.0" . "2012-12-30")
            ("13ww52.6" . "2013-12-28")
            ("16ww53.3" . "2016-12-28"))))
     (for-each
      (lambda (test-pair)
        (let ((inteldate (car test-pair))
              (isodate (cdr test-pair)))
          (test
           (conc "(isodate->inteldate "isodate ") => "inteldate)
           inteldate
           (isodate->inteldate isodate))
          
          (test
           (conc "(inteldate->isodate "inteldate ")   => "isodate)
           isodate
           (inteldate->isodate inteldate))))
      test-table))))

;(inteldate-tests)

Modified launch.scm from [4e784cfd15] to [e1f9f3deb0].

120
121
122
123
124
125
126












127
128
129
130
131
132
133
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145







+
+
+
+
+
+
+
+
+
+
+
+







    (rmt:teststep-set-status! run-id test-id stepname "start" "-" #f #f)
    ;; now launch the actual process
    (call-with-environment-variables 
     (list (cons "PATH" (conc (get-environment-variable "PATH") ":.")))
     (lambda () ;; (process-run "/bin/bash" "-c" "exec ls -l /tmp/foobar > /tmp/delme-more.log 2>&1")
       (let* ((cmd (conc stepcmd " > " stepname ".log 2>&1")) ;; >outfile 2>&1 
	      (pid (process-run "/bin/bash" (list "-c" cmd))))

         (with-output-to-file "Makefile.ezsteps"
           (lambda ()
             (print stepname ".log :")
             (print "\t" cmd)
             (if (file-exists? (conc stepname ".logpro"))
                 (print "\tlogpro " stepname ".logpro " stepname ".html < " stepname ".log"))
             (print)
             (print stepname " : " stepname ".log")
             (print))
           #:append)

	 (rmt:test-set-top-process-pid run-id test-id pid)
	 (let processloop ((i 0))
	   (let-values (((pid-val exit-status exit-code)(process-wait pid #t)))
		       (mutex-lock! m)
		       (launch:einf-pid-set!         exit-info pid)         ;; (vector-set! exit-info 0 pid)
		       (launch:einf-exit-status-set! exit-info exit-status) ;; (vector-set! exit-info 1 exit-status)
		       (launch:einf-exit-code-set!   exit-info exit-code)   ;; (vector-set! exit-info 2 exit-code)
314
315
316
317
318
319
320
321

322
323

324
325

326
327
328
329
330
331
332
326
327
328
329
330
331
332

333
334

335
336

337
338
339
340
341
342
343
344







-
+

-
+

-
+







			     (current-seconds) 
			     start-seconds)))))
	 (kill-tries 0))
    ;; (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area)
    ;; (tests:set-full-meta-info test-id run-id (calc-minutes) work-area)
    (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10)
    (let loop ((minutes   (calc-minutes))
	       (cpu-load  (get-cpu-load))
	       (cpu-load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
	       (disk-free (get-df (current-directory))))
      (let ((new-cpu-load (let* ((load  (get-cpu-load))
      (let ((new-cpu-load (let* ((load  (alist-ref 'adj-core-load (common:get-normalized-cpu-load #f)))
				 (delta (abs (- load cpu-load))))
			    (if (> delta 0.6) ;; don't bother updating with small changes
			    (if (> delta 0.1) ;; don't bother updating with small changes
				load
				#f)))
	    (new-disk-free (let* ((df    (get-df (current-directory)))
				  (delta (abs (- df disk-free))))
			     (if (> delta 200) ;; ignore changes under 200 Meg
				 df
				 #f))))
844
845
846
847
848
849
850
851




852
853
854
855
856
857
858
859
860
861
862
863
864
865
866

867
868
869
870
871
872
873
856
857
858
859
860
861
862

863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880

881
882
883
884
885
886
887
888







-
+
+
+
+














-
+







	    )))
    (if (and *toppath*
	     (directory-exists? *toppath*))
	(begin
	  (setenv "MT_RUN_AREA_HOME" *toppath*)
	  (setenv "MT_TESTSUITE_NAME" (common:get-testsuite-name)))
	(begin
	  (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.")))
	  (debug:print-error 0 *default-log-port* "failed to find the top path to your Megatest area.")
          ;;(exit 1)
          #f
          ))
    *toppath*))

(define (get-best-disk confdat testconfig)
  (let* ((disks   (or (and testconfig (hash-table-ref/default testconfig "disks" #f))
		      (hash-table-ref/default confdat "disks" #f)))
	 (minspace (let ((m (configf:lookup confdat "setup" "minspace")))
		     (string->number (or m "10000")))))
    (if disks 
	(let ((res (common:get-disk-with-most-free-space disks minspace))) ;; min size of 1000, seems tad dumb
	  (if res
	      (cdr res)
	      (begin
		(if (common:low-noise-print 20 "No valid disks or no disk with enough space")
		    (debug:print-error 0 *default-log-port* "No valid disks found in megatest.config. Please add some to your [disks] section and ensure the directory exists and has enough space!\n    You can change minspace in the [setup] section of megatest.config. Current setting is: " minspace))
		(exit 1)))))))
		(exit 1))))))) ;; TODO - move the exit to the calling location and return #f

;; Desired directory structure:
;;
;;  <linkdir> - <target> - <testname> -.
;;                                     |
;;                                     v
;;  <rundir>  -  <target>  -    <testname> -|- <itempath(s)>
1051
1052
1053
1054
1055
1056
1057


1058
1059
1060
1061
1062
1063
1064







1065
1066
1067
1068
1069
1070






1071
1072
1073
1074
1075





1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119












































1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194











































































1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245




















































1246
1247
1248
1249
1250
1251
1252
1066
1067
1068
1069
1070
1071
1072
1073
1074







1075
1076
1077
1078
1079
1080
1081






1082
1083
1084
1085
1086
1087





1088
1089
1090
1091
1092












































1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136











































































1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211



















































1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270







+
+
-
-
-
-
-
-
-
+
+
+
+
+
+
+
-
-
-
-
-
-
+
+
+
+
+
+
-
-
-
-
-
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







;; 2. create run dir on disk, path name is meaningful
;; 3. create link from run dir to megatest runs area 
;; 4. remotely run the test on allocated host
;;    - could be ssh to host from hosts table (update regularly with load)
;;    - could be netbatch
;;      (launch-test db (cadr status) test-conf))
(define (launch-test test-id run-id run-info keyvals runname test-conf test-name test-path itemdat params)
  (mutex-lock! *launch-setup-mutex*) ;; setting variables and processing the testconfig is NOT thread-safe, reuse the launch-setup mutex
  (let* ((item-path       (item-list->path itemdat)))
  (let loop ((delta        (- (current-seconds) *last-launch*))
	     (launch-delay (string->number (or (configf:lookup *configdat* "setup" "launch-delay") "5"))))
    (if (> launch-delay delta)
	(begin
	  (debug:print-info 0 *default-log-port* "Delaying launch of " test-name " for " (- launch-delay delta) " seconds")
	  (thread-sleep! (- launch-delay delta))
	  (loop (- (current-seconds) *last-launch*) launch-delay))))
    (let loop ((delta        (- (current-seconds) *last-launch*))
	       (launch-delay (string->number (or (configf:lookup *configdat* "setup" "launch-delay") "5"))))
      (if (> launch-delay delta)
	  (begin
	    (debug:print-info 0 *default-log-port* "Delaying launch of " test-name " for " (- launch-delay delta) " seconds")
	    (thread-sleep! (- launch-delay delta))
	    (loop (- (current-seconds) *last-launch*) launch-delay))))
  (set! *last-launch* (current-seconds))
  (change-directory *toppath*)
  (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute"
   (list ;; (list "MT_TEST_RUN_DIR" work-area)
    (list "MT_RUN_AREA_HOME" *toppath*)
    (list "MT_TEST_NAME" test-name)
    (change-directory *toppath*)
    (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute", *maybe* - the longer they are set the longer each launch takes (must be non-overlapping with the vars)
     (append
      (list
       (list "MT_RUN_AREA_HOME" *toppath*)
       (list "MT_TEST_NAME" test-name)
    ;; (list "MT_ITEM_INFO" (conc itemdat)) 
    (list "MT_RUNNAME"   runname)
    ;; (list "MT_TARGET"    mt_target)
    ))
  (let* ((tregistry       (tests:get-all))
       (list "MT_RUNNAME"   runname)
       (list "MT_ITEMPATH"  item-path)
       )
      itemdat))
    (let* ((tregistry       (tests:get-all)) ;; third param (below) is system-allowed
	 (item-path       (let ((ip (item-list->path itemdat)))
			    (alist->env-vars (list (list "MT_ITEMPATH" ip)))
			    ip))
	 (tconfig         (or (tests:get-testconfig test-name tregistry #t force-create: #t)
			      test-conf)) ;; force re-read now that all vars are set
	 (useshell        (let ((ush (config-lookup *configdat* "jobtools"     "useshell")))
			    (if ush 
				(if (equal? ush "no") ;; must use "no" to NOT use shell
				    #f
				    ush)
				#t)))     ;; default is yes
	 (runscript       (config-lookup tconfig   "setup"        "runscript"))
	 (ezsteps         (> (length (hash-table-ref/default tconfig "ezsteps" '())) 0)) ;; don't send all the steps, could be big
	 (diskspace       (config-lookup tconfig   "requirements" "diskspace"))
	 (memory          (config-lookup tconfig   "requirements" "memory"))
	 (hosts           (config-lookup *configdat* "jobtools"     "workhosts"))
	 (remote-megatest (config-lookup *configdat* "setup" "executable"))
	 (run-time-limit  (or (configf:lookup  tconfig   "requirements" "runtimelim")
			      (configf:lookup  *configdat* "setup" "runtimelim")))
	 ;; FIXME SOMEDAY: not good how this is so obtuse, this hack is to 
	 ;;                allow running from dashboard. Extract the path
	 ;;                from the called megatest and convert dashboard
	 ;;             	  or dboard to megatest
	 (local-megatest  (let* ((lm  (car (argv)))
				 (dir (pathname-directory lm))
				 (exe (pathname-strip-directory lm)))
			    (conc (if dir (conc dir "/") "")
				  (case (string->symbol exe)
				    ((dboard)    "../megatest")
				    ((mtest)     "../megatest")
				    ((dashboard) "megatest")
				    (else exe)))))
	 (launcher        (common:get-launcher *configdat* test-name item-path)) ;; (config-lookup *configdat* "jobtools"     "launcher"))
	 (test-sig   (conc (common:get-testsuite-name) ":" test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path
	 (work-area  #f)
	 (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all
	 (diskpath   #f)
	 (cmdparms   #f)
	 (fullcmd    #f) ;; (define a (with-output-to-string (lambda ()(write x))))
	 (mt-bindir-path #f)
	 (testinfo   (rmt:get-test-info-by-id run-id test-id))
	 (mt_target  (string-intersperse (map cadr keyvals) "/"))
	 (debug-param (append (if (args:get-arg "-debug")  (list "-debug" (args:get-arg "-debug")) '())
			      (if (args:get-arg "-logging")(list "-logging") '()))))
           ;; for tconfig, why do we allow fallback to test-conf?
	   (tconfig         (or (tests:get-testconfig test-name tregistry #t force-create: #t)
				(begin
                                  (debug:print 0 *default-log-port* "WARNING: falling back to pre-calculated testconfig. This is likely not desired.")
                                  test-conf))) ;; force re-read now that all vars are set
	   (useshell        (let ((ush (config-lookup *configdat* "jobtools"     "useshell")))
			      (if ush 
				  (if (equal? ush "no") ;; must use "no" to NOT use shell
				      #f
				      ush)
				  #t)))     ;; default is yes
	   (runscript       (config-lookup tconfig   "setup"        "runscript"))
	   (ezsteps         (> (length (hash-table-ref/default tconfig "ezsteps" '())) 0)) ;; don't send all the steps, could be big
	   ;; (diskspace       (config-lookup tconfig   "requirements" "diskspace"))
	   ;; (memory          (config-lookup tconfig   "requirements" "memory"))
	   ;; (hosts           (config-lookup *configdat* "jobtools"     "workhosts")) ;; I'm pretty sure this was never completed
	   (remote-megatest (config-lookup *configdat* "setup" "executable"))
	   (run-time-limit  (or (configf:lookup  tconfig   "requirements" "runtimelim")
				(configf:lookup  *configdat* "setup" "runtimelim")))
	   ;; FIXME SOMEDAY: not good how this is so obtuse, this hack is to 
	   ;;                allow running from dashboard. Extract the path
	   ;;                from the called megatest and convert dashboard
	   ;;             	  or dboard to megatest
	   (local-megatest  (let* ((lm  (car (argv)))
				   (dir (pathname-directory lm))
				   (exe (pathname-strip-directory lm)))
			      (conc (if dir (conc dir "/") "")
				    (case (string->symbol exe)
				      ((dboard)    "../megatest")
				      ((mtest)     "../megatest")
				      ((dashboard) "megatest")
				      (else exe)))))
	   (launcher        (common:get-launcher *configdat* test-name item-path)) ;; (config-lookup *configdat* "jobtools"     "launcher"))
	   (test-sig   (conc (common:get-testsuite-name) ":" test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path
	   (work-area  #f)
	   (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all
	   (diskpath   #f)
	   (cmdparms   #f)
	   (fullcmd    #f) ;; (define a (with-output-to-string (lambda ()(write x))))
	   (mt-bindir-path #f)
	   (testinfo   (rmt:get-test-info-by-id run-id test-id))
	   (mt_target  (string-intersperse (map cadr keyvals) "/"))
	   (debug-param (append (if (args:get-arg "-debug")  (list "-debug" (args:get-arg "-debug")) '())
				(if (args:get-arg "-logging")(list "-logging") '()))))

    (setenv "MT_ITEMPATH" item-path)
    (if hosts (set! hosts (string-split hosts)))
    ;; set the megatest to be called on the remote host
    (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest"))
    (set! mt-bindir-path (pathname-directory remote-megatest))
    (if launcher (set! launcher (string-split launcher)))
    ;; set up the run work area for this test
    (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run
	     (not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir
	(begin
	  (debug:print-info 0 *default-log-port* "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path)
	  (runs:remove-test-directory testinfo 'remove-data-only))) ;; remove data only, do not perturb the record

    ;; prevent overlapping actions - set to LAUNCHED as early as possible
    ;;
    ;; the following call handles waiver propogation. cannot yet condense into roll-up-pass-fail
    (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED"))
    (rmt:roll-up-pass-fail-counts run-id test-name item-path #f "LAUNCHED" #f)
    (set! diskpath (get-best-disk *configdat* tconfig))
    (if diskpath
	(let ((dat  (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat)))
	  (set! work-area (car dat))
	  (set! toptest-work-area (cadr dat))
	  (debug:print-info 2 *default-log-port* "Using work area " work-area))
	(begin
	  (set! work-area (conc test-path "/tmp_run"))
	  (create-directory work-area #t)
	  (debug:print 0 *default-log-port* "WARNING: No disk work area specified - running in the test directory under tmp_run")))
    (set! cmdparms (base64:base64-encode 
		    (z3:encode-buffer 
		     (with-output-to-string
		       (lambda () ;; (list 'hosts     hosts)
			 (write (list (list 'testpath  test-path)
				      (list 'transport (conc *transport-type*))
				      ;; (list 'serverinf *server-info*)
				      (list 'toppath   *toppath*)
				      (list 'work-area work-area)
				      (list 'test-name test-name) 
				      (list 'runscript runscript) 
				      (list 'run-id    run-id   )
				      (list 'test-id   test-id  )
				      ;; (list 'item-path item-path )
				      (list 'itemdat   itemdat  )
				      (list 'megatest  remote-megatest)
				      (list 'ezsteps   ezsteps) 
				      (list 'target    mt_target)
				      (list 'runtlim   (if run-time-limit (common:hms-string->seconds run-time-limit) #f))
				      (list 'env-ovrd  (hash-table-ref/default *configdat* "env-override" '())) 
				      (list 'set-vars  (if params (hash-table-ref/default params "-setvars" #f)))
				      (list 'runname   runname)
				      (list 'mt-bindir-path mt-bindir-path))))))))

    ;; clean out step records from previous run if they exist
    ;; (rmt:delete-test-step-records run-id test-id)
    ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway
    (if (file-exists? work-area)
	(change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir
    (cond
     ((and launcher hosts) ;; must be using ssh hostname
      (set! fullcmd (append launcher (car hosts)(list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param)))
     ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms))))
     (launcher
      (set! fullcmd (append launcher (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param)))
     ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms))))
     (else
      (if (not useshell)(debug:print 0 *default-log-port* "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section"))
      (set! fullcmd (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param (list (if useshell "&" ""))))))
    ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" "")))))
    (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm"))))
    (debug:print 1 *default-log-port* "Launching " work-area)
    ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done
    (debug:print 4 *default-log-port* "fullcmd: " fullcmd)
    (let* ((commonprevvals (alist->env-vars
			    (hash-table-ref/default *configdat* "env-override" '())))
      ;; (if hosts (set! hosts (string-split hosts)))
      ;; set the megatest to be called on the remote host
      (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest"))
      (set! mt-bindir-path (pathname-directory remote-megatest))
      (if launcher (set! launcher (string-split launcher)))
      ;; set up the run work area for this test
      (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run
	       (not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir
	  (begin
	    (debug:print-info 0 *default-log-port* "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path)
	    (runs:remove-test-directory testinfo 'remove-data-only))) ;; remove data only, do not perturb the record
      
      ;; prevent overlapping actions - set to LAUNCHED as early as possible
      ;;
      ;; the following call handles waiver propogation. cannot yet condense into roll-up-pass-fail
      (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED"))
      (rmt:roll-up-pass-fail-counts run-id test-name item-path #f "LAUNCHED" #f)
      ;; (pp (hash-table->alist tconfig))
      (set! diskpath (get-best-disk *configdat* tconfig))
      (if diskpath
	  (let ((dat  (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat)))
	    (set! work-area (car dat))
	    (set! toptest-work-area (cadr dat))
	    (debug:print-info 2 *default-log-port* "Using work area " work-area))
	  (begin
	    (set! work-area (conc test-path "/tmp_run"))
	    (create-directory work-area #t)
	    (debug:print 0 *default-log-port* "WARNING: No disk work area specified - running in the test directory under tmp_run")))
      (set! cmdparms (base64:base64-encode 
		      (z3:encode-buffer 
		       (with-output-to-string
			 (lambda () ;; (list 'hosts     hosts)
			   (write (list (list 'testpath  test-path)
					(list 'transport (conc *transport-type*))
					;; (list 'serverinf *server-info*)
					(list 'toppath   *toppath*)
					(list 'work-area work-area)
					(list 'test-name test-name) 
					(list 'runscript runscript) 
					(list 'run-id    run-id   )
					(list 'test-id   test-id  )
					;; (list 'item-path item-path )
					(list 'itemdat   itemdat  )
					(list 'megatest  remote-megatest)
					(list 'ezsteps   ezsteps) 
					(list 'target    mt_target)
					(list 'runtlim   (if run-time-limit (common:hms-string->seconds run-time-limit) #f))
					(list 'env-ovrd  (hash-table-ref/default *configdat* "env-override" '())) 
					(list 'set-vars  (if params (hash-table-ref/default params "-setvars" #f)))
					(list 'runname   runname)
					(list 'mt-bindir-path mt-bindir-path))))))))
      
      ;; clean out step records from previous run if they exist
      ;; (rmt:delete-test-step-records run-id test-id)
      ;; if the dir does not exist we may have a itempath where individual variables are a path, launch anyway
      (if (file-exists? work-area)
	  (change-directory work-area)) ;; so that log files from the launch process don't clutter the test dir
      (cond
       ;; ((and launcher hosts) ;; must be using ssh hostname
       ;;    (set! fullcmd (append launcher (car hosts)(list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param)))
       ;; (set! fullcmd (append launcher (car hosts)(list remote-megatest test-sig "-execute" cmdparms))))
       (launcher
	(set! fullcmd (append launcher (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param)))
       ;; (set! fullcmd (append launcher (list remote-megatest test-sig "-execute" cmdparms))))
       (else
	(if (not useshell)(debug:print 0 *default-log-port* "WARNING: internal launching will not work well without \"useshell yes\" in your [jobtools] section"))
	(set! fullcmd (append (list remote-megatest "-m" test-sig "-execute" cmdparms) debug-param (list (if useshell "&" ""))))))
      ;; (set! fullcmd (list remote-megatest test-sig "-execute" cmdparms (if useshell "&" "")))))
      (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm"))))
      (debug:print 1 *default-log-port* "Launching " work-area)
      ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done
      (debug:print 4 *default-log-port* "fullcmd: " fullcmd)
      (set! *last-launch* (current-seconds)) ;; all that junk above takes time, set this as late as possible.
      (let* ((commonprevvals (alist->env-vars
			      (hash-table-ref/default *configdat* "env-override" '())))
	   (testprevvals   (alist->env-vars
			    (hash-table-ref/default tconfig "pre-launch-env-overrides" '())))
	   (miscprevvals   (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute"
			    (append (list (list "MT_TEST_RUN_DIR" work-area)
					  (list "MT_TEST_NAME" test-name)
					  (list "MT_ITEM_INFO" (conc itemdat)) 
					  (list "MT_RUNNAME"   runname)
					  (list "MT_TARGET"    mt_target)
					  (list "MT_ITEMPATH"  item-path)
					  )
				    itemdat)))
	   ;; Launchwait defaults to true, must override it to turn off wait
	   (launchwait     (if (equal? (configf:lookup *configdat* "setup" "launchwait") "no") #f #t))
	   (launch-results (apply (if launchwait
				      process:cmd-run-with-stderr->list
				      process-run)
				  (if useshell
				      (let ((cmdstr (string-intersperse fullcmd " ")))
					(if launchwait
					    cmdstr
					    (conc cmdstr " >> mt_launch.log 2>&1")))
				      (car fullcmd))
				  (if useshell
				      '()
				      (cdr fullcmd)))))
      (if (not launchwait) ;; give the OS a little time to allow the process to start
	  (thread-sleep! 0.01))
      (with-output-to-file "mt_launch.log"
	(lambda ()
	  (print "LAUNCHCMD: " (string-intersperse fullcmd " "))
	  (if (list? launch-results)
	      (apply print launch-results)
	      (print "NOTE: launched \"" fullcmd "\"\n  but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n  if you have problems with this"))
	  #:append))
      (debug:print 2 *default-log-port* "Launching completed, updating db")
      (debug:print 2 *default-log-port* "Launch results: " launch-results)
      (if (not launch-results)
          (begin
            (print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now")
            ;; (sqlite3:finalize! db)
            ;; good ole "exit" seems not to work
            ;; (_exit 9)
            ;; but this hack will work! Thanks go to Alan Post of the Chicken email list
            ;; NB// Is this still needed? Should be safe to go back to "exit" now?
            (process-signal (current-process-id) signal/kill)
            ))
      (alist->env-vars miscprevvals)
      (alist->env-vars testprevvals)
      (alist->env-vars commonprevvals)
      launch-results))
  (change-directory *toppath*))
	     (miscprevvals   (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute"
			      (append (list (list "MT_TEST_RUN_DIR" work-area)
					    (list "MT_TEST_NAME" test-name)
					    (list "MT_ITEM_INFO" (conc itemdat)) 
					    (list "MT_RUNNAME"   runname)
					    (list "MT_TARGET"    mt_target)
					    (list "MT_ITEMPATH"  item-path)
					    )
				      itemdat)))
	     (testprevvals   (alist->env-vars
			      (hash-table-ref/default tconfig "pre-launch-env-overrides" '())))
	     ;; Launchwait defaults to true, must override it to turn off wait
	     (launchwait     (if (equal? (configf:lookup *configdat* "setup" "launchwait") "no") #f #t))
	     (launch-results (apply (if launchwait
					process:cmd-run-with-stderr->list
					process-run)
				    (if useshell
					(let ((cmdstr (string-intersperse fullcmd " ")))
					  (if launchwait
					      cmdstr
					      (conc cmdstr " >> mt_launch.log 2>&1")))
					(car fullcmd))
				    (if useshell
					'()
					(cdr fullcmd)))))
        (mutex-unlock! *launch-setup-mutex*) ;; yes, really should mutex all the way to here. Need to put this entire process into a fork.
	(if (not launchwait) ;; give the OS a little time to allow the process to start
	    (thread-sleep! 0.01))
	(with-output-to-file "mt_launch.log"
	  (lambda ()
	    (print "LAUNCHCMD: " (string-intersperse fullcmd " "))
	    (if (list? launch-results)
		(apply print launch-results)
		(print "NOTE: launched \"" fullcmd "\"\n  but did not wait for it to proceed. Add the following to megatest.config \n[setup]\nlaunchwait yes\n  if you have problems with this"))
	    #:append))
	(debug:print 2 *default-log-port* "Launching completed, updating db")
	(debug:print 2 *default-log-port* "Launch results: " launch-results)
	(if (not launch-results)
	    (begin
	      (print "ERROR: Failed to run " (string-intersperse fullcmd " ") ", exiting now")
	      ;; (sqlite3:finalize! db)
	      ;; good ole "exit" seems not to work
	      ;; (_exit 9)
	      ;; but this hack will work! Thanks go to Alan Post of the Chicken email list
	      ;; NB// Is this still needed? Should be safe to go back to "exit" now?
	      (process-signal (current-process-id) signal/kill)
	      ))
	(alist->env-vars miscprevvals)
	(alist->env-vars testprevvals)
	(alist->env-vars commonprevvals)
	launch-results))
    (change-directory *toppath*)))

;; recover a test where the top controlling mtest may have died
;;
(define (launch:recover-test run-id test-id)
  ;; this function is called on the test run host via ssh
  ;;
  ;; 1. look at the process from pid

Modified megatest-version.scm from [a6ad525294] to [0bf6986bb1].

1
2
3
4
5
6

7
1
2
3
4
5

6
7





-
+

;; Always use two or four digit decimal
;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00..

(declare (unit megatest-version))

(define megatest-version 1.6208)
(define megatest-version 1.6302)

Modified megatest.scm from [46d15d3c2a] to [ba3fdf979e].

91
92
93
94
95
96
97


98
99
100
101
102
103
104
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106







+
+







Selectors (e.g. use for -runtests, -remove-runs, -set-state-status, -list-runs etc.)
  -target key1/key2/...   : run for key1, key2, etc.
  -reqtarg key1/key2/...  : run for key1, key2, etc. but key1/key2 must be in runconfig
  -testpatt patt1/patt2,patt3/...  : % is wildcard
  -runname                : required, name for this particular test run
  -state                  : Applies to runs, tests or steps depending on context
  -status                 : Applies to runs, tests or steps depending on context
  -mode key               : load testpatt from <key> in runconfigs instead of default TESTPATT
  -tagexpr tag1,tag2%,..  : select tests with tags matching expression

Test helpers (for use inside tests)
  -step stepname
  -test-status            : set the state and status of a test (use :state and :status)
  -setlog logfname        : set the path/filename to the final log relative to the test
                            directory. may be used with -test-status
  -set-toplog logfname    : set the overall log for a suite of sub-tests
116
117
118
119
120
121
122
123

124
125
126
127
128
129
130
118
119
120
121
122
123
124

125
126
127
128
129
130
131
132







-
+







  -load-test-data         : read test specific data for storage in the test_data table
                            from standard in. Each line is comma delimited with four
                            fields category,variable,value,comment

Queries
  -list-runs patt         : list runs matching pattern \"patt\", % is the wildcard
  -show-keys              : show the keys used in this megatest setup
  -test-files targpatt    : get the most recent test path/file matching targpatt e.g. %/%... 
  -test-files targpatt    : get the most recent test path/file matching targpatt e.g. %/% or '*.log'
                            returns list sorted by age ascending, see examples below
  -test-paths             : get the test paths matching target, runname, item and test
                            patterns.
  -list-disks             : list the disks available for storing runs
  -list-targets           : list the targets in runconfigs.config
  -list-db-targets        : list the target combinations used in the db
  -show-config            : dump the internal representation of the megatest.config file
207
208
209
210
211
212
213
214



215
216
217
218
219
220
221
209
210
211
212
213
214
215

216
217
218
219
220
221
222
223
224
225







-
+
+
+







			":runname"
			"-runname"
			":state"  
			"-state"
			":status"
			"-status"
			"-list-runs"
			"-testpatt" 
			"-testpatt"
                        "-mode"
                        "-tagexpr"
			"-itempatt"
			"-setlog"
			"-set-toplog"
			"-runstep"
			"-logpro"
			"-m"
			"-rerun"
343
344
345
346
347
348
349

350


351
352
353
354
355
356
357
347
348
349
350
351
352
353
354

355
356
357
358
359
360
361
362
363







+
-
+
+







(let ((targ (or (args:get-arg "-reqtarg")(args:get-arg "-target"))))
  (if targ (setenv "MT_TARGET" targ)))

;; The watchdog is to keep an eye on things like db sync etc.
;;
(define *watchdog* (make-thread common:watchdog "Watchdog thread"))

(if (not (args:get-arg "-server"))
(thread-start! *watchdog*)
    (thread-start! *watchdog*)) ;; if starting a server; wait till we get to running state before kicking off watchdog
;;(BB> "thread-start! watchdog")

(if (args:get-arg "-log")
    (let ((oup (open-output-file (args:get-arg "-log"))))
      (debug:print-info 0 *default-log-port* "Sending log output to " (args:get-arg "-log"))
      (set! *default-log-port* oup)))

(if (or (args:get-arg "-h")
788
789
790
791
792
793
794

795
796
797
798
799
800
801
802
803
804
805
806
807













808
809
810
811
812
813
814
794
795
796
797
798
799
800
801













802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821







+
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+







	  (exit))))

;;======================================================================
;; Weird special calls that need to run *after* the server has started?
;;======================================================================

(if (args:get-arg "-list-targets")
    (if (launch:setup)
    (let ((targets (common:get-runconfig-targets)))
      (debug:print 1 *default-log-port* "Found "(length targets) " targets")
      (case (string->symbol (or (args:get-arg "-dumpmode") "alist"))
	((alist)
	 (for-each (lambda (x)
		     ;; (print "[" x "]"))
		     (print x))
		   targets))
	((json)
	 (json-write targets))
	(else
	 (debug:print-error 0 *default-log-port* "dump output format " (args:get-arg "-dumpmode") " not supported for -list-targets")))
      (set! *didsomething* #t)))
        (let ((targets (common:get-runconfig-targets)))
          (debug:print 1 *default-log-port* "Found "(length targets) " targets")
          (case (string->symbol (or (args:get-arg "-dumpmode") "alist"))
            ((alist)
             (for-each (lambda (x)
                         ;; (print "[" x "]"))
                         (print x))
                       targets))
            ((json)
             (json-write targets))
            (else
             (debug:print-error 0 *default-log-port* "dump output format " (args:get-arg "-dumpmode") " not supported for -list-targets")))
          (set! *didsomething* #t))))

;; cache the runconfigs in $MT_LINKTREE/$MT_TARGET/$MT_RUNNAME/.runconfig
;;
(define (full-runconfigs-read)
;; in the envprocessing branch the below code replaces the further below code
;;  (if (eq? *configstatus* 'fulldata)
;;      *runconfigdat*
1016
1017
1018
1019
1020
1021
1022
1023
1024


1025
1026
1027
1028

1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041













1042
1043
1044
1045
1046
1047
1048
1023
1024
1025
1026
1027
1028
1029


1030
1031
1032
1033
1034
1035
1036













1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056







-
-
+
+




+
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+







	       ;; (if (args:get-arg "-testpatt") 
	       ;;  	        (args:get-arg "-testpatt") 
	       ;;  	        "%"))
	       (keys        (rmt:get-keys)) ;; (db:get-keys dbstruct))
	       ;; (runsdat  (db:get-runs dbstruct runpatt #f #f '()))
	;; (runsdat     (rmt:get-runs-by-patt keys (or runpatt "%") (common:args-get-target) ;; (db:get-runs-by-patt dbstruct keys (or runpatt "%") (common:args-get-target)
	;; 		           	 #f #f '("id" "runname" "state" "status" "owner" "event_time" "comment") 0))
	       (runsdat     (db:dispatch-query access-mode rmt:get-runs-by-patt db:get-runs-by-patt keys (or runpatt "%") 
                                            (common:args-get-target) #f #f '("id" "runname" "state" "status" "owner" "event_time" "comment") 0))
	       (runsdat     (rmt:get-runs-by-patt keys (or runpatt "%") 
                                                  (common:args-get-target) #f #f '("id" "runname" "state" "status" "owner" "event_time" "comment") 0))
	       (runstmp     (db:get-rows runsdat))
	       (header      (db:get-header runsdat))
	       ;; this is "-since" support. This looks at last mod times of <run-id>.db files
	       ;; and collects those modified since the -since time.
	       (runs        runstmp)
	       (runs        (if (and (not (null? runstmp))
				     (args:get-arg "-since"))
				(let ((changed-ids (db:get-changed-run-ids (string->number (args:get-arg "-since")))))
				  (let loop ((hed (car runstmp))
					     (tal (cdr runstmp))
					     (res '()))
				    (let ((new-res (if (member (db:get-value-by-header hed header "id") changed-ids)
						       (cons hed res)
						       res)))
				      (if (null? tal)
					  (reverse new-res)
					  (loop (car tal)(cdr tal) new-res)))))
				runstmp))
                        ;; (if (and (not (null? runstmp))
			;;        (args:get-arg "-since"))
			;;   (let ((changed-ids (db:get-changed-run-ids (string->number (args:get-arg "-since")))))
			;;     (let loop ((hed (car runstmp))
			;;   	     (tal (cdr runstmp))
			;;   	     (res '()))
			;;       (let ((new-res (if (member (db:get-value-by-header hed header "id") changed-ids)
			;;   		       (cons hed res)
			;;   		       res)))
			;;         (if (null? tal)
			;;   	  (reverse new-res)
			;;   	  (loop (car tal)(cdr tal) new-res)))))
			;;   runstmp))
	       (db-targets  (args:get-arg "-list-db-targets"))
	       (seen        (make-hash-table))
	       (dmode       (let ((d (args:get-arg "-dumpmode")))
			      (if d (string->symbol d) #f)))
	       (data        (make-hash-table))
	       (fields-spec (if (args:get-arg "-fields")
				(extract-fields-constraints (args:get-arg "-fields"))
1525
1526
1527
1528
1529
1530
1531

1532

1533
1534
1535
1536
1537
1538
1539
1533
1534
1535
1536
1537
1538
1539
1540

1541
1542
1543
1544
1545
1546
1547
1548







+
-
+







		(debug:print 0 *default-log-port* "Failed to setup, giving up on -test-paths or -test-files, exiting")
		(exit 1)))
	  (let* ((keys     (rmt:get-keys))
		 ;; db:test-get-paths must not be run remote
		 (paths    (tests:test-get-paths-matching keys target (args:get-arg "-test-files"))))
	    (set! *didsomething* #t)
	    (for-each (lambda (path)
			(if (file-exists? path)
			(print path))
			(print path)))	
		      paths)))
	;; else do a general-run-call
	(general-run-call 
	 "-test-files"
	 "Get paths to test"
	 (lambda (target runname keys keyvals)
	   (let* ((db       #f)
1823
1824
1825
1826
1827
1828
1829

1830

1831
1832
1833
1834
1835
1836
1837
1832
1833
1834
1835
1836
1837
1838
1839

1840
1841
1842
1843
1844
1845
1846
1847







+
-
+








(if (args:get-arg "-cleanup-db")
    (begin
      (if (not (launch:setup))
	  (begin
	    (debug:print 0 *default-log-port* "Failed to setup, exiting") 
	    (exit 1)))
      (let ((dbstruct (db:setup *toppath*)))
      (common:cleanup-db)
        (common:cleanup-db dbstruct))
      (set! *didsomething* #t)))

(if (args:get-arg "-mark-incompletes")
    (begin
      (if (not (launch:setup))
	  (begin
	    (debug:print 0 *default-log-port* "Failed to setup, exiting")
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854

1855
1856
1857
1858
1859
1860
1861
1855
1856
1857
1858
1859
1860
1861



1862
1863
1864
1865
1866
1867
1868
1869







-
-
-
+








(if (args:get-arg "-update-meta")
    (begin
      (if (not (launch:setup))
	  (begin
	    (debug:print 0 *default-log-port* "Failed to setup, exiting") 
	    (exit 1)))
      ;; now can find our db
      ;; keep this one local
      (open-run-close runs:update-all-test_meta #f)
      (runs:update-all-test_meta #f)
      (set! *didsomething* #t)))

;;======================================================================
;; Start a repl
;;======================================================================

;; fakeout readline
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990

1991







1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
1988
1989
1990
1991
1992
1993
1994


1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006

2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017







-
-


+

+
+
+
+
+
+
+

-











          (debug:print 0 *default-log-port* "Failed to create HTML output in " toppath "/lt/runs-index.html"))
      (set! *didsomething* #t)))

;;======================================================================
;; Exit and clean up
;;======================================================================

(if *runremote* (close-all-connections!)) ;; for http-client

(if (not *didsomething*)
    (debug:print 0 *default-log-port* help))
;;(BB> "thread-join! watchdog")

;; join the watchdog thread if it has been thread-start!ed  (it may not have been started in the case of a server that never enters running state)
;;   (symbols returned by thread-state: created ready running blocked suspended sleeping terminated dead)
(if (thread? *watchdog*)
    (case (thread-state *watchdog*)
      ((ready running blocked sleeping terminated dead)
       (thread-join! *watchdog*))))

(set! *time-to-exit* #t)
(thread-join! *watchdog*)

(if (not (eq? *globalexitstatus* 0))
    (if (or (args:get-arg "-run")(args:get-arg "-runtests")(args:get-arg "-runall"))
        (begin
           (debug:print 0 *default-log-port* "NOTE: Subprocesses with non-zero exit code detected: " *globalexitstatus*)
           (exit 0))
        (case *globalexitstatus*
         ((0)(exit 0))
         ((1)(exit 1))
         ((2)(exit 2))
         (else (exit 3)))))

Added minimal/manyservers.sh version [1fde698cb9].
























































































































1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
#!/bin/bash

echo manyservers.sh pid $$

logdir=$PWD/log-manysrv


function reset {
    rm -f .homehost .server .server.lock links/.db/monitor.db .starting-server
    }

function launch_many_servers {
    # count  = $1
    # logdir = $2
    # prefx  = $3
  perl -e 'foreach my $i (1 ... '$1'){print "'$2'/'$3'-srv-$i.log\n"}' | \
     xargs -P $1 -n 1 megatest -server - -run-id 0 -daemonize -log
}

    
function get_srv_pids {
    ps auwx | grep "mtest -server" | grep $logdir | grep -v grep | awk '{print $2}' 
}


if [[ -e $logdir ]]; then rm -rf $logdir; fi
if [[ ! -e $logdir ]]; then mkdir $logdir; fi

reset

simultaneous_servers=20
server_collision_resolution_delay=15
server_timeout_delay=65

echo "Launching $simultaneous_servers simultaneous servers"
launch_many_servers $simultaneous_servers $logdir "first"
echo "Sleeping $server_collision_resolution_delay seconds to allow new servers to die because one is already running."
sleep $server_collision_resolution_delay

pids=`get_srv_pids`
pids_left=`echo $pids | wc -w`
echo "pids_left=$pids_left"
echo "after $server_collision_resolution_delay seconds: servers remaining=$pids_left; expecting 1"
if [[ $pids_left == 1 ]]; then
    echo "All servers but 1 terminated. Still good."
else
    if [[ $pids_left == 0 ]]; then
        echo "All servers died too soon.  Not good. Aborting."
        echo "TEST FAIL"
        exit 1
    else
        echo "Too many servers left.  Not good.  Aborting."
        echo "TEST FAIL"
        echo $pids | xargs kill
        sleep 5
        pids=`get_srv_pids`
        pids_left=`echo $pids | wc -w`
        if [[ ! ( $pids_left == 0 ) ]]; then
            echo $pids | xargs kill -9
        fi
        exit 1
    fi
fi



echo "launching another volley of $simultaneous_servers.  THey should all perish. right away, leaving the one server running."
launch_many_servers $simultaneous_servers $logdir "second"
sleep $server_collision_resolution_delay

pids=`get_srv_pids`
pids_left=`echo $pids | wc -w`
echo "pids_left=$pids_left"
echo "after $server_collision_resolution_delay seconds: servers remaining=$pids_left; expecting 1"
if [[ $pids_left == 1 ]]; then
    echo "All servers but 1 terminated. So far so good."
else
    if [[ $pids_left == 0 ]]; then
        echo "All servers died too soon.  Not good. Aborting."
        echo "TEST FAIL"
        exit 1
    else
        echo "Too many servers left.  Not good.  Aborting."
        echo "TEST FAIL"
        echo $pids | xargs kill
        sleep 5
        pids=`get_srv_pids`
        pids_left=`echo $pids | wc -w`
        if [[ ! ( $pids_left == 0 ) ]]; then
            echo $pids | xargs kill -9
        fi
        exit 1
    fi
fi



echo "sleeping for awhile ($server_timeout_delay seconds) to let server exit on its own for no-request timeout"
sleep $server_timeout_delay
pids=`get_srv_pids`
pids_left=`echo $pids | wc -w`
echo "after $server_timeout_delay seconds: servers remaining=$pids_left; expecting 0"

if [[ $pids_left == 0 ]]; then
    echo "No servers remain. This is good."
    echo "TEST PASS"
    exit 0
else
    echo "Too many servers left.  Not good.  Aborting."
    echo "TEST FAIL"
    echo $pids | xargs kill
    sleep 5
    pids=`get_srv_pids`
    pids_left=`echo $pids | wc -w`
    if [[ ! ( $pids_left == 0 ) ]]; then
        echo $pids | xargs kill -9
    fi
    exit 1
fi

Modified rmt.scm from [5e992d9837] to [c70f311cf0].

22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
22
23
24
25
26
27
28








29
30
31
32
33
34
35







-
-
-
-
-
-
-
-







;; THESE ARE ALL CALLED ON THE CLIENT SIDE!!!
;;

;; generate entries for ~/.megatestrc with the following
;;
;;  grep define ../rmt.scm | grep rmt: |perl -pi -e 's/\(define\s+\((\S+)\W.*$/\1/'|sort -u

(defstruct remote
  (hh-dat            (common:get-homehost)) ;; homehost record ( addr . hhflag )
  (server-url        (if *toppath* (server:read-dotserver *toppath*))) ;; (server:check-if-running *toppath*) #f))
  (last-server-check 0)  ;; last time we checked to see if the server was alive
  (conndat           #f)
  (transport         *transport-type*)
  (server-timeout    (or (server:get-timeout) 100))) ;; default to 100 seconds

;;======================================================================
;;  S U P P O R T   F U N C T I O N S
;;======================================================================

;; if a server is either running or in the process of starting call client:setup
;; else return #f to let the calling proc know that there is no server available
;;
92
93
94
95
96
97
98











99
100
101
102
103
104
105


106
107
108
109
110
111








112
113
114
115
116
117
118
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110






111
112
113
114
115
116
117
118
119
120
121
122
123
124
125







+
+
+
+
+
+
+
+
+
+
+







+
+
-
-
-
-
-
-
+
+
+
+
+
+
+
+







      (rmt:send-receive cmd rid params attemptnum: attemptnum))
     ;; on homehost and this is a read
     ((and (cdr (remote-hh-dat *runremote*))   ;; on homehost
           (member cmd api:read-only-queries)) ;; this is a read
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  3")
      (rmt:open-qry-close-locally cmd 0 params))

     ;; on homehost and this is a write, we already have a server, but server has died
     ((and (cdr (remote-hh-dat *runremote*))         ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (remote-server-url *runremote*)           ;; have a server
           (not (server:read-dotserver *toppath*)))  ;; server has died.
      (set! *runremote* #f)
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  4.1")
      (rmt:send-receive cmd rid params attemptnum: attemptnum))

     ;; on homehost and this is a write, we already have a server
     ((and (cdr (remote-hh-dat *runremote*))         ;; on homehost
           (not (member cmd api:read-only-queries))  ;; this is a write
           (remote-server-url *runremote*))          ;; have a server
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  4")
      (rmt:open-qry-close-locally cmd 0 params))

     ;; commented by bb; this was blocking server passive start on write on homehost (case 5)
     ;; on homehost and this is a write, we have a server (we know because case 4 checked)
     ((and (cdr (remote-hh-dat *runremote*))         ;; on homehost
	   (not (member cmd api:read-only-queries)))
      (mutex-unlock! *rmt-mutex*)
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  4.1")
      (rmt:open-qry-close-locally cmd 0 params))
     ;; ;; on homehost and this is a write, we have a server (we know because case 4 checked)
     ;; ((and (cdr (remote-hh-dat *runremote*))         ;; on homehost
     ;;       (not (member cmd api:read-only-queries)))
     ;;  (mutex-unlock! *rmt-mutex*)
     ;;  (debug:print-info 12 *default-log-port* "rmt:send-receive, case  4.1")
     ;;  (rmt:open-qry-close-locally cmd 0 params))

     
     ;; no server contact made and this is a write, passively start a server 
     ((and (not (remote-server-url *runremote*))
	   (not (member cmd api:read-only-queries)))
      (debug:print-info 12 *default-log-port* "rmt:send-receive, case  5")
      (let ((serverconn (server:read-dotserver *toppath*))) ;; (server:check-if-running *toppath*))) ;; Do NOT want to run server:check-if-running - very expensive to do for every write call
	(if serverconn
	    (remote-server-url-set! *runremote* serverconn) ;; the string can be consumed by the client setup if needed
255
256
257
258
259
260
261
262

263
264
265
266
267
268
269
262
263
264
265
266
267
268

269
270
271
272
273
274
275
276







-
+







	      #f))
	(begin
	  ;; (rmt:update-db-stats run-id cmd params duration)
	  ;; mark this run as dirty if this was a write, the watchdog is responsible for syncing it
	  (if qry-is-write
	      (let ((start-time (current-seconds)))
		(mutex-lock! *db-multi-sync-mutex*)
		(set! *db-last-write* start-time) ;; the oldest "write"
		(set! *db-last-access* start-time)  ;; THIS IS PROBABLY USELESS? (we are on a client)
                (mutex-unlock! *db-multi-sync-mutex*)))))
    res))

(define (rmt:send-receive-no-auto-client-setup connection-info cmd run-id params)
  (let* ((run-id   (if run-id run-id 0))
	 (res  	   (handle-exceptions
		    exn
318
319
320
321
322
323
324





325
326
327
328
329
330
331
332
333
334







335
336
337
338
339
340
341
342
343
344
345
346
347
348
349





350
351
352
353
354
355
356
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380







+
+
+
+
+










+
+
+
+
+
+
+















+
+
+
+
+








;; hand off a call to one of the db:queries statements
;; added run-id to make looking up the correct db possible 
;;
(define (rmt:general-call stmtname run-id . params)
  (rmt:send-receive 'general-call run-id (append (list stmtname run-id) params)))


;; given a hostname, return a pair of cpu load and update time representing latest intelligence from tests running on that host
(define (rmt:get-latest-host-load hostname)
  (rmt:send-receive 'get-latest-host-load 0 (list hostname)))

;; (define (rmt:sync-inmem->db run-id)
;;   (rmt:send-receive 'sync-inmem->db run-id '()))

(define (rmt:sdb-qry qry val run-id)
  ;; add caching if qry is 'getid or 'getstr
  (rmt:send-receive 'sdb-qry run-id (list qry val)))

;; NOT COMPLETED
(define (rmt:runtests user run-id testpatt params)
  (rmt:send-receive 'runtests run-id testpatt))

;;======================================================================
;;  T E S T   M E T A 
;;======================================================================

(define (rmt:get-tests-tags)
  (rmt:send-receive 'get-tests-tags #f '()))

;;======================================================================
;;  K E Y S 
;;======================================================================

;; These require run-id because the values come from the run!
;;
(define (rmt:get-key-val-pairs run-id)
  (rmt:send-receive 'get-key-val-pairs run-id (list run-id)))

(define (rmt:get-keys)
  (if *db-keys* *db-keys* 
     (let ((res (rmt:send-receive 'get-keys #f '())))
       (set! *db-keys* res)
       res)))

(define (rmt:get-keys-write) ;; dummy query to force server start
  (let ((res (rmt:send-receive 'get-keys-write #f '())))
    (set! *db-keys* res)
    res))

;; we don't reuse run-id's (except possibly *after* a db cleanup) so it is safe
;; to cache the resuls in a hash
;;
(define (rmt:get-key-vals run-id)
  (or (hash-table-ref/default *keyvals* run-id #f)
      (let ((res (rmt:send-receive 'get-key-vals #f (list run-id))))
586
587
588
589
590
591
592
593
594


595
596
597
598
599
600
601
610
611
612
613
614
615
616


617
618
619
620
621
622
623
624
625







-
-
+
+







(define (rmt:update-run-event_time run-id)
  (rmt:send-receive 'update-run-event_time #f (list run-id)))

(define (rmt:get-runs-by-patt  keys runnamepatt targpatt offset limit fields last-runs-update) ;; fields of #f uses default
  (rmt:send-receive 'get-runs-by-patt #f (list keys runnamepatt targpatt offset limit fields last-runs-update)))

(define (rmt:find-and-mark-incomplete run-id ovr-deadtime)
  (if (rmt:send-receive 'have-incompletes? run-id (list run-id ovr-deadtime))
      (rmt:send-receive 'mark-incomplete run-id (list run-id ovr-deadtime))))
  ;; (if (rmt:send-receive 'have-incompletes? run-id (list run-id ovr-deadtime))
  (rmt:send-receive 'mark-incomplete run-id (list run-id ovr-deadtime))) ;; )

(define (rmt:get-main-run-stats run-id)
  (rmt:send-receive 'get-main-run-stats #f (list run-id)))

(define (rmt:get-var varname)
  (rmt:send-receive 'get-var #f (list varname)))

Modified rpc-transport.scm from [7aa56cfddc] to [f2b0cd0198].

174
175
176
177
178
179
180
181

182
183
184
185
186
187
188
189
190
191
192
193
194
195
196

197
198
199
200

201
202
203
204
205
206
207
174
175
176
177
178
179
180

181
182
183
184
185
186
187
188
189
190
191
192
193
194
195

196
197
198
199

200
201
202
203
204
205
206
207







-
+














-
+



-
+







		  (port     (cadr host-info))
		  (ping-res ((rpc:procedure 'server:login host port) *toppath*)))
	      (if ping-res
		  (let ((server-dat (list iface port #f #f #f)))
		    (hash-table-set! *runremote* run-id server-dat)
		    server-dat)
		  (begin
		    (server:try-running run-id)
		    (server:try-running *toppath*)
		    (thread-sleep! 2)
		    (rpc-transport:client-setup run-id (- remtries 1)))))
 	    (let* ((server-db-info (open-run-close tasks:get-server tasks:open-db run-id)))
 	      (debug:print-info 0 *default-log-port* "client:setup server-dat=" server-dat ", remaining-tries=" remaining-tries)
	      (if server-db-info
 		  (let* ((iface     (tasks:hostinfo-get-interface server-db-info))
 			 (port      (tasks:hostinfo-get-port      server-db-info))
			 (server-dat (list iface port #f #f #f))
 			 (ping-res  ((rpc:procedure 'server:login host port) *toppath*)))
 		    (if start-res
 			(begin
 			  (hash-table-set! *runremote* run-id server-dat)
			  server-dat)
			(begin
			  (server:try-running run-id)
			  (server:try-running *toppath*)
			  (thread-sleep! 2)
			  (rpc-transport:client-setup run-id (- remtries 1)))))
		  (begin
		    (server:try-running run-id)
		    (server:try-running *toppath*)
		    (thread-sleep! 2)
		    (rpc-transport:client-setup run-id (- remtries 1)))))))))
;; 
;; 	     (port     (if (and hostinfo (> (length hostdat) 1))(cadr hostdat) #f)))
;; 	(if (and port
;; 		 (string->number port))
;; 	    (let ((portn (string->number port)))

Modified runs.scm from [ebf1e29df4] to [b5157dd5f4].

1956
1957
1958
1959
1960
1961
1962













1963
1964
1965
1966
1967
1968
1969
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982







+
+
+
+
+
+
+
+
+
+
+
+
+







	      (val (config-lookup test-conf "test_meta" fld)))
	 ;; (debug:print 5 *default-log-port* "idx: " idx " fld: " fld " val: " val)
	 (if (and val (not (equal? (vector-ref currrecord idx) val)))
	     (begin
	       (print "Updating " test-name " " fld " to " val)
	       (rmt:testmeta-update-field test-name fld val)))))
     '(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9)("jobgroup" 10)))))

;; find tests with matching tags, tagpatt is a string "tagpatt1,tagpatt2%, ..."
;;
(define (runs:get-tests-matching-tags tagpatt)
  (let* ((tagdata (rmt:get-tests-tags))
         (res     '())) ;; list of tests that match one or more tags
    (for-each
     (lambda (tag)
       (if (patt-list-match tag tagpatt)
           (set! res (append (hash-table-ref tagdata tag)))))
     (hash-table-keys tagdata))
    res))
    

;; Update test_meta for all tests
(define (runs:update-all-test_meta db)
  (let ((test-names (tests:get-all))) ;; (tests:get-valid-tests)))
    (for-each 
     (lambda (test-name)
       (let* ((test-conf    (mt:lazy-read-test-config test-name)))

Modified server.scm from [c0f30a061c] to [cd39bae7c7].

45
46
47
48
49
50
51
52













53
54
55
56
57
58
59
45
46
47
48
49
50
51

52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71







-
+
+
+
+
+
+
+
+
+
+
+
+
+







;;

;; all routes though here end in exit ...
;;
;; start_server
;;
(define (server:launch run-id transport-type)
  (BB> "server:launch fired for run-id="run-id" transport-type="transport-type)
  ;;(BB> "server:launch fired for run-id="run-id" transport-type="transport-type)

  (let ((attempt-in-progress (server:start-attempted? *toppath*)))
    (when attempt-in-progress
      (debug:print-info 0 *default-log-port* "Server start attempt in progress in other process (=> "attempt-in-progress"<=).  Aborting server launch attempt in this process ("(current-process-id)")")
      (exit)))
      
  (let ((dotserver-url (server:check-if-running *toppath*)))
    (when dotserver-url
      (debug:print-info 0 *default-log-port* "Server already running (=> "dotserver-url"<=).  Aborting server launch attempt in this process ("(current-process-id)")")
      (exit)
      ))
  
  (case transport-type
    ((http)(http-transport:launch run-id))
    ;;((nmsg)(nmsg-transport:launch run-id))
    ((rpc)  (rpc-transport:launch run-id))
    (else (debug:print-error 0 *default-log-port* "unknown server type " transport-type))))
;;       (else   (debug:print-error 0 *default-log-port* "No known transport set, transport=" transport ", using rpc")
;; 	      (rpc-transport:launch run-id)))))
101
102
103
104
105
106
107
108

109


110
111
112
113
114
115

116
117
118
119
120
121
122
123
124
125









126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144


















145
146
147
148
149


150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166










167
168

169
170
171
172
173

174
175
176
177









178
179
180
181
182
183
184
113
114
115
116
117
118
119

120
121
122
123
124
125
126
127
128

129
130
131
132
133
134
135
136



137
138
139
140
141
142
143
144
145
146


















147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167


168
169
170
171
172
173
174
175
176
177
178
179
180






181
182
183
184
185
186
187
188
189
190


191
192
193
194
195
196
197




198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213







-
+

+
+





-
+







-
-
-
+
+
+
+
+
+
+
+
+

-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+



-
-
+
+











-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
-
-
+





+
-
-
-
-
+
+
+
+
+
+
+
+
+







     result)))

;; Given a run id start a server process    ### NOTE ### > file 2>&1 
;; if the run-id is zero and the target-host is set 
;; try running on that host
;;   incidental: rotate logs in logs/ dir.
;;
(define  (server:run areapath) ;; areapath is ignored for now.
(define  (server:run areapath) ;; areapath is *toppath* for a given testsuite area
  (let* ((curr-host   (get-host-name))
         (attempt-in-progress (server:start-attempted? areapath))
         (dot-server-url (server:check-if-running areapath))
	 (curr-ip     (server:get-best-guess-address curr-host))
	 (curr-pid    (current-process-id))
	 (homehost    (common:get-homehost)) ;; configf:lookup *configdat* "server" "homehost" ))
	 (target-host (car homehost))
	 (testsuite   (common:get-testsuite-name))
	 (logfile     (conc *toppath* "/logs/server.log"))
	 (logfile     (conc areapath "/logs/server.log"))
	 (cmdln (conc (common:get-megatest-exe)
		      " -server " (or target-host "-") " -run-id " 0 (if (equal? (configf:lookup *configdat* "server" "daemonize") "yes")
									      (conc " -daemonize -log " logfile)
									      "")
		      " -m testsuite:" testsuite)) ;; (conc " >> " logfile " 2>&1 &")))))
	 (log-rotate  (make-thread common:rotate-logs  "server run, rotate logs thread")))
    ;; we want the remote server to start in *toppath* so push there
    (push-directory *toppath*)
    (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...")
    (thread-start! log-rotate)
    (push-directory areapath)
    (cond
     (attempt-in-progress
      (debug:print 0 *default-log-port* "INFO: Not trying to start server because attempt is in progress: "attempt-in-progress))
     (dot-server-url
            (debug:print 0 *default-log-port* "INFO: Not trying to start server because one is already running : "dot-server-url))
     (else
      (debug:print 0 *default-log-port* "INFO: Trying to start server (" cmdln ") ...")
      (thread-start! log-rotate)

    ;; host.domain.tld match host?
    (if (and target-host 
	     ;; look at target host, is it host.domain.tld or ip address and does it 
	     ;; match current ip or hostname
	     (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host))
	     (not (equal? curr-ip target-host)))
	(begin
	  (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile)
	  (setenv "TARGETHOST" target-host)))
    
    (setenv "TARGETHOST_LOGF" logfile)
    (common:wait-for-normalized-load 4 " delaying server start due to load" remote-host: (get-environment-variable "TARGETHOST")) ;; do not try starting servers on an already overloaded machine, just wait forever
    (system (conc "nbfake " cmdln))
    (unsetenv "TARGETHOST_LOGF")
    (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
    (thread-join! log-rotate)
    (pop-directory)))

      ;; host.domain.tld match host?
      (if (and target-host 
               ;; look at target host, is it host.domain.tld or ip address and does it 
               ;; match current ip or hostname
               (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host))
               (not (equal? curr-ip target-host)))
          (begin
            (debug:print-info 0 *default-log-port* "Starting server on " target-host ", logfile is " logfile)
            (setenv "TARGETHOST" target-host)))
      
      (setenv "TARGETHOST_LOGF" logfile)
      (common:wait-for-normalized-load 4 " delaying server start due to load" remote-host: (get-environment-variable "TARGETHOST")) ;; do not try starting servers on an already overloaded machine, just wait forever
      (system (conc "nbfake " cmdln))
      (unsetenv "TARGETHOST_LOGF")
      (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
      (thread-join! log-rotate)
      (pop-directory)))))
    
(define (server:get-client-signature) ;; BB> why is this proc named "get-"?  it returns nothing -- set! has not return value.
  (if *my-client-signature* *my-client-signature*
      (let ((sig (server:mk-signature)))
	(set! *my-client-signature* sig)
	*my-client-signature*)))
        (set! *my-client-signature* sig)
        *my-client-signature*)))

;; kind start up of servers, wait 40 seconds before allowing another server for a given
;; run-id to be launched
(define (server:kind-run areapath)
  (let ((last-run-time (hash-table-ref/default *server-kind-run* areapath #f)))
    (if (or (not last-run-time)
	    (> (- (current-seconds) last-run-time) 30))
	(begin
	  (server:run areapath)
	  (hash-table-set! *server-kind-run* areapath (current-seconds))))))

;; The generic run a server command. Dispatches the call to server 0 if run-id != 0
;; 
;;  (define (server:try-running run-id)
;;    (if (eq? run-id 0)
;;        (server:run run-id)
;;        (rmt:start-server run-id)))
(define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG.

(define (server:attempting-start areapath)
  (with-output-to-file
      (conc areapath "/.starting-server")
    (lambda ()
      (print (current-process-id) " on " (get-host-name)))))
  
(define (server:complete-attempt areapath)
  (delete-file* (conc areapath "/.starting-server")))
(define server:try-running server:run) ;; there is no more per-run servers ;; REMOVE ME. BUG.

  
(define (server:start-attempted? areapath)
  (let ((flagfile (conc areapath "/.starting-server")))
    (handle-exceptions
     exn
     #f  ;; if things go wrong pretend we can't see the file
     (cond
     (and (file-exists? flagfile)
	  (< (- (current-seconds)
		(file-modification-time flagfile))
	     15))))) ;; exists and less than 15 seconds old
      ((and (file-exists? flagfile)
            (< (- (current-seconds)
                  (file-modification-time flagfile))
               15)) ;; exists and less than 15 seconds old
       (with-input-from-file flagfile (lambda () (read-line))))
      ((file-exists? flagfile) ;; it is stale.
       (server:complete-attempt areapath)
       #f)
      (else #f)))))
    
(define (server:read-dotserver areapath)
  (let ((dotfile (conc areapath "/.server")))
    (handle-exceptions
     exn
     #f  ;; if things go wrong pretend we can't see the file
     (if (and (file-exists? dotfile)
229
230
231
232
233
234
235


236

237
238
239
240
241
242
243
258
259
260
261
262
263
264
265
266

267
268
269
270
271
272
273
274







+
+
-
+







    (if dotserver
	(let* ((res (case *transport-type*
		      ((http)(server:ping-server dotserver))
		      ;; ((nmsg)(nmsg-transport:ping (tasks:hostinfo-get-interface server)
		      )))
	  (if res
	      dotserver
	      (begin
                (server:remove-dotserver-file areapath ".*") ;; remove stale dotserver
	      #f))
                #f)))
	#f)))

;; called in megatest.scm, host-port is string hostname:port
;;
;; NOTE: This is NOT called directly from clients as not all transports support a client running
;;       in the same process as the server.
;;

Modified tasks.scm from [b8a3c2af2e] to [a0c6ff1ee2].

321
322
323
324
325
326
327
328

329

330
331
332
333
334
335
336
321
322
323
324
325
326
327

328

329
330
331
332
333
334
335
336







-
+
-
+







   (let* ((header (list "id" "hostname" "pid" "interface" "port" "pubport" "state" "run_id" "priority" "start_time"))
	  (selstr (string-intersperse header ","))
	  (res    '()))
    (sqlite3:for-each-row
     (lambda (a . b)
       (set! res (cons (apply vector a b) res)))
     mdb
     (conc "SELECT " selstr " FROM servers WHERE run_id=? AND state in ('available','running','dbprep') ORDER BY start_time DESC;")
     (conc "SELECT " selstr " FROM servers WHERE state in ('available','running','dbprep') ORDER BY start_time DESC;")
     run-id)
     )
    (vector header res)))

(define (tasks:get-server mdb run-id #!key (retries 10))
  (let ((res  #f)
	(best #f))
    (handle-exceptions
     exn
400
401
402
403
404
405
406
407

408
409
410
411
412
413
414
400
401
402
403
404
405
406

407
408
409
410
411
412
413
414







-
+







	     (delay-time 0))
      (if (and (not server-dat)
	       (< delay-time delay-max-tries))
	  (begin
	    (if (common:low-noise-print 60 "tasks:start-and-wait-for-server" run-id)
		(debug:print 0 *default-log-port* "Try starting server for run-id " run-id))
	    (thread-sleep! (/ (random 2000) 1000))
	    (server:kind-run run-id)
	    (server:kind-run *toppath*)
	    (thread-sleep! (min delay-time 1))
            (if (not (or (server:start-attempted? *toppath*)
                         (server:read-dotserver *toppath*))) ;; no point in trying
                (loop (tasks:get-server (db:delay-if-busy tdbdat) run-id)(+ delay-time 1))
                #f))
          #f)))

Modified tests.scm from [5514a2a23d] to [63786038c0].

918
919
920
921
922
923
924
925
926
927
928




929
930
931
932
933
934
935
936
937
938







939
940
941
942
943
944
945
918
919
920
921
922
923
924




925
926
927
928
929
930
931
932
933
934
935
936
937

938
939
940
941
942
943
944
945
946
947
948
949
950
951







-
-
-
-
+
+
+
+









-
+
+
+
+
+
+
+







    (close-output-port oup)))
	  
	  
;; MUST BE CALLED local!
;;
(define (tests:test-get-paths-matching keynames target fnamepatt #!key (res '()))
  ;; BUG: Move the values derived from args to parameters and push to megatest.scm
  (let* ((testpatt   (if (args:get-arg "-testpatt")(args:get-arg "-testpatt") "%"))
	 (statepatt  (if (args:get-arg ":state")   (args:get-arg ":state")    "%"))
	 (statuspatt (if (args:get-arg ":status")  (args:get-arg ":status")   "%"))
	 (runname    (if (args:get-arg ":runname") (args:get-arg ":runname")  "%"))
  (let* ((testpatt   (or (args:get-arg "-testpatt")(args:get-arg "-testpatt") "%"))
	 (statepatt  (or (args:get-arg "-state")   (args:get-arg ":state")    "%"))
	 (statuspatt (or (args:get-arg "-status")  (args:get-arg ":status")   "%"))
	 (runname    (or (args:get-arg "-runname") (args:get-arg ":runname")  "%"))
	 (paths-from-db (rmt:test-get-paths-matching-keynames-target-new keynames target res
					testpatt
					statepatt
					statuspatt
					runname)))
    (if fnamepatt
	(apply append 
	       (map (lambda (p)
		      (if (directory-exists? p)
			  (glob (conc p "/" fnamepatt))
			  (let ((glob-query (conc p "/" fnamepatt)))
			    (handle-exceptions
				exn
				(with-input-from-pipe
				    (conc "echo " glob-query)
				  read-lines)  ;; we aren't going to try too hard. If glob breaks it is likely because someone tried to do */*/*.log or similar
			      (glob glob-query)))
			  '()))
		    paths-from-db))
	paths-from-db)))

			      
;;======================================================================
;; Gather data from test/task specifications
1330
1331
1332
1333
1334
1335
1336

1337
1338
1339
1340
1341
1342
1343
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350







+







	   (set! res count))
	 tdb
	 "SELECT count(id) FROM test_rundat;")
	res))
  0)

(define (tests:update-central-meta-info run-id test-id cpuload diskfree minutes uname hostname)
  (rmt:general-call 'update-test-rundat run-id test-id (current-seconds) (or cpuload -1)(or diskfree -1) -1 (or minutes -1))
  (if (and cpuload diskfree)
      (rmt:general-call 'update-cpuload-diskfree run-id cpuload diskfree test-id))
  (if minutes 
      (rmt:general-call 'update-run-duration run-id minutes test-id))
  (if (and uname hostname)
      (rmt:general-call 'update-uname-host run-id uname hostname test-id)))
  

Modified tests/fullrun/tests/all_toplevel/testconfig from [3fb72f4d55] to [5a83007156].

1
2
3

4
5
6
7
8
9
10
1
2

3
4
5
6
7
8
9
10


-
+







[ezsteps]
calcresults megatest -list-runs $MT_RUNNAME -target $MT_TARGET
check_triggers  cat $MT_RUN_AREA_HOME/triggers_$MT_RUN_NAME.dat
check_triggers  cat $MT_RUN_AREA_HOME/triggers_$MT_RUNNAME.dat

[logpro]
check_triggers ;;
  (expect:error in "LogFileBody" = 0 "No errors" #/error/i)

[requirements]
waiton  #{getenv ALL_TOPLEVEL_TESTS}

Added utils/remrun version [836fc55fdd].





























1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
#!/bin/bash
###############################################################################
#
# remrun - same behavior as nbfake but first param is a hosthane
#          (capture command output in a logfile)
#
# remrun behavior can be changed by setting the following env var:
#   NBFAKE_LOG        Logfile for nbfake output
#
###############################################################################

if [[ -z "$@" ]]; then
  cat <<__EOF

remrun usage:

remrun hostname <command to run>

remrun behavior can be changed by setting the following env vars:
   NBFAKE_LOG        Logfile for remrun output

__EOF
  exit
fi

export NBFAKE_HOST=$1
shift
exec nbfake $*