Megatest

Changes On Branch v1.71
Login

Changes In Branch v1.71 Excluding Merge-Ins

This is equivalent to a diff from eb2060809c to e1443922b3

2024-06-24
18:42
Moving to .servinfo files WIP. Creating the files is working. Leaf check-in: e1443922b3 user: mmgraham tags: v1.71
2024-05-30
14:29
Changed version to 1.7105 check-in: 88e277ab72 user: mmgraham tags: v1.71, v1.7105
2023-06-27
18:45
cherry picked 8ff6166610. Fixes the quote problem in PATH check-in: 2438268e0b user: mmgraham tags: v1.70, v1.7014
2023-05-15
18:59
Limit to 10 dbs in num-run-dbs. Fully turned off running find-and-mark in runs.scm check-in: 447b257e2f user: matt tags: v1.71
2023-05-08
16:46
Added with-transaction around a for-each-row check-in: eb2060809c user: mmgraham tags: v1.70
2023-05-07
22:56
Corrected file paths of -shm and -wal files in db:all-db-sync check-in: 8bf6fd860b user: mmgraham tags: v1.70

Modified Makefile from [6526b7c191] to [9cff738d12].

309
310
311
312
313
314
315



316
317
318
319
320
321
322
	$(INSTALL) $< $@
	chmod a+x $@

$(PREFIX)/bin/mt-new-to-old.sh : utils/mt-new-to-old.sh
	$(INSTALL) $< $@
	chmod a+x $@





deploytarg/nbfake : utils/nbfake
	$(INSTALL) $< $@
	chmod a+x $@

deploytarg/viewscreen : utils/viewscreen
	$(INSTALL) $< $@







>
>
>







309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
	$(INSTALL) $< $@
	chmod a+x $@

$(PREFIX)/bin/mt-new-to-old.sh : utils/mt-new-to-old.sh
	$(INSTALL) $< $@
	chmod a+x $@

$(PREFIX)/bin/convert-db.sh : utils/convert-db.sh
	$(INSTALL) $< $@
	chmod a+x $@

deploytarg/nbfake : utils/nbfake
	$(INSTALL) $< $@
	chmod a+x $@

deploytarg/viewscreen : utils/viewscreen
	$(INSTALL) $< $@
356
357
358
359
360
361
362

363
364
365
366
367
368
369
        fi

install : $(PREFIX)/bin/.$(ARCHSTR) $(PREFIX)/bin/.$(ARCHSTR)/mtest $(PREFIX)/bin/megatest \
          $(PREFIX)/bin/.$(ARCHSTR)/dboard $(PREFIX)/bin/dashboard $(HELPERS) $(PREFIX)/bin/nbfake \
          $(PREFIX)/bin/.$(ARCHSTR)/mtexec $(PREFIX)/bin/mtexec $(PREFIX)/bin/serialize-env \
	  $(PREFIX)/bin/nbfind $(PREFIX)/bin/mtrunner $(PREFIX)/bin/viewscreen $(PREFIX)/bin/mt_xterm \
          $(PREFIX)/bin/mt-old-to-new.sh $(PREFIX)/bin/mt-new-to-old.sh \

	  $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun \
	  $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun $(PREFIX)/bin/mtutil \
	  $(PREFIX)/bin/tcmt $(PREFIX)/share/db/mt-pg.sql \
          $(PREFIX)/share/js/jquery-3.1.0.slim.min.js \
          $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so \
          $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so.0 \
          $(PREFIX)/bin/.$(ARCHSTR)/lib/libxcb-xlib.so.0







>







359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
        fi

install : $(PREFIX)/bin/.$(ARCHSTR) $(PREFIX)/bin/.$(ARCHSTR)/mtest $(PREFIX)/bin/megatest \
          $(PREFIX)/bin/.$(ARCHSTR)/dboard $(PREFIX)/bin/dashboard $(HELPERS) $(PREFIX)/bin/nbfake \
          $(PREFIX)/bin/.$(ARCHSTR)/mtexec $(PREFIX)/bin/mtexec $(PREFIX)/bin/serialize-env \
	  $(PREFIX)/bin/nbfind $(PREFIX)/bin/mtrunner $(PREFIX)/bin/viewscreen $(PREFIX)/bin/mt_xterm \
          $(PREFIX)/bin/mt-old-to-new.sh $(PREFIX)/bin/mt-new-to-old.sh \
          $(PREFIX)/bin/convert-db.sh $(PREFIX)/bin/convert-db.sh \
	  $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun \
	  $(PREFIX)/share/docs/megatest_manual.html $(PREFIX)/bin/remrun $(PREFIX)/bin/mtutil \
	  $(PREFIX)/bin/tcmt $(PREFIX)/share/db/mt-pg.sql \
          $(PREFIX)/share/js/jquery-3.1.0.slim.min.js \
          $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so \
          $(PREFIX)/bin/.$(ARCHSTR)/lib/libpangox-1.0.so.0 \
          $(PREFIX)/bin/.$(ARCHSTR)/lib/libxcb-xlib.so.0

Modified api.scm from [4f8dbc344f] to [2295f8d728].

227
228
229
230
231
232
233



234
235
236
237
238
239
240
241
242
243
244
245


246
247
248
249
250
251
252
                     ((test-set-state-status)           (apply db:test-set-state-status dbstruct params))
                     ((test-set-top-process-pid)        (apply db:test-set-top-process-pid dbstruct params))
                     ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params))
                     ((set-state-status-and-roll-up-run) (apply db:set-state-status-and-roll-up-run dbstruct params)) 
                     ((top-test-set-per-pf-counts)      (apply db:top-test-set-per-pf-counts dbstruct params))
                     ((test-set-archive-block-id)       (apply db:test-set-archive-block-id dbstruct params))




                     ;; RUNS
                     ((register-run)                 (apply db:register-run dbstruct params))
                     ((set-tests-state-status)       (apply db:set-tests-state-status dbstruct params))
                     ((delete-run)                   (apply db:delete-run dbstruct params))
                     ((lock/unlock-run)              (apply db:lock/unlock-run dbstruct params))
                     ((update-run-event_time)        (apply db:update-run-event_time dbstruct params))
                     ((update-run-stats)             (apply db:update-run-stats dbstruct params))
                     ((set-var)                      (apply db:set-var dbstruct params))
                     ((inc-var)                      (apply db:inc-var dbstruct params))
		     ((dec-var)                      (apply db:dec-var dbstruct params))
                     ((del-var)                      (apply db:del-var dbstruct params))
		     ((add-var)                      (apply db:add-var dbstruct params))



                     ;; STEPS
                     ((teststep-set-status!)         (apply db:teststep-set-status! dbstruct params))
                     ((delete-steps-for-test!)       (apply db:delete-steps-for-test! dbstruct params))
                     
                     ;; TEST DATA
                     ((test-data-rollup)             (apply db:test-data-rollup dbstruct params))







>
>
>












>
>







227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
                     ((test-set-state-status)           (apply db:test-set-state-status dbstruct params))
                     ((test-set-top-process-pid)        (apply db:test-set-top-process-pid dbstruct params))
                     ((set-state-status-and-roll-up-items) (apply db:set-state-status-and-roll-up-items dbstruct params))
                     ((set-state-status-and-roll-up-run) (apply db:set-state-status-and-roll-up-run dbstruct params)) 
                     ((top-test-set-per-pf-counts)      (apply db:top-test-set-per-pf-counts dbstruct params))
                     ((test-set-archive-block-id)       (apply db:test-set-archive-block-id dbstruct params))

                     ((insert-test)                   (let ((run-id (alist-ref "run_id" params equal? #f)))
                                                       (db:insert-test dbstruct run-id params)))

                     ;; RUNS
                     ((register-run)                 (apply db:register-run dbstruct params))
                     ((set-tests-state-status)       (apply db:set-tests-state-status dbstruct params))
                     ((delete-run)                   (apply db:delete-run dbstruct params))
                     ((lock/unlock-run)              (apply db:lock/unlock-run dbstruct params))
                     ((update-run-event_time)        (apply db:update-run-event_time dbstruct params))
                     ((update-run-stats)             (apply db:update-run-stats dbstruct params))
                     ((set-var)                      (apply db:set-var dbstruct params))
                     ((inc-var)                      (apply db:inc-var dbstruct params))
		     ((dec-var)                      (apply db:dec-var dbstruct params))
                     ((del-var)                      (apply db:del-var dbstruct params))
		     ((add-var)                      (apply db:add-var dbstruct params))

                     ((insert-run)                   (apply db:insert-run dbstruct params))

                     ;; STEPS
                     ((teststep-set-status!)         (apply db:teststep-set-status! dbstruct params))
                     ((delete-steps-for-test!)       (apply db:delete-steps-for-test! dbstruct params))
                     
                     ;; TEST DATA
                     ((test-data-rollup)             (apply db:test-data-rollup dbstruct params))

Modified common.scm from [760479d289] to [6bed0cbf4e].

29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53



54
55
56
57
58
59
60

(declare (unit common))
(declare (uses commonmod))
(import commonmod)

(include "common_records.scm")


;; (require-library margs)
;; (include "margs.scm")

;; (define old-exit exit)
;; 
;; (define (exit . code)
;;   (if (null? code)
;;       (old-exit)
;;       (old-exit code)))

(define (stop-the-train)
  (thread-start! (make-thread (lambda ()
				(let loop ()
				  (if (and *toppath*
					   (file-exists? (conc *toppath*"/stop-the-train")))
				      (begin
					(debug:print 0 *default-log-port* "ERROR: found file "*toppath*"/stop-the-train, exiting immediately")



					(exit 1)))
				  (thread-sleep! 5)
				  (loop))))))

;; execute thunk, return value.  If exception thrown, trap exception, return #f, and emit nonfatal condition note to *default-log-port* .
;; arguments - thunk, message
(define (common:fail-safe thunk warning-message-on-exception)







|
|
<
|
<
<
<
<
<
<








>
>
>







29
30
31
32
33
34
35
36
37

38






39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56

(declare (unit common))
(declare (uses commonmod))
(import commonmod)

(include "common_records.scm")

(define (remove-files filespec)
  (let ((files (glob filespec)))

    (for-each delete-file* files)))







(define (stop-the-train)
  (thread-start! (make-thread (lambda ()
				(let loop ()
				  (if (and *toppath*
					   (file-exists? (conc *toppath*"/stop-the-train")))
				      (begin
					(debug:print 0 *default-log-port* "ERROR: found file "*toppath*"/stop-the-train, exiting immediately")
                                        (remove-files (conc *toppath* "/logs/server*"))
                                        (remove-files (conc *toppath* "/.servinfo/*"))
                                        (remove-files (conc *toppath* "/.mtdb/*lock"))
					(exit 1)))
				  (thread-sleep! 5)
				  (loop))))))

;; execute thunk, return value.  If exception thrown, trap exception, return #f, and emit nonfatal condition note to *default-log-port* .
;; arguments - thunk, message
(define (common:fail-safe thunk warning-message-on-exception)
131
132
133
134
135
136
137

138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165

(define *pkts-info*    (make-hash-table)) ;; store stuff like the last parent here
(define *configinfo*   #f)   ;; raw results from setup, includes toppath and table from megatest.config
(define *runconfigdat* #f)   ;; run configs data
(define *configdat*    #f)   ;; megatest.config data
(define *configstatus* #f)   ;; status of data; 'fulldata : all processing done, #f : no data yet, 'partialdata : partial read done
(define *toppath*      #f)

(define *already-seen-runconfig-info* #f)

(define *test-meta-updated* (make-hash-table))
(define *globalexitstatus*  0) ;; attempt to work around possible thread issues
(define *passnum*           0) ;; when running track calls to run-tests or similar
;; (define *alt-log-file* #f)  ;; used by -log
;; (define *common:denoise*    (make-hash-table)) ;; for low noise printing
(define *default-log-port*  (current-error-port))
(define *time-zero* (current-seconds)) ;; for the watchdog
(define *default-area-tag* "local")

;; DATABASE
;; (define *dbstruct-dbs*         #f) ;; used to cache the dbstruct in db:setup. Goal is to remove this.
;; db stats
(define *db-stats*            (make-hash-table)) ;; hash of vectors < count duration-total >
(define *db-stats-mutex*      (make-mutex))
;; db access
(define *db-last-access*      (current-seconds)) ;; last db access, used in server
;; (define *db-write-access*     #t)
;; db sync
;; (define *db-last-sync*        0)                 ;; last time the sync to megatest.db happened
(define *db-sync-in-progress* #f)                ;; if there is a sync in progress do not try to start another
;; (define *db-multi-sync-mutex* (make-mutex))      ;; protect access to *db-sync-in-progress*, *db-last-sync*
;; task db
(define *task-db*             #f) ;; (vector db path-to-db)
(define *db-access-allowed*   #t) ;; flag to allow access
;; (define *db-access-mutex*     (make-mutex)) ;; moved to dbfile
(define *db-transaction-mutex* (make-mutex))







>




















|







127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162

(define *pkts-info*    (make-hash-table)) ;; store stuff like the last parent here
(define *configinfo*   #f)   ;; raw results from setup, includes toppath and table from megatest.config
(define *runconfigdat* #f)   ;; run configs data
(define *configdat*    #f)   ;; megatest.config data
(define *configstatus* #f)   ;; status of data; 'fulldata : all processing done, #f : no data yet, 'partialdata : partial read done
(define *toppath*      #f)
(define *dbdir* ".mtdb")
(define *already-seen-runconfig-info* #f)

(define *test-meta-updated* (make-hash-table))
(define *globalexitstatus*  0) ;; attempt to work around possible thread issues
(define *passnum*           0) ;; when running track calls to run-tests or similar
;; (define *alt-log-file* #f)  ;; used by -log
;; (define *common:denoise*    (make-hash-table)) ;; for low noise printing
(define *default-log-port*  (current-error-port))
(define *time-zero* (current-seconds)) ;; for the watchdog
(define *default-area-tag* "local")

;; DATABASE
;; (define *dbstruct-dbs*         #f) ;; used to cache the dbstruct in db:setup. Goal is to remove this.
;; db stats
(define *db-stats*            (make-hash-table)) ;; hash of vectors < count duration-total >
(define *db-stats-mutex*      (make-mutex))
;; db access
(define *db-last-access*      (current-seconds)) ;; last db access, used in server
;; (define *db-write-access*     #t)
;; db sync
;; (define *db-last-sync*        0)                 ;; last time the sync to nfs db happened
(define *db-sync-in-progress* #f)                ;; if there is a sync in progress do not try to start another
;; (define *db-multi-sync-mutex* (make-mutex))      ;; protect access to *db-sync-in-progress*, *db-last-sync*
;; task db
(define *task-db*             #f) ;; (vector db path-to-db)
(define *db-access-allowed*   #t) ;; flag to allow access
;; (define *db-access-mutex*     (make-mutex)) ;; moved to dbfile
(define *db-transaction-mutex* (make-mutex))
528
529
530
531
532
533
534
535
536
537



538
539
540

541
542
543
544
545
546
547
548
549
550
551
552
553
554
555

556
557
558
559
560
561
562
	  (debug:print-info 2 *default-log-port* "unable to rotate log " file ", probably handled by another process, this is safe to ignore. exn=" exn)
	  (debug:print 2 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	  ;; (print-call-chain (current-error-port)) ;; 
	  )
	(let* ((fullname  (conc "logs/" file))
	       (mod-time  (file-modification-time fullname))
	       (file-age  (- (current-seconds) mod-time))
	       (file-old  (> file-age (* 48 60 60)))
	       (file-big  (> (file-size fullname) 200000)))
	  (hash-table-set! all-files file mod-time)



	  (if (or (and (string-match "^.*.log" file)
		       file-old
		       file-big)

		  (and (string-match "^server-.*.log" file)
		       file-old))
	      (let ((gzfile (conc fullname ".gz")))
		(if (common:file-exists? gzfile)
		    (begin
		      (debug:print-info 0 *default-log-port* "removing " gzfile)
		      (delete-file* gzfile)
		      (hash-table-delete!  all-files gzfile) ;; needed?
		      ))
		(debug:print-info 0 *default-log-port* "compressing " file)
		(system (conc "gzip " fullname))
		(inc-stat "gzipped")
		(hash-table-set! all-files (conc file ".gz") file-age)  ;; add the .gz file and remove the base file
		(hash-table-delete! all-files file)
		)

	      (if (and (> file-age (* (string->number (or (configf:lookup *configdat* "setup" "log-expire-days") "30")) 24 3600))
		       (file-exists? fullname)) ;; just in case it was gzipped - will get it next time
		  (handle-exceptions
		   exn
		   #f
		   (if (directory? fullname)
		       (begin







|


>
>
>
|


>















>







525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
	  (debug:print-info 2 *default-log-port* "unable to rotate log " file ", probably handled by another process, this is safe to ignore. exn=" exn)
	  (debug:print 2 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
	  ;; (print-call-chain (current-error-port)) ;; 
	  )
	(let* ((fullname  (conc "logs/" file))
	       (mod-time  (file-modification-time fullname))
	       (file-age  (- (current-seconds) mod-time))
	       (file-old  (> file-age (* 48 60 60))) ;; over 48 hours
	       (file-big  (> (file-size fullname) 200000)))
	  (hash-table-set! all-files file mod-time)
	  (if (or 
                ;; gzip:
                ;; any old and big log files: (server logs, runlogs, update_ext_specs, etc.
               (and (string-match "^.*.log" file)
		       file-old
		       file-big)
                  ;; old server log files:
		  (and (string-match "^server-.*.log" file)
		       file-old))
	      (let ((gzfile (conc fullname ".gz")))
		(if (common:file-exists? gzfile)
		    (begin
		      (debug:print-info 0 *default-log-port* "removing " gzfile)
		      (delete-file* gzfile)
		      (hash-table-delete!  all-files gzfile) ;; needed?
		      ))
		(debug:print-info 0 *default-log-port* "compressing " file)
		(system (conc "gzip " fullname))
		(inc-stat "gzipped")
		(hash-table-set! all-files (conc file ".gz") file-age)  ;; add the .gz file and remove the base file
		(hash-table-delete! all-files file)
		)
              ;; delete other files over expiration date:
	      (if (and (> file-age (* (string->number (or (configf:lookup *configdat* "setup" "log-expire-days") "30")) 24 3600))
		       (file-exists? fullname)) ;; just in case it was gzipped - will get it next time
		  (handle-exceptions
		   exn
		   #f
		   (if (directory? fullname)
		       (begin
578
579
580
581
582
583
584
585














586
587
588
589
590

591



592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
      (if (> num-logs max-allowed) ;; because NFS => don't let number of logs exceed 300
	  (let ((files (take (sort (hash-table-keys all-files)
				   (lambda (a b)
				     (< (hash-table-ref all-files a)(hash-table-ref all-files b))))
			     (- num-logs max-allowed))))
	    (for-each
	     (lambda (file)
	       (let* ((fullname (conc "logs/" file)))














		 (if (directory? fullname)
		     (debug:print-info 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.")
		     (handle-exceptions
		      exn
		      (debug:print-error 0 *default-log-port* "failed to remove " fullname ", exn=" exn)

		      (delete-file* fullname)))))



	     files)
	    (debug:print-info 0 *default-log-port* "Deleted " (length files) " files from logs, keeping " max-allowed " files."))))))

;;======================================================================
;; Force a megatest cleanup-db if version is changed and skip-version-check not specified
;; Do NOT check if not on homehost!
;;
(define (common:exit-on-version-changed)
  (if (common:on-homehost?)
      (if (common:api-changed?)
	  (let* ((mtconf (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.config"))
                 (dbfile  (conc (get-environment-variable "MT_RUN_AREA_HOME") "/megatest.db"))
                 (read-only (not (file-write-access? dbfile)))
                 (dbstruct (db:setup #t))) ;; (db:setup-db *dbstruct-dbs* *toppath* #f))) ;;  #t)))
	    (debug:print 0 *default-log-port*
			 "WARNING: Version mismatch!\n"
			 "   expected: " (common:version-signature) "\n"
			 "   got:      " (common:get-last-run-version))
            (cond
             ((get-environment-variable "MT_SKIP_DB_MIGRATE") #t)
             ((and (common:file-exists? mtconf) (common:file-exists? dbfile) (not read-only)
                   (eq? (current-user-id)(file-owner mtconf))) ;; safe to run -cleanup-db
              (debug:print 0 *default-log-port* "   I see you are the owner of megatest.config, attempting to cleanup and reset to new version")
              (handle-exceptions
               exn
               (begin
                 (debug:print 0 *default-log-port* "Failed to switch versions. exn=" exn)
                 (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
                 (print-call-chain (current-error-port))
                 (exit 1))
               (common:cleanup-db dbstruct)))
             ((not (common:file-exists? mtconf))
              (debug:print 0 *default-log-port* "   megatest.config does not exist in this area.  Cannot proceed with megatest version migration.")
              (exit 1))
             ((not (common:file-exists? dbfile))
              (debug:print 0 *default-log-port* "   megatest.db does not exist in this area.  Cannot proceed with megatest version migration.")
              (exit 1))
             ((not (eq? (current-user-id)(file-owner mtconf)))
              (debug:print 0 *default-log-port* "   You do not own megatest.db in this area.  Cannot proceed with megatest version migration.")
              (exit 1))
             (read-only
              (debug:print 0 *default-log-port* "   You have read-only access to this area.  Cannot proceed with megatest version migration.")
              (exit 1))
             (else
              (debug:print 0 *default-log-port* " to switch versions you can run: \"megatest -cleanup-db\"")
              (exit 1)))))))
;;======================================================================
;;      (begin
;;	(debug:print 0 *default-log-port* "ERROR: cannot migrate version unless on homehost. Exiting.")
;;	(exit 1))))

;;======================================================================
;; S P A R S E   A R R A Y S







|
>
>
>
>
>
>
>
>
>
>
>
>
>
>





>
|
>
>
>



<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<







580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614











































615
616
617
618
619
620
621
      (if (> num-logs max-allowed) ;; because NFS => don't let number of logs exceed 300
	  (let ((files (take (sort (hash-table-keys all-files)
				   (lambda (a b)
				     (< (hash-table-ref all-files a)(hash-table-ref all-files b))))
			     (- num-logs max-allowed))))
	    (for-each
	     (lambda (file)
	       (let* ((fullname (conc "logs/" file))
                      (is-alive 0))
                 ;; Don't delete it if it's the log file of a running server.
                 (if (string-match "server-\\d+-[a-zA-Z0-9]+\\.log" file)
                    (let* ((parts (string-split file "-."))
                        (pid (cadr parts)) ; Second part is the PID
                        (server-machine (caddr parts)) ; Third part is the server machine
                        (local (string=? (get-host-name) server-machine))
                        (test-cmd (conc "test -d /proc/" pid)))
                        (if local
                           (set! is-alive (not (system test-cmd))) 
                           (set! is-alive (not (system (conc "ssh " server-machine " test -d /proc/" pid))))
                        )
                    )
                 )
		 (if (directory? fullname)
		     (debug:print-info 0 *default-log-port* fullname " in logs directory is a directory! Cannot rotate it, it is best to not put subdirectories in the logs dir.")
		     (handle-exceptions
		      exn
		      (debug:print-error 0 *default-log-port* "failed to remove " fullname ", exn=" exn)
		      (if (not is-alive) 
                        (delete-file* fullname)
                        (debug:print-info 0 *default-log-port* "Not deleting log file " file " since its server is still alive")
                      )
                      ))))
	     files)
	    (debug:print-info 0 *default-log-port* "Deleted " (length files) " files from logs, keeping " max-allowed " files."))))))












































;;======================================================================
;;      (begin
;;	(debug:print 0 *default-log-port* "ERROR: cannot migrate version unless on homehost. Exiting.")
;;	(exit 1))))

;;======================================================================
;; S P A R S E   A R R A Y S
704
705
706
707
708
709
710






711
712
713
714
715
716
717
    (if dat
	dat
	""))))

(define (common:alist-ref/default key alist default)
  (or (alist-ref key alist) default))







(define (common:low-noise-print waitval . keys)
  (let* ((key      (string-intersperse (map conc keys) "-" ))
	 (lasttime (hash-table-ref/default *common:denoise* key 0))
	 (currtime (current-seconds)))
    (if (> (- currtime lasttime) waitval)
	(begin
	  (hash-table-set! *common:denoise* key currtime)







>
>
>
>
>
>







681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
    (if dat
	dat
	""))))

(define (common:alist-ref/default key alist default)
  (or (alist-ref key alist) default))


;; The `common:low-noise-print` function is a utility that can be used to throttle the
;; frequency of certain operations. It does this by tracking the last time an operation was
;; performed and only allowing it again after a specified interval (`waitval`). This can be useful
;; for reducing noise in logs or limiting the rate of user notifications, among other use cases.

(define (common:low-noise-print waitval . keys)
  (let* ((key      (string-intersperse (map conc keys) "-" ))
	 (lasttime (hash-table-ref/default *common:denoise* key 0))
	 (currtime (current-seconds)))
    (if (> (- currtime lasttime) waitval)
	(begin
	  (hash-table-set! *common:denoise* key currtime)
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
					  (string-translate *toppath* "/" "."))
				    (conc "/tmp/" (current-process-id) ;; just in case we have an issue with the dir by own user name
					  "/megatest_localdb/"
					  tsname
					  (string-translate *toppath* "/" "."))
				    ))))
		(set! *db-cache-path* dbpath)
		;; ensure megatest area has .megatest
		(let ((dbarea (conc *toppath* "/.megatest")))
		  (if (not (file-exists? dbarea))
		      (create-directory dbarea)))
		;; ensure tmp area has .megatest
		(let ((dbarea (conc dbpath "/.megatest")))
		  (if (not (file-exists? dbarea))
		      (create-directory dbarea)))
		dbpath))
	  #f)))

(define (common:get-area-path-signature)
  (message-digest-string (md5-primitive) *toppath*))







|
|


|
|







935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
					  (string-translate *toppath* "/" "."))
				    (conc "/tmp/" (current-process-id) ;; just in case we have an issue with the dir by own user name
					  "/megatest_localdb/"
					  tsname
					  (string-translate *toppath* "/" "."))
				    ))))
		(set! *db-cache-path* dbpath)
		;; ensure megatest area has dbdir
		(let ((dbarea (conc *toppath* "/" *dbdir*)))
		  (if (not (file-exists? dbarea))
		      (create-directory dbarea)))
		;; ensure tmp area has dbdir
		(let ((dbarea (conc dbpath "/" *dbdir*)))
		  (if (not (file-exists? dbarea))
		      (create-directory dbarea)))
		dbpath))
	  #f)))

(define (common:get-area-path-signature)
  (message-digest-string (md5-primitive) *toppath*))

Modified commonmod.scm from [2570fcf4eb] to [cefe879e36].

84
85
86
87
88
89
90







91
92
93
94
95
96
97
    (if valstr
	(val->alist valstr)
	'()))) ;; should it return empty list or #f to indicate not set?


(define (get-section cfgdat section)
  (hash-table-ref/default cfgdat section '()))








;; dot-locking egg seems not to work, using this for now
;; if lock is older than expire-time then remove it and try again
;; to get the lock
;;
(define (common:simple-file-lock fname #!key (expire-time 300))
  (let ((fmod-time (handle-exceptions







>
>
>
>
>
>
>







84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
    (if valstr
	(val->alist valstr)
	'()))) ;; should it return empty list or #f to indicate not set?


(define (get-section cfgdat section)
  (hash-table-ref/default cfgdat section '()))

(define (common:make-tmpdir-name areapath tmpadj)
  (let* ((area (pathname-file areapath))
         (dname (conc "/tmp/"(current-user-name)"/megatest_localdb/" area "/" (string-translate areapath "/" ".") tmpadj "/.mtdb")))
    (unless (directory-exists? dname)
      (create-directory dname #t))
    dname))

;; dot-locking egg seems not to work, using this for now
;; if lock is older than expire-time then remove it and try again
;; to get the lock
;;
(define (common:simple-file-lock fname #!key (expire-time 300))
  (let ((fmod-time (handle-exceptions

Modified dashboard.scm from [0995d5cbb4] to [0d09ba74fa].

662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
                        0
                        (dboard:rundat-last-update run-dat)))
	 (last-db-time (if do-not-use-db-file-timestamps
			   0
			   (dboard:rundat-last-db-time run-dat)))
	 (db-path      (or (dboard:rundat-db-path run-dat)
			   (let* ((db-dir (common:get-db-tmp-area))
				  (db-pth (conc db-dir "/.megatest/main.db")))
			     (dboard:rundat-db-path-set! run-dat db-pth)
			     db-pth)))
	 (db-mod-time  (common:lazy-sqlite-db-modification-time db-path))
	 (db-modified  (>= db-mod-time last-db-time))
	 (multi-get    (> (dboard:rundat-run-data-offset run-dat) 0))  ;; multi-get in progress
	 (tmptests     (if (or do-not-use-db-file-timestamps
			       (dboard:tabdat-filters-changed tabdat)







|







662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
                        0
                        (dboard:rundat-last-update run-dat)))
	 (last-db-time (if do-not-use-db-file-timestamps
			   0
			   (dboard:rundat-last-db-time run-dat)))
	 (db-path      (or (dboard:rundat-db-path run-dat)
			   (let* ((db-dir (common:get-db-tmp-area))
				  (db-pth (conc db-dir "/" *dbdir* "/main.db")))
			     (dboard:rundat-db-path-set! run-dat db-pth)
			     db-pth)))
	 (db-mod-time  (common:lazy-sqlite-db-modification-time db-path))
	 (db-modified  (>= db-mod-time last-db-time))
	 (multi-get    (> (dboard:rundat-run-data-offset run-dat) 0))  ;; multi-get in progress
	 (tmptests     (if (or do-not-use-db-file-timestamps
			       (dboard:tabdat-filters-changed tabdat)
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
;;======================================================================

(stop-the-train)

(define (main)
  ;; (print "Starting dashboard main")
    
  (let* ((mtdb-path (conc *toppath* "/.megatest/main.db"))
         (target (args:get-arg "-target"))
         (commondat       (dboard:commondat-make)))
    (if target
        (begin
          (args:remove-arg-from-ht "-target")
          (dboard:commondat-target-set! commondat target)
        )







|







3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
;;======================================================================

(stop-the-train)

(define (main)
  ;; (print "Starting dashboard main")
    
  (let* ((mtdb-path (conc *toppath* "/" *dbdir* "/main.db"))
         (target (args:get-arg "-target"))
         (commondat       (dboard:commondat-make)))
    (if target
        (begin
          (args:remove-arg-from-ht "-target")
          (dboard:commondat-target-set! commondat target)
        )
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
    (if (not (common:on-homehost?))
    (begin
      (debug:print 0 *default-log-port* "WARNING: You are starting the dashboard on a machine that is not the homehost:" (common:get-homehost))
      (debug:print 0 *default-log-port* "It will be slower.")
      ))


    (if (and (common:file-exists? mtdb-path)
	     (file-write-access? mtdb-path))
	(if (not (args:get-arg "-skip-version-check"))
            (common:exit-on-version-changed)))

    (let* ()
      ;; Move this stuff to db.scm? I'm not sure that is the right thing to do...
      (cond 
       ((args:get-arg "-test") ;; run-id,test-id
	(let* ((dat     (let ((d (map string->number (string-split (args:get-arg "-test") ",")))) 
			  (if (> (length d) 1)
			      d







<
<
<
<
<







3814
3815
3816
3817
3818
3819
3820





3821
3822
3823
3824
3825
3826
3827
    (if (not (common:on-homehost?))
    (begin
      (debug:print 0 *default-log-port* "WARNING: You are starting the dashboard on a machine that is not the homehost:" (common:get-homehost))
      (debug:print 0 *default-log-port* "It will be slower.")
      ))







    (let* ()
      ;; Move this stuff to db.scm? I'm not sure that is the right thing to do...
      (cond 
       ((args:get-arg "-test") ;; run-id,test-id
	(let* ((dat     (let ((d (map string->number (string-split (args:get-arg "-test") ",")))) 
			  (if (> (length d) 1)
			      d
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901

(define last-copy-time 0)


;; Sync to tmp only if in read-only mode.

(define (sync-db-to-tmp tabdat)
  (let* ((db-file "./.megatest/main.db"))
    (if (and (not (file-write-access? db-file)) ( > (current-seconds) (+ last-copy-time 5)))
      (begin
        (db:multi-db-sync (db:setup #f) 'old2new)
        (set! last-copy-time (current-seconds))
      )
    )
  )







|







3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896

(define last-copy-time 0)


;; Sync to tmp only if in read-only mode.

(define (sync-db-to-tmp tabdat)
  (let* ((db-file (conc "./" *dbdir* "/main.db")))
    (if (and (not (file-write-access? db-file)) ( > (current-seconds) (+ last-copy-time 5)))
      (begin
        (db:multi-db-sync (db:setup #f) 'old2new)
        (set! last-copy-time (current-seconds))
      )
    )
  )

Modified db.scm from [d4dcfd72a3] to [c536b1d8e4].

409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432



(define (db:all-db-sync dbstruct)
  (let* ((dbdat (db:open-db dbstruct #f db:initialize-main-db))
	 (data-synced       0) ;; count of changed records
    (tmp-area       (common:get-db-tmp-area))
    (dbfiles        (glob (conc tmp-area"/.megatest/*.db")))
    (sync-durations (make-hash-table))
    (no-sync-db        (db:open-no-sync-db)))
    (for-each
     (lambda (file) ;; tmp db file
       (debug:print-info 3 *default-log-port* "file: " file)
       (let* ((fname       (conc (pathname-file file) ".db")) ;; fname is tmp db file
              (wal-file (conc file "-wal"))
              (shm-file (conc file "-shm"))
	      (fulln       (conc *toppath*"/.megatest/"fname)) ;; fulln is nfs db name
              (wal-time     (if (file-exists? wal-file)             
			       (file-modification-time wal-file)
                               0))
              (shm-time     (if (file-exists? shm-file)             
			       (file-modification-time shm-file)
                               0))








|








|







409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432



(define (db:all-db-sync dbstruct)
  (let* ((dbdat (db:open-db dbstruct #f db:initialize-main-db))
	 (data-synced       0) ;; count of changed records
    (tmp-area       (common:get-db-tmp-area))
    (dbfiles        (glob (conc tmp-area"/" *dbdir* "/*.db")))
    (sync-durations (make-hash-table))
    (no-sync-db        (db:open-no-sync-db)))
    (for-each
     (lambda (file) ;; tmp db file
       (debug:print-info 3 *default-log-port* "file: " file)
       (let* ((fname       (conc (pathname-file file) ".db")) ;; fname is tmp db file
              (wal-file (conc file "-wal"))
              (shm-file (conc file "-shm"))
	      (fulln       (conc *toppath*"/" *dbdir* "/"fname)) ;; fulln is nfs db name
              (wal-time     (if (file-exists? wal-file)             
			       (file-modification-time wal-file)
                               0))
              (shm-time     (if (file-exists? shm-file)             
			       (file-modification-time shm-file)
                               0))

487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
    (tmp-area       (common:get-db-tmp-area))
    (old2new (member 'old2new options))
    (dejunk (member 'dejunk options))
    (killservers (member 'killservers options))
    (servers (server:get-list *toppath*))
    (src-area (if old2new *toppath* tmp-area))
    (dest-area (if old2new tmp-area *toppath*))
    (dbfiles        (if old2new (glob (conc *toppath* "/.megatest/*.db")) (glob (conc tmp-area "/.megatest/*.db"))))
    (keys (db:get-keys dbstruct))
    (sync-durations (make-hash-table)))


    (if killservers
      (begin
       	  (for-each







|







487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
    (tmp-area       (common:get-db-tmp-area))
    (old2new (member 'old2new options))
    (dejunk (member 'dejunk options))
    (killservers (member 'killservers options))
    (servers (server:get-list *toppath*))
    (src-area (if old2new *toppath* tmp-area))
    (dest-area (if old2new tmp-area *toppath*))
    (dbfiles        (if old2new (glob (conc *toppath* "/" *dbdir* "/*.db")) (glob (conc tmp-area "/" *dbdir* "/*.db"))))
    (keys (db:get-keys dbstruct))
    (sync-durations (make-hash-table)))


    (if killservers
      (begin
       	  (for-each
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
    )
    (for-each
     (lambda (srcfile)
       (debug:print-info 3 *default-log-port* "file: " srcfile)
       (let* ((fname (conc (pathname-file srcfile) ".db"))
              (basename (pathname-file srcfile))
              (run-id (if (string= basename "main") #f (string->number basename)))
	      (destfile (conc dest-area "/.megatest/" fname))
              (dest-directory  (conc dest-area "/.megatest/"))
              (dummy (debug:print-info 2 *default-log-port* "destfile = " destfile))
              (dummy2 (debug:print-info 2 *default-log-port* "dejunk = " dejunk))
              ;; TODO: time1 and time2 need to take into account -wal and -shm files
	      (time1 (file-modification-time srcfile))
              (time2 (if (file-exists? destfile)
                         (begin
                            (debug:print-info 2 *default-log-port* "destfile " destfile " exists")







|
|







514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
    )
    (for-each
     (lambda (srcfile)
       (debug:print-info 3 *default-log-port* "file: " srcfile)
       (let* ((fname (conc (pathname-file srcfile) ".db"))
              (basename (pathname-file srcfile))
              (run-id (if (string= basename "main") #f (string->number basename)))
	      (destfile (conc dest-area "/" *dbdir* "/" fname))
              (dest-directory  (conc dest-area "/" *dbdir* "/"))
              (dummy (debug:print-info 2 *default-log-port* "destfile = " destfile))
              (dummy2 (debug:print-info 2 *default-log-port* "dejunk = " dejunk))
              ;; TODO: time1 and time2 need to take into account -wal and -shm files
	      (time1 (file-modification-time srcfile))
              (time2 (if (file-exists? destfile)
                         (begin
                            (debug:print-info 2 *default-log-port* "destfile " destfile " exists")
1579
1580
1581
1582
1583
1584
1585


























































1586
1587
1588
1589
1590
1591
1592
		      qry)
		    qryvals)
	     (sqlite3:execute db "UPDATE runs SET state=?,status=?,event_time=strftime('%s','now') WHERE id=? AND state='deleted';" state status res)
	     res))) 
	(begin
	  (debug:print-error 0 *default-log-port* "Called without all necessary keys")
	  #f))))



























































;; replace header and keystr with a call to runs:get-std-run-fields
;;
;; keypatts: ( (KEY1 "abc%def")(KEY2 "%") )
;; runpatts: patt1,patt2 ...
;;
(define (db:get-runs dbstruct runpatt count offset keypatts)







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
		      qry)
		    qryvals)
	     (sqlite3:execute db "UPDATE runs SET state=?,status=?,event_time=strftime('%s','now') WHERE id=? AND state='deleted';" state status res)
	     res))) 
	(begin
	  (debug:print-error 0 *default-log-port* "Called without all necessary keys")
	  #f))))

;; called with run-id=#f so will operate on main.db
;;
(define (db:insert-run dbstruct run-id target runname run-meta)
  (let* ((keys (db:get-keys dbstruct))
     	 (runs (db:simple-get-runs dbstruct runname #f #f target #f))) ;; runpatt count offset target last-update
    ;; need to insert run based on target and runname
    (let* ((targvals (string-split target "/"))
	   (keystr   (string-intersperse keys ","))
	   (key?str  (string-intersperse (make-list (length targvals) "?") ","))
	   (qrystr   (conc "INSERT INTO runs (id,runname,"keystr") VALUES (?,?,"key?str")"))
	   (get-var  (lambda (db qrystr)
		       (let* ((res #f))
			 (sqlite3:for-each-row
			  (lambda row
			    (set res (car row)))
			  db qrystr run-id runname)
			 res))))
      (if (null? runs)
        (begin
	  (db:create-initial-run-record dbstruct run-id runname target)
        )
      )
      run-id)))

(define (db:create-initial-run-record dbstruct run-id runname target)	  
  (let* ((keys     (db:get-keys dbstruct))
     	 (targvals (string-split target "/"))
	 (keystr   (string-intersperse keys ","))
	 (key?str  (string-intersperse (make-list (length targvals) "?") ",")) ;; a string with the same length as targvals, where each element is "?" and interspersed with commas.
	 (qrystr   (conc "INSERT INTO runs (id,runname,"keystr") VALUES (?,?,"key?str")")))

    (db:with-db
     dbstruct #f #t ;; run-id writable
     (lambda (dbdat db)
       (apply sqlite3:execute db qrystr run-id runname targvals)))))

(define (db:insert-test dbstruct run-id test-rec)
  (let* ((testname  (alist-ref "testname" test-rec equal?))
	 (item-path (alist-ref "item_path" test-rec equal?))
	 (id        (db:get-test-id dbstruct run-id testname item-path))
	 (fieldvals (filter (lambda (x)(not (member (car x) '("id" "last_update")))) test-rec))
	 (setqry    (conc "UPDATE tests SET "(string-intersperse
					      (map (lambda (dat)
						     (conc (car dat)"=?"))
						   fieldvals)
					      ",")" WHERE id=?;"))
	 (insqry   (conc "INSERT INTO tests ("(string-intersperse (map (lambda (x) (car x)) fieldvals) ",")
			 ") VALUES ("(string-intersperse (make-list (length fieldvals) "?") ",")");")))
    ;; (debug:print 0 *default-log-port* "id: "id"\nset: "setqry"\ninsqry: "insqry)
    (db:with-db
     dbstruct
     run-id #t
     (lambda (dbdat db)
      ;; (if id
      ;;   (apply sqlite3:execute db setqry (append (map cdr fieldvals) (list id)))
	   (apply sqlite3:execute db insqry (map cdr fieldvals))
           ))))

;; replace header and keystr with a call to runs:get-std-run-fields
;;
;; keypatts: ( (KEY1 "abc%def")(KEY2 "%") )
;; runpatts: patt1,patt2 ...
;;
(define (db:get-runs dbstruct runpatt count offset keypatts)
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666

1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
			     (if (number? count)
				 (conc " LIMIT " count)
				 "")
			     (if (number? offset)
				 (conc " OFFSET " offset)
				 "")))
	   )
    (debug:print-info 11 *default-log-port* "db:get-runs START qrystr: " qrystr " target: " target " offset: " offset " limit: " count)
    (db:with-db dbstruct #f #f
		(lambda (dbdat db)		
		  (sqlite3:for-each-row
		   (lambda (target id runname state status owner event_time)
		     (set! res (cons (make-simple-run target id runname state status owner event_time) res)))

		   db
		   qrystr
		   )))
    (debug:print-info 11 *default-log-port* "db:get-runs END qrystr: " qrystr " target: " target " offset: " offset " limit: " count)
    res))

;; TODO: Switch this to use max(update_time) from each run db? Then if using a server there is no disk traffic (using inmem db)
;;
;; NOTE: This DOESN'T (necessarily) get the real run ids, but the number of the <number>.db!!






 (define (db:get-changed-run-ids since-time)
  (let* ((dbdir      (db:dbfile-path)) ;; (configf:lookup *configdat* "setup" "dbdir"))
        (alldbs     (glob (conc dbdir "/.megatest/[0-9]*.db*")))
        (changed    (filter (lambda (dbfile)
                              (> (file-modification-time dbfile) since-time))
                            alldbs)))
    (delete-duplicates
     (map (lambda (dbfile)
           (let* ((res (string-match ".*\\/(\\d\\d)\\.db*" dbfile)))
             (if res







|




|
>



|













|







1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
			     (if (number? count)
				 (conc " LIMIT " count)
				 "")
			     (if (number? offset)
				 (conc " OFFSET " offset)
				 "")))
	   )
    (debug:print-info 11 *default-log-port* "db:simple-get-runs START qrystr: " qrystr " target: " target " offset: " offset " limit: " count)
    (db:with-db dbstruct #f #f
		(lambda (dbdat db)		
		  (sqlite3:for-each-row
		   (lambda (target id runname state status owner event_time)
		     (set! res (cons (make-simple-run target id runname state status owner event_time) res))
                   )
		   db
		   qrystr
		   )))
    (debug:print-info 11 *default-log-port* "db:simple-get-runs END qrystr: " qrystr " target: " target " offset: " offset " limit: " count)
    res))

;; TODO: Switch this to use max(update_time) from each run db? Then if using a server there is no disk traffic (using inmem db)
;;
;; NOTE: This DOESN'T (necessarily) get the real run ids, but the number of the <number>.db!!






 (define (db:get-changed-run-ids since-time)
  (let* ((dbdir      (db:dbfile-path)) ;; (configf:lookup *configdat* "setup" "dbdir"))
        (alldbs     (glob (conc dbdir "/" *dbdir* "/[0-9]*.db*")))
        (changed    (filter (lambda (dbfile)
                              (> (file-modification-time dbfile) since-time))
                            alldbs)))
    (delete-duplicates
     (map (lambda (dbfile)
           (let* ((res (string-match ".*\\/(\\d\\d)\\.db*" dbfile)))
             (if res
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
         (changed_run_dbs (db:get-changed-run-ids since-time)) ;; gets the rundb numbers
         (all_run_ids 
          (db:with-db dbstruct #f #f 
            (lambda (dbdat db)
              (sqlite3:fold-row backcons '() db "SELECT id FROM runs"))
          )
         )
         (changed_run_ids (filter (lambda (run) (member (modulo run 100) changed_run_dbs)) all_run_ids))
         ;; TODO: couldn't we just use changed_run_ids for run_ids?
         (run_ids 
          (db:with-db dbstruct #f #f 
            (lambda (dbdat db)
              (sqlite3:fold-row backcons '() db "SELECT id FROM runs  WHERE last_update>=?" since-time))
          )
         )







|







4206
4207
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
         (changed_run_dbs (db:get-changed-run-ids since-time)) ;; gets the rundb numbers
         (all_run_ids 
          (db:with-db dbstruct #f #f 
            (lambda (dbdat db)
              (sqlite3:fold-row backcons '() db "SELECT id FROM runs"))
          )
         )
         (changed_run_ids (filter (lambda (run) (member (modulo run (num-run-dbs)) changed_run_dbs)) all_run_ids))
         ;; TODO: couldn't we just use changed_run_ids for run_ids?
         (run_ids 
          (db:with-db dbstruct #f #f 
            (lambda (dbdat db)
              (sqlite3:fold-row backcons '() db "SELECT id FROM runs  WHERE last_update>=?" since-time))
          )
         )
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
	  #f
        ))))

;; sync for filesystem local db writes
;;
(define (db:run-lock-and-sync no-sync-db)
  (let* ((tmp-area       (common:get-db-tmp-area))
	 (dbfiles        (glob (conc tmp-area"/.megatest/*.db")))
	 (sync-durations (make-hash-table)))
    ;; (debug:print-info 0 *default-log-port* "lock-and-sync, dbfiles: "dbfiles)
    (for-each
     (lambda (file)
       (let* ((fname (conc (pathname-file file) ".db"))
	      (fulln (conc *toppath*"/.megatest/"fname))
	      (time1 (if (file-exists? file)
			 (file-modification-time file)
			 (begin
			   (debug:print-info 0 *default-log-port* "Sync - I do not see file "file)
			   1)))
	      (time2 (if (file-exists? fulln)
			 (file-modification-time fulln)







|





|







4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
	  #f
        ))))

;; sync for filesystem local db writes
;;
(define (db:run-lock-and-sync no-sync-db)
  (let* ((tmp-area       (common:get-db-tmp-area))
	 (dbfiles        (glob (conc tmp-area"/" *dbdir* "/*.db")))
	 (sync-durations (make-hash-table)))
    ;; (debug:print-info 0 *default-log-port* "lock-and-sync, dbfiles: "dbfiles)
    (for-each
     (lambda (file)
       (let* ((fname (conc (pathname-file file) ".db"))
	      (fulln (conc *toppath*"/" *dbdir* "/"fname))
	      (time1 (if (file-exists? file)
			 (file-modification-time file)
			 (begin
			   (debug:print-info 0 *default-log-port* "Sync - I do not see file "file)
			   1)))
	      (time2 (if (file-exists? fulln)
			 (file-modification-time fulln)

Modified dbfile.scm from [a0594e55e5] to [14d0a1b210].

40
41
42
43
44
45
46




47
48
49


50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
	ports

	commonmod
	)

;; (import debugprint)





;;======================================================================
;;  R E C O R D S
;;======================================================================



;; a single Megatest area with it's multiple dbs is
;; managed in a dbstruct
;;
(defstruct dbr:dbstruct
  (areapath  #f)
  (homehost  #f)
  (tmppath   #f)
  (read-only #f)
  (subdbs (make-hash-table))
  )

;; NOTE: Need one dbr:subdb per main.db, 1.db ...
;;
(defstruct dbr:subdb
  (dbname      #f) ;; .megatest/1.db
  (mtdbfile    #f) ;; mtrah/.megatest/1.db
  (mtdbdat     #f) ;; only need one of these for syncing
  ;; (dbdats      (make-hash-table))  ;; id => dbdat 
  (tmpdbfile   #f) ;; /tmp/.../.megatest/1.db
  ;; (refndbfile  #f) ;; /tmp/.../.megatest/1.db_ref
  (dbstack     (make-stack)) ;; stack for tmp dbr:dbdat,
  (homehost    #f) ;; not used yet
  (on-homehost #f) ;; not used yet
  (read-only   #f)
  (last-sync   0)
  (last-write  (current-seconds))
  )                ;; goal is to converge on one struct for an area but for now it is too confusing







>
>
>
>



>
>















|
|


|
|







40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
	ports

	commonmod
	)

;; (import debugprint)

;; Parameters

(define num-run-dbs           (make-parameter 10))

;;======================================================================
;;  R E C O R D S
;;======================================================================

;; (define-record simple-run target id runname state status owner event_time)

;; a single Megatest area with it's multiple dbs is
;; managed in a dbstruct
;;
(defstruct dbr:dbstruct
  (areapath  #f)
  (homehost  #f)
  (tmppath   #f)
  (read-only #f)
  (subdbs (make-hash-table))
  )

;; NOTE: Need one dbr:subdb per main.db, 1.db ...
;;
(defstruct dbr:subdb
  (dbname      #f) ;; " *dbdir* "/1.db
  (mtdbfile    #f) ;; mtrah/" *dbdir* "/1.db
  (mtdbdat     #f) ;; only need one of these for syncing
  ;; (dbdats      (make-hash-table))  ;; id => dbdat 
  (tmpdbfile   #f) ;; /tmp/.../" *dbdir* "/1.db
  ;; (refndbfile  #f) ;; /tmp/.../" *dbdir* "/1.db_ref
  (dbstack     (make-stack)) ;; stack for tmp dbr:dbdat,
  (homehost    #f) ;; not used yet
  (on-homehost #f) ;; not used yet
  (read-only   #f)
  (last-sync   0)
  (last-write  (current-seconds))
  )                ;; goal is to converge on one struct for an area but for now it is too confusing
91
92
93
94
95
96
97

98
99
100
101
102
103
104
(define *db-sync-in-progress* #f)
(define *db-with-db-mutex*    (make-mutex))
(define *max-api-process-requests* 0)
(define *api-process-request-count* 0)
(define *db-write-access*     #t)
(define *db-last-sync*        0)                 ;; last time the sync to megatest.db happened
(define *db-multi-sync-mutex* (make-mutex))      ;; protect access to *db-sync-in-progress*, *db-last-sync*


(define (db:generic-error-printout exn . message)
  (print-call-chain (current-error-port))
  (apply dbfile:print-err message)
  (dbfile:print-err
    ", error: "     ((condition-property-accessor 'exn 'message)   exn)
    ", arguments: " ((condition-property-accessor 'exn 'arguments) exn)







>







97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
(define *db-sync-in-progress* #f)
(define *db-with-db-mutex*    (make-mutex))
(define *max-api-process-requests* 0)
(define *api-process-request-count* 0)
(define *db-write-access*     #t)
(define *db-last-sync*        0)                 ;; last time the sync to megatest.db happened
(define *db-multi-sync-mutex* (make-mutex))      ;; protect access to *db-sync-in-progress*, *db-last-sync*
(define *dbdir* ".mtdb")

(define (db:generic-error-printout exn . message)
  (print-call-chain (current-error-port))
  (apply dbfile:print-err message)
  (dbfile:print-err
    ", error: "     ((condition-property-accessor 'exn 'message)   exn)
    ", arguments: " ((condition-property-accessor 'exn 'arguments) exn)
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209

(define (db:dbname->path apath dbname)
  (conc apath"/"dbname))

;; POTENTIAL BUG: this implementation could produce a db file if run-id is neither #f or a number
(define (dbfile:run-id->dbname run-id)
  (cond
   ((number? run-id) (conc ".megatest/" (modulo run-id 100) ".db"))
   ((not run-id)     (conc ".megatest/main.db"))
   (else             run-id)))

;; Make the dbstruct, setup up auxillary db's and call for main db at least once
;;
;; called in http-transport and replicated in rmt.scm for *local* access. 
;;
(define (dbfile:setup do-sync areapath tmppath)







|
|







201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216

(define (db:dbname->path apath dbname)
  (conc apath"/"dbname))

;; POTENTIAL BUG: this implementation could produce a db file if run-id is neither #f or a number
(define (dbfile:run-id->dbname run-id)
  (cond
   ((number? run-id) (conc  *dbdir* "/" (modulo run-id (num-run-dbs)) ".db"))
   ((not run-id)     (conc  *dbdir* "/main.db"))
   (else             run-id)))

;; Make the dbstruct, setup up auxillary db's and call for main db at least once
;;
;; called in http-transport and replicated in rmt.scm for *local* access. 
;;
(define (dbfile:setup do-sync areapath tmppath)
366
367
368
369
370
371
372
373

374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
                                 (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 30000))
				 (if sync-mode
				     (sqlite3:execute db (conc "PRAGMA synchronous = "sync-mode";")))
				 (if journal-mode
				     (sqlite3:execute db (conc "PRAGMA journal_mode = "journal-mode";")))
				 (if (and init-proc (not db-exists))
				     (init-proc db))
				 db)))

                            (begin
			      (if (file-exists? fname )
                                  (let ((db (sqlite3:open-database fname)))
				    ;; pragmas synchronous not needed because this db is used read-only
				    ;; (sqlite3:execute db (conc "PRAGMA synchronous = "mode";")
				    (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 30000)) ;; read-only but still need timeout
				    db )
                                  (print "file doesn't exist: " fname))))
			(exn (io-error)
			     (dbfile:print-err exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")
			     (retry))
			(exn (corrupt)
			     (dbfile:print-err exn "ERROR: database " fname " is corrupt. Repair it to proceed.")
			     (retry))
			(exn (busy)







|
>







|







373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
                                 (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 30000))
				 (if sync-mode
				     (sqlite3:execute db (conc "PRAGMA synchronous = "sync-mode";")))
				 (if journal-mode
				     (sqlite3:execute db (conc "PRAGMA journal_mode = "journal-mode";")))
				 (if (and init-proc (not db-exists))
				     (init-proc db))
				 db))
			     expire-time: 30)
                            (begin
			      (if (file-exists? fname )
                                  (let ((db (sqlite3:open-database fname)))
				    ;; pragmas synchronous not needed because this db is used read-only
				    ;; (sqlite3:execute db (conc "PRAGMA synchronous = "mode";")
				    (sqlite3:set-busy-handler! db (sqlite3:make-busy-timeout 30000)) ;; read-only but still need timeout
				    db )
                                  (print "cautious-open-database: file doesn't exist: " fname))))
			(exn (io-error)
			     (dbfile:print-err exn "ERROR: i/o error with " fname ". Check permissions, disk space etc. and try again.")
			     (retry))
			(exn (corrupt)
			     (dbfile:print-err exn "ERROR: database " fname " is corrupt. Repair it to proceed.")
			     (retry))
			(exn (busy)
803
804
805
806
807
808
809
810


811
812
813
814
815
816
817
	        (sqlite3:for-each-row
	          (lambda (a . b)
	            (set! fromdat (cons (apply vector a b) fromdat))
	            (if (> (length fromdat) batch-len)
		      (begin
		        (set! fromdats (cons fromdat fromdats))
		        (set! fromdat  '())
		        (set! totrecords (+ totrecords 1)))


                    )
                 )
	         (dbr:dbdat-dbh fromdb)
	         full-sel)
              )
            )








|
>
>







811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
	        (sqlite3:for-each-row
	          (lambda (a . b)
	            (set! fromdat (cons (apply vector a b) fromdat))
	            (if (> (length fromdat) batch-len)
		      (begin
		        (set! fromdats (cons fromdat fromdats))
		        (set! fromdat  '())
		        (set! totrecords (+ totrecords 1))
                        (thread-sleep! 2)
                      )
                    )
                 )
	         (dbr:dbdat-dbh fromdb)
	         full-sel)
              )
            )

1150
1151
1152
1153
1154
1155
1156


1157


1158
1159
1160
1161
1162
1163
1164
	  (thread-sleep! 0.25)
	  (if (file-exists? fname)
	      (handle-exceptions exn
                #f 
                (with-input-from-file fname
	  	  (lambda ()
		    (equal? key-string (read-line)))))


	      #f)


       )
    )
  )
)

(define (dbfile:simple-file-lock-and-wait fname #!key (expire-time 300))
  (let ((end-time (+ expire-time (current-seconds))))







>
>
|
>
>







1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
	  (thread-sleep! 0.25)
	  (if (file-exists? fname)
	      (handle-exceptions exn
                #f 
                (with-input-from-file fname
	  	  (lambda ()
		    (equal? key-string (read-line)))))
              (begin
                (dbfile:print-err "dbfile:simple-file-lock created " fname " but it was gone 0.25 seconds later")
	      #f
              )
          )
       )
    )
  )
)

(define (dbfile:simple-file-lock-and-wait fname #!key (expire-time 300))
  (let ((end-time (+ expire-time (current-seconds))))

Modified http-transport.scm from [d2af089e7c] to [3385ca6732].

75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
					   (server:get-best-guess-address hostname)
					   #f)))
			    (if ipstr ipstr hostn))) ;; hostname))) 
	 (start-port      (portlogger:open-run-close portlogger:find-port))
	 (link-tree-path  (common:get-linktree))
	 (tmp-area        (common:get-db-tmp-area))
	 (start-file      (conc tmp-area "/.server-start")))
    (debug:print-info 0 *default-log-port* "portlogger recommended port: " start-port)
    ;; set some parameters for the server
    (root-path     (if link-tree-path 
		       link-tree-path
		       (current-directory))) ;; WARNING: SECURITY HOLE. FIX ASAP!
    (handle-directory spiffy-directory-listing)
    (handle-exception (lambda (exn chain)
			(signal (make-composite-condition







<







75
76
77
78
79
80
81

82
83
84
85
86
87
88
					   (server:get-best-guess-address hostname)
					   #f)))
			    (if ipstr ipstr hostn))) ;; hostname))) 
	 (start-port      (portlogger:open-run-close portlogger:find-port))
	 (link-tree-path  (common:get-linktree))
	 (tmp-area        (common:get-db-tmp-area))
	 (start-file      (conc tmp-area "/.server-start")))

    ;; set some parameters for the server
    (root-path     (if link-tree-path 
		       link-tree-path
		       (current-directory))) ;; WARNING: SECURITY HOLE. FIX ASAP!
    (handle-directory spiffy-directory-listing)
    (handle-exception (lambda (exn chain)
			(signal (make-composite-condition
462
463
464
465
466
467
468



469
470
471
472
473
474
475
	 (server-log-file (args:get-arg "-log"))) ;; always set when we are a server

    (handle-exceptions
	exn
      (debug:print 0 *default-log-port* "Failed to create " started-file ", exn=" exn)
      (with-output-to-file started-file (lambda ()(print (current-process-id)))))




    (let loop ((count         0)
	       (server-state 'available)
	       (bad-sync-count 0)
	       (start-time     (current-milliseconds)))

      ;; Use this opportunity to sync the tmp db to megatest.db
      (if (not server-going) ;; *dbstruct-dbs* 







>
>
>







461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
	 (server-log-file (args:get-arg "-log"))) ;; always set when we are a server

    (handle-exceptions
	exn
      (debug:print 0 *default-log-port* "Failed to create " started-file ", exn=" exn)
      (with-output-to-file started-file (lambda ()(print (current-process-id)))))

    (debug:print 0 *default-log-port* "Creating servinfo file for " (get-host-name) ":" (cadr *server-info*)) 
    (http:create-server-registration-file *toppath* (get-host-name) (cadr *server-info*))

    (let loop ((count         0)
	       (server-state 'available)
	       (bad-sync-count 0)
	       (start-time     (current-milliseconds)))

      ;; Use this opportunity to sync the tmp db to megatest.db
      (if (not server-going) ;; *dbstruct-dbs* 
533
534
535
536
537
538
539
540
541
542

543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
	  (begin
             (if (not *server-id*)
		 (set! *server-id* (server:mk-signature)))
             (debug:print 0 *default-log-port* (current-seconds) (current-directory) (current-process-id) (argv))   
	     (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*)
	     (flush-output *default-log-port*)))
      (if (common:low-noise-print 60 "dbstats")
	  (begin
	    (debug:print 0 *default-log-port* "Server stats:")
	    (db:print-current-query-stats)))

      (let* ((hrs-since-start  (/ (- (current-seconds) server-start-time) 3600)))
	(cond
	 #;((and *server-run*
	       (> (- (current-seconds) server-start-time) 420)) ;; let's try server replacement
	  ;; ((adj-proc-load . 0.056875) (adj-core-load . 0.11375) (1m-load . 0.91) (5m-load . 0.77) (15m-load . 1.0) (proc . 16) (core . 8) (phys . 1))
	  (let* ((loaddat       (common:get-normalized-cpu-load #f))
		 (adj-proc-load (alist-ref 'adj-proc-load loaddat))
		 (adj-core-load (alist-ref 'adj-core-load loaddat))
		 (adj-load      (max adj-proc-load adj-core-load)))
	    (if (< adj-load 2) ;; reduce chance of runaway
		(server:run *toppath*))
	    (db:all-db-sync *dbstruct-dbs*)
	    (thread-sleep! 30)
	    (http-transport:server-shutdown port)))
         ((and *server-run*
	       (> (+ last-access server-timeout)
		  (current-seconds)))
          (if (common:low-noise-print 120 "server continuing")
              (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
	      (let ((curr-time (current-seconds)))
		(handle-exceptions







|


>
|
|
<
<
<
<
<
<
<
<
<
<
<
<







535
536
537
538
539
540
541
542
543
544
545
546
547












548
549
550
551
552
553
554
	  (begin
             (if (not *server-id*)
		 (set! *server-id* (server:mk-signature)))
             (debug:print 0 *default-log-port* (current-seconds) (current-directory) (current-process-id) (argv))   
	     (debug:print 0 *default-log-port* "SERVER STARTED: " iface ":" port " AT " (current-seconds) " server-id: " *server-id*)
	     (flush-output *default-log-port*)))
      (if (common:low-noise-print 60 "dbstats")
	(begin
	    (debug:print 0 *default-log-port* "Server stats:")
	    (db:print-current-query-stats)))

        (let* ((hrs-since-start  (/ (- (current-seconds) server-start-time) 3600)))
	 (cond












         ((and *server-run*
	       (> (+ last-access server-timeout)
		  (current-seconds)))
          (if (common:low-noise-print 120 "server continuing")
              (debug:print-info 0 *default-log-port* "Server continuing, seconds since last db access: " (- (current-seconds) last-access))
	      (let ((curr-time (current-seconds)))
		(handle-exceptions
653
654
655
656
657
658
659

660
661
662
663
664
665


















666
667
668
669
670
671
672
                                    (args:get-arg "-server")
                                    "-")
                                )) "Server run"))
           (th3 (make-thread (lambda ()
                               (debug:print-info 0 *default-log-port* "Server monitor thread started")
                               (http-transport:keep-running)
                               "Keep running"))))

      (thread-start! th2)
      (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor.
      (thread-start! th3)
      (set! *didsomething* #t)
      (thread-join! th2)
      (exit))))



















;; (define (http-transport:server-signal-handler signum)
;;   (signal-mask! signum)
;;   (handle-exceptions
;;    exn
;;    (debug:print 0 *default-log-port* " ... exiting ...")
;;    (let ((th1 (make-thread (lambda ()







>

|




>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
                                    (args:get-arg "-server")
                                    "-")
                                )) "Server run"))
           (th3 (make-thread (lambda ()
                               (debug:print-info 0 *default-log-port* "Server monitor thread started")
                               (http-transport:keep-running)
                               "Keep running"))))

      (thread-start! th2)
      (thread-sleep! 0.25) ;; give the server time to settle before starting the keep-running monitor. 
      (thread-start! th3)
      (set! *didsomething* #t)
      (thread-join! th2)
      (exit))))

;; return servid
;; side-effects:
;;   ttdat-cleanup-proc is populated with function to remove the serverinfo file
(define (http:create-server-registration-file areapath host port)
  (let* (
         (servdir  (server:get-servinfo-dir areapath))
         (servinf (conc servdir"/"host":"port"-"(current-process-id)))
         (serv-id (server:mk-signature))
         (clean-proc (lambda ()
                       (delete-file* servinf)
                       )))
    (assert (and host port) "FATAL: tt:create-server-registration-file called with no conn")
    (with-output-to-file servinf
      (lambda ()
        (print "SERVER STARTED: "host":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id))))
      serv-id))


;; (define (http-transport:server-signal-handler signum)
;;   (signal-mask! signum)
;;   (handle-exceptions
;;    exn
;;    (debug:print 0 *default-log-port* " ... exiting ...")
;;    (let ((th1 (make-thread (lambda ()

Modified launch.scm from [60d380c61b] to [c0f3b7f442].

324
325
326
327
328
329
330













































331
332
333
334
335
336
337
	      (if (hash-table-ref/default misc-flags 'keep-going #f)  ;; keep originals for cpu-load and disk-free unless they change more than the allowed delta
		  (loop (calc-minutes)
                        (or new-cpu-load cpu-load)
                        (or new-disk-free disk-free)
                        (if do-sync (current-seconds) last-sync)))))))
    (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f))) ;; NOTE: Checking twice for keep-going is intentional















































(define (launch:execute encoded-cmd)
  (let* ((cmdinfo    (common:read-encoded-string encoded-cmd))
	 (tconfigreg #f))
    (setenv "MT_CMDINFO" encoded-cmd)
    ;;(bb-check-path msg: "launch:execute incoming")
    (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed)







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
	      (if (hash-table-ref/default misc-flags 'keep-going #f)  ;; keep originals for cpu-load and disk-free unless they change more than the allowed delta
		  (loop (calc-minutes)
                        (or new-cpu-load cpu-load)
                        (or new-disk-free disk-free)
                        (if do-sync (current-seconds) last-sync)))))))
    (tests:update-central-meta-info run-id test-id (get-cpu-load) (get-df (current-directory))(calc-minutes) #f #f))) ;; NOTE: Checking twice for keep-going is intentional


;; read testconfig and create .logpro and script files
;;    - use #f for tconfigreg to re-read the testconfigs from disk
;;
(define (launch:extract-scripts-logpro  test-dir test-name item-path tconfigreg-in)
  (let* ((tconfigreg      (or tconfigreg-in
			      (tests:get-all)))
	 (tconfig-fname   (conc test-dir "/.testconfig"))
	 (tconfig-tmpfile (conc tconfig-fname ".tmp"))
	 (tconfig         (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs)))
	 (scripts         (configf:get-section tconfig "scripts"))
	 (logpros         (configf:get-section tconfig "logpro")))
    ;; create .testconfig file
    (configf:write-alist tconfig tconfig-tmpfile)
    (file-move tconfig-tmpfile tconfig-fname #t)
    (delete-file* ".final-status")
    
    ;; extract scripts from testconfig and write them to files in test run dir
    (for-each
     (lambda (scriptdat)
       (match scriptdat
	      ((name content)
	       (debug:print-info 2 *default-log-port* "Creating script "(current-directory)"/"name)
	       (with-output-to-file name
		 (lambda ()
		   (print content)))
	       (change-file-mode name (bitwise-ior perm/irwxg perm/irwxu)))
	      (else
	       (debug:print-info 0 "Invalid script definiton found in [scripts] section of testconfig. \"" scriptdat "\""))))
     scripts)

    ;; extract logpro from testconfig and write them to files in test run dir
    (for-each
     (lambda (logprodat)
       (match logprodat
	      ((name content)
	       (debug:print-info 2 *default-log-port* "Creating logpro file "(current-directory)"/"name)
	       (with-output-to-file name
		 (lambda ()
		   (print content)
		   ;; (change-file-mode name (bitwise-ior perm/irwxg perm/irwxu))
		   )))
	      (else
	       (debug:print-info 0 "Invalid logpro definiton found in [logpro] section of testconfig. \"" logprodat "\""))))
     logpros)))

(define (launch:execute encoded-cmd)
  (let* ((cmdinfo    (common:read-encoded-string encoded-cmd))
	 (tconfigreg #f))
    (setenv "MT_CMDINFO" encoded-cmd)
    ;;(bb-check-path msg: "launch:execute incoming")
    (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed)
591
592
593
594
595
596
597



598
599
600
601
602
603
604
605
606
607
608
609
610
611
	      (list  "MT_RUNNAME"   runname)
	      (list  "MT_MEGATEST"  megatest)
	      (list  "MT_TARGET"    target)
	      (list  "MT_LINKTREE"  (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))
	      (list  "MT_TESTSUITENAME" (common:get-testsuite-name))))
          ;;(bb-check-path msg: "launch:execute post block 3")




	  (if mt-bindir-path (setenv "PATH" (conc "\""(getenv "PATH")":"mt-bindir-path"\"")))
          ;;(bb-check-path msg: "launch:execute post block 4")
	  ;; (change-directory top-path)
	  ;; Can setup as client for server mode now
	  ;; (client:setup)

	  
	  ;; environment overrides are done *before* the remaining critical envars.
	  (alist->env-vars env-ovrd)
          ;;(bb-check-path msg: "launch:execute post block 41")
	  (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals)
          ;;(bb-check-path msg: "launch:execute post block 42")
	  (set-item-env-vars itemdat)
          ;;(bb-check-path msg: "launch:execute post block 43")







>
>
>
|




|
<







636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651

652
653
654
655
656
657
658
	      (list  "MT_RUNNAME"   runname)
	      (list  "MT_MEGATEST"  megatest)
	      (list  "MT_TARGET"    target)
	      (list  "MT_LINKTREE"  (common:get-linktree)) ;; (configf:lookup *configdat* "setup" "linktree"))
	      (list  "MT_TESTSUITENAME" (common:get-testsuite-name))))
          ;;(bb-check-path msg: "launch:execute post block 3")

	  (let ((tmppath (getenv "PATH")))
	    (if (string-search tmppath " ")
		(debug:print 0 *default-log-port* "WARNING: spaces in PATH are not supported."))
	    (if mt-bindir-path (setenv "PATH" (conc tmppath":"mt-bindir-path))))
          ;;(bb-check-path msg: "launch:execute post block 4")
	  ;; (change-directory top-path)
	  ;; Can setup as client for server mode now
	  ;; (client:setup)
  

	  ;; environment overrides are done *before* the remaining critical envars.
	  (alist->env-vars env-ovrd)
          ;;(bb-check-path msg: "launch:execute post block 41")
	  (runs:set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals)
          ;;(bb-check-path msg: "launch:execute post block 42")
	  (set-item-env-vars itemdat)
          ;;(bb-check-path msg: "launch:execute post block 43")
627
628
629
630
631
632
633

634
635
636
637
638
639
640
641
642
643

644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681








682
683
684
685
686
687
688

	  (if (args:get-arg "-xterm")
	      (set! fullrunscript "xterm")
	      (if (and fullrunscript 
		       (common:file-exists? fullrunscript)
		       (not (file-execute-access? fullrunscript)))
		  (system (conc "chmod ug+x " fullrunscript))))


	  ;; We are about to actually kick off the test
	  ;; so this is a good place to remove the records for 
	  ;; any previous runs
	  ;; (db:test-remove-steps db run-id testname itemdat)
	  ;; now is also a good time to write the .testconfig file
	  (let* ((tconfig-fname   (conc work-area "/.testconfig"))
		 (tconfig-tmpfile (conc tconfig-fname ".tmp"))
		 (tconfig         (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs)))
		 (scripts (configf:get-section tconfig "scripts")))

	    ;; create .testconfig file
	    (configf:write-alist tconfig tconfig-tmpfile)
	    (file-move tconfig-tmpfile tconfig-fname #t)
	    (delete-file* ".final-status")

	    ;; extract scripts from testconfig and write them to files in test run dir
	    (for-each
	     (lambda (scriptdat)
	       (match scriptdat
		      ((name content)
		       (with-output-to-file name
			 (lambda ()
			   (print content)
			   (change-file-mode name (bitwise-ior perm/irwxg perm/irwxu)))))
		      (else
		       (debug:print-info 0 "Invalid script definiton found in [scripts] section of testconfig. \"" scriptdat "\""))))
	     scripts))
	  ;;

	  (let* ((m            (make-mutex))
		 (kill-job?    #f)
		 (exit-info    (make-launch:einf pid: #t exit-status: #t exit-code: #t rollup-status: 0)) ;; pid exit-status exit-code (i.e. process was successfully run) rollup-status
		 (job-thread   #f)
		 ;; (keep-going   #t)
		 (misc-flags   (let ((ht (make-hash-table)))
				 (hash-table-set! ht 'keep-going #t)
				 ht))
		 (runit        (lambda ()
				 (launch:manage-steps run-id test-id item-path fullrunscript ezsteps subrun test-name tconfigreg exit-info m)))
		 (monitorjob   (lambda ()
				 (launch:monitor-job  run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags)))
		 (th1          (make-thread monitorjob "monitor job"))
		 (th2          (make-thread runit "run job"))
                 (tconfig         (tests:get-testconfig test-name item-path tconfigreg #t))
                 (propagate-exit-code (configf:lookup *configdat* "setup" "propagate-exit-code"))
                 (propagate-status-list '("FAIL" "KILLED" "ABORT" "DEAD" "CHECK" "SKIP" "WAIVED"))
                 (test-status "not set")
                 )








	    (set! job-thread th2)
	    (thread-start! th1)
	    (thread-start! th2)
	    (thread-join! th2)
	    (debug:print-info 0 *default-log-port* "Megatest execute of test " test-name ", item path " item-path " complete. Notifying the db ...")
            (debug:print-info 2 *default-log-port* "exit-info = " exit-info)
	    (hash-table-set! misc-flags 'keep-going #f)







>

|
|
|
|
|
|
|
|
|
>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|



















|
<
>
>
>
>
>
>
>
>







674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729

730
731
732
733
734
735
736
737
738
739
740
741
742
743
744

	  (if (args:get-arg "-xterm")
	      (set! fullrunscript "xterm")
	      (if (and fullrunscript 
		       (common:file-exists? fullrunscript)
		       (not (file-execute-access? fullrunscript)))
		  (system (conc "chmod ug+x " fullrunscript))))
	  (launch:extract-scripts-logpro work-area test-name item-path tconfigreg)

;;;;;	  ;; We are about to actually kick off the test
;;;;;	  ;; so this is a good place to remove the records for 
;;;;;	  ;; any previous runs
;;;;;	  ;; (db:test-remove-steps db run-id testname itemdat)
;;;;;	  ;; now is also a good time to write the .testconfig file
;;;;;	  (let* ((tconfig-fname   (conc work-area "/.testconfig"))
;;;;;		 (tconfig-tmpfile (conc tconfig-fname ".tmp"))
;;;;;		 (tconfig         (tests:get-testconfig test-name item-path tconfigreg #t force-create: #t)) ;; 'return-procs)))
;;;;;		 (scripts         (configf:get-section tconfig "scripts"))
;;;;;		 (precmd          (configf:lookup tconfig )
;;;;;	    ;; create .testconfig file
;;;;;	    (configf:write-alist tconfig tconfig-tmpfile)
;;;;;	    (file-move tconfig-tmpfile tconfig-fname #t)
;;;;;	    (delete-file* ".final-status")
;;;;;
;;;;;	    ;; extract scripts from testconfig and write them to files in test run dir
;;;;;	    (for-each
;;;;;	     (lambda (scriptdat)
;;;;;	       (match scriptdat
;;;;;		      ((name content)
;;;;;		       (with-output-to-file name
;;;;;			 (lambda ()
;;;;;			   (print content)
;;;;;			   (change-file-mode name (bitwise-ior perm/irwxg perm/irwxu)))))
;;;;;		      (else
;;;;;		       (debug:print-info 0 "Invalid script definiton found in [scripts] section of testconfig. \"" scriptdat "\""))))
;;;;;	     scripts))
	  ;;

	  (let* ((m            (make-mutex))
		 (kill-job?    #f)
		 (exit-info    (make-launch:einf pid: #t exit-status: #t exit-code: #t rollup-status: 0)) ;; pid exit-status exit-code (i.e. process was successfully run) rollup-status
		 (job-thread   #f)
		 ;; (keep-going   #t)
		 (misc-flags   (let ((ht (make-hash-table)))
				 (hash-table-set! ht 'keep-going #t)
				 ht))
		 (runit        (lambda ()
				 (launch:manage-steps run-id test-id item-path fullrunscript ezsteps subrun test-name tconfigreg exit-info m)))
		 (monitorjob   (lambda ()
				 (launch:monitor-job  run-id test-id item-path fullrunscript ezsteps test-name tconfigreg exit-info m work-area runtlim misc-flags)))
		 (th1          (make-thread monitorjob "monitor job"))
		 (th2          (make-thread runit "run job"))
                 (tconfig         (tests:get-testconfig test-name item-path tconfigreg #t))
                 (propagate-exit-code (configf:lookup *configdat* "setup" "propagate-exit-code"))
                 (propagate-status-list '("FAIL" "KILLED" "ABORT" "DEAD" "CHECK" "SKIP" "WAIVED"))
                 (test-status     "not set")

		 (test-state      "not set")
		 (precmd          (configf:lookup tconfig "setup" "precmd"))
		 (postcmd         (configf:lookup tconfig "setup" "postcmd")))
	    ;; first, if set, run the precmd
	    (if precmd ;; (file-exists? precmd)(file-execute-access? precmd))
		(begin
		  ;; (save-environment-as-files "precmd-envt")
		  (system precmd))) ;; up to test author to put nbfake if desired.
	    (set! job-thread th2)
	    (thread-start! th1)
	    (thread-start! th2)
	    (thread-join! th2)
	    (debug:print-info 0 *default-log-port* "Megatest execute of test " test-name ", item path " item-path " complete. Notifying the db ...")
            (debug:print-info 2 *default-log-port* "exit-info = " exit-info)
	    (hash-table-set! misc-flags 'keep-going #f)
735
736
737
738
739
740
741
742
743

744
745

746
747
748
749
750
751
752
753






754
755
756
757
758
759
760
	      (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) ;; end of let*

	    (mutex-unlock! m)
            (launch:end-of-run-check run-id )
	    (debug:print 2 *default-log-port* "Output from running " fullrunscript ", pid " (launch:einf-pid exit-info) " in work area " 
			 work-area ":\n====\n exit code " (launch:einf-exit-code exit-info) "\n" "====\n")


            (set! test-status (db:test-get-status (rmt:get-testinfo-state-status run-id test-id)))


            ;; If the propagate-exit-code option has been set in the megatest config, and the test status matches the list, set the exit code to 1.


            (if (and propagate-exit-code (string=? propagate-exit-code "yes") (member test-status propagate-status-list))
               (begin
                (debug:print 1 *default-log-port* "Setting exit status to 1 because of test status of " test-status) 
                (set! *globalexitstatus* 1)
               )
            )







	    (if (not (launch:einf-exit-status exit-info))
		(exit 4))))
        )))

;; Spec for End of test
;; At end of each test call, after marking self as COMPLETED do run-state-status-rollup
;; At transition to run COMPLETED/X do hooks







|
|
>


>








>
>
>
>
>
>







791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
	      (rmt:update-run-stats run-id (rmt:get-raw-run-stats run-id))) ;; end of let*

	    (mutex-unlock! m)
            (launch:end-of-run-check run-id )
	    (debug:print 2 *default-log-port* "Output from running " fullrunscript ", pid " (launch:einf-pid exit-info) " in work area " 
			 work-area ":\n====\n exit code " (launch:einf-exit-code exit-info) "\n" "====\n")

	    (let* ((testrec  (rmt:get-testinfo-state-status run-id test-id)))
              (set! test-status (db:test-get-status testrec))
	      (set! test-state  (db:test-get-state  testrec)))

            ;; If the propagate-exit-code option has been set in the megatest config, and the test status matches the list, set the exit code to 1.


            (if (and propagate-exit-code (string=? propagate-exit-code "yes") (member test-status propagate-status-list))
               (begin
                (debug:print 1 *default-log-port* "Setting exit status to 1 because of test status of " test-status) 
                (set! *globalexitstatus* 1)
               )
            )

	    (if postcmd
		(begin
		  (setenv "MT_TEST_STATE" test-state)
		  (setenv "MT_TEST_STATUS" test-status)
		  ;; (save-environment-as-files "postcmd-envt")
		  (system postcmd)))
	    (if (not (launch:einf-exit-status exit-info))
		(exit 4))))
        )))

;; Spec for End of test
;; At end of each test call, after marking self as COMPLETED do run-state-status-rollup
;; At transition to run COMPLETED/X do hooks
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
		       runname
		       (common:file-exists? fulldir))
		  (let ((tmpfile  (conc fulldir "/.megatest.cfg." (current-seconds)))
			(targfile (conc fulldir "/.megatest.cfg-"  megatest-version "-" megatest-fossil-hash))
			(rconfig  (conc fulldir "/.runconfig." megatest-version "-" megatest-fossil-hash)))
		    (if (common:file-exists? rconfig) ;; only cache megatest.config AFTER runconfigs has been cached
			(begin
			  (debug:print-info 0 *default-log-port* "Caching megatest.config in " tmpfile)
                          (if (not (common:in-running-test?))
                              (configf:write-alist *configdat* tmpfile))
			  (system (conc "ln -sf " tmpfile " " targfile))))
		    )))
	    (debug:print-info 1 *default-log-port* "No linktree yet, no caching configs.")))))









|







953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
		       runname
		       (common:file-exists? fulldir))
		  (let ((tmpfile  (conc fulldir "/.megatest.cfg." (current-seconds)))
			(targfile (conc fulldir "/.megatest.cfg-"  megatest-version "-" megatest-fossil-hash))
			(rconfig  (conc fulldir "/.runconfig." megatest-version "-" megatest-fossil-hash)))
		    (if (common:file-exists? rconfig) ;; only cache megatest.config AFTER runconfigs has been cached
			(begin
			  (debug:print-info 2 *default-log-port* "Caching megatest.config in " tmpfile)
                          (if (not (common:in-running-test?))
                              (configf:write-alist *configdat* tmpfile))
			  (system (conc "ln -sf " tmpfile " " targfile))))
		    )))
	    (debug:print-info 1 *default-log-port* "No linktree yet, no caching configs.")))))


Modified megatest-version.scm from [9b7afeda4b] to [cd3d6b0408].

16
17
18
19
20
21
22
23
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;; Always use two or four digit decimal
;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00..

;; (declare (unit megatest-version))

(define megatest-version 1.7014)







|
16
17
18
19
20
21
22
23
;;     along with Megatest.  If not, see <http://www.gnu.org/licenses/>.

;; Always use two or four digit decimal
;; 1.01, 1.02...1.10,1.11,1.1101 ... 1.99,2.00..

;; (declare (unit megatest-version))

(define megatest-version 1.7105)

Modified megatest.scm from [301539c25d] to [a23040756c].

84
85
86
87
88
89
90





91



92
93
94
95
96
97
98

(dbfile:db-init-proc db:initialize-main-db)

;; load the ~/.megatestrc file, put (use trace)(trace-call-sites #t)(trace function-you-want-to-trace) in this file
;;
(let ((debugcontrolf (conc (get-environment-variable "HOME") "/.megatestrc")))
  (if (common:file-exists? debugcontrolf)





      (load debugcontrolf)))




;; usage logging, careful with this, it is not designed to deal with all real world challenges!
;;
(if (and *usage-log-file*
         (file-write-access? *usage-log-file*))
    (with-output-to-file
        *usage-log-file*







>
>
>
>
>
|
>
>
>







84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106

(dbfile:db-init-proc db:initialize-main-db)

;; load the ~/.megatestrc file, put (use trace)(trace-call-sites #t)(trace function-you-want-to-trace) in this file
;;
(let ((debugcontrolf (conc (get-environment-variable "HOME") "/.megatestrc")))
  (if (common:file-exists? debugcontrolf)
    (begin
      ;; for some reason, debug:print does not work here. Had to use print.
      (with-output-to-port (current-error-port)
	(lambda ()
	  (print (conc "WARNING: loading " debugcontrolf))))
      (load debugcontrolf)
    )
  )
)

;; usage logging, careful with this, it is not designed to deal with all real world challenges!
;;
(if (and *usage-log-file*
         (file-write-access? *usage-log-file*))
    (with-output-to-file
        *usage-log-file*
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
  -test-paths             : get the test paths matching target, runname, item and test
                            patterns.
  -list-disks             : list the disks available for storing runs
  -list-targets           : list the targets in runconfigs.config
  -list-db-targets        : list the target combinations used in the db
  -show-config            : dump the internal representation of the megatest.config file
  -show-runconfig         : dump the internal representation of the runconfigs.config file
  -dumpmode MODE          : dump in MODE format instead of sexpr, MODE=json,ini,sexp etc. (add -debug 0,9 to see which file contributes each line)
  -show-cmdinfo           : dump the command info for a test (run in test environment)
  -section sectionName
  -var varName            : for config and runconfig lookup value for sectionName varName
  -since N                : get list of runs changed since time N (Unix seconds)
  -fields fieldspec       : fields to include in json dump; runs:id,runame+tests:testname+steps
  -sort fieldname         : in -list-runs sort tests by this field
  -testdata-csv [categorypatt/]varpatt  : dump testdata for given category







|







199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
  -test-paths             : get the test paths matching target, runname, item and test
                            patterns.
  -list-disks             : list the disks available for storing runs
  -list-targets           : list the targets in runconfigs.config
  -list-db-targets        : list the target combinations used in the db
  -show-config            : dump the internal representation of the megatest.config file
  -show-runconfig         : dump the internal representation of the runconfigs.config file
  -dumpmode MODE          : dump in MODE format instead of sexpr, MODE=json,ini,sexpr etc. (add -debug 0,9 to see which file contributes each line)
  -show-cmdinfo           : dump the command info for a test (run in test environment)
  -section sectionName
  -var varName            : for config and runconfig lookup value for sectionName varName
  -since N                : get list of runs changed since time N (Unix seconds)
  -fields fieldspec       : fields to include in json dump; runs:id,runame+tests:testname+steps
  -sort fieldname         : in -list-runs sort tests by this field
  -testdata-csv [categorypatt/]varpatt  : dump testdata for given category
229
230
231
232
233
234
235


236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
  -load file.scm          : load and run file.scm
  -mark-incompletes       : find and mark incomplete tests
  -ping run-id|host:port  : ping server, exit with 0 if found
  -debug N|N,M,O...       : enable debug 0-N or N and M and O ...
  -debug-noprop N|M,M,O...: enable debug but do not propagate to subprocesses via MT_DEBUG
  -config fname           : override the megatest.config file with fname
  -append-config fname    : append fname to the megatest.config file



Utilities
  -env2file fname         : write the environment to fname.csh and fname.sh
  -envcap a               : save current variables labeled as context 'a' in file envdat.db
  -envdelta a-b           : output enviroment delta from context a to context b to -o fname
                            set the output mode with -dumpmode csh, bash or ini
                            note: ini format will use calls to use curr and minimize path
  -refdb2dat refdb        : convert refdb to sexp or to format specified by s-dumpmode
                            formats: perl, ruby, sqlite3, csv (for csv the -o param
                            will substitute %s for the sheet name in generating 
                            multiple sheets)
  -o                      : output file for refdb2dat (defaults to stdout)
  -archive cmd            : archive runs specified by selectors to one of disks specified
                            in the [archive-disks] section.
                            cmd: keep-html, restore, save, save-remove, get, replicate-db (use 







>
>
|






|







237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
  -load file.scm          : load and run file.scm
  -mark-incompletes       : find and mark incomplete tests
  -ping run-id|host:port  : ping server, exit with 0 if found
  -debug N|N,M,O...       : enable debug 0-N or N and M and O ...
  -debug-noprop N|M,M,O...: enable debug but do not propagate to subprocesses via MT_DEBUG
  -config fname           : override the megatest.config file with fname
  -append-config fname    : append fname to the megatest.config file
  -import-sexpr fname     : import a sexpr file (use -list-runs % -dumpmode sexpr to create)
  -regen-testfiles        : regenerate scripts and logpro files from testconfig, run in test context
  
Utilities
  -env2file fname         : write the environment to fname.csh and fname.sh
  -envcap a               : save current variables labeled as context 'a' in file envdat.db
  -envdelta a-b           : output enviroment delta from context a to context b to -o fname
                            set the output mode with -dumpmode csh, bash or ini
                            note: ini format will use calls to use curr and minimize path
  -refdb2dat refdb        : convert refdb to sexpr or to format specified by -dumpmode
                            formats: perl, ruby, sqlite3, csv (for csv the -o param
                            will substitute %s for the sheet name in generating 
                            multiple sheets)
  -o                      : output file for refdb2dat (defaults to stdout)
  -archive cmd            : archive runs specified by selectors to one of disks specified
                            in the [archive-disks] section.
                            cmd: keep-html, restore, save, save-remove, get, replicate-db (use 
347
348
349
350
351
352
353

354
355
356
357
358
359
360
			"-extract-ods"
			"-pathmod"
			"-env2file"
			"-envcap"
			"-envdelta"
			"-setvars"
			"-set-state-status"


                        ;; move runs stuff here
                        "-remove-keep"           
			"-set-run-status"
			"-age"

			;; archive 







>







357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
			"-extract-ods"
			"-pathmod"
			"-env2file"
			"-envcap"
			"-envdelta"
			"-setvars"
			"-set-state-status"
			"-import-sexpr"

                        ;; move runs stuff here
                        "-remove-keep"           
			"-set-run-status"
			"-age"

			;; archive 
432
433
434
435
436
437
438

439
440
441
442
443
444
445
                        "-run-wait"      ;; wait on a run to complete (i.e. no RUNNING)
			"-one-pass"      ;;
			"-local"         ;; run some commands using local db access
			"-generate-html"
			"-generate-html-structure" 
			"-list-run-time"
                        "-list-test-time"

			
			;; misc queries
			"-list-disks"
			"-list-targets"
			"-list-db-targets"
			"-show-runconfig"
			"-show-config"







>







443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
                        "-run-wait"      ;; wait on a run to complete (i.e. no RUNNING)
			"-one-pass"      ;;
			"-local"         ;; run some commands using local db access
			"-generate-html"
			"-generate-html-structure" 
			"-list-run-time"
                        "-list-test-time"
			"-regen-testfiles"
			
			;; misc queries
			"-list-disks"
			"-list-targets"
			"-list-db-targets"
			"-show-runconfig"
			"-show-config"
945
946
947
948
949
950
951
952
953
954
955
956

957


958
959







960
961
962
963
964
965
966

























967
968





969
970
971

972
973
974
975
976
977
978

979


980
981
982


983





984
























985
986


987
988
989
990
991
992
993
994
995
;; a specific Megatest area. Detail are being hashed out and this may change.
;;
(if (args:get-arg "-adjutant")
    (begin
      (adjutant-run)
      (set! *didsomething* #t)))

(if (or (args:get-arg "-list-servers")
        (args:get-arg "-kill-servers"))
    (let ((tl (launch:setup)))
      (if tl ;; all roads from here exit
	  (let* ((servers (server:get-list *toppath*))

		 (fmtstr  "~33a~22a~20a~20a~8a\n"))


	    (format #t fmtstr "ID" "host:port" "age (hms)" "Last mod" "State")
	    (format #t fmtstr "==" "=========" "=========" "========" "=====")







	    (for-each ;;  ( mod-time host port start-time pid )
	     (lambda (server)
	       (let* ((mtm (any->number (car server)))
		      (mod (if mtm (- (current-seconds) mtm) "unk"))
		      (age (- (current-seconds)(or (any->number (list-ref server 3)) (current-seconds))))
		      (url (conc (cadr server) ":" (caddr server)))
		      (pid (list-ref server 4))

























		      (alv (if (number? mod)(< mod 10) #f)))
		 (format #t





			 fmtstr
			 pid
			 url

			 (seconds->hr-min-sec age)
			 (seconds->hr-min-sec mod)
			 (if alv "alive" "dead"))
		 (if (and alv
			  (args:get-arg "-kill-servers"))
		     (begin
		       (debug:print-info 0 *default-log-port* "Attempting to kill server with pid " pid)

		       (server:kill server)))))


	     (sort servers (lambda (a b)
			     (let ((ma (or (any->number (car a)) 9e9))
				   (mb (or (any->number (car b)) 9e9)))


			       (> ma mb)))))





	    ;; (debug:print-info 1 *default-log-port* "Done with listservers")
























	    (set! *didsomething* #t)
	    (exit))


	  (exit))))
      ;; must do, would have to add checks to many/all calls below

;;======================================================================
;; Weird special calls that need to run *after* the server has started?
;;======================================================================

(if (args:get-arg "-list-targets")
    (if (launch:setup)







|
<
|
<
|
>
|
>
>
|
|
>
>
>
>
>
>
>
|
|
|
<
|
<
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
>
>
>
>
>
|
|
<
>
|
|
<
|
<
|
<
>
|
>
>
|
|
|
>
>
|
>
>
>
>
>
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
>
>
|
|







957
958
959
960
961
962
963
964

965

966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982

983

984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018

1019
1020
1021

1022

1023

1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
;; a specific Megatest area. Detail are being hashed out and this may change.
;;
(if (args:get-arg "-adjutant")
    (begin
      (adjutant-run)
      (set! *didsomething* #t)))

(if (args:get-arg "-list-servers")

  (let* ((tl (launch:setup)) ;; need this to initialize *toppath*

        (servdir (server:get-servinfo-dir *toppath*))
        (servfiles (glob (conc servdir "/*:*.db")))
        (fmtstr  "~10a~22a~10a~25a~25a~8a\n")
        (dbfiles (append (glob (conc *toppath* "/.mtdb/main.db")) (glob (conc *toppath* "/.mtdb/?.db"))(glob (conc *toppath* "/.mtdb/??.db"))))
     )
     (format #t fmtstr "DB" "host:port" "PID" "age" "last mod" "state")
     (for-each
        (lambda (dbfile)
          (let* (
            (dbfname (conc (pathname-file dbfile) ".db"))
            )
                (let (
                  (sinfos (server:get-server-info-sorted *toppath* dbfname))
                  )
                  (for-each 
                     (lambda (sinfo)
                       (let* (

                         (db (list-ref sinfo 5))

                         (pid (list-ref sinfo 4))
                         (host (list-ref sinfo 0))
                         (port (list-ref sinfo 1))
                         (server-id (list-ref sinfo 3))
                         (age (seconds->hr-min-sec (- (current-seconds) (list-ref sinfo 2))))
                         (last-mod (seconds->string (list-ref sinfo 2)))
                         (status (system (conc "ssh " host " ps " pid " > /dev/null")))
                         (state (if (> status 0)
                                  "dead"
                                  (tt:ping host port server-id 0)
                                ))
                         )
                         (format #t fmtstr db (conc host ":" port) pid age last-mod state)
                       )
                     )
                     sinfos
                  )
                ) 
          )
       )
       dbfiles
     )
     (set! *didsomething* #t)
     (exit)  
  )
)


(if (args:get-arg "-kill-servers")

  (let* ((tl (launch:setup)) ;; need this to initialize *toppath*
        (servdir (server:get-servinfo-dir *toppath*))
        (servfiles (glob (conc servdir "/*:*.db")))
        (fmtstr  "~10a~22a~10a~25a~25a~8a\n")
        (dbfiles (append (glob (conc *toppath* "/.mtdb/main.db")) (glob (conc *toppath* "/.mtdb/?.db"))(glob (conc *toppath* "/.mtdb/??.db"))))

     )
     (format #t fmtstr "DB" "host:port" "PID" "age" "last mod" "state")
     (for-each

        (lambda (dbfile)

          (let* (

            (dbfname (conc (pathname-file dbfile) ".db"))
            (sfiles   (server:find-server *toppath* dbfname))
            )
            (for-each 
              (lambda (sfile)
                (let (
                  (sinfos (servert:get-server-info-sorted *toppath* dbfname))
                  )
                  (for-each 
                     (lambda (sinfo)
                       (let* (
                         (db (list-ref sinfo 5))
                         (pid (list-ref sinfo 4))
                         (host (list-ref sinfo 0))
                         (port (list-ref sinfo 1))
                         (server-id (list-ref sinfo 3))
                         (age (seconds->hr-min-sec (- (current-seconds) (list-ref sinfo 2))))
                         (last-mod (seconds->string (list-ref sinfo 2)))
                         (killed (system (conc "ssh " host " kill " pid " > /dev/null")))
                         (dummy2 (sleep 1))
                         (state (if (> (system (conc "ssh " host " ps " pid " > /dev/null")) 0) "dead" "alive"))
                            )
                         (format #t fmtstr db (conc host ":" port) pid age last-mod state)
                         (delete-file* sfile)
                       )
                     )
                     sinfos
                  )
                ) 
              )
              sfiles
            )
          )
       )
       dbfiles
     )
     ;; remove this db, because otherwise metadata contains records for old servers, and this causes a problem with db:no-sync-get-lock-with-id.
     (if (file-exists? (conc *toppath* "/.mtdb/no-sync.db"))
       (delete-file (conc *toppath* "/.mtdb/no-sync.db"))
     )
     (set! *didsomething* #t)
     (exit)  
  )
)



;;======================================================================
;; Weird special calls that need to run *after* the server has started?
;;======================================================================

(if (args:get-arg "-list-targets")
    (if (launch:setup)
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
	       (args:get-arg "-var"))
	  (let ((val (or (configf:lookup data (args:get-arg "-section")(args:get-arg "-var"))
			 (configf:lookup data "default" (args:get-arg "-var")))))
	    (if val (print val))))
	 ((or (not (args:get-arg "-dumpmode"))
              (string=? (args:get-arg "-dumpmode") "ini"))
	  (configf:config->ini data))
	 ((string=? (args:get-arg "-dumpmode") "sexp")
	  (pp (hash-table->alist data)))
	 ((string=? (args:get-arg "-dumpmode") "json")
	  (json-write data))
	 (else
	  (debug:print-error 0 *default-log-port* "-dumpmode of " (args:get-arg "-dumpmode") " not recognised")))
	(set! *didsomething* #t))
      (pop-directory)))







|







1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
	       (args:get-arg "-var"))
	  (let ((val (or (configf:lookup data (args:get-arg "-section")(args:get-arg "-var"))
			 (configf:lookup data "default" (args:get-arg "-var")))))
	    (if val (print val))))
	 ((or (not (args:get-arg "-dumpmode"))
              (string=? (args:get-arg "-dumpmode") "ini"))
	  (configf:config->ini data))
	 ((string=? (args:get-arg "-dumpmode") "sexpr")
	  (pp (hash-table->alist data)))
	 ((string=? (args:get-arg "-dumpmode") "json")
	  (json-write data))
	 (else
	  (debug:print-error 0 *default-log-port* "-dumpmode of " (args:get-arg "-dumpmode") " not recognised")))
	(set! *didsomething* #t))
      (pop-directory)))
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
       ((and (args:get-arg "-section")
	     (args:get-arg "-var"))
	(let ((val (configf:lookup data (args:get-arg "-section")(args:get-arg "-var"))))
	  (if val (print val))))

       ;; print just a section if only -section

       ((equal? (args:get-arg "-dumpmode") "sexp")
	(pp (hash-table->alist data)))
       ((equal? (args:get-arg "-dumpmode") "json")
	(json-write data))
       ((or (not (args:get-arg "-dumpmode"))
	    (string=? (args:get-arg "-dumpmode") "ini"))
	(configf:config->ini data))
       (else







|







1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
       ((and (args:get-arg "-section")
	     (args:get-arg "-var"))
	(let ((val (configf:lookup data (args:get-arg "-section")(args:get-arg "-var"))))
	  (if val (print val))))

       ;; print just a section if only -section

       ((equal? (args:get-arg "-dumpmode") "sexpr")
	(pp (hash-table->alist data)))
       ((equal? (args:get-arg "-dumpmode") "json")
	(json-write data))
       ((or (not (args:get-arg "-dumpmode"))
	    (string=? (args:get-arg "-dumpmode") "ini"))
	(configf:config->ini data))
       (else
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
     (else
      (if (not (car *configinfo*))
	  (begin
	    (debug:print-error 0 *default-log-port* "Attempted " action "on test(s) but run area config file not found")
	    (exit 1))
	  ;; put test parameters into convenient variables
	  (begin
	    ;; check for correct version, exit with message if not correct
	    (common:exit-on-version-changed)
	    (runs:operate-on  action
			      target
			      runname
			      testpatt
			      state:  (common:args-get-state)
			      status: (common:args-get-status)
			      new-state-status: (args:get-arg "-set-state-status")







<
<







1223
1224
1225
1226
1227
1228
1229


1230
1231
1232
1233
1234
1235
1236
     (else
      (if (not (car *configinfo*))
	  (begin
	    (debug:print-error 0 *default-log-port* "Attempted " action "on test(s) but run area config file not found")
	    (exit 1))
	  ;; put test parameters into convenient variables
	  (begin


	    (runs:operate-on  action
			      target
			      runname
			      testpatt
			      state:  (common:args-get-state)
			      status: (common:args-get-status)
			      new-state-status: (args:get-arg "-set-state-status")
1431
1432
1433
1434
1435
1436
1437





1438
1439
1440
1441
1442
1443
1444
	       (tests-spec  (let ((t (alist-ref "tests" fields-spec equal?)))
			      (if (and t (null? t)) ;; all fields
				  db:test-record-fields
				  t)))
	       (adj-tests-spec (delete-duplicates (if tests-spec (cons "id" tests-spec) db:test-record-fields))) ;; '("id"))))
	       (steps-spec  (alist-ref "steps" fields-spec equal?))
	       (test-field-index (make-hash-table)))





	  (if (and tests-spec (not (null? tests-spec))) ;; do some validation and processing of the test-spec
	      (let ((invalid-tests-spec (filter (lambda (x)(not (member x db:test-record-fields))) tests-spec)))
		(if (null? invalid-tests-spec)
		    ;; generate the lookup map test-field-name => index-number
		    (let loop ((hed (car adj-tests-spec))
			       (tal (cdr adj-tests-spec))
			       (idx 0))







>
>
>
>
>







1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
	       (tests-spec  (let ((t (alist-ref "tests" fields-spec equal?)))
			      (if (and t (null? t)) ;; all fields
				  db:test-record-fields
				  t)))
	       (adj-tests-spec (delete-duplicates (if tests-spec (cons "id" tests-spec) db:test-record-fields))) ;; '("id"))))
	       (steps-spec  (alist-ref "steps" fields-spec equal?))
	       (test-field-index (make-hash-table)))
	  (if (and (args:get-arg "-dumpmode")
		   (not (member (args:get-arg "-dumpmode") '("sexpr" "json" "ods" "list"))))
	      (begin
		(debug:print 0 *default-log-port* "ERROR: dumpmode "(args:get-arg "-dumpmode")" not recognised. Use sexpr, json, ods or list")
		(exit)))
	  (if (and tests-spec (not (null? tests-spec))) ;; do some validation and processing of the test-spec
	      (let ((invalid-tests-spec (filter (lambda (x)(not (member x db:test-record-fields))) tests-spec)))
		(if (null? invalid-tests-spec)
		    ;; generate the lookup map test-field-name => index-number
		    (let loop ((hed (car adj-tests-spec))
			       (tal (cdr adj-tests-spec))
			       (idx 0))
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509



1510
1511
1512
1513
1514
1515
1516
			;; (mutils:hierhash-set! data (db:get-value-by-header run header "status")     targetstr runname "meta" "status"     )
			;; (mutils:hierhash-set! data (db:get-value-by-header run header "state")      targetstr runname "meta" "state"      )
			;; (mutils:hierhash-set! data (conc (db:get-value-by-header run header "id"))  targetstr runname "meta" "id"         )
			;; (mutils:hierhash-set! data (db:get-value-by-header run header "event_time") targetstr runname "meta" "event_time" )
			;; (mutils:hierhash-set! data (db:get-value-by-header run header "comment")    targetstr runname "meta" "comment"    )
			;; ;; add last entry twice - seems to be a bug in hierhash?
			;; (mutils:hierhash-set! data (db:get-value-by-header run header "comment")    targetstr runname "meta" "comment"    )
		       (else
			(if (null? runs-spec)
			    (print "Run: " targetstr "/" runname 
				   " status: " (db:get-value-by-header run header "state")
				   " run-id: " run-id ", number tests: " (length tests)
				   " event_time: " (db:get-value-by-header run header "event_time"))
			    (begin
			      (if (not (member "target" runs-spec))
			          ;; (display (conc "Target: " targetstr))
			          (display (conc "Run: " targetstr "/" runname " ")))
			      (for-each
			       (lambda (field-name)
				 (if (equal? field-name "target")
				     (display (conc "target: " targetstr " "))
				     (display (conc field-name ": " (db:get-value-by-header run header (conc field-name)) " "))))
			       runs-spec)
			      (newline)))))



		       
		     (for-each 
		      (lambda (test)
		      	(common:debug-handle-exceptions #f
			 exn
			 (begin
			   (debug:print-error 0 *default-log-port* "Bad data in test record? " test)







|
|














|
>
>
>







1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
			;; (mutils:hierhash-set! data (db:get-value-by-header run header "status")     targetstr runname "meta" "status"     )
			;; (mutils:hierhash-set! data (db:get-value-by-header run header "state")      targetstr runname "meta" "state"      )
			;; (mutils:hierhash-set! data (conc (db:get-value-by-header run header "id"))  targetstr runname "meta" "id"         )
			;; (mutils:hierhash-set! data (db:get-value-by-header run header "event_time") targetstr runname "meta" "event_time" )
			;; (mutils:hierhash-set! data (db:get-value-by-header run header "comment")    targetstr runname "meta" "comment"    )
			;; ;; add last entry twice - seems to be a bug in hierhash?
			;; (mutils:hierhash-set! data (db:get-value-by-header run header "comment")    targetstr runname "meta" "comment"    )
                        ((#f list)
			 (if (null? runs-spec)
			    (print "Run: " targetstr "/" runname 
				   " status: " (db:get-value-by-header run header "state")
				   " run-id: " run-id ", number tests: " (length tests)
				   " event_time: " (db:get-value-by-header run header "event_time"))
			    (begin
			      (if (not (member "target" runs-spec))
			          ;; (display (conc "Target: " targetstr))
			          (display (conc "Run: " targetstr "/" runname " ")))
			      (for-each
			       (lambda (field-name)
				 (if (equal? field-name "target")
				     (display (conc "target: " targetstr " "))
				     (display (conc field-name ": " (db:get-value-by-header run header (conc field-name)) " "))))
			       runs-spec)
			      (newline))))
		       (else
			(debug:print 0 *default-log-port* "ERROR: dumpmode "(args:get-arg "-dumpmode")" not recognised. Use sexpr, json, ods or list")
			))
		       
		     (for-each 
		      (lambda (test)
		      	(common:debug-handle-exceptions #f
			 exn
			 (begin
			   (debug:print-error 0 *default-log-port* "Bad data in test record? " test)
1994
1995
1996
1997
1998
1999
2000















2001
2002
2003
2004
2005
2006
2007
	   (let* ((db       #f)
		  ;; DO NOT run remote
		  (paths    (tests:test-get-paths-matching keys target (args:get-arg "-test-files"))))
	     (for-each (lambda (path)
			 (print path))
		       paths))))))
















;;======================================================================
;; Archive tests
;;======================================================================
;; Archive tests matching target, runname, and testpatt
(if (equal? (args:get-arg "-archive") "replicate-db")
    (begin
          ;; check if source







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
	   (let* ((db       #f)
		  ;; DO NOT run remote
		  (paths    (tests:test-get-paths-matching keys target (args:get-arg "-test-files"))))
	     (for-each (lambda (path)
			 (print path))
		       paths))))))

;;======================================================================
;; Utils for test areas
;;======================================================================

(if (args:get-arg "-regen-testfiles")
    (if (getenv "MT_TEST_RUN_DIR")
	(begin
	  (launch:setup)
	  (change-directory (getenv "MT_TEST_RUN_DIR"))
	  (let* ((testname (getenv "MT_TEST_NAME"))
		 (itempath (getenv "MT_ITEMPATH")))
	    (launch:extract-scripts-logpro (getenv "MT_TEST_RUN_DIR") testname itempath #f))
	  (set! *didsomething* #t))
	(debug:print 0 *default-log-port* "ERROR: Must run -regen-testfiles in a test environment (i.e. test xterm from dashboard)")))
		 	  
;;======================================================================
;; Archive tests
;;======================================================================
;; Archive tests matching target, runname, and testpatt
(if (equal? (args:get-arg "-archive") "replicate-db")
    (begin
          ;; check if source
2476
2477
2478
2479
2480
2481
2482






2483
2484
2485
2486
2487
2488
2489
       (db:setup #f)
       'killservers
       'dejunk
       'adj-testids
       'old2new
       )
      (set! *didsomething* #t)))







(when (args:get-arg "-sync-brute-force")
  (launch:setup)
  ((server:get-bruteforce-syncer (db:setup #t) persist-until-sync: #t))
  (set! *didsomething* #t))

(if (args:get-arg "-sync-to-megatest.db")







>
>
>
>
>
>







2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
       (db:setup #f)
       'killservers
       'dejunk
       'adj-testids
       'old2new
       )
      (set! *didsomething* #t)))

(if (args:get-arg "-import-sexpr")
   (begin
   (launch:setup)
   (rmt:import-sexpr (args:get-arg "-import-sexpr"))
   (set! *didsomething* #t)))

(when (args:get-arg "-sync-brute-force")
  (launch:setup)
  ((server:get-bruteforce-syncer (db:setup #t) persist-until-sync: #t))
  (set! *didsomething* #t))

(if (args:get-arg "-sync-to-megatest.db")

Modified portlogger.scm from [36a4964f50] to [6fdf0de81e].

24
25
26
27
28
29
30
31

32
33
34
35
36
37
38

(declare (unit portlogger))
(declare (uses db))

;; lsof -i

(define (portlogger:open-db fname)
  (let* ((avail    (tasks:wait-on-journal fname 5 remove: #t)) ;; wait up to about 10 seconds for the journal to go away

	 (exists   (common:file-exists? fname))
	 (db       (if avail 
		       (sqlite3:open-database fname)
		       (begin
			 (system (conc "rm -f " fname))
			 (sqlite3:open-database fname))))
	 (handler  (sqlite3:make-busy-timeout 136000))







|
>







24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39

(declare (unit portlogger))
(declare (uses db))

;; lsof -i

(define (portlogger:open-db fname)
  (let* (;; (avail    (tasks:wait-on-journal fname 5 remove: #t)) ;; wait up to about 10 seconds for the journal to go away
         (avail #t)
	 (exists   (common:file-exists? fname))
	 (db       (if avail 
		       (sqlite3:open-database fname)
		       (begin
			 (system (conc "rm -f " fname))
			 (sqlite3:open-database fname))))
	 (handler  (sqlite3:make-busy-timeout 136000))
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
            port INTEGER PRIMARY KEY,
            state TEXT DEFAULT 'not-used',
            fail_count INTEGER DEFAULT 0,
            update_time TIMESTAMP DEFAULT (strftime('%s','now')) );")
    db))

(define (portlogger:open-run-close proc . params)
  (let* ((fname  (conc "/tmp/." (current-user-name) "-portlogger.db"))
	 (avail  (tasks:wait-on-journal fname 10))) ;; wait up to about 10 seconds for the journal to go away
    (handle-exceptions
     exn
     (begin
       ;; (release-dot-lock fname)
       (debug:print-error 0 *default-log-port* "portlogger:open-run-close failed. " proc " " params)
       (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
       (debug:print 5 *default-log-port* "exn=" (condition->list exn))







|
|







55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
            port INTEGER PRIMARY KEY,
            state TEXT DEFAULT 'not-used',
            fail_count INTEGER DEFAULT 0,
            update_time TIMESTAMP DEFAULT (strftime('%s','now')) );")
    db))

(define (portlogger:open-run-close proc . params)
  (let* ((fname  (conc "/tmp/." (current-user-name) "-portlogger.db")))
	 ;; (avail  (tasks:wait-on-journal fname 10))) ;; wait up to about 10 seconds for the journal to go away
    (handle-exceptions
     exn
     (begin
       ;; (release-dot-lock fname)
       (debug:print-error 0 *default-log-port* "portlogger:open-run-close failed. " proc " " params)
       (debug:print 0 *default-log-port* " message: " ((condition-property-accessor 'exn 'message) exn))
       (debug:print 5 *default-log-port* "exn=" (condition->list exn))

Modified rmt.scm from [8bfadce4ad] to [f953bf98e7].

333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
    (debug:print-info 13 *default-log-port* "rmt:send-receive, case  9. conninfo=" conninfo " dat=" dat " runremote = " runremote)
    (mutex-unlock! *rmt-mutex*)
    (if success ;; success only tells us that the transport was
	;; successful, have to examine the data to see if
	;; there was a detected issue at the other end
	(extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd)
	(begin
           (debug:print-error 0 *default-log-port* " dat=" dat) 
           (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params))
	)))

(define (rmt:print-db-stats)
  (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f"
    (debug:print 18 *default-log-port* "DB Stats\n========")
    (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg"))







|







333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
    (debug:print-info 13 *default-log-port* "rmt:send-receive, case  9. conninfo=" conninfo " dat=" dat " runremote = " runremote)
    (mutex-unlock! *rmt-mutex*)
    (if success ;; success only tells us that the transport was
	;; successful, have to examine the data to see if
	;; there was a detected issue at the other end
	(extras-transport-succeded *default-log-port* *rmt-mutex* attemptnum runremote res params rid cmd)
	(begin
           (debug:print-info 0 *default-log-port* "Bad return data from Megatest server: dat=" dat) 
           (extras-transport-failed *default-log-port* *rmt-mutex* attemptnum runremote cmd rid params))
	)))

(define (rmt:print-db-stats)
  (let ((fmtstr "~40a~7-d~9-d~20,2-f")) ;; "~20,2-f"
    (debug:print 18 *default-log-port* "DB Stats\n========")
    (debug:print 18 *default-log-port* (format #f "~40a~8a~10a~10a" "Cmd" "Count" "TotTime" "Avg"))
1093
1094
1095
1096
1097
1098
1099
















































































      res)) ;; All good, return res

#;(set-functions rmt:send-receive                       remote-server-url-set!
	       http-transport:close-connections	      remote-conndat-set!
	       debug:print                            debug:print-info
	       remote-ro-mode                         remote-ro-mode-set!
	       remote-ro-mode-checked-set!            remote-ro-mode-checked)























































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
      res)) ;; All good, return res

#;(set-functions rmt:send-receive                       remote-server-url-set!
	       http-transport:close-connections	      remote-conndat-set!
	       debug:print                            debug:print-info
	       remote-ro-mode                         remote-ro-mode-set!
	       remote-ro-mode-checked-set!            remote-ro-mode-checked)


;;======================================================================
;; import an sexpr file into the db
;;======================================================================

(define (rmt:import-sexpr sexpr-file)
  (if (file-exists? sexpr-file)
      (let* ((data (with-input-from-file sexpr-file read)))
	(for-each
	 (lambda (targ-dat)
	   (rmt:import-target targ-dat)) ;; ("target" ("run1" ("data" (1 ("field" . "value") ...
	 data))
      (let* ((msg (conc "ERROR: file "sexpr-file" not found")))
	(debug:print 0 *default-log-port* msg)
	(cons #f msg))))

(define (rmt:import-target targ-dat)
  (let* ((target (car targ-dat))
	 (data   (cdr targ-dat)))
    (for-each
     (lambda (run-dat)
       (rmt:import-run target run-dat)) ;; ("runname" ("data" ("testid" ("field" . "value") ...
     data)))

(define (rmt:import-run target run-dat)
  (let* ((runname    (car run-dat))
	 (all-dat    (cdr run-dat))
	 (tests-data (alist-ref "data" all-dat equal?))
	 (run-meta   (alist-ref "meta" all-dat equal?))
         (run-id     (string->number (alist-ref "id"   run-meta equal?))))
    (rmt:insert-run run-id target runname run-meta)
    (if (list? tests-data)
      (begin
        (debug:print 0 *default-log-port* "Inserting " (length tests-data) " tests in run " runname)
        (for-each
          (lambda (test-dat)
            (let* ((test-id  (car test-dat))
	      (test-rec (cdr test-dat)))
	      (rmt:insert-test run-id test-rec)))
         tests-data)
      )
      (debug:print 0 *default-log-port* "rmt:import-run: tests-data is empty")
    )
  )
)

;; insert run if not there, return id either way
(define (rmt:insert-run run-id target runname run-meta)
  ;; look for id, return if found
  (let* ((runs (rmt:send-receive 'simple-get-runs #f
				    ;;    runpatt count offset target last-update)
				    (list runname #f    #f     target #f))))
    (if (null? runs)
       (begin
        (debug:print 0 *default-log-port* "inserting run for runname " runname " target " target)
	(rmt:send-receive 'insert-run #f (list run-id target runname run-meta))
       )
       (begin
	(simple-run-id (car runs))
       ))))


(define (rmt:insert-test run-id test-rec)
  (let* ((testname  (alist-ref "testname" test-rec equal?))
	 (item-path (alist-ref "item_path" test-rec equal?))
         (test-id (rmt:get-test-id run-id testname item-path))
         )

    (if test-id
       (debug:print 0 *default-log-port* "test "testname"/"item-path " already exists in run-id " run-id)
       (begin
         (rmt:send-receive 'insert-test run-id test-rec)
       )
    )
  )
)



Modified runs.scm from [6164fa6d2a] to [84c9e1cd98].

797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850

    (if (not (null? required-tests))
	(debug:print-info 1 *default-log-port* "Adding \"" (string-intersperse required-tests " ") "\" to the run queue"))
    ;; NOTE: these are all parent tests, items are not expanded yet.
    (debug:print-info 4 *default-log-port* "test-records=" (hash-table->alist test-records))
    (let ((reglen (configf:lookup *configdat* "setup" "runqueue")))
      (if (> (length (hash-table-keys test-records)) 0)
	  (let* ((keep-going        #t)
		 (run-queue-retries 5)
		;; (th1        (make-thread (lambda ()
		;; 			    (handle-exceptions
		;; 				exn
		;; 				(begin
		;; 				  (print-call-chain)
		;; 				  (print " message: " ((condition-property-accessor 'exn 'message) exn)))
		;; 			      (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
		;; 						    (any->number reglen) all-tests-registry)))
		;; 			  "runs:run-tests-queue"))
		 (th2        (make-thread (lambda ()			 ;; BBQ: why are we visiting ALL runs here?	    
					    ;; (rmt:find-and-mark-incomplete-all-runs))))) CAN'T INTERRUPT IT ...
					    (let ((run-ids (rmt:get-all-run-ids)))
					      (for-each (lambda (run-id)
							  (if keep-going
							      (handle-exceptions
							       exn
							       (debug:print 0 *default-log-port* "error in calling find-and-mark-incomplete for run-id " run-id ", exn=" exn)
							       (rmt:find-and-mark-incomplete run-id #f)))) ;; ovr-deadtime))) ;; could be root of https://hsdes.intel.com/appstore/article/#/220546828/main -- Title: Megatest jobs show DEAD even though they are still running (1.64/27)
							run-ids)))
					  "runs: mark-incompletes")))
	    ;; (thread-start! th1)
	    (thread-start! th2)
	    ;; (thread-join! th1)
	    ;; just do the main stuff in the main thread
	    (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
								    (any->number reglen) all-tests-registry)
	    (set! keep-going #f)
	    (thread-join! th2)
	    ;; if run-count > 0 call, set -preclean and -rerun STUCK/DEAD
	    (if (> run-count 0) ;; handle reruns
		(begin
		  (if (not (hash-table-ref/default flags "-preclean" #f))
		      (hash-table-set! flags "-preclean" #t))
		  (if (not (hash-table-ref/default flags "-rerun" #f))
		      (hash-table-set! flags "-rerun" "ABORT,STUCK/DEAD,n/a,ZERO_ITEMS"))
		  ;; recursive call to self
      (runs:run-tests target runname test-patts user flags run-count: (- run-count 1)))
                  (launch:end-of-run-check run-id)))
	  (debug:print-info 0 *default-log-port* "No tests to run")))
    (debug:print-info 4 *default-log-port* "All done by here")
    ;; TODO: try putting post hook call here
      
    ;  (debug:print-info 2 *default-log-port* " run-count " run-count)
    ;  (runs:run-post-hook run-id))
    ;  (debug:print-info 2 *default-log-port* "Not calling post hook runcount = " run-count ))   







|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<



<
<








|
|







797
798
799
800
801
802
803
804
























805
806
807


808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824

    (if (not (null? required-tests))
	(debug:print-info 1 *default-log-port* "Adding \"" (string-intersperse required-tests " ") "\" to the run queue"))
    ;; NOTE: these are all parent tests, items are not expanded yet.
    (debug:print-info 4 *default-log-port* "test-records=" (hash-table->alist test-records))
    (let ((reglen (configf:lookup *configdat* "setup" "runqueue")))
      (if (> (length (hash-table-keys test-records)) 0)
	  (let* ()
























	    ;; just do the main stuff in the main thread
	    (runs:run-tests-queue run-id runname test-records keyvals flags test-patts required-tests
								    (any->number reglen) all-tests-registry)


	    ;; if run-count > 0 call, set -preclean and -rerun STUCK/DEAD
	    (if (> run-count 0) ;; handle reruns
		(begin
		  (if (not (hash-table-ref/default flags "-preclean" #f))
		      (hash-table-set! flags "-preclean" #t))
		  (if (not (hash-table-ref/default flags "-rerun" #f))
		      (hash-table-set! flags "-rerun" "ABORT,STUCK/DEAD,n/a,ZERO_ITEMS"))
		  ;; recursive call to self
                  (runs:run-tests target runname test-patts user flags run-count: (- run-count 1)))
                 (launch:end-of-run-check run-id)))
	  (debug:print-info 0 *default-log-port* "No tests to run")))
    (debug:print-info 4 *default-log-port* "All done by here")
    ;; TODO: try putting post hook call here
      
    ;  (debug:print-info 2 *default-log-port* " run-count " run-count)
    ;  (runs:run-post-hook run-id))
    ;  (debug:print-info 2 *default-log-port* "Not calling post hook runcount = " run-count ))   
1378
1379
1380
1381
1382
1383
1384



1385
1386

1387
1388
1389
1390
1391
1392
1393
		      (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW
			  (if (null? tal)
			      #f ;; yes, really
			      (list (car tal)(cdr tal) reg reruns))
			  (begin
			    (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60)
				(debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state >" nth-try "< will be overridden and we'll retry."))



			    ;; was: (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f)
                            (mt:test-set-state-status-by-testname-unless-completed run-id test-name item-path "COMPLETED" "PREQ_FAIL" #f)

			    (hash-table-set! test-registry hed 'removed) ;; was 0
                            (if (not (and (null? reg) (null? tal)))
                                (runs:loop-values tal reg reglen regfull reruns)
                                #f))))
		     (else
		      (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60)
			  (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now."))







>
>
>
|
|
>







1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
		      (if (eq? nth-try 'removed) ;; removed is removed - drop it NOW
			  (if (null? tal)
			      #f ;; yes, really
			      (list (car tal)(cdr tal) reg reruns))
			  (begin
			    (if (runs:lownoise (conc "FAILED prerequisites or other issue" hed) 60)
				(debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequisites or other issue. Internal state >" nth-try "< will be overridden and we'll retry."))
			    (let* ((test-id      (rmt:get-test-id run-id testname item-path))
				   (test-info    (rmt:get-testinfo-state-status run-id test-id)) ;; we need *current* info
				   (status       (db:test-status test-info)))
			      (if (equal? status "KEEP_TRYING")
				  (mt:test-set-state-status-by-testname-unless-completed run-id test-name item-path "COMPLETED" "PREQ_FAIL" #f)
				  (mt:test-set-state-status-by-testname run-id test-name item-path "NOT_STARTED" "KEEP_TRYING" #f)))
			    (hash-table-set! test-registry hed 'removed) ;; was 0
                            (if (not (and (null? reg) (null? tal)))
                                (runs:loop-values tal reg reglen regfull reruns)
                                #f))))
		     (else
		      (if (runs:lownoise (conc "FAILED prerequitests and we tried" hed) 60)
			  (debug:print 0 *default-log-port* "WARNING: test " hed " has FAILED prerequitests and we've tried at least 10 times to run it. Giving up now."))
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
    ;; Here is where the test_meta table is best updated
    ;; Yes, another use of a global for caching. Need a better way?
    ;;
    ;; There is now a single call to runs:update-all-test_meta and this 
    ;; per-test call is not needed. Given the delicacy of the move to 
    ;; v1.55 this code is being left in place for the time being.
    ;;
    (if (not (hash-table-ref/default *test-meta-updated* test-name #f))
        (begin
          (hash-table-set! *test-meta-updated* test-name #t)
          (runs:update-test_meta test-name test-conf)))
    
    ;; itemdat => ((ripeness "overripe") (temperature "cool") (season "summer"))
    (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/"))
	   (test-id       (rmt:get-test-id run-id test-name item-path))







|







2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
    ;; Here is where the test_meta table is best updated
    ;; Yes, another use of a global for caching. Need a better way?
    ;;
    ;; There is now a single call to runs:update-all-test_meta and this 
    ;; per-test call is not needed. Given the delicacy of the move to 
    ;; v1.55 this code is being left in place for the time being.
    ;;
    (if (not (hash-table-exists? *test-meta-updated* test-name))
        (begin
          (hash-table-set! *test-meta-updated* test-name #t)
          (runs:update-test_meta test-name test-conf)))
    
    ;; itemdat => ((ripeness "overripe") (temperature "cool") (season "summer"))
    (let* ((new-test-path (string-intersperse (cons test-path (map cadr itemdat)) "/"))
	   (test-id       (rmt:get-test-id run-id test-name item-path))
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
	 (state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f)))
	 (rp-mutex     (make-mutex))
	 (bup-mutex    (make-mutex))
         (keep-records (args:get-arg "-keep-records")) ;; used in conjunction with -remove-runs to keep the records, TODO: consolidate this with "mode".
	 (test-records '())) ;; for tasks that we wish to operate on all tests in one fell swoop

    (let* ((write-access-actions '(remove-runs set-state-status archive run-wait kill-runs))
           (dbfile             (conc  *toppath* "/.megatest/main.db"))
           (readonly-mode      (not (file-write-access? dbfile))))
      (when (and readonly-mode
                 (member action write-access-actions))
        (debug:print-error 0 *default-log-port* dbfile " is readonly.  Cannot proceed with action ["action"] in which write-access isrequired .")
        (exit 1)))
    
    (debug:print-info 4 *default-log-port* "runs:operate-on => Header: " header " action: " action " new-state-status: " new-state-status)







|







2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
	 (state-status (if (string? new-state-status) (string-split new-state-status ",") '(#f #f)))
	 (rp-mutex     (make-mutex))
	 (bup-mutex    (make-mutex))
         (keep-records (args:get-arg "-keep-records")) ;; used in conjunction with -remove-runs to keep the records, TODO: consolidate this with "mode".
	 (test-records '())) ;; for tasks that we wish to operate on all tests in one fell swoop

    (let* ((write-access-actions '(remove-runs set-state-status archive run-wait kill-runs))
           (dbfile             (conc  *toppath* "/" *dbdir* "/main.db"))
           (readonly-mode      (not (file-write-access? dbfile))))
      (when (and readonly-mode
                 (member action write-access-actions))
        (debug:print-error 0 *default-log-port* dbfile " is readonly.  Cannot proceed with action ["action"] in which write-access isrequired .")
        (exit 1)))
    
    (debug:print-info 4 *default-log-port* "runs:operate-on => Header: " header " action: " action " new-state-status: " new-state-status)
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
     (lambda (key)
       (let* ((idx (cadr key))
	      (fld (car  key))
	      (val (configf:lookup test-conf "test_meta" fld)))
	 ;; (debug:print 5 *default-log-port* "idx: " idx " fld: " fld " val: " val)
	 (if (and val (not (equal? (vector-ref currrecord idx) val)))
	     (begin
	       (debug:print 0 *default-log-port* "Updating " test-name " " fld " to " val)
	       (rmt:testmeta-update-field test-name fld val)))))
     '(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9)("jobgroup" 10)))))

;; find tests with matching tags, tagpatt is a string "tagpatt1,tagpatt2%, ..."
;;
(define (runs:get-tests-matching-tags tagpatt)
  (let* ((tagdata (rmt:get-tests-tags))







|







2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
     (lambda (key)
       (let* ((idx (cadr key))
	      (fld (car  key))
	      (val (configf:lookup test-conf "test_meta" fld)))
	 ;; (debug:print 5 *default-log-port* "idx: " idx " fld: " fld " val: " val)
	 (if (and val (not (equal? (vector-ref currrecord idx) val)))
	     (begin
	       (debug:print 2 *default-log-port* "Updating " test-name " " fld " to " val)
	       (rmt:testmeta-update-field test-name fld val)))))
     '(("author" 2)("owner" 3)("description" 4)("reviewed" 5)("tags" 9)("jobgroup" 10)))))

;; find tests with matching tags, tagpatt is a string "tagpatt1,tagpatt2%, ..."
;;
(define (runs:get-tests-matching-tags tagpatt)
  (let* ((tagdata (rmt:get-tests-tags))

Modified server.scm from [89a8625f42] to [8914b0f2d7].

410
411
412
413
414
415
416
417
418
419
420
421


















































































































































































































































422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441





















442
443
444
445
446
447
448
		      (with-output-to-file start-flag (lambda () (print server-key)))
		      (thread-sleep! 0.25)
		      (set! new-server-key (with-input-from-file start-flag (lambda () (read-line))))
		      (equal? server-key new-server-key)))
	       #t
               ;; If either of the above conditions is not true, print a "Gating server start" message, wait <idle-time> + 1, then call this function recursively. 
	       (begin
		 (debug:print-info 0 *default-log-port* "Gating server start, last start: "
				   (seconds->time-string fmodtime) ", time since last start: " delta ", required idletime: " idletime ", gating reason:" (if old-enough "another job started a server" "too soon to start another server"))
		 
		 (thread-sleep! ( + 1 idletime))
		 (server:wait-for-server-start-last-flag areapath)))))))




















































































































































































































































        
;; kind start up of server, wait before allowing another server for a given
;; area to be launched
;;
(define (server:kind-run areapath)
  ;; look for $MT_RUN_AREA_HOME/logs/server-start-last
  ;; and wait for it to be at least <server idletime> seconds old
  (server:wait-for-server-start-last-flag areapath)
  (if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
      (let* ((lock-file    (conc areapath "/logs/server-start.lock")))
	(let* ((start-flag (conc areapath "/logs/server-start-last")))
	  (common:simple-file-lock-and-wait lock-file expire-time: 25)
	  (debug:print-info  2 *default-log-port* "server:kind-run: touching " start-flag)
	  (system (conc "touch " start-flag)) ;; lazy but safe
	  (server:run areapath)
	  (thread-sleep! 20) ;; don't release the lock for at least a few seconds. And allow time for the server startup to get to "SERVER STARTED".
	  (common:simple-file-release-lock lock-file)))
      (debug:print-info 0 *default-log-port* "Found server already running. NOT trying to start another.")))






















;; this one seems to be the general entry point
;;
(define (server:start-and-wait areapath #!key (timeout 60))
  (let ((give-up-time (+ (current-seconds) timeout)))
    (let loop ((server-info (server:check-if-running areapath))
	       (try-num    0))







|




>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>




















>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
		      (with-output-to-file start-flag (lambda () (print server-key)))
		      (thread-sleep! 0.25)
		      (set! new-server-key (with-input-from-file start-flag (lambda () (read-line))))
		      (equal? server-key new-server-key)))
	       #t
               ;; If either of the above conditions is not true, print a "Gating server start" message, wait <idle-time> + 1, then call this function recursively. 
	       (begin
		 (debug:print-info 2 *default-log-port* "Gating server start, last start: "
				   (seconds->time-string fmodtime) ", time since last start: " delta ", required idletime: " idletime ", gating reason:" (if old-enough "another job started a server" "too soon to start another server"))
		 
		 (thread-sleep! ( + 1 idletime))
		 (server:wait-for-server-start-last-flag areapath)))))))

(define (server:get-servinfo-dir areapath)
  (let* ((spath (conc areapath"/.servinfo")))
    (if (not (file-exists? spath))
	(create-directory spath #t))
    spath))

;; gets server info and appends path to server file
;; sorts by age, oldest first
;;
;; returns list of (host port startseconds server-id servinfofile)
;;
(define (server:get-server-info-sorted areapath dbfname)
  (let* (
	 (sfiles   (server:find-server areapath dbfname))
	 (sdats    (filter car (map server:server-get-info sfiles))) ;; first element is #f if the file disappeared while being read
	 (sorted   (sort sdats (lambda (a b)
				 (let* ((starta (list-ref a 2))
					(startb (list-ref b 2)))
				   (if (eq? starta startb)
				       (string>? (list-ref a 3)(list-ref b 3)) ;; if servers started at same time look at server-id
				       (< starta startb))))))
	 (count    0))
    (for-each
     (lambda (rec)
       (if (or (> (length sorted) 1)
	       (common:low-noise-print 120 "server info sorted"))
	   (debug:print 2 *default-log-port* "SERVER #"count": "(string-intersperse (map conc sorted) ", ")))
       (set! count (+ count 1)))
     sorted)
    sorted))

(define (server:clean-up-old areapath)
  ;; any server file that has not been touched in ten minutes is effectively dead
  (let* ((sfiles (glob (conc (server:get-servinfo-dir areapath)"/*"))))
    (for-each
     (lambda (sfile)
       (let* ((modtime (handle-exceptions
			   exn
			 (begin
			   (debug:print 0 *default-log-port* "WARNING: failed to get modification file for "sfile)
			   (current-seconds))
			 (file-modification-time sfile))))
	 (if (and (number? modtime)
		  (> (- (current-seconds) modtime)
		     600))
	     (begin
	       (debug:print 0 *default-log-port* "WARNING: found old server info file "sfile", removing it.")
	       (handle-exceptions
		   exn
		 (debug:print 0 *default-log-port* "WARNING: failed to delete old server info file "sfile)
		 (delete-file sfile))))))
     sfiles)))



(define server-last-start 0)

;; oldest server alive determines host then choose random of youngest
;; five servers on that host
;;
;; mode:
;;   best - get best server (random of newest five)
;;   home - get home host based on oldest server
;;   info - print info
(define (server:choose-server areapath #!optional (mode 'best))
  ;; age is current-starttime
  ;; find oldest alive
  ;;   1. sort by age ascending and ping until good
  ;; find alive rand from youngest
  ;;   1. sort by age descending
  ;;   2. take five
  ;;   3. check alive, discard if not and repeat
  ;; first we clean up old server files
  (assert (eq? (rmt:transport-mode) 'http) "FATAL: server:run called with rmt:transport-mode="(rmt:transport-mode))
  (server:clean-up-old areapath)
  (let* ((since-last (- (current-seconds) server-last-start))
        (server-start-delay 10))     
    (if ( < (- (current-seconds) server-last-start) 10 )
      (begin
        (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start))
        (debug:print 2 *default-log-port* "server:choose-server: last server start less than " server-start-delay " seconds ago. Sleeping " server-start-delay " seconds")
        (thread-sleep! server-start-delay)
      )
      (debug:print 2 *default-log-port* "server:choose-server: seconds since last server start: " (- (current-seconds) server-last-start))
    )
  )
  (let* ((serversdat  (server:get-servers-info areapath))
	 (servkeys    (hash-table-keys serversdat))
	 (by-time-asc (if (not (null? servkeys)) ;; NOTE: Oldest is last
			  (sort servkeys ;; list of "host:port"
				(lambda (a b)
				  (>= (list-ref (hash-table-ref serversdat a) 2)
				      (list-ref (hash-table-ref serversdat b) 2))))
			  '())))
    (debug:print 2 *default-log-port* "server:choose-server: serversdat: " serversdat)
    (debug:print 2 *default-log-port* "server:choose-server: servkeys: " servkeys)
    (if (not (null? by-time-asc))
	(let* ((oldest     (last by-time-asc))
	       (oldest-dat (hash-table-ref serversdat oldest))
	       (host       (list-ref oldest-dat 0))
	       (all-valid  (filter (lambda (x)
				     (equal? host (list-ref (hash-table-ref serversdat x) 0)))
				   by-time-asc))
	       (best-ten  (lambda ()
			     (if (> (length all-valid) 11)
				 (take (drop-right all-valid 1) 10) ;; remove the oldest from consideration so it can age out
				 (if (> (length all-valid) 8)
				     (drop-right all-valid 1)
				     all-valid))))
	       (names->dats (lambda (names)
			      (map (lambda (x)
				     (hash-table-ref serversdat x))
				   names)))
	       (am-home?    (lambda ()
			      (let* ((currhost (get-host-name))
				     (bestadrs (server:get-best-guess-address currhost)))
				(or (equal? host currhost)
				    (equal? host bestadrs))))))
	  (case mode
	    ((info)
	     (debug:print 0 *default-log-port* "oldest: "oldest-dat", selected host: "host", all-valid: "all-valid)
	     (debug:print 0 *default-log-port* "youngest: "(hash-table-ref serversdat (car all-valid))))
	    ((home)     host)
	    ((homehost) (cons host (am-home?))) ;; shut up old code
	    ((home?)    (am-home?))
	    ((best-ten)(names->dats (best-ten)))
	    ((all-valid)(names->dats all-valid))
	    ((best)     (let* ((best-ten (best-ten))
			       (len       (length best-ten)))
			  (hash-table-ref serversdat (list-ref best-ten (random len)))))
	    ((count)(length all-valid))
	    (else
	     (debug:print 0 *default-log-port* "ERROR: invalid command "mode)
	     #f)))
	(begin
	  (server:run areapath)
          (set! server-last-start (current-seconds))
	  ;; (thread-sleep! 3)
	  (case mode
	    ((homehost) (cons #f #f))
	    (else	#f))))))




;; oldest server alive determines host then choose random of youngest
;; five servers on that host
;;
(define (server:get-servers-info areapath)
  ;; (assert *toppath* "FATAL: server:get-servers-info called before *toppath* has been set.")
  (let* ((servinfodir (server:get-servinfo-dir areapath))) ;; (conc *toppath*"/.servinfo")))
    (if (not (file-exists? servinfodir))
	(create-directory servinfodir))
    (let* ((allfiles    (glob (conc servinfodir"/*")))
	   (res         (make-hash-table)))
      (for-each
       (lambda (f)
	 (let* ((hostport  (pathname-strip-directory f))
		(serverdat (server:logf-get-start-info f)))
	   (match serverdat
	     ((host port start server-id pid)
	      (if (and host port start server-id pid)
		  (hash-table-set! res hostport serverdat)
		  (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat)))
	     (else
	      (debug:print-info 2 *default-log-port* "bad server info for "f": "serverdat)))))
       allfiles)
      res)))



;; given a path to a server info file return: host port startseconds server-id pid dbfname logf
;; example of what it's looking for in the log file:
;;     SERVER STARTED: 10.38.175.67:50216 AT 1616502350.0 server-id: 4907e90fc55c7a09694e3f658c639cf4 
;;
(define (server:server-get-info logf)
  (let ((server-rx    (regexp "^SERVER STARTED: (\\S+):(\\d+) AT ([\\d\\.]+) server-id: (\\S+) pid: (\\d+) dbfname: (\\S+)")) ;; SERVER STARTED: host:port AT timesecs server id
	(bad-dat      (list #f #f #f #f #f #f logf)))
     (let ((fdat     (handle-exceptions
			 exn
		       (begin
			 ;; WARNING: this is potentially dangerous to blanket ignore the errors
			 (debug:print-info 0 *default-log-port* "Unable to get server info from "logf", exn="(condition->list exn))
			 '()) ;; no idea what went wrong, call it a bad server, return empty list
		       (with-input-from-file logf read-lines))))
       (if (null? fdat) ;; bad data, return bad-dat
	   bad-dat
	   (let loop ((inl  (car fdat))
		      (tail (cdr fdat))
		      (lnum 0))
	     (let ((mlst (string-match server-rx inl)))
	       (if (not mlst)
		   (if (> lnum 500) ;; give up if more than 500 lines of server log read
		       bad-dat
		       (if (null? tail)
			   bad-dat
			   (loop (car tail)(cdr tail)(+ lnum 1))))
		   (match mlst ;; have a not null list
		     ((_ host port start server-id pid dbfname)
		      (list host
			    (string->number port)
			    (string->number start)
			    server-id
			    (string->number pid)
			    dbfname
			    logf))
		     (else
		      (debug:print 0 *default-log-port* "ERROR: did not recognise SERVER line info "mlst)
		      bad-dat)))))))))



;; find valid server
;; get servers listed, last part of name must match :<dbfname>
;; if more than one, wait one second and look again
;; future: ping oldest, if alive remove other :<dbfname> files
;;
(define (server:find-server areapath dbfname)
  (let* ((servdir  (server:get-servinfo-dir areapath))
	 (sfiles   (glob (conc servdir"/*:"dbfname)))
         (good-files '()))
         (for-each 
           (lambda (sfile)
             (let* ((sinfo (tt:server-get-info sfile))
                 (host (list-ref sinfo 0))
                 (port (list-ref sinfo 1))
                 (server-id (list-ref sinfo 3))
                 (pid (list-ref sinfo 4))
                 (status (system (conc "ssh " host " ps " pid " > /dev/null")))
                 )
                (if (= status 0)
                  (set! good-files (cons sfile good-files))
                  (delete-file* sfile)
                )
             )
           )
           sfiles
         )
    (debug:print-info 2 *default-log-port* "server:find-server: good-files: " good-files " sfiles: " sfiles)
    good-files))



        
;; kind start up of server, wait before allowing another server for a given
;; area to be launched
;;
(define (server:kind-run areapath)
  ;; look for $MT_RUN_AREA_HOME/logs/server-start-last
  ;; and wait for it to be at least <server idletime> seconds old
  (server:wait-for-server-start-last-flag areapath)
  (if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
      (let* ((lock-file    (conc areapath "/logs/server-start.lock")))
	(let* ((start-flag (conc areapath "/logs/server-start-last")))
	  (common:simple-file-lock-and-wait lock-file expire-time: 25)
	  (debug:print-info  2 *default-log-port* "server:kind-run: touching " start-flag)
	  (system (conc "touch " start-flag)) ;; lazy but safe
	  (server:run areapath)
	  (thread-sleep! 20) ;; don't release the lock for at least a few seconds. And allow time for the server startup to get to "SERVER STARTED".
	  (common:simple-file-release-lock lock-file)))
      (debug:print-info 0 *default-log-port* "Found server already running. NOT trying to start another.")))

;; return servid
;; side-effects:
;;   ttdat-cleanup-proc is populated with function to remove the serverinfo file
(define (server:create-server-registration-file areapath host port)
  (let* (
	 (servdir  (server:get-servinfo-dir areapath))
	 (servinf (conc servdir"/"host":"port"-"(current-process-id)))
	 (serv-id (server:mk-signature areapath))
	 (clean-proc (lambda ()
		       (delete-file* servinf)
		       )))
    (assert (and host port) "FATAL: tt:create-server-registration-file called with no conn")
    (tt-cleanup-proc-set! ttdat clean-proc)
    (tt-servinf-file-set! ttdat servinf)
    (with-output-to-file servinf
      (lambda ()
	(print "SERVER STARTED: "host":"port" AT "(current-seconds)" server-id: "serv-id" pid: "(current-process-id))))
      serv-id))



;; this one seems to be the general entry point
;;
(define (server:start-and-wait areapath #!key (timeout 60))
  (let ((give-up-time (+ (current-seconds) timeout)))
    (let loop ((server-info (server:check-if-running areapath))
	       (try-num    0))

Modified tests.scm from [5c2006972a] to [fd3f543001].

1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
		(if (and tcfg cache-file) (hash-table-set! tcfg "have fulldata" #t)) ;; mark this as fully read data
		(if tcfg (hash-table-set! *testconfigs* test-full-name tcfg))
		(if (and testexists
			 cache-file
			 (file-write-access? cache-path)
			 allow-write-cache)
		    (let ((tpath (conc cache-path "/.testconfig")))
		      (debug:print-info 1 *default-log-port* "Caching testconfig for " test-name " in " tpath)
                      (if (and tcfg (not (common:in-running-test?)))
                          (configf:write-alist tcfg tpath))))
		tcfg))))))
  
;; sort tests by priority and waiton
;; Move test specific stuff to a test unit FIXME one of these days
(define (tests:sort-by-priority-and-waiton test-records)







|







1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
		(if (and tcfg cache-file) (hash-table-set! tcfg "have fulldata" #t)) ;; mark this as fully read data
		(if tcfg (hash-table-set! *testconfigs* test-full-name tcfg))
		(if (and testexists
			 cache-file
			 (file-write-access? cache-path)
			 allow-write-cache)
		    (let ((tpath (conc cache-path "/.testconfig")))
		      (debug:print-info 2 *default-log-port* "Caching testconfig for " test-name " in " tpath)
                      (if (and tcfg (not (common:in-running-test?)))
                          (configf:write-alist tcfg tpath))))
		tcfg))))))
  
;; sort tests by priority and waiton
;; Move test specific stuff to a test unit FIXME one of these days
(define (tests:sort-by-priority-and-waiton test-records)

Added utils/convert-db.sh version [8205e222a7].



















































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
#!/bin/bash

if [ -z "megatest.config" ]; then
    echo "The file 'megatest.config' does not exist. This must be run in a megatest area."
    exit 1
fi
if [ -d ".mtdb" ]; then
    echo "The .mtdb directory already exists. Will not do the conversion"
    exit 1
fi
if [ -d ".megatest" ]; then
    echo "Found a .megatest directory. Will convert from megatest 1.70 to 1.71/1.80 format"
    /p/foundry/env/pkgs/megatest/1.70/16/bin/megatest -list-runs % -dumpmode sexpr > data.sexpr
else 
    if [ -f "megatest.db" ]; then
        echo "Found megatest.db. Will convert from megatest 1.65 to 1.71/1.80 format"
        /p/foundry/env/pkgs/megatest/1.65/92/bin/megatest -list-runs % -dumpmode sexpr > data.sexpr
    else
        echo "Did not find .megatest or megatest.db. Cannot do the conversion"
        exit 1
    fi
fi
which megatest
megatest -import-sexpr data.sexpr