Megatest

Check-in [c597d4e8cf]
Login
Overview
Comment:WIP Fixed db:sync-touched and db:sync-tables. Disabled watchdog threads. Will use only keep-running thread.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.7001-multi-db-rb01
Files: files | file ages | folders
SHA1: c597d4e8cf0073eb864328d347d425c124467c6c
User & Date: mmgraham on 2022-05-02 11:29:49
Other Links: branch diff | manifest | tags
Context
2022-05-04
18:54
WIP Fixed delta syncing with locking check-in: 7011056de4 user: mmgraham tags: v1.7001-multi-db-rb01
17:30
Set timeout to small number check-in: 29e828ea6d user: mrwellan tags: v1.7001-multi-db-rb01
2022-05-02
11:29
WIP Fixed db:sync-touched and db:sync-tables. Disabled watchdog threads. Will use only keep-running thread. check-in: c597d4e8cf user: mmgraham tags: v1.7001-multi-db-rb01
2022-04-18
12:48
avoided locking db when read-only, changed db:get-keys to use keys:config-get-fields, corrected formatting in list-servers check-in: b34c691e5f user: mmgraham tags: v1.7001-multi-db-rb01
Changes

Modified dashboard.scm from [2bab2530f0] to [e90491091e].

157
158
159
160
161
162
163
164

165
166
167
168
169
170
171
157
158
159
160
161
162
163

164
165
166
167
168
169
170
171







-
+







    
;; RA => Might require revert for filters 
;; create a watch dog to move changes from lt/.db/*.db to megatest.db
;;
;;;(if (file-write-access? (conc *toppath* "/megatest.db"))
;;(debug:print-info 13 *default-log-port* "Before common:watchdog spawn")

(thread-start! (make-thread common:watchdog "Watchdog thread"))
;; (thread-start! (make-thread common:watchdog "Watchdog thread"))
;;(debug:print-info 13 *default-log-port* "After common:watchdog spawn")
;; (if (not (args:get-arg "-use-db-cache"))
;;     (begin
;;       (debug:print-info 0 *default-log-port* "Forcing db-cache mode due to read-only access to megatest.db")
;;       (hash-table-set! args:arg-hash "-use-db-cache" #t)));;;)
;;)

Modified db.scm from [33f582feca] to [3fa116b68d].

172
173
174
175
176
177
178











179
180
181
182
183
184
185
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196







+
+
+
+
+
+
+
+
+
+
+







;; 						name: dbname)))
;; 	      ;; NOTE: pushing on the stack only happens AFTER the handle has been used
;; 	      ;; (stack-push! (dbr:dbstruct-dbstack dbstruct) newdb)
;; 	      newdb)
;;           (stack-pop! (dbr:subdb-dbstack subdb)))
;; 	(db:open-db subdb run-id))) ;; )


(define (db:get-db dbstruct run-id) 
   (let* ((subdb (dbfile:get-subdb dbstruct run-id))
        (dbdat (dbfile:get-dbdat dbstruct run-id)))
        (if (dbr:dbdat? dbdat)
          dbdat
          (dbfile:open-db *dbstruct-dbs* #f db:initialize-main-db)
        )
   )
)

(define-inline (db:generic-error-printout exn . message)
  (print-call-chain (current-error-port))
  (apply debug:print-error 0 *default-log-port* message)
  (debug:print-error 0 *default-log-port* "   params: " params
		     ", error: "     ((condition-property-accessor 'exn 'message)   exn)
		     ", arguments: " ((condition-property-accessor 'exn 'arguments) exn)
		     ", location: "  ((condition-property-accessor 'exn 'location)  exn)
420
421
422
423
424
425
426
427
428
429






430
431
432
433

434
435

436
437
438
439
440




441
442
443
444
445
446
447
431
432
433
434
435
436
437



438
439
440
441
442
443
444
445
446

447
448

449
450
451
452
453

454
455
456
457
458
459
460
461
462
463
464







-
-
-
+
+
+
+
+
+



-
+

-
+




-
+
+
+
+







    ;; (cons db dbpath)))
    (make-dbr:dbdat dbfile: dbpath dbh: db read-only: (not write-access))))

;; sync run to disk if touched
;;
(define (db:sync-touched dbstruct run-id #!key (force-sync #f))
  (let* ((subdb   (dbfile:get-subdb dbstruct run-id))
	 (tmpdb   (db:get-subdb dbstruct run-id))
	 (mtdb    (dbr:subdb-mtdb subdb))
	 (refndb  (dbr:subdb-refndb subdb))
	 (tmpsubdb   (dbfile:get-subdb dbstruct run-id))
         (tmpdbfile (dbr:subdb-tmpdbfile tmpsubdb))
	 (mtdb    (dbr:subdb-mtdbdat subdb))
         (tmpdb (dbfile:open-sqlite3-db tmpdbfile #f))

	 ;; (refndb  (dbr:subdb-refndb subdb))
	 (start-t (current-seconds)))
    (debug:print-info 4 *default-log-port* "Syncing for run-id: " run-id)
    (mutex-lock! *db-multi-sync-mutex*)
    (let ((update_info (cons (if force-sync 0 *db-last-sync*) "last_update")))
    (let ((update_info (cons "last_update" (if force-sync 0 *db-last-sync*) )))
      (mutex-unlock! *db-multi-sync-mutex*)
      (db:sync-tables (db:sync-all-tables-list dbstruct) update_info tmpdb refndb mtdb))
      (db:sync-tables (db:sync-all-tables-list dbstruct) update_info tmpdb mtdb))
    (mutex-lock! *db-multi-sync-mutex*)
    (set! *db-last-sync* start-t)
    (set! *db-last-access* start-t)
    (mutex-unlock! *db-multi-sync-mutex*)
    (stack-push! (dbr:subdb-dbstack subdb) tmpdb)))
    (stack-push! (dbr:subdb-dbstack subdb) tmpdb))
  #t
)


;; db:safely-close-sqlite3-db and db:close-all were here, moved to dbfile

;;   (let ((locdbs (dbr:dbstruct-locdbs dbstruct)))
;;     (if (hash-table? locdbs)
;; 	(for-each (lambda (run-id)
;; 		    (db:close-run-db dbstruct run-id))
654
655
656
657
658
659
660

661

662
663
664
665
666
667
668
669

670
671
672

673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688


689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708

709
710
711
712
713
714
715
671
672
673
674
675
676
677
678

679
680
681
682
683
684
685
686

687
688
689

690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727

728
729
730
731
732
733
734
735







+
-
+







-
+


-
+
















+
+



















-
+







		   (if (not (db:repair-db dbdat))
		       (begin
			 (debug:print-error 0 *default-log-port* "Failed to rebuild " dbpath ", exiting now.")
			 (exit)))))
	       (cons todb slave-dbs))
     
     0)

   ;; this is the work to be done
   ;; this is the work to be done")
   (cond
    ((not fromdb) (debug:print 3 *default-log-port* "WARNING: db:sync-tables called with fromdb missing")
     -1)
    ((not todb)   (debug:print 3 *default-log-port* "WARNING: db:sync-tables called with todb missing")
     -2)
    ((not (sqlite3:database? (dbr:dbdat-dbh fromdb)))
     (debug:print-error 0 *default-log-port* "db:sync-tables called with fromdb not a database " fromdb)
     -3)
   -3)
    ((not (sqlite3:database? (dbr:dbdat-dbh todb)))
     (debug:print-error 0 *default-log-port* "db:sync-tables called with todb not a database " todb)
     -4)
   -4)

    ((not (file-write-access? (dbr:dbdat-dbfile todb)))
     (debug:print-error 0 *default-log-port* "db:sync-tables called with todb not a read-only database " todb)
     -5)
    ((not (null? (let ((readonly-slave-dbs
                        (filter
                         (lambda (dbdat)
                           (not (file-write-access? (dbr:dbdat-dbfile todb))))
                         slave-dbs)))
                   (for-each
                    (lambda (bad-dbdat)
                      (debug:print-error
                       0 *default-log-port* "db:sync-tables called with todb not a read-only database " bad-dbdat))
                    readonly-slave-dbs)
                   readonly-slave-dbs))) -6)
    (else
     (debug:print 3 *default-log-port* "db:sync-tables: args are good")

     (let ((stmts       (make-hash-table)) ;; table-field => stmt
	   (all-stmts   '())              ;; ( ( stmt1 value1 ) ( stml2 value2 ))
	   (numrecs     (make-hash-table))
	   (start-time  (current-milliseconds))
	   (tot-count   0))
       (for-each ;; table
	(lambda (tabledat)
	  (let* ((tablename        (car tabledat))
		 (fields           (cdr tabledat))
		 (has-last-update  (member "last_update" fields))
		 (use-last-update  (cond
				    ((and has-last-update
					  (member "last_update" fields))
				     #t) ;; if given a number, just use it for all fields
				    ((number? last-update) #f) ;; if not matched first entry then ignore last-update for this table
				    ((and (pair? last-update)
					  (member (car last-update)    ;; last-update field name
						  (map car fields)))
                                        #t)
				    (last-update
				    ((and last-update (not (pair? last-update)) (not (number? last-update)))
				     (debug:print 0 *default-log-port* "ERROR: parameter last-update for db:sync-tables must be a pair or a number, received: " last-update);; found in fields
				     #f)
				    (else
				     #f)))
		 (last-update-value (if use-last-update ;; no need to check for has-last-update - it is already accounted for
					(if (number? last-update)
					    last-update
743
744
745
746
747
748
749


750


751
752
753
754
755
756
757
758



759
760
761





762
763
764
765
766
767

768
769
770
771
772
773
774
775
776
777
778
779
780
781
782

783
784
785
786
787
788
789
790
791
792


793
794
795

796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816





817
818





819
820
821
822




823
824
825
826
827
828
829
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781

782
783
784
785
786

787
788
789
790
791
792
793
794
795
796

797
798

799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821

822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847

848
849
850
851
852
853
854
855
856
857
858
859
860
861
862

863
864
865
866
867
868
869
870
871
872
873







+
+

+
+







-
+
+
+


-
+
+
+
+
+





-
+

-













+









-
+
+



+




















-
+
+
+
+
+


+
+
+
+
+



-
+
+
+
+







	    ;; set up the field->num table
	    (for-each
	     (lambda (field)
	       (hash-table-set! field->num field count)
	       (set! count (+ count 1)))
	     fields)

            (debug:print 3 *default-log-port* "fromdat: " fromdat)

	    ;; read the source table
            ;; store a list of all rows in the table in fromdat, up to batch-len.
            ;; Then add fromdat to the fromdats list, clear fromdat and repeat.
	    (sqlite3:for-each-row
	     (lambda (a . b)
	       (set! fromdat (cons (apply vector a b) fromdat))
	       (if (> (length fromdat) batch-len)
		   (begin
		     (set! fromdats (cons fromdat fromdats))
		     (set! fromdat  '())
		     (set! totrecords (+ totrecords 1)))))
		     (set! totrecords (+ totrecords 1)))
               )
             )
	     (dbr:dbdat-dbh fromdb)
	     full-sel)
	    

             ;; Count less than batch-len as a record
             (if (> (length fromdat) 0)
                 (set! totrecords (+ totrecords 1)))

	    ;; tack on remaining records in fromdat
	    (if (not (null? fromdat))
		(set! fromdats (cons fromdat fromdats)))

	    (if (common:low-noise-print 120 "sync-records")
		(debug:print-info 4 *default-log-port* "found " totrecords " records to sync"))
		(debug:print 4 *default-log-port* "found " totrecords " records to sync"))

	    ;; read the target table; BBHERE
	    (sqlite3:for-each-row
	     (lambda (a . b)
	       (hash-table-set! todat a (apply vector a b)))
	     (dbr:dbdat-dbh todb)
	     full-sel)

            (when (and delay-handicap (> delay-handicap 0))
              (debug:print-info 0 *default-log-port* "imposing synthetic sync delay of "delay-handicap" seconds since sync/delay-handicap is configured")
              (thread-sleep! delay-handicap)
              (debug:print-info 0 *default-log-port* "synthetic sync delay of "delay-handicap" seconds completed")
              )
            
	    ;; first pass implementation, just insert all changed rows

	    (for-each 
	     (lambda (targdb)
	       (let* ((db                 (dbr:dbdat-dbh targdb))
                      (drp-trigger        (if (member "last_update" field-names)
					      (db:drop-trigger db tablename) 
					      #f))
                      (is-trigger-dropped (if (member "last_update" field-names)
                                              (db:is-trigger-dropped db tablename)
					      #f)) 
		      (stmth  (sqlite3:prepare db full-ins)))
		      (stmth  (sqlite3:prepare db full-ins))
                      (changed-rows 0))
		 ;; (db:delay-if-busy targdb) ;; NO WAITING
                 (if (member "last_update" field-names)
                     (debug:print-info 3 *default-log-port* "is-trigger-dropped: " is-trigger-dropped)) 

		 (for-each
		  (lambda (fromdat-lst)
		    (sqlite3:with-transaction
		     db
		     (lambda ()
		       (for-each ;; 
			(lambda (fromrow)
			  (let* ((a    (vector-ref fromrow 0))
				 (curr (hash-table-ref/default todat a #f))
				 (same #t))
			    (let loop ((i 0))
			      (if (or (not curr)
				      (not (equal? (vector-ref fromrow i)(vector-ref curr i))))
				  (set! same #f))
			      (if (and same
				       (< i (- num-fields 1)))
				  (loop (+ i 1))))
			    (if (not same)
				(begin
				  (apply sqlite3:execute stmth (vector->list fromrow))
				  (hash-table-set! numrecs tablename (+ 1 (hash-table-ref/default numrecs tablename 0)))))))
				  (hash-table-set! numrecs tablename (+ 1 (hash-table-ref/default numrecs tablename 0)))
                                  (set! changed-rows (+ changed-rows 1))
                                )
                            )
                            ))
			fromdat-lst))))
		  fromdats)


	         (debug:print 3 *default-log-port* "changed rows: " changed-rows)


		 (sqlite3:finalize! stmth)
                 (if (member "last_update" field-names)
                    (db:create-trigger db tablename))))
	     (append (list todb) slave-dbs))))
	     (append (list todb) slave-dbs)
           )
          )
        )
	tbls)
       (let* ((runtime      (- (current-milliseconds) start-time))
	      (should-print (or (debug:debug-mode 12)
				(common:low-noise-print 120 "db sync" (> runtime 500))))) ;; low and high sync times treated as separate.
	 (if should-print (debug:print 3 *default-log-port* "INFO: db sync, total run time " runtime " ms"))
	 (for-each 
	  (lambda (dat)
1003
1004
1005
1006
1007
1008
1009

























1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022

1023
1024
1025
1026




















































1027
1028
1029
1030


1031

1032
1033
1034

1035
1036
1037
1038
1039
1040
1041
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092




1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146


1147
1148

1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160







+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+













+
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+


-
-
+
+
-
+



+







;; 	       (cache-db (db:cache-for-read-only
;; 			  megatest-db
;; 			  (conc cache-dir "/" fname)
;; 			  use-last-update: #t)))
;; 	  (thread-start! th1)
;; 	  (apply proc cache-db params)
;; 	  ))))




;; Get a lock from the no-sync-db for the from-db, then delta sync the from-db to the to-db, otherwise return #f

(define (db:lock-and-delta-sync no-sync-db from-db-file to-db-file)
  (assert (not *db-sync-in-progress*) "FATAL: db:lock-and-sync called while a sync is in progress.")
  (let* ((lockdat  (db:no-sync-get-lock no-sync-db from-db))
	 (gotlock  (car lockdat))
	 (locktime (cdr lockdat)))
    (if gotlock
	(begin
          (debug:print 0 *default-log-port* "db:lock-and-sync copying db")
	  ;; (file-copy from-db to-db #t)
	  (db:no-sync-del! no-sync-db from-db)
	  #t)
        (begin
          (debug:print 0 *default-log-port* "could not get lock for " from-db " from no-sync-db")
	  #f
        ))))





;; options:
;;
;;  'killservers  - kills all servers
;;  'dejunk       - removes junk records
;;  'adj-testids  - move test-ids into correct ranges
;;  'old2new      - sync megatest.db to /tmp/.../megatest.db and /tmp/.../megatest_ref.db
;;  'new2old      - sync /tmp/.../megatest.db to megatest.db and /tmp/.../megatest_ref.db (and update data_synced)
;;  'closeall     - close all opened dbs
;;  'schema       - attempt to apply schema changes
;;  run-ids: '(1 2 3 ...) or #f (for all)
;;
(define (db:multi-db-sync dbstruct . options)
  (db:open-db dbstruct #f)
  ;; (if (not (launch:setup))
  ;;    (debug:print 0 *default-log-port* "ERROR: not able to setup up for megatest.")
  (assert #f "FATAL: Call to db:multi-db-sync which is not completed yet.")
  (let* ((data-synced 0)) ;; count of changed records (I hope)

  (let* ((data-synced 0) ;; count of changed records
    (tmp-area       (common:get-db-tmp-area))
    (dbfiles        (glob (conc tmp-area"/.db/*.db")))
    (sync-durations (make-hash-table))
    )

    (for-each
     (lambda (file)
       (debug:print-info 0 *default-log-port* "file: " file)
       (let* ((fname (conc (pathname-file file) ".db"))
	      (fulln (conc *toppath*"/.db/"fname))
	      (time1 (if (file-exists? file)
			 (file-modification-time file)
			 (begin
			   (debug:print-info 0 *default-log-port* "Sync - I do not see file "file)
			   1)))
	      (time2 (if (file-exists? fulln)
			 (file-modification-time fulln)
			 (begin
			   (debug:print-info 0 *default-log-port* "Sync - I do not see file "fulln)
			   0)))
	      (changed (> time1 time2))
	      (do-cp (cond
		      ((not (file-exists? fulln)) ;; shouldn't happen, but this might recover
		       (debug:print-info 0 *default-log-port* "File "fulln" not found! Copying "fname" to "fulln)
		       #t)
		      (changed ;; (and changed
		       ;; (> (- (current-seconds) time1) 3)) ;; if file is changed and three seconds have passed.
		       #t)
		      ((and changed *time-to-exit*) ;; last sync
		       #t)
		      (else
		       #f))))
	 (if do-cp
	     (let* ((start-time (current-milliseconds)))
	       (debug:print-info 0 *default-log-port* "delta sync delta file: " fname", delta: " (- time1 time2) " seconds")
	       (db:lock-and-delta-sync *no-sync-db* file fulln)
	       (hash-table-set! sync-durations (conc fname".db") (- (current-milliseconds) start-time)))
	     (debug:print-info 0 *default-log-port* "skipping delta sync. " file " is up to date")
         )
       )
     )
     dbfiles
    )


    (hash-table->alist sync-durations)



    (debug:print 0 *default-log-port* "db:multi-db-sync subdbs: " (hash-table-values (dbr:dbstruct-subdbs dbstruct)))
    (for-each
     (lambda (subdb)
       (let* ((mtdb     (dbr:subdb-mtdb subdb))
	      (tmpdb    (dbr:subdb-tmpdb subdb))
       (let* ((mtdb     (dbr:subdb-mtdbdat subdb))
	      (tmpdbfile    (dbr:subdb-tmpdbfile subdb))
	      (refndb   (dbr:subdb-refndb subdb))
              (main-tmpdb (dbfile:open-db dbstruct #f db:initialize-main-db))
	      (allow-cleanup #t) ;; (if run-ids #f #t))
	      (servers  (server:get-list *toppath*)) ;; (tasks:get-all-servers (db:delay-if-busy tdbdat)))
	      )    
         (debug:print 0 *default-log-port* "db:multi-db-sync mtdb: " mtdb " tmpdbfile:" tmpdbfile )
	 (for-each
	  (lambda (option)
	    
	    (case option
	      ;; kill servers
	      ((killservers)
	       (for-each
1054
1055
1056
1057
1058
1059
1060
1061

1062
1063
1064


1065
1066
1067
1068

1069
1070
1071

1072
1073
1074
1075
1076

1077
1078
1079
1080
1081

1082

1083
1084
1085
1086

1087
1088
1089

1090
1091
1092



1093
1094
1095


1096
1097
1098
1099
1100
1101
1102
1173
1174
1175
1176
1177
1178
1179

1180



1181
1182
1183
1184
1185

1186
1187
1188

1189

1190
1191
1192

1193
1194
1195
1196
1197

1198

1199
1200
1201
1202

1203

1204

1205



1206
1207
1208
1209
1210

1211
1212
1213
1214
1215
1216
1217
1218
1219







-
+
-
-
-
+
+



-
+


-
+
-



-
+




-
+
-
+



-
+
-

-
+
-
-
-
+
+
+


-
+
+







	       (delete-file* (common:get-sync-lock-filepath)))
	      
	      ;; clear out junk records
	      ;;
	      ((dejunk)
	       ;; (db:delay-if-busy mtdb) ;; ok to delay on mtdb
	       (when (file-write-access? (dbr:dbdat-dbfile mtdb)) (db:clean-up mtdb))
	       (db:clean-up tmpdb)
	       (db:clean-up main-tmpdb)
	       (db:clean-up refndb))

	      ;; sync runs, test_meta etc.
              )
	      ;; sync from main dbs to /tmp ones.
	      ;;
	      ((old2new)
	       (set! data-synced
		 (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb tmpdb refndb)
		 (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f mtdb main-tmpdb)
		    data-synced)))
	      
	      ;; now ensure all newdb data are synced to megatest.db
	      ;; sync from /tmp dbs to main ones.
	      ;; do not use the run-ids list passed in to the function
	      ;;
	      ((new2old)
	       (set! data-synced
		 (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f tmpdb refndb mtdb)
		 (+ (db:sync-tables (db:sync-all-tables-list dbstruct) #f main-tmpdb mtdb)
		    data-synced)))

	      ((adj-target)
	       (db:adj-target (dbr:dbdat-dbh mtdb))
	       (db:adj-target (dbr:dbdat-dbh tmpdb))
	       (db:adj-target (dbr:dbdat-dbh main-tmpdb))
	       (db:adj-target (dbr:dbdat-dbh refndb)))
              )
	      
	      ((schema)
	       (db:patch-schema-maindb (dbr:dbdat-dbh mtdb))
	       (db:patch-schema-maindb (dbr:dbdat-dbh tmpdb))
	       (db:patch-schema-maindb (dbr:dbdat-dbh main-tmpdb))
	       (db:patch-schema-maindb (dbr:dbdat-dbh refndb))
	       (db:patch-schema-rundb  (dbr:dbdat-dbh mtdb))
	       (db:patch-schema-rundb  (dbr:dbdat-dbh tmpdb))
	       (db:patch-schema-rundb  (dbr:dbdat-dbh main-tmpdb))
	       (db:patch-schema-rundb  (dbr:dbdat-dbh refndb))))
	    
	    (stack-push! (dbr:subdb-dbstack subdb) tmpdb))
               )
              )
	    (stack-push! (dbr:subdb-dbstack subdb) main-tmpdb))
	  options)))
     (hash-table-values (dbr:dbstruct-subdbs dbstruct)))
    data-synced))
    data-synced)
)

;; Sync all changed db's
;;
(define (db:tmp->megatest.db-sync dbstruct run-id last-update)
  (let* ((subdbs (hash-table-values (dbr:dbstruct-subdbs dbstruct)))
	 (res    '()))
    (for-each
4968
4969
4970
4971
4972
4973
4974
4975

4976
4977
4978
4979
4980
4981
4982
4983
4984

4985
4986
4987
4988
4989










































4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002
5003
5004
5005
5006
5007
5008





5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019



5020

5021
5022
5023
5024
5025
5026
5027
5085
5086
5087
5088
5089
5090
5091

5092
5093
5094
5095
5096
5097
5098
5099
5100

5101
5102
5103
5104
5105
5106
5107
5108
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132
5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165


5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184

5185
5186
5187
5188
5189
5190
5191
5192







-
+








-
+





+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

















-
-
+
+
+
+
+











+
+
+
-
+







	    (debug:print-info 13 *default-log-port* "after db:setup with dbstruct=" dbstruct)
	    (cond
	     ((dbr:dbstruct-read-only dbstruct)
	      (debug:print-info 13 *default-log-port* "loading read-only watchdog")
	      (common:readonly-watchdog dbstruct))
	     (else
	      (debug:print-info 13 *default-log-port* "loading writable-watchdog.")
              (let* ((syncer (or (configf:lookup *configdat* "server" "sync-method") "copy-sync"))) ;; "delta-sync"))) ;; "brute-force-sync")))
              (let* ((syncer (or (configf:lookup *configdat* "server" "sync-method") "delta-sync"))) ;; "delta-sync"))) ;; "brute-force-sync")))
                (cond
                 ((equal? syncer "brute-force-sync")
                  (server:writable-watchdog-bruteforce dbstruct))
                 ((equal? syncer "delta-sync")
                  (server:writable-watchdog-deltasync dbstruct))
                 ((equal? syncer "copy-sync")
                  (server:writable-watchdog-copysync dbstruct))
                 (else
                  (debug:print-error 0 *default-log-port* "Unknown server/sync-method specified ("syncer") - valid values are brute-force-sync and delta-sync.")
                  (debug:print-error 0 *default-log-port* "Unknown server/sync-method specified ("syncer") - valid values are copy-sync, brute-force-sync and delta-sync.")
                  (exit 1)))
                ;;(debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] Syncer started (method="syncer")")
                )))
	    (debug:print-info 13 *default-log-port* "watchdog done."))
	  (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost"))))


(define (db:do-sync)
  (let* ((syncer (or (configf:lookup *configdat* "server" "sync-method") "delta-sync"))
    (dbstruct (db:setup #t)))

    (debug:print 0 *default-log-port* "db:do-sync: sync-method: " syncer)
    (cond
      ((equal? syncer "brute-force-sync")
       (db:run-lock-and-sync *no-sync-db*))
      ((equal? syncer "delta-sync")
       (debug:print 0 *default-log-port* "db:do-sync: db:multi-db-sync" )
       (let* (
	   (tmpdbpth (dbr:dbstruct-tmppath dbstruct))
	   (lockfile (conc tmpdbpth ".lock"))
	   (locked   (common:simple-file-lock lockfile)) 
	   (res      (if locked
			 (db:multi-db-sync 
			  dbstruct
			  'new2old)
			 #f)))
           (if res
	     (begin
	       (common:simple-file-release-lock lockfile)
	       (print "db:do-sync: Synced " res " records to megatest.db")
             )
	     (print "db:do-sync: Skipping sync, there is a sync in progress.")
           )
       )
      )
      ((equal? syncer "copy-sync")
       (db:run-lock-and-sync *no-sync-db*))
      (else
        (debug:print-error 0 *default-log-port* "Unknown server/sync-method specified ("syncer") - valid values are copy-sync, brute-force-sync and delta-sync.")
        (exit 1)
      )
    )
  )
)




(define (server:writable-watchdog-bruteforce dbstruct)
  (thread-sleep! 1) ;; delay for startup
  #;(let* ((do-a-sync  (server:get-bruteforce-syncer dbstruct))
         (final-sync (server:get-bruteforce-syncer dbstruct fork-to-background: #t persist-until-sync: #t)))
    (when (and (not (args:get-arg "-sync-to-megatest.db")) ;; conditions under which we do not run the sync
	       (args:get-arg "-server"))
      
      (let loop ()
	(do-a-sync)
        (if (not *time-to-exit*) (loop))) ;; keep going unless time to exit

      ;; time to exit, close the no-sync db here
      (final-sync)

      (if (common:low-noise-print 30)
	  (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id)
			    )))))

			    ))))
 )


;; Get a lock from the no-sync-db for the from-db, then copy the from-db to the to-db, otherwise return #f

(define (db:lock-and-sync no-sync-db from-db to-db)
  (assert (not *db-sync-in-progress*) "FATAL: db:lock-and-sync called while a sync is in progress.")
  (let* ((lockdat  (db:no-sync-get-lock no-sync-db from-db))
	 (gotlock  (car lockdat))
	 (locktime (cdr lockdat)))
    (if gotlock
	(begin
	  (file-copy from-db to-db #t)
	  (db:no-sync-del! no-sync-db from-db)
	  #t)
        (begin
          (debug:print 0 *default-log-port* "could not get lock for " from-db " from no-sync-db")
	  #f
	#f)))
        ))))

;; sync for filesystem local db writes
;;
(define (db:run-lock-and-sync no-sync-db)
  (let* ((tmp-area       (common:get-db-tmp-area))
	 (dbfiles        (glob (conc tmp-area"/.db/*.db")))
	 (sync-durations (make-hash-table)))
5050
5051
5052
5053
5054
5055
5056
5057

5058
5059
5060
5061
5062
5063
5064
5215
5216
5217
5218
5219
5220
5221

5222
5223
5224
5225
5226
5227
5228
5229







-
+







		       #t)
		      ((and changed *time-to-exit*) ;; last copy
		       #t)
		      (else
		       #f))))
	 (if do-cp
	     (let* ((start-time (current-milliseconds)))
	       (debug:print-info 0 *default-log-port* "sync file: "file", fname: "fname", time1: "time1", time2: "time2)
	       (debug:print-info 0 *default-log-port* "sync copy file: " fname", delta: " (- time1 time2) " seconds")
	       (db:lock-and-sync no-sync-db file fulln)
	       (hash-table-set! sync-durations (conc fname".db") (- (current-milliseconds) start-time)))
	     #;(debug:print-info 0 *default-log-port* "skipping sync..."))))
     dbfiles)
    (hash-table->alist sync-durations)))

;; straight forward copy based sync
5077
5078
5079
5080
5081
5082
5083
5084
5085


5086
5087
5088
5089
5090
5091
5092
5242
5243
5244
5245
5246
5247
5248


5249
5250
5251
5252
5253
5254
5255
5256
5257







-
-
+
+







 	(debug-mode         (debug:debug-mode 1))
 	(last-time          (current-seconds))     ;; last time through the sync loop
 	(no-sync-db         (db:open-no-sync-db))
 	(sync-duration      0)  ;; run time of the sync in milliseconds
	(tmp-area           (common:get-db-tmp-area)))
    ;; Sync moved to http-transport keep-running loop
    (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls
    (debug:print-info 2 *default-log-port* "Periodic copy-based sync thread started. sync is "legacy-sync", tmp-area is "tmp-area)
    (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is "legacy-sync" pid="(current-process-id));;  " this-wd-num="this-wd-num)
    (debug:print-info 2 *default-log-port* "Periodic copy-based sync thread started. syncer is copy-sync, tmp-area is " tmp-area)
    (debug:print-info 3 *default-log-port* "watchdog starting. syncer is copy-sync pid="(current-process-id));;  " this-wd-num="this-wd-num)
    
    (if (and legacy-sync (not *time-to-exit*))
 	(begin
 	  (debug:print-info 0 *default-log-port* "Server running, periodic copy-based sync started.")
	  (let loop ()

	    ;; run the sync and print out durations
5109
5110
5111
5112
5113
5114
5115
5116
5117
5118
5119
5120
5121
5122
5123
5124
5125
5126
5127
5128
5129
5130
5131
5132

















5133
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
5164
5165
5166
5167
5168
5169
5170
5171
5172
5173
5174
5175
5176
5177
5178
5179
5180
5181
5182
5183
5184
5185
5186
5187
5188
5189
5190
5191
5192
5193
5194
5195
5196
5197
5198
5199
5200



































































5201
5202


5203
5204
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215

5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241
















5242
5243
5244
5245
5246
5247
5248
5249
5250
5251
5252
5253
5254
5255
5256


5257
5258


5259
5260
5261
5262
5263
5264
5265
5274
5275
5276
5277
5278
5279
5280

















5281
5282
5283
5284
5285
5286
5287
5288
5289
5290
5291
5292
5293
5294
5295
5296
5297
5298



































































5299
5300
5301
5302
5303
5304
5305
5306
5307
5308
5309
5310
5311
5312
5313
5314
5315
5316
5317
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353
5354
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365


5366
5367













5368


























5369
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382
5383
5384
5385
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395
5396
5397


5398
5399
5400

5401
5402
5403
5404
5405
5406
5407
5408
5409







-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+













-
-
+
+

-
+
+







	    (if (common:low-noise-print 30)
		(debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = "
				  *time-to-exit*" pid="(current-process-id) )))))))

(define (server:writable-watchdog-deltasync dbstruct)
  ;; This is awful complex and convoluted. Plan to redo?
  ;; for now ... skip it.
;; ==> 
;; ==> (thread-sleep! 0.05) ;; delay for startup
;; ==> (let ((legacy-sync  (common:run-sync?))
;; ==>       (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300))
;; ==> 	(debug-mode   (debug:debug-mode 1))
;; ==> 	(last-time    (current-seconds))
;; ==> 	(no-sync-db   (db:open-no-sync-db))
;; ==> 	(stmt-cache   #f) ;; (dbr:dbstruct-stmt-cache dbstruct))
;; ==> 	(sync-duration 0) ;; run time of the sync in milliseconds
;; ==>       (subdbs       (hash-table-values (dbr:dbstruct-subdbs dbstruct))))
;; ==>   (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls
;; ==>   (debug:print-info 2 *default-log-port* "Periodic sync thread started.")
;; ==>   (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)  );;  " this-wd-num="this-wd-num)
;; ==>   
;; ==>   (if (and legacy-sync (not *time-to-exit*))
;; ==> 	(begin
;; ==> 	  (debug:print-info 0 *default-log-port* "Server running, periodic sync started.")
 
  (thread-sleep! 0.05) ;; delay for startup
  (let ((legacy-sync  (common:run-sync?)))
       (sync-stale-seconds (configf:lookup-number *configdat* "server" "sync-stale-seconds" default: 300))
 	(debug-mode   (debug:debug-mode 1))
 	(last-time    (current-seconds))
 	(no-sync-db   (db:open-no-sync-db))
 	(stmt-cache   #f) ;; (dbr:dbstruct-stmt-cache dbstruct))
 	(sync-duration 0) ;; run time of the sync in milliseconds
       (subdbs       (hash-table-values (dbr:dbstruct-subdbs dbstruct))))
   (set! *no-sync-db* no-sync-db) ;; make the no sync db available to api calls
   (debug:print-info 2 *default-log-port* "Periodic sync thread started.")
   (debug:print-info 3 *default-log-port* "watchdog starting. legacy-sync is " legacy-sync" pid="(current-process-id)  );;  " this-wd-num="this-wd-num)
   
   (if (and legacy-sync (not *time-to-exit*))
 	(begin
 	  (debug:print-info 0 *default-log-port* "Server running, periodic sync started.")
	  (let loop ()
;; ==> 	    ;; sync for filesystem local db writes
;; ==> 	    ;;
;; ==> 	    (mutex-lock! *db-multi-sync-mutex*)
;; ==> 	       (let* ((start-file (conc tmp-area "/.start-sync"))
;; ==> 		      (end-file   (conc tmp-area "/.end-sync"))
;; ==> 			      
;; ==> 		      (need-sync        (>= *db-last-access* *db-last-sync*)) ;; no sync since last write
;; ==> 		      (sync-in-progress *db-sync-in-progress*)
;; ==> 		      (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 5))
;; ==> 		      (should-sync      (and (not *time-to-exit*)
;; ==> 					     (> (- (current-seconds) *db-last-sync*) min-intersync-delay))) ;; sync every five seconds minimum, deprecated logic, can probably be removed
;; ==> 		      (start-time       (current-seconds))
;; ==> 		      (cpu-load-adj     (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f)))
;; ==> 		      (mt-mod-time      (file-modification-time mtpath))
;; ==> 		      (last-sync-start  (if (common:file-exists? start-file)
;; ==> 					    (file-modification-time start-file)
;; ==> 					    0))
;; ==> 		      (last-sync-end    (if (common:file-exists? end-file)
;; ==> 					    (file-modification-time end-file)
;; ==> 					    10))
;; ==> 		      (sync-period      (+ 3 (* cpu-load-adj 30))) ;; as adjusted load increases increase the sync period
;; ==> 		      (recently-synced  (and (< (- start-time mt-mod-time) sync-period) ;; not useful if sync didn't modify megatest.db!
;; ==> 					     (< mt-mod-time last-sync-start)))
;; ==> 		      (sync-done        (<= last-sync-start last-sync-end))
;; ==> 		      (sync-stale       (> start-time (+ last-sync-start sync-stale-seconds)))
;; ==> 		      (will-sync        (and (not *time-to-exit*)       ;; do not start a sync if we are in the process of exiting
;; ==> 					     (or need-sync should-sync)
;; ==> 					     (or sync-done sync-stale)
;; ==> 					     (not sync-in-progress)
;; ==> 					     (not recently-synced))))
;; ==> 		 (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop.  need-sync="need-sync" sync-in-progress=" sync-in-progress
;; ==> 				   " should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync
;; ==> 				   " sync-done=" sync-done " sync-period=" sync-period)
;; ==> 		 (if (and (> sync-period 5)
;; ==> 			  (common:low-noise-print 30 "sync-period"))
;; ==> 		     (debug:print-info 0 *default-log-port* "Increased sync period due to long sync times, sync took: " sync-period " seconds."))
;; ==> 		 ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced))
;; ==> 		 ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync)
;; ==> 		 (if will-sync (set! *db-sync-in-progress* #t))
;; ==> 		 (mutex-unlock! *db-multi-sync-mutex*)
;; ==> 		 (if will-sync
;; ==> 		     (let (;; (max-sync-duration  (configf:lookup-number *configdat* "server" "max-sync-duration")) ;; KEEPING THIS AVAILABLE BUT SHOULD NOT USE, I'M PRETTY SURE IT DOES NOT WORK!
;; ==> 			   (sync-start         (current-milliseconds)))
;; ==> 		       (with-output-to-file start-file (lambda ()(print (current-process-id))))
;; ==> 		       
;; ==> 		       ;; put lock here
;; ==> 		       
;; ==> 		       ;; (if (or (not max-sync-duration)
;; ==> 		       ;;        (< sync-duration max-sync-duration)) ;; NOTE: db:sync-to-megatest.db keeps track of time of last sync and syncs incrementally
;; ==> 
;; ==> 		      ;;
;; ==> 		     
;; ==> 		       (for-each
;; ==> 			(lambda (subdb)
;; ==> 			  (let* (;;(dbstruct (db:setup))
;; ==> 				 (mtdb       (dbr:subdb-mtdb subdb))
;; ==> 				 (mtpath     (db:dbdat-get-path mtdb))
;; ==> 				 (tmp-area   (common:get-db-tmp-area))
;; ==> 				 (res        (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive
;; ==> 			    (set! sync-duration (- (current-milliseconds) sync-start))
;; ==> 			    (if (> res 0) ;; some records were transferred, keep the db alive
;; ==> 				(begin
;; ==> 				  (mutex-lock! *heartbeat-mutex*)
;; ==> 				  (set! *db-last-access* (current-seconds))
;; ==> 				  (mutex-unlock! *heartbeat-mutex*)
;; ==> 				  (debug:print-info 0 *default-log-port* "sync called, " res " records transferred."))
;; ==> 				(debug:print-info 2 *default-log-port* "sync called but zero records transferred")))
 	    ;; sync for filesystem local db writes
 	    ;;
 	    (mutex-lock! *db-multi-sync-mutex*)
 	       (let* ((start-file (conc tmp-area "/.start-sync"))
 		      (end-file   (conc tmp-area "/.end-sync"))
 			      
 		      (need-sync        (>= *db-last-access* *db-last-sync*)) ;; no sync since last write
 		      (sync-in-progress *db-sync-in-progress*)
 		      (min-intersync-delay (configf:lookup-number *configdat* "server" "minimum-intersync-delay" default: 5))
 		      (should-sync      (and (not *time-to-exit*)
 					     (> (- (current-seconds) *db-last-sync*) min-intersync-delay))) ;; sync every five seconds minimum, deprecated logic, can probably be removed
 		      (start-time       (current-seconds))
 		      (cpu-load-adj     (alist-ref 'adj-proc-load (common:get-normalized-cpu-load #f)))
 		      (mt-mod-time      (file-modification-time mtpath))
 		      (last-sync-start  (if (common:file-exists? start-file)
 					    (file-modification-time start-file)
 					    0))
 		      (last-sync-end    (if (common:file-exists? end-file)
 					    (file-modification-time end-file)
 					    10))
 		      (sync-period      (+ 3 (* cpu-load-adj 30))) ;; as adjusted load increases increase the sync period
 		      (recently-synced  (and (< (- start-time mt-mod-time) sync-period) ;; not useful if sync didn't modify megatest.db!
 					     (< mt-mod-time last-sync-start)))
 		      (sync-done        (<= last-sync-start last-sync-end))
 		      (sync-stale       (> start-time (+ last-sync-start sync-stale-seconds)))
 		      (will-sync        (and (not *time-to-exit*)       ;; do not start a sync if we are in the process of exiting
 					     (or need-sync should-sync)
 					     (or sync-done sync-stale)
 					     (not sync-in-progress)
 					     (not recently-synced))))
 		 (debug:print-info 13 *default-log-port* "WD writable-watchdog top of loop.  need-sync="need-sync" sync-in-progress=" sync-in-progress
 				   " should-sync="should-sync" start-time="start-time" mt-mod-time="mt-mod-time" recently-synced="recently-synced" will-sync="will-sync
 				   " sync-done=" sync-done " sync-period=" sync-period)
 		 (if (and (> sync-period 5)
 			  (common:low-noise-print 30 "sync-period"))
 		     (debug:print-info 0 *default-log-port* "Increased sync period due to long sync times, sync took: " sync-period " seconds."))
 		 ;; (if recently-synced (debug:print-info 0 *default-log-port* "Skipping sync due to recently-synced flag=" recently-synced))
 		 ;; (debug:print-info 0 *default-log-port* "need-sync: " need-sync " sync-in-progress: " sync-in-progress " should-sync: " should-sync " will-sync: " will-sync)
 		 (if will-sync (set! *db-sync-in-progress* #t))
 		 (mutex-unlock! *db-multi-sync-mutex*)
 		 (if will-sync
 		     (let (;; (max-sync-duration  (configf:lookup-number *configdat* "server" "max-sync-duration")) ;; KEEPING THIS AVAILABLE BUT SHOULD NOT USE, I'M PRETTY SURE IT DOES NOT WORK!
 			   (sync-start         (current-milliseconds)))
 		       (with-output-to-file start-file (lambda ()(print (current-process-id))))
 		       
 		       ;; put lock here
 		       
 		       ;; (if (or (not max-sync-duration)
 		       ;;        (< sync-duration max-sync-duration)) ;; NOTE: db:sync-to-megatest.db keeps track of time of last sync and syncs incrementally
 
 		      ;;
 		     
 		       (for-each
 			(lambda (subdb)
 			  (let* (;;(dbstruct (db:setup))
 				 (mtdb       (dbr:subdb-mtdb subdb))
 				 (mtpath     (db:dbdat-get-path mtdb))
 				 (tmp-area   (common:get-db-tmp-area))
 				 (res        (db:sync-to-megatest.db dbstruct no-sync-db: no-sync-db))) ;; did we sync any data? If so need to set the db touched flag to keep the server alive
 			    (set! sync-duration (- (current-milliseconds) sync-start))
 			    (if (> res 0) ;; some records were transferred, keep the db alive
 				(begin
 				  (mutex-lock! *heartbeat-mutex*)
 				  (set! *db-last-access* (current-seconds))
 				  (mutex-unlock! *heartbeat-mutex*)
 				  (debug:print-info 0 *default-log-port* "sync called, " res " records transferred."))
 				(debug:print-info 2 *default-log-port* "sync called but zero records transferred")))
;; ==> 			  )
;; ==> 			subdbs)))
 			  )
 			subdbs)))
;; ==> 		 ;;                         ;; TODO: factor this next routine out into a function
;; ==> 		 ;;                         (with-input-from-pipe ;; this should not block other threads but need to verify this
;; ==> 		 ;;                          (conc "megatest -sync-to-megatest.db -m testsuite:" (common:get-area-name) ":" *toppath*)
;; ==> 		 ;;                          (lambda ()
;; ==> 		 ;;                            (let loop ((inl (read-line))
;; ==> 		 ;;                                       (res #f))
;; ==> 		 ;;                              (if (eof-object? inl)
;; ==> 		 ;;                                  (begin
;; ==> 		 ;;                                    (set! sync-duration (- (current-milliseconds) sync-start))
;; ==> 		 ;;                                    (cond
;; ==> 		 ;;                                     ((not res)
;; ==> 		 ;;                                      (debug:print 0 *default-log-port* "ERROR: sync from /tmp db to megatest.db appears to have failed. Recommended that you stop your runs and run \"megatest -cleanup-db\""))
;; ==> 		 ;;                                     ((> res 0)
 		 
;; ==> 		 ;;                                      (mutex-lock! *heartbeat-mutex*)
;; ==> 		 ;;                                      (set! *db-last-access* (current-seconds))
;; ==> 		 ;;                                      (mutex-unlock! *heartbeat-mutex*))))
;; ==> 		 ;;                                  (let ((num-synced (let ((matches (string-match "^Synced (\\d+).*$" inl)))
;; ==> 		 ;;                                                      (if matches
;; ==> 		 ;;                                                          (string->number (cadr matches))
;; ==> 		 ;;                                                          #f))))
;; ==> 		 ;;                                    (loop (read-line)
;; ==> 		 ;;                                          (or num-synced res))))))))))
;; ==> 		 
;; ==> 		 (if will-sync
;; ==> 		     (begin
;; ==> 		       (mutex-lock! *db-multi-sync-mutex*)
;; ==> 		       (set! *db-sync-in-progress* #f)
;; ==> 		       (set! *db-last-sync* start-time)
;; ==> 		       (with-output-to-file end-file (lambda ()(print (current-process-id))))
;; ==> 		       
;; ==> 		       ;; release lock here
;; ==> 		       
;; ==> 		       (mutex-unlock! *db-multi-sync-mutex*)))
;; ==> 		 (if (and debug-mode
;; ==> 			  (> (- start-time last-time) 60))
;; ==> 		     (begin
;; ==> 		       (set! last-time start-time)
;; ==> 		       (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*))))))
;; ==> 	       
 		 (if will-sync
 		     (begin
 		       (mutex-lock! *db-multi-sync-mutex*)
 		       (set! *db-sync-in-progress* #f)
 		       (set! *db-last-sync* start-time)
 		       (with-output-to-file end-file (lambda ()(print (current-process-id))))
 		       
 		       ;; release lock here
 		       
 		       (mutex-unlock! *db-multi-sync-mutex*)))
 		 (if (and debug-mode
 			  (> (- start-time last-time) 60))
 		     (begin
 		       (set! last-time start-time)
 		       (debug:print-info 4 *default-log-port* "timestamp -> " (seconds->time-string (current-seconds)) ", time since start -> " (seconds->hr-min-sec (- (current-seconds) *time-zero*))))))
 	       
	       ;; keep going unless time to exit
	       ;;
	       (if (not *time-to-exit*)
		   (let delay-loop ((count 0))
		     ;;(debug:print-info 13 *default-log-port* "delay-loop top; count="count" pid="(current-process-id)" this-wd-num="this-wd-num" *time-to-exit*="*time-to-exit*)
		     
		     (if (and (not *time-to-exit*)
			      (< count 6)) ;; was 11, changing to 4. 
			 (begin
			   (thread-sleep! 1)
			   (delay-loop (+ count 1))))
		     (if (not *time-to-exit*) (loop))))
	       
;; ==> 	       ;; time to exit, close the no-sync db here
;; ==> 	       (db:no-sync-close-db no-sync-db stmt-cache)
;; 	       ;; time to exit, close the no-sync db here
;; 	       (db:no-sync-close-db no-sync-db stmt-cache)
	       (if (common:low-noise-print 30)
		   (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) )))) ;; ))) ;;" this-wd-num="this-wd-num)))))))
		   (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " *time-to-exit*" pid="(current-process-id) )))) 
))


(define (std-exit-procedure)
  ;;(common:telemetry-log-close)
  (on-exit (lambda () 0))
  ;;(debug:print-info 13 *default-log-port* "std-exit-procedure called; *time-to-exit*="*time-to-exit*)
  (let ((no-hurry  (if *time-to-exit* ;; hurry up

Modified http-transport.scm from [0925a1edaf] to [5185faa44b].

459
460
461
462
463
464
465

466
467
468
469
470
471
472

473


474



475




476
477
478
479
480
481
482
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474

475
476
477
478
479
480

481
482
483
484
485
486
487
488
489
490
491







+







+
-
+
+

+
+
+
-
+
+
+
+







      (debug:print 0 *default-log-port* "Failed to create " started-file ", exn=" exn)
      (with-output-to-file started-file (lambda ()(print (current-process-id)))))

    (let loop ((count         0)
	       (server-state 'available)
	       (bad-sync-count 0)
	       (start-time     (current-milliseconds)))

      ;; Use this opportunity to sync the tmp db to megatest.db
      (if (not server-going) ;; *dbstruct-dbs* 
	  (begin
	    (debug:print 0 *default-log-port* "SERVER: dbprep")
	    (set! *dbstruct-dbs*  (db:setup #t)) ;;  run-id)) FIXME!!!
	    (set! server-going #t)
	    (debug:print 0 *default-log-port* "SERVER: running, megatest version: " (common:get-full-version)) ;; NOTE: the server is NOT yet marked as running in the log. We do that in the keep-running routine.

	    (thread-start! *watchdog*))
	    ;; (thread-start! *watchdog*)
          ) 
	  (if *no-sync-db*
              (begin
                (debug:print 0 *default-log-port* "keep-running calling db:do-sync at " (time->string (seconds->local-time) "%H:%M:%S"))
                (db:do-sync)
	      (db:run-lock-and-sync *no-sync-db*)))
	        ;; (db:run-lock-and-sync *no-sync-db*)
              )
          )
      )
      
      ;; when things go wrong we don't want to be doing the various queries too often
      ;; so we strive to run this stuff only every four seconds or so.
      (let* ((sync-time (- (current-milliseconds) start-time))
	    (rem-time  (quotient (- 4000 sync-time) 1000)))
	(if (and (<= rem-time 4)
		 (>  rem-time 0))

Modified megatest.scm from [fd40d13cfe] to [c7ae0fc5df].

505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520









521
522
523
524
525
526
527
505
506
507
508
509
510
511









512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527







-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+







(let ((targ (or (args:get-arg "-reqtarg")(args:get-arg "-target"))))
  (if targ (setenv "MT_TARGET" targ)))

;; The watchdog is to keep an eye on things like db sync etc.
;;

;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage
(define *watchdog* (make-thread
		    (lambda ()
		      (handle-exceptions
			  exn
			  (begin
			    (print-call-chain)
			    (print " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn))
			(common:watchdog)))
		    "Watchdog thread"))
;;(define *watchdog* (make-thread
;;		    (lambda ()
;;		      (handle-exceptions
;;			  exn
;;			  (begin
;;			    (print-call-chain)
;;			    (print " message: " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn))
;;			(common:watchdog)))
;;		    "Watchdog thread"))

;;(if (not (args:get-arg "-server"))
;;    (thread-start! *watchdog*)) ;; if starting a server; wait till we get to running state before kicking off watchdog
(let* ((no-watchdog-args
       '("-list-runs"
         "-testdata-csv"
         "-list-servers"
545
546
547
548
549
550
551
552
553




554
555
556
557
558
559
560
545
546
547
548
549
550
551


552
553
554
555
556
557
558
559
560
561
562







-
-
+
+
+
+







                                               (if (null? tail)
                                                 #t
                                                 (loop (car tail) (cdr tail))))))      
       (no-watchdog-args-vals (filter (lambda (x) x)
                                      (map args:get-arg no-watchdog-args)))
       (start-watchdog (and (null? no-watchdog-args-vals) start-watchdog-specail-arg-val)))
       ;(print  "no-watchdog-args="no-watchdog-args "no-watchdog-args-vals="no-watchdog-args-vals " start-watchdog-specail-arg-val:" start-watchdog-specail-arg-val " start-watchdog:" start-watchdog) 
  (if start-watchdog
      (thread-start! *watchdog*)))
;;  (if start-watchdog
;;      (thread-start! *watchdog*))
    #t
)


;; bracket open-output-file with code to make leading directory if it does not exist and handle exceptions
(define (open-logfile logpath-in)
  (condition-case
   (let* ((log-dir (or (pathname-directory logpath-in) "."))
	  (fname   (pathname-strip-directory logpath-in))
2477
2478
2479
2480
2481
2482
2483
2484
2485


2486
2487
2488
2489
2490
2491
2492
2479
2480
2481
2482
2483
2484
2485


2486
2487
2488
2489
2490
2491
2492
2493
2494







-
-
+
+







(when (args:get-arg "-sync-brute-force")
  (launch:setup)
  ((server:get-bruteforce-syncer (db:setup #t) persist-until-sync: #t))
  (set! *didsomething* #t))

(if (args:get-arg "-sync-to-megatest.db")
    (let* ((duh      (launch:setup))
	   (dbstruct (db:setup #f))
	   (tmpdbpth (cdr (dbr:dbstruct-tmpdb dbstruct)))
	   (dbstruct (db:setup #t))
	   (tmpdbpth (dbr:dbstruct-tmppath dbstruct))
	   (lockfile (conc tmpdbpth ".lock"))
	   (locked   (common:simple-file-lock lockfile)) 
	   (res      (if locked
			 (db:multi-db-sync 
			  dbstruct
			  'new2old)
			 #f)))
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558




2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2550
2551
2552
2553
2554
2555
2556




2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573







-
-
-
-
+
+
+
+













    (set! *time-to-exit* #t)
    )
;;(debug:print-info 13 *default-log-port* "thread-join! watchdog")

;; join the watchdog thread if it has been thread-start!ed  (it may not have been started in the case of a server that never enters running state)
;;   (symbols returned by thread-state: created ready running blocked suspended sleeping terminated dead)
;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage
(if (thread? *watchdog*)
    (case (thread-state *watchdog*)
      ((ready running blocked sleeping terminated dead)
       (thread-join! *watchdog*))))
;;(if (thread? *watchdog*)
;;    (case (thread-state *watchdog*)
;;      ((ready running blocked sleeping terminated dead)
;;       (thread-join! *watchdog*))))

(set! *time-to-exit* #t)

(if (not (eq? *globalexitstatus* 0))
    (if (or (args:get-arg "-run")(args:get-arg "-runtests")(args:get-arg "-runall"))
        (begin
           (debug:print 0 *default-log-port* "NOTE: Subprocesses with non-zero exit code detected: " *globalexitstatus*)
           (exit 0))
        (case *globalexitstatus*
         ((0)(exit 0))
         ((1)(exit 1))
         ((2)(exit 2))
         (else (exit 3)))))

Modified tests/simplerun/thebeginning.scm from [50a75d414c] to [4f9085105b].

11
12
13
14
15
16
17



18
19
20
21


22
23
24
25
26
27







28
29
30
31



32
33
34
35
36
37
38










39
40

41
42
43
44
45



46
47
48


49
50
51
52
53
54
55
56


57
58
59









































11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26






27
28
29
30
31
32
33
34



35
36
37
38
39





40
41
42
43
44
45
46
47
48
49
50

51
52


53

54
55
56
57


58
59
60
61
62
63
64
65


66
67
68


69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109







+
+
+




+
+
-
-
-
-
-
-
+
+
+
+
+
+
+

-
-
-
+
+
+


-
-
-
-
-
+
+
+
+
+
+
+
+
+
+

-
+

-
-

-
+
+
+

-
-
+
+






-
-
+
+

-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 ;; dbfile:set-subdb
 ;; db:with-db
 ;; dbfile:get-subdb
 )

;; *************** dbfile.scm tests ****************


 (debug:print 0 *default-log-port* " tmp area: " (common:get-db-tmp-area))

(define tmpdir (common:get-db-tmp-area))
(test #f #t (dbr:dbstruct? (dbfile:setup #t *toppath* tmpdir)))
(test #f #t (dbr:dbstruct? (db:setup #t)))
(define dbstruct *dbstruct-dbs*)
(test #f #t (dbr:subdb? (dbfile:init-subdb dbstruct #f db:initialize-main-db))) ;; this opens the nfs main db

(test #f #t (dbr:dbdat? (dbfile:open-db *dbstruct-dbs* #f db:initialize-main-db)))
(define maindbdat (dbfile:open-db *dbstruct-dbs* #f db:initialize-main-db))
(dbfile:add-dbdat dbstruct #f maindbdat)
(test #f #t (dbr:subdb? (dbfile:get-subdb dbstruct #f)))
(test #f #t (dbr:dbdat? (dbfile:get-dbdat dbstruct #f)))
(test #f #f (dbr:dbdat? (dbfile:get-dbdat dbstruct #f))) ;; stack empty so should fail.
;; (test #f #t (dbr:dbdat? (dbfile:open-db *dbstruct-dbs* #f db:initialize-main-db))) ;; this opens the tmp db.
;; (define maindbdat (dbfile:open-db *dbstruct-dbs* #f db:initialize-main-db)) ;; this opens the tmp db.
;; (dbfile:add-dbdat dbstruct #f maindbdat)

;;(test #f #t (dbr:subdb? (dbfile:get-subdb dbstruct #f)))
;; (test #f #t (dbr:dbdat? (dbfile:get-dbdat dbstruct #f)))
;; (test #f #f (dbr:dbdat? (dbfile:get-dbdat dbstruct #f))) ;; stack empty so should fail.

(test #f #t (hash-table? (dbr:dbstruct-subdbs dbstruct)))
(test #f #t (stack? (dbr:subdb-dbstack (dbfile:get-subdb dbstruct #f))))
(test #f '("SYSTEM" "RELEASE") (db:get-keys *dbstruct-dbs*))
;; (test #f #t (hash-table? (dbr:dbstruct-subdbs dbstruct)))
;; (test #f #t (stack? (dbr:subdb-dbstack (dbfile:get-subdb dbstruct #f))))
;; (test #f '("SYSTEM" "RELEASE") (db:get-keys *dbstruct-dbs*))


(test #f #t (dbr:dbdat? (dbfile:open-db dbstruct 1 db:initialize-main-db)))
(define rundbdat (dbfile:open-db dbstruct 1 db:initialize-main-db))
(dbfile:add-dbdat dbstruct 1 rundbdat)
(test #f #t (dbr:subdb? (dbfile:get-subdb dbstruct 1)))
(test #f #t (dbr:dbdat? (dbfile:get-dbdat dbstruct 1)))
 (test #f #t (dbr:dbdat? (dbfile:open-db dbstruct 1 db:initialize-main-db)))
 (test #f #t (dbr:dbdat? (dbfile:open-db dbstruct 2 db:initialize-main-db)))
 (define rundbdat (dbfile:open-db dbstruct 1 db:initialize-main-db))
 (define rundbdat2 (dbfile:open-db dbstruct 1 db:initialize-main-db))
 (dbfile:add-dbdat dbstruct 1 rundbdat)
 (dbfile:add-dbdat dbstruct 2 rundbdat2)
;; (test #f #t (dbr:subdb? (dbfile:get-subdb dbstruct 1)))
;; (test #f #t (dbr:dbdat? (dbfile:get-dbdat dbstruct 1)))
;; (test #f #t (dbr:subdb? (dbfile:get-subdb dbstruct 2)))
;; (test #f #t (dbr:dbdat? (dbfile:get-dbdat dbstruct 2)))

(test #f #t (db:close-all dbstruct))
;; (test #f #t (db:close-all dbstruct))

(test #f #t (db:safely-close-sqlite3-db (dbr:dbdat-dbh rundbdat) (dbr:dbdat-stmt-cache rundbdat)))
(test #f #t (db:safely-close-sqlite3-db (dbr:dbdat-dbh maindbdat) (dbr:dbdat-stmt-cache maindbdat)))

(test #f #t (> (dbfile:lazy-sqlite-db-modification-time ".db/main.db") 0))
;; (test #f #t (> (dbfile:lazy-sqlite-db-modification-time ".db/main.db") 0))
;; (test #f #t (> (dbfile:lazy-sqlite-db-modification-time ".db/1.db") 0))
;; (test #f #t (> (dbfile:lazy-sqlite-db-modification-time ".db/2.db") 0))

(test #f #t (common:simple-file-lock "./db.lock"))
(test #f "./db.lock" (common:simple-file-release-lock "./db.lock"))
;; (test #f #t (common:simple-file-lock "./db.lock"))
;; (test #f "./db.lock" (common:simple-file-release-lock "./db.lock"))



;; *************** db.scm tests ****************


(define thisdbdat (db:open-db dbstruct #f))
(test #f #t (dbr:dbdat? thisdbdat))
;; (define thisdbdat (db:open-db dbstruct #f))
;; (test #f #t (dbr:dbdat? thisdbdat))

(test #f #t (dbr:dbdat? (db:get-db dbstruct #f)))
(test #f #t (dbr:dbdat? (db:get-db dbstruct 1)))
;; (test #f #t (dbr:dbdat? (db:get-db dbstruct #f)))
;; (test #f #t (dbr:dbdat? (db:get-db dbstruct 1)))
;; (test #f #t (dbr:dbdat? (db:get-db dbstruct 2)))

;; (dbfile:add-dbdat dbstruct #f maindbdat)
;; (define maindbdat (dbfile:get-dbdat dbstruct #f))
;; (dbfile:add-dbdat dbstruct #f maindbdat)

(define mtdbdat2 (dbr:subdb-mtdbdat (dbfile:get-subdb dbstruct #f)))

(define areapath  (dbr:dbstruct-areapath dbstruct))
(define mtdbpath  (dbfile:run-id->path areapath #f))
(define init-proc db:initialize-main-db)

(define mtdbdat   (dbfile:open-sqlite3-db mtdbpath init-proc))

(define maindb-handle (dbr:dbdat-dbh mtdbdat))
(define maindb-handle2 (dbr:dbdat-dbh mtdbdat2))
(debug:print 0 *default-log-port* "maindb handle: " maindb-handle)
(debug:print 0 *default-log-port* "maindb handle2: " maindb-handle2)

(sqlite3:execute maindb-handle "vacuum")
(sqlite3:execute maindb-handle2 "vacuum")

(define full-sel   (conc "SELECT * from runs"))

(sqlite3:for-each-row
	     (lambda (a . b)
               (debug:print 0 *default-log-port* "a: " a " b: " b)
             )
	     maindb-handle
	     full-sel)

(test #f #t (db:sync-touched dbstruct #f))
(test #f #t (db:sync-touched dbstruct 1))
(test #f #t (db:sync-touched dbstruct 2))

(test #f #t (db:safely-close-sqlite3-db (dbr:dbdat-dbh rundbdat) (dbr:dbdat-stmt-cache rundbdat)))
(test #f #t (db:safely-close-sqlite3-db (dbr:dbdat-dbh rundbdat2) (dbr:dbdat-stmt-cache rundbdat2)))
(test #f #t (db:safely-close-sqlite3-db (dbr:dbdat-dbh mtdbdat) (dbr:dbdat-stmt-cache mtdbdat)))