Megatest

Check-in [58cf8acf44]
Login
Overview
Comment:wip
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.6584-ck5
Files: files | file ages | folders
SHA1: 58cf8acf440c8ff545936813718305d6cb26c882
User & Date: matt on 2021-05-16 23:22:01
Other Links: branch diff | manifest | tags
Context
2021-05-16
23:26
wip check-in: 84869b5b12 user: matt tags: v1.6584-ck5
23:22
wip check-in: 58cf8acf44 user: matt tags: v1.6584-ck5
2021-05-15
21:57
wip check-in: db4714b500 user: matt tags: v1.6584-ck5
Changes

Modified dbmod.scm from [df51662aee] to [43fca71bc1].

226
227
228
229
230
231
232
233

234
235
236

237
238
239
240
241
242
243
226
227
228
229
230
231
232

233
234
235

236
237
238
239
240
241
242
243







-
+


-
+







;; Make the dbstruct, call for main db at least once
;; sync disk db to inmem
;;
;; called in http-transport and replicated in rmt.scm for *local* access. 
;;
(define (db:setup run-id)
  (assert *toppath* "FATAL: db:setup called before toppath is available.")
  (let* ((dbstruct (make-dbr:dbstruct))
  (let* ((dbstruct (or *dbstruct-db* (make-dbr:dbstruct)))
	 (db-file  (db:run-id->path *toppath* run-id)))
    (db:get-dbdat dbstruct *toppath* db-file)
    (set! *dbstruct-db* dbstruct)
    (if (not *dbstruct-db*)(set! *dbstruct-db* dbstruct))
    dbstruct))

;;======================================================================
;; setting/getting a lock on the db for only one server per db
;;
;;  NOTE:
;;       These operate directly on the disk file, NOT on the inmemory db
411
412
413
414
415
416
417
418
419
420
421




422
423
424
425
426
427
428
429
430

431
432
433
434
435
436
437


438
439
440
441
442
443
444
411
412
413
414
415
416
417




418
419
420
421
422
423
424
425
426
427
428


429
430
431

432
433
434

435
436
437
438
439
440
441
442
443







-
-
-
-
+
+
+
+







-
-
+


-



-
+
+







;; ;;     (set! *db-last-access* start-t)
;; ;;     (mutex-unlock! *db-multi-sync-mutex*)
;; ;;     (stack-push! (dbr:dbstruct-dbstack dbstruct) tmpdb)))

;; NOTE: touched logic is disabled/not done
;; sync run to disk if touched
;;
(define (db:sync-inmem->disk dbstruct dbfile #!key (force-sync #f))
  (let* ((dbdat       (db:get-dbdat dbstruct dbfile))
	 (db          (dbr:dbdat-db dbstruct))
	 (inmem       (dbr:dbdat-inmem dbstruct))
(define (db:sync-inmem->disk dbstruct apath dbfile #!key (force-sync #f))
  (let* ((dbdat       (db:get-dbdat dbstruct apath dbfile))
	 (db          (dbr:dbdat-db dbdat))
	 (inmem       (dbr:dbdat-inmem dbdat))
	 (start-t     (current-seconds))
	 (last-update (dbr:dbdat-last-write dbdat))
	 (last-sync   (dbr:dbdat-last-sync dbdat)))
    (debug:print-info 4 *default-log-port* "Syncing for dbfile: " dbfile)
    (mutex-lock! *db-multi-sync-mutex*)
    (let* ((update_info (cons (if force-sync 0 last-update) "last_update"))
    	   (need-sync   (or force-sync (>= last-update last-sync))))
      (mutex-unlock! *db-multi-sync-mutex*)
      (if need-sync
       (if need-sync
	  (db:sync-tables (db:sync-all-tables-list) update_info inmem db)
	  (debug:print 0 *default-log-port* "Skipping sync as nothing touched.")))
    (mutex-lock! *db-multi-sync-mutex*)
    (dbr:dbdat-last-sync-set! dbdat start-t)
    (mutex-unlock! *db-multi-sync-mutex*)))


;; TODO: Add final sync to this
;;
(define (db:safely-close-sqlite3-db db stmt-cache #!key (try-num 3))
  (if (<= try-num 0)
      #f
      (handle-exceptions
	  exn
	(begin
	  (print "Attempt to safely close sqlite3 db failed. Trying again. exn=" exn)

Modified launchmod.scm from [c24b724e9b] to [c2309e8637].

2279
2280
2281
2282
2283
2284
2285







2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308



























2309
2310
2311
2312
2313
2314
2315
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292























2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326







+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+







		)) ;; we can safely cache megatest.config since we have a valid runconfig
	  data))))

;;======================================================================
;; TODO: for multiple areas, we will have multiple watchdogs; and multiple threads to manage
(define (common:watchdog)
  (debug:print-info 13 *default-log-port* "common:watchdog entered.")
  (assert *toppath* "common:watchdog started before *toppath* is set")
  (let* ((start-time (current-seconds))
	 (am-server  (args:get-arg "-server"))
	 (dbfile     (args:get-arg "-db"))
	 (apath      *toppath*))
    (let loop ()
      (thread-sleep! 5) ;; add control / setting for this
  #;(if (launch:setup)
      (if (common:on-homehost?)
	  (let ((dbstruct (db:setup #t)))
	    (debug:print-info 13 *default-log-port* "after db:setup with dbstruct=" dbstruct)
	    (cond
	     ((dbr:dbstruct-read-only dbstruct)
	      (debug:print-info 13 *default-log-port* "loading read-only watchdog")
	      (common:readonly-watchdog dbstruct))
	     (else
	      (debug:print-info 13 *default-log-port* "loading writable-watchdog.")
              (let* ((syncer (or (configf:lookup *configdat* "server" "sync-method") "brute-force-sync")))
                (cond
                 ((equal? syncer "brute-force-sync")
                  (server:writable-watchdog-bruteforce dbstruct))
                 ((equal? syncer "delta-sync")
                  (server:writable-watchdog-deltasync dbstruct))
                 (else
                  (debug:print-error 0 *default-log-port* "Unknown server/sync-method specified ("syncer") - valid values are brute-force-sync and delta-sync.")
                  (exit 1)))
                ;;(debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] Syncer started (method="syncer")")
                )))
	    (debug:print-info 13 *default-log-port* "watchdog done."))
	  (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost"))))
      (if am-server
	  (if (not  *dbstruct-db*)
	      (loop)
	      (db:sync-inmem->disk *dbstruct-db* *toppath* dbfile))))))
  
;; 
;; (let ((dbstruct 
;; 	    (debug:print-info 13 *default-log-port* "after db:setup with dbstruct=" dbstruct)
;; 	    (cond
;; 	     ((dbr:dbstruct-read-only dbstruct)
;; 	      (debug:print-info 13 *default-log-port* "loading read-only watchdog")
;; 	      (common:readonly-watchdog dbstruct))
;; 	     (else
;; 	      (debug:print-info 13 *default-log-port* "loading writable-watchdog.")
;;             (let* ((syncer (or (configf:lookup *configdat* "server" "sync-method") "brute-force-sync")))
;;               (cond
;;                ((equal? syncer "brute-force-sync")
;;                 (server:writable-watchdog-bruteforce dbstruct))
;;                ((equal? syncer "delta-sync")
;;                 (server:writable-watchdog-deltasync dbstruct))
;;                (else
;;                 (debug:print-error 0 *default-log-port* "Unknown server/sync-method specified ("syncer") - valid values are brute-force-sync and delta-sync.")
;;                 (exit 1)))
;;               ;;(debug:print 1 *default-log-port* "INFO: ["(common:human-time)"] Syncer started (method="syncer")")
;;               )))
;; 	    (debug:print-info 13 *default-log-port* "watchdog done."))
;; 	  (debug:print-info 13 *default-log-port* "no need for watchdog on non-homehost"))))

;;======================================================================
;; currently the primary job of the watchdog is to run the sync back to megatest.db from the db in /tmp
;; if we are on the homehost and we are a server (by definition we are on the homehost if we are a server)
;;
(define (common:readonly-watchdog dbstruct)
  #f)
2522
2523
2524
2525
2526
2527
2528
2529

2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550

2551
2552
2553
2554
2555
2556
2557
2533
2534
2535
2536
2537
2538
2539

2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560

2561
2562
2563
2564
2565
2566
2567
2568







-
+




















-
+







			(delay-loop (+ count 1))))
		  (if (not (bdat-time-to-exit *bdat*)) (loop))))
	    ;; time to exit, close the no-sync db here
	    (db:no-sync-close-db no-sync-db stmt-cache)
	    (if (common:low-noise-print 30)
		(debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = " (bdat-time-to-exit *bdat*)" pid="(current-process-id) ))))))) ;;" this-wd-num="this-wd-num)))))))

(define (server:writable-watchdog-bruteforce dbstruct)
#;(define (server:writable-watchdog-bruteforce dbstruct)
  (thread-sleep! 1) ;; delay for startup
  (let* ((do-a-sync  (server:get-bruteforce-syncer dbstruct))
         (final-sync (server:get-bruteforce-syncer dbstruct fork-to-background: #t persist-until-sync: #t)))
    (when (and (not (args:get-arg "-sync-to-megatest.db")) ;; conditions under which we do not run the sync
	       (args:get-arg "-server"))
      
      (let loop ()
	(do-a-sync)
        (if (not (bdat-time-to-exit *bdat*)) (loop))) ;; keep going unless time to exit

      ;; time to exit, close the no-sync db here
      (final-sync)

      (if (common:low-noise-print 30)
	  (debug:print-info 0 *default-log-port* "Exiting watchdog timer, *time-to-exit* = "(bdat-time-to-exit *bdat*)" pid="(current-process-id)
			    )))))

;; moving this here as it needs access to db and cannot be in common.
;;

(define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f) (persist-until-sync #f))
#;(define (server:get-bruteforce-syncer dbstruct #!key (fork-to-background #f) (persist-until-sync #f))
  (let* ((sqlite-exe   (or (get-environment-variable "MT_SQLITE3_EXE"))) ;; defined in cfg.sh
         (sync-log     (or (args:get-arg "-sync-log") (conc *toppath* "/logs/sync-" (current-process-id) "-" (get-host-name) ".log")))
	 (tmp-area     (common:get-db-tmp-area))
	 (tmp-db       (conc tmp-area "/megatest.db"))
	 (staging-file (conc *toppath* "/.megatest.db"))
	 (mtdbfile     (conc *toppath* "/megatest.db"))
	 (lockfile     (common:get-sync-lock-filepath))

Modified megatest.scm from [3e262da95d] to [2adacf8022].

741
742
743
744
745
746
747
748

749
750
751
752
753
754
755
741
742
743
744
745
746
747

748
749
750
751
752
753
754
755







-
+







     			"-create-megatest-area"
     			"-mark-incompletes"
     
     			"-convert-to-norm"
     			"-convert-to-old"
     			"-import-megatest.db"
     			"-sync-to-megatest.db"
                             "-sync-brute-force"
			"-sync-brute-force"
     			"-logging"
     			"-v" ;; verbose 2, more than normal (normal is 1)
     			"-q" ;; quiet 0, errors/warnings only
     
                             "-diff-rep"
     
     			"-syscheck"
2579
2580
2581
2582
2583
2584
2585
2586

2587
2588
2589
2590
2591
2592
2593
2579
2580
2581
2582
2583
2584
2585

2586
2587
2588
2589
2590
2591
2592
2593







-
+







            'dejunk
            'adj-testids
            'old2new
            ;; 'new2old
            )
           (set! *didsomething* #t)))
     
     (when (args:get-arg "-sync-brute-force")
     #;(when (args:get-arg "-sync-brute-force")
       ((server:get-bruteforce-syncer (db:setup #t) persist-until-sync: #t))
       (set! *didsomething* #t))
     
     #;(if (args:get-arg "-sync-to-megatest.db")
         (let* ((dbstruct (db:setup #f))
     	   (tmpdbpth (cdr (dbr:dbstruct-tmpdb dbstruct)))
     	   (lockfile (conc tmpdbpth ".lock"))

Modified rmtmod.scm from [f26d9abd38] to [93f26ad84f].

642
643
644
645
646
647
648

649





650
651
652
653
654
655
656
642
643
644
645
646
647
648
649

650
651
652
653
654
655
656
657
658
659
660
661







+
-
+
+
+
+
+







  (rmt:send-receive 'get-num-runs #f (list runpatt)))

(define (rmt:get-runs-cnt-by-patt runpatt targetpatt keys)
  (rmt:send-receive 'get-runs-cnt-by-patt #f (list runpatt  targetpatt keys)))

;; Use the special run-id == #f scenario here since there is no run yet
(define (rmt:register-run keyvals runname state status user contour)
  ;; first register in main.db (thus the #f)
  (rmt:send-receive 'register-run #f (list keyvals runname state status user contour)))
  (let* ((run-id (rmt:send-receive 'register-run #f (list keyvals runname state status user contour))))
    ;; now register in the run db itself
    (rmt:send-receive 'register-run run-id (list keyvals runname state status user contour))
    run-id))
  
    
(define (rmt:get-run-name-from-id run-id)
  (rmt:send-receive 'get-run-name-from-id run-id (list run-id)))

(define (rmt:delete-run run-id)
  (rmt:send-receive 'delete-run run-id (list run-id)))

1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554

1555
1556
1557
1558
1559
1560
1561
1548
1549
1550
1551
1552
1553
1554


1555
1556

1557
1558
1559
1560
1561
1562
1563
1564







-
-


-
+







;;	(exit 1))))

(define (common:run-sync?)
    ;; (and (common:on-homehost?)
  (args:get-arg "-server"))




;; this one seems to be the general entry point
;;
(define (server:start-and-wait areapath #!key (timeout 60))
#;(define (server:start-and-wait areapath #!key (timeout 60))
  (let ((give-up-time (+ (current-seconds) timeout)))
    (let loop ((server-info (server:check-if-running areapath))
	       (try-num    0))
      (if (or server-info
	      (> (current-seconds) give-up-time)) ;; server-url will be #f if no server available.
	  (server:record->url server-info)
	  (let ((num-ok (length (server:get-best (server:get-list areapath)))))
1584
1585
1586
1587
1588
1589
1590


1591

1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611


1612

1613
1614
1615
1616
1617
1618
1619
1587
1588
1589
1590
1591
1592
1593
1594
1595

1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618

1619
1620
1621
1622
1623
1624
1625
1626







+
+
-
+




















+
+
-
+







    (if res
        server-url
	#f)))

;; no longer care if multiple servers are started by accident. older
;; servers will drop off in time.
;;
;; defunct
;;
(define (server:check-if-running areapath) ;;  #!key (numservers "2"))
#;(define (server:check-if-running areapath) ;;  #!key (numservers "2"))
  (let* ((ns            (server:get-num-servers))
	 (servers       (server:get-best (server:get-list areapath))))
    (if (or (and servers
		 (null? servers))
	    (not servers)
	    (and (list? servers)
		 (< (length servers) (pseudo-random-integer ns)))) ;; somewhere between 0 and numservers
        #f
        (let loop ((hed (car servers))
                   (tal (cdr servers)))
          (let ((res (server:check-server hed)))
            (if res
                hed
                (if (null? tal)
                    #f
                    (loop (car tal)(cdr tal)))))))))

;; kind start up of servers, wait 40 seconds before allowing another server for a given
;; run-id to be launched
;;
;; defunct
;;
(define (server:kind-run areapath)
#;(define (server:kind-run areapath)
  ;; look for $MT_RUN_AREA_HOME/logs/server-start-last
  ;; and wait for it to be at least 3 seconds old
  ;; (server:wait-for-server-start-last-flag areapath)
  (if (not (server:check-if-running areapath)) ;; why try if there is already a server running?
      (let* ((last-run-dat (hash-table-ref/default *server-kind-run* areapath '(0 0))) ;; callnum, whenrun
	     (call-num     (car last-run-dat))
	     (when-run     (cadr last-run-dat))
1638
1639
1640
1641
1642
1643
1644



1645

1646
1647
1648
1649
1650
1651
1652
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654

1655
1656
1657
1658
1659
1660
1661
1662







+
+
+
-
+







  (http-transport:client-connect iface port)
  #;(case (server:get-transport)
    ((rpc)  (rpc:client-connect  iface port))
    ((http) (http:client-connect iface port))
    ((zmq)  (zmq:client-connect  iface port))
    (else   (rpc:client-connect  iface port))))

;;
;; defunct
;;
(define (client:setup areapath #!key (remaining-tries 100) (failed-connects 0))
#;(define (client:setup areapath #!key (remaining-tries 100) (failed-connects 0))
  (print "got here")
  ;; (client:setup-http areapath remaining-tries: remaining-tries failed-connects: failed-connects)
  #;(case (server:get-transport)
    ((rpc) (rpc-transport:client-setup remaining-tries: remaining-tries failed-connects: failed-connects)) ;;(client:setup-rpc run-id))
    ((http)(client:setup-http areapath remaining-tries: remaining-tries failed-connects: failed-connects))
    (else  (rpc-transport:client-setup remaining-tries: remaining-tries failed-connects: failed-connects)))) ;; (client:setup-rpc run-id))))

Modified tests/unittests/basicserver.scm from [983ffc6ad7] to [4ef17a5055].

53
54
55
56
57
58
59

60
61


62
63
64
65
66
67
68
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71







+


+
+







 ;; with-input-from-request
 ;; rmt:get-connection
 ;; with-input-from-request
 )

(define *db* (db:setup #f))
(test #f 'server-started (api:execute-requests *db* 'get-server (list *toppath* ".db/1.db")))
(set! *dbstruct-db* #f)
(test #f 'server-started (rmt:general-open-connection *rmt:remote* *toppath* ".db/1.db"))
(test #f '("SYSTEM" "RELEASE") (rmt:get-keys))

(test #f 1 (rmt:register-run '(("SYSTEM" "a")("RELEASE" "b")) "run1" "new" "n/a" "justme" #f))

;; (delete-file* "logs/1.log")
;; (define run-id 1)

;; (test "setup for run" #t (begin (launch:setup)
;;  				(string? (getenv "MT_RUN_AREA_HOME"))))
;;