Megatest

Check-in [3053005860]
Login
Overview
Comment:Make server logs not overlap on server.log - this makes debug easier
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.80
Files: files | file ages | folders
SHA1: 305300586048f6b822649a99744ce830826de7ed
User & Date: matt on 2023-04-06 13:32:22
Other Links: branch diff | manifest | tags
Context
2023-04-06
16:37
merge-fork check-in: c574c7b21b user: matt tags: v1.80
13:32
Make server logs not overlap on server.log - this makes debug easier check-in: 3053005860 user: matt tags: v1.80
09:23
Fixed performance issue with append in runs.scm check-in: 0dc6c83d6e user: matt tags: v1.80
Changes

Modified db.scm from [315e0db07f] to [db6ac85832].

2838
2839
2840
2841
2842
2843
2844
2845

2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
;; 
(define (db:get-test-state-status-by-id dbstruct run-id test-id)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (dbdat db)
     (let ((res (cons #f #f)))

       (sqlite3:for-each-row ;; attemptnum added to hold pid of top process (not Megatest) controlling a test
	(lambda (state status)
	  (cons state status))
	(db:get-cache-stmth dbdat db "SELECT state,status FROM tests WHERE id=?;")
	test-id)
       res))))

;; Use db:test-get* to access
;; Get test data using test_ids. NB// Only works within a single run!!
;;
(define (db:get-test-info-by-ids dbstruct run-id test-ids)







|
>



|







2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
;; 
(define (db:get-test-state-status-by-id dbstruct run-id test-id)
  (db:with-db
   dbstruct
   run-id
   #f
   (lambda (dbdat db)
     (let ((res   (cons #f #f))
	   (stmth (db:get-cache-stmth dbdat db "SELECT state,status FROM tests WHERE id=?;")))
       (sqlite3:for-each-row ;; attemptnum added to hold pid of top process (not Megatest) controlling a test
	(lambda (state status)
	  (cons state status))
	stmth 
	test-id)
       res))))

;; Use db:test-get* to access
;; Get test data using test_ids. NB// Only works within a single run!!
;;
(define (db:get-test-info-by-ids dbstruct run-id test-ids)

Modified dbfile.scm from [d5febb23fb] to [c7b39de25b].

1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
  ;; didn't see much change in the frequency of the messages:
  ;; Warning (#<thread: thread14974>): in thread: (bind!) bad parameter or other API misuse
  ;; allowing request count to go up to 1000 and other crashes showed up:
  ;; Warning (#<thread: thread1889>): in thread: (deserialize) unexpected end of input: #<input port "(tcp)">
  ;;
  ;; leave it fully on for now, test later if there is a performance issue
  ;;
  (let* ((use-mutex #t) ;; (> *api-process-request-count* 25)) ;; risk of db corruption
	 (have-struct (dbr:dbstruct? dbstruct))
         (dbdat     (if have-struct                ;; this stuff just allows us to call with a db handle directly
			(db:open-db dbstruct run-id (dbfile:db-init-proc)) ;; (dbfile:get-subdb dbstruct run-id)
			#f))
	 (db        (if have-struct                ;; this stuff just allows us to call with a db handle directly
			(dbr:dbdat-dbh dbdat)
			dbstruct))
	 (fname     (if dbdat
			(dbr:dbdat-dbfile dbdat)
			"nofilenameavailable"))
	 (jfile     (conc fname"-journal"))
	 (qryproc   (lambda ()
		      (if use-mutex (mutex-lock! *db-with-db-mutex*))
		      (let ((res (apply proc dbdat db params))) ;; the actual call is here.
			(if use-mutex (mutex-unlock! *db-with-db-mutex*))
			;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w))
			(if dbdat
			    (dbfile:add-dbdat dbstruct run-id dbdat))
			;; (delete-file* crumbfile)
			res)))
	 (stop-train (conc (dbr:dbstruct-areapath dbstruct)"/stop-the-train")))

    (assert (sqlite3:database? db) "FATAL: db:with-db, db is not a database, db="db
	    ", fname="fname)
    (if (file-exists? jfile)
	(begin
	  (dbfile:print-err "INFO: "jfile" exists, delaying to reduce database load")
	  (thread-sleep! 0.2)))







|

|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|







1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
  ;; didn't see much change in the frequency of the messages:
  ;; Warning (#<thread: thread14974>): in thread: (bind!) bad parameter or other API misuse
  ;; allowing request count to go up to 1000 and other crashes showed up:
  ;; Warning (#<thread: thread1889>): in thread: (deserialize) unexpected end of input: #<input port "(tcp)">
  ;;
  ;; leave it fully on for now, test later if there is a performance issue
  ;;
  (let* ((use-mutex   #t) ;;(> *api-process-request-count* 50)) ;; risk of db corruption
	 (have-struct (dbr:dbstruct? dbstruct))
         (dbdat       (if have-struct                ;; this stuff just allows us to call with a db handle directly
			  (db:open-db dbstruct run-id (dbfile:db-init-proc)) ;; (dbfile:get-subdb dbstruct run-id)
			  #f))
	 (db          (if have-struct                ;; this stuff just allows us to call with a db handle directly
			  (dbr:dbdat-dbh dbdat)
			  dbstruct))
	 (fname       (if dbdat
			  (dbr:dbdat-dbfile dbdat)
			  "nofilenameavailable"))
	 (jfile       (conc fname"-journal"))
	 (qryproc     (lambda ()
			(if use-mutex (mutex-lock! *db-with-db-mutex*))
			(let ((res (apply proc dbdat db params))) ;; the actual call is here.
			  (if use-mutex (mutex-unlock! *db-with-db-mutex*))
			  ;; (if (vector? dbstruct)(db:done-with dbstruct run-id r/w))
			  (if dbdat
			      (dbfile:add-dbdat dbstruct run-id dbdat))
			  ;; (delete-file* crumbfile)
			  res)))
	 (stop-train  (conc (dbr:dbstruct-areapath dbstruct)"/stop-the-train")))

    (assert (sqlite3:database? db) "FATAL: db:with-db, db is not a database, db="db
	    ", fname="fname)
    (if (file-exists? jfile)
	(begin
	  (dbfile:print-err "INFO: "jfile" exists, delaying to reduce database load")
	  (thread-sleep! 0.2)))

Modified megatest.scm from [39b3d98b1d] to [46ccc9ab0a].

610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
        (define *didsomething* #t)  
        (exit 1))))

;; this segment will run launch:setup only if -log is not set. This is fairly safe as servers are not
;; manually started and thus should never be started in a non-megatest area. Thus no need to handle situation
;; where (launch:setup) returns #f?
;;
(if (or (args:get-arg "-log")(args:get-arg "-server")) ;; redirect the log always when a server
    (handle-exceptions
	exn
      (begin
	(print "ERROR: Failed to switch to log output. " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn))
      (let* ((tl   (or (args:get-arg "-log")(launch:setup)))   ;; run launch:setup if -server, ensure we do NOT run launch:setup if -log specified
	     (dbname (args:get-arg "-db"))   ;; for the server logfile name
	     (logf (or (args:get-arg "-log") ;; use -log unless we are a server, then craft a logfile name







|







610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
        (define *didsomething* #t)  
        (exit 1))))

;; this segment will run launch:setup only if -log is not set. This is fairly safe as servers are not
;; manually started and thus should never be started in a non-megatest area. Thus no need to handle situation
;; where (launch:setup) returns #f?
;;
(if (or (args:get-arg "-log")#;(args:get-arg "-server")) ;; redirect the log always when a server
    (handle-exceptions
	exn
      (begin
	(print "ERROR: Failed to switch to log output. " ((condition-property-accessor 'exn 'message) exn) ", exn=" exn))
      (let* ((tl   (or (args:get-arg "-log")(launch:setup)))   ;; run launch:setup if -server, ensure we do NOT run launch:setup if -log specified
	     (dbname (args:get-arg "-db"))   ;; for the server logfile name
	     (logf (or (args:get-arg "-log") ;; use -log unless we are a server, then craft a logfile name

Modified runs.scm from [09906f7b93] to [399ccd6fb7].

1860
1861
1862
1863
1864
1865
1866
1867

1868
1869
1870
1871
1872
1873
1874
			      ;; BUG: This next line sucks up a lot of horsepower
			      ;; (set! tal (append tal (list newtestname)))
			      ;; (set! tal (cons newtestname tal)) ;; 4/6/2023 - try using cons, does it matter if the test gets added at the beginning?
			      (set! incoming-tests (cons newtestname incoming-tests))
			      ))  ;; since these are itemized create new test names testname/itempath
                          items-in-testpatt)))
          
	  (if (< (length tal) 20)

	      (begin
		(set! tal (append tal (reverse incoming-tests)))
		(set! incoming-tests '())))

	  ;; At this point we have possibly added items to tal but all must be handed off to 
	  ;; INNER COND logic. I think loop without rotating the queue 
	  ;; (loop hed tal reg reruns))







|
>







1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
			      ;; BUG: This next line sucks up a lot of horsepower
			      ;; (set! tal (append tal (list newtestname)))
			      ;; (set! tal (cons newtestname tal)) ;; 4/6/2023 - try using cons, does it matter if the test gets added at the beginning?
			      (set! incoming-tests (cons newtestname incoming-tests))
			      ))  ;; since these are itemized create new test names testname/itempath
                          items-in-testpatt)))
          
	  (if (and (< (length tal) 20)
		   (not (null? incoming-tests)))
	      (begin
		(set! tal (append tal (reverse incoming-tests)))
		(set! incoming-tests '())))

	  ;; At this point we have possibly added items to tal but all must be handed off to 
	  ;; INNER COND logic. I think loop without rotating the queue 
	  ;; (loop hed tal reg reruns))

Modified tcp-transportmod.scm from [4f9e2ba569] to [2389278b99].

598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
      (thread-sleep! 1))
     ((> nrun 100)
      (debug:print 0 *default-log-port* nrun" servers running on this host, not starting another.")
      (thread-sleep! 1))
     (else
      (if (not (file-exists? (conc areapath"/logs")))
	      (create-directory (conc areapath"/logs") #t))
	  (let* ((logfile   (conc areapath "/logs/server.log")) ;; -" curr-pid "-" target-host ".log"))
		 (cmdln     (conc
			     mtexe
			     " -server - ";; (or target-host "-")
			     " -m testsuite:" testsuite
			     ;; " -run-id " (or run-id "main") ;; NO, we do NOT want to have run id as part of this
			     " -db "  dbfname ;; (dbmod:run-id->dbfname run-id)
			     " " profile-mode







|







598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
      (thread-sleep! 1))
     ((> nrun 100)
      (debug:print 0 *default-log-port* nrun" servers running on this host, not starting another.")
      (thread-sleep! 1))
     (else
      (if (not (file-exists? (conc areapath"/logs")))
	      (create-directory (conc areapath"/logs") #t))
	  (let* ((logfile   (conc areapath "/logs/server-"dbfname"-"(current-process-id)".log")) ;; -" curr-pid "-" target-host ".log"))
		 (cmdln     (conc
			     mtexe
			     " -server - ";; (or target-host "-")
			     " -m testsuite:" testsuite
			     ;; " -run-id " (or run-id "main") ;; NO, we do NOT want to have run id as part of this
			     " -db "  dbfname ;; (dbmod:run-id->dbfname run-id)
			     " " profile-mode