Megatest

Check-in [e7a3a54019]
Login
Overview
Comment:Found couple more cases of poorly protected sqlite3 calls. Fixed.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.55
Files: files | file ages | folders
SHA1: e7a3a54019f75299051e1856f2c6557ae1dbda1b
User & Date: matt on 2014-03-28 00:08:23
Other Links: branch diff | manifest | tags
Context
2014-06-02
10:49
Merged e7a3 from v1.55 branch into v1.60 branch check-in: 180ef4e375 user: mrwellan tags: v1.60
2014-03-28
10:59
Added specified tests to the required list to fix corner case where a specified test could get dropped check-in: 0f5dd6bf65 user: mrwellan tags: v1.55
00:08
Found couple more cases of poorly protected sqlite3 calls. Fixed. check-in: e7a3a54019 user: matt tags: v1.55
2014-03-27
11:14
Merged sqlite-trials work to v1.55 check-in: 2b3cd8f3ca user: mrwellan tags: v1.55, v1.5516rc2
Changes

Modified launch.scm from [c33f4302cb] to [e1a0a2dd1d].

143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
	  (set-run-config-vars run-id keyvals target) ;; (db:get-target db run-id))
	  ;; environment overrides are done *before* the remaining critical envars.
	  (alist->env-vars env-ovrd)
	  (set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals)
	  (set-item-env-vars itemdat)
	  (save-environment-as-files "megatest")
	  ;; open-run-close not needed for test-set-meta-info
	  (tests:set-full-meta-info #f test-id run-id 0 work-area)

	  ;; (tests:test-set-status! test-id "REMOTEHOSTSTART" "n/a" (args:get-arg "-m") #f)
	  (tests:test-force-state-status! test-id "REMOTEHOSTSTART" "n/a")
	  (thread-sleep! 0.3) ;; NFS slowness has caused grief here

	  (if (args:get-arg "-xterm")
	      (set! fullrunscript "xterm")







|







143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
	  (set-run-config-vars run-id keyvals target) ;; (db:get-target db run-id))
	  ;; environment overrides are done *before* the remaining critical envars.
	  (alist->env-vars env-ovrd)
	  (set-megatest-env-vars run-id inkeys: keys inkeyvals: keyvals)
	  (set-item-env-vars itemdat)
	  (save-environment-as-files "megatest")
	  ;; open-run-close not needed for test-set-meta-info
	  (tests:set-full-meta-info #f test-id run-id 0 work-area 10)

	  ;; (tests:test-set-status! test-id "REMOTEHOSTSTART" "n/a" (args:get-arg "-m") #f)
	  (tests:test-force-state-status! test-id "REMOTEHOSTSTART" "n/a")
	  (thread-sleep! 0.3) ;; NFS slowness has caused grief here

	  (if (args:get-arg "-xterm")
	      (set! fullrunscript "xterm")
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
					(calc-minutes  (lambda ()
							 (inexact->exact 
							  (round 
							   (- 
							    (current-seconds) 
							    start-seconds)))))
					(kill-tries 0))
				   (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area)
				   (let loop ((minutes   (calc-minutes)))
				     (begin
				       (set! kill-job? (or (test-get-kill-request test-id) ;; run-id test-name itemdat))
							   (and runtlim (let* ((run-seconds   (- (current-seconds) start-seconds))
									       (time-exceeded (> run-seconds runtlim)))
									  (if time-exceeded
									      (begin
										(debug:print-info 0 "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" run-seconds " seconds, limit=" runtlim)
										#t)
									      #f)))))
				       ;; open-run-close not needed for test-set-meta-info
				       (tests:set-partial-meta-info #f test-id run-id minutes work-area)
				       (if kill-job? 
					   (begin
					     (mutex-lock! m)
					     ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this
					     ;;       section and the runit section? Or add a loop that tries three times with a 1/4 second
					     ;;       between tries?
					     (let* ((pid (vector-ref exit-info 0)))







|











|







303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
					(calc-minutes  (lambda ()
							 (inexact->exact 
							  (round 
							   (- 
							    (current-seconds) 
							    start-seconds)))))
					(kill-tries 0))
				   (tests:set-full-meta-info #f test-id run-id (calc-minutes) work-area 10)
				   (let loop ((minutes   (calc-minutes)))
				     (begin
				       (set! kill-job? (or (test-get-kill-request test-id) ;; run-id test-name itemdat))
							   (and runtlim (let* ((run-seconds   (- (current-seconds) start-seconds))
									       (time-exceeded (> run-seconds runtlim)))
									  (if time-exceeded
									      (begin
										(debug:print-info 0 "KILLING TEST DUE TO TIME LIMIT EXCEEDED! Runtime=" run-seconds " seconds, limit=" runtlim)
										#t)
									      #f)))))
				       ;; open-run-close not needed for test-set-meta-info
				       (tests:set-partial-meta-info #f test-id run-id minutes work-area 10)
				       (if kill-job? 
					   (begin
					     (mutex-lock! m)
					     ;; NOTE: The pid can change as different steps are run. Do we need handshaking between this
					     ;;       section and the runit section? Or add a loop that tries three times with a 1/4 second
					     ;;       between tries?
					     (let* ((pid (vector-ref exit-info 0)))

Modified megatest.scm from [094dda6a97] to [92a59bf1dc].

992
993
994
995
996
997
998
999



1000
1001
1002
1003
1004
1005
1006
		    (exit 6))
		  (let* ((stepname   (args:get-arg "-runstep"))
			 (logprofile (args:get-arg "-logpro"))
			 (logfile    (conc stepname ".log"))
			 (cmd        (if (null? remargs) #f (car remargs)))
			 (params     (if cmd (cdr remargs) '()))
			 (exitstat   #f)
			 (shell      (last (string-split (get-environment-variable "SHELL") "/")))



			 (redir      (case (string->symbol shell)
				       ((tcsh csh ksh)    ">&")
				       ((zsh bash sh ash) "2>&1 >")
				       (else ">&")))
			 (fullcmd    (conc "(" (string-intersperse 
						(cons cmd params) " ")
					   ") " redir " " logfile)))







|
>
>
>







992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
		    (exit 6))
		  (let* ((stepname   (args:get-arg "-runstep"))
			 (logprofile (args:get-arg "-logpro"))
			 (logfile    (conc stepname ".log"))
			 (cmd        (if (null? remargs) #f (car remargs)))
			 (params     (if cmd (cdr remargs) '()))
			 (exitstat   #f)
			 (shell      (let ((sh (get-environment-variable "SHELL") ))
				       (if sh 
					   (last (string-split sh "/"))
					   "bash")))
			 (redir      (case (string->symbol shell)
				       ((tcsh csh ksh)    ">&")
				       ((zsh bash sh ash) "2>&1 >")
				       (else ">&")))
			 (fullcmd    (conc "(" (string-intersperse 
						(cons cmd params) " ")
					   ") " redir " " logfile)))

Modified tests.scm from [0d9cac1a4f] to [a199e06515].

702
703
704
705
706
707
708
709
710















711
712
713
714
715
716
717
718
719
720
721
722















723
724
725
726
727
728
729
730
731
732
733
734
  ;; transactionized write at the server
  (cdb:tests-update-cpuload-diskfree *runremote* test-id cpuload diskfree)
  (if minutes 
      (cdb:tests-update-run-duration *runremote* test-id minutes))
  (if (and uname hostname)
      (cdb:tests-update-uname-host *runremote* test-id uname hostname)))
  
(define (tests:set-full-meta-info db test-id run-id minutes work-area)
  ;; DOES cdb:remote-run under the hood!















  (let* ((num-records 0) ;; (test:tdb-get-rundat-count tdb))
	 (cpuload  (get-cpu-load))
	 (diskfree (get-df (current-directory)))
	 (uname    (get-uname "-srvpio"))
	 (hostname (get-host-name)))
    (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes)
    (tests:update-central-meta-info test-id cpuload diskfree minutes uname hostname)))
	  
(define (tests:set-partial-meta-info db test-id run-id minutes work-area)
  ;; DOES cdb:remote-run under the hood!
  (let* ((cpuload  (get-cpu-load))
	 (diskfree (get-df (current-directory))))















    (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes)
    ;; Update central with uname and hostname = #f
    ;; Is this one of the performance problems? This info should come from testdat-meta anyway
    ;; (tests:update-central-meta-info test-id cpuload diskfree minutes #f #f)
  ))
	 
(define (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes)
  (let ((tdb         (db:open-test-db-by-test-id db test-id work-area: work-area)))
    (if (sqlite3:database? tdb)
	(begin
	  (sqlite3:execute tdb "INSERT INTO test_rundat (update_time,cpuload,diskfree,run_duration) VALUES (strftime('%s','now'),?,?,?);"
			   cpuload diskfree minutes)







|

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
|
|
|
|
|
|
|
|


|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|



|







702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
  ;; transactionized write at the server
  (cdb:tests-update-cpuload-diskfree *runremote* test-id cpuload diskfree)
  (if minutes 
      (cdb:tests-update-run-duration *runremote* test-id minutes))
  (if (and uname hostname)
      (cdb:tests-update-uname-host *runremote* test-id uname hostname)))
  
(define (tests:set-full-meta-info db test-id run-id minutes work-area remtries)
  ;; DOES cdb:remote-run under the hood!
  (let ((remtries 10))
    (handle-exceptions
     exn
     (if (> remtries 0)
	 (begin
	   (set! remtries (- remtries 1))
	   (thread-sleep! 10)
	   (tests:set-full-meta-info db test-id run-id minutes work-area (- remtries 1)))
	 (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
	   (debug:print 0 "ERROR: tried for over a minute to update meta info and failed. Giving up")
	   (debug:print 0 "EXCEPTION: database probably overloaded or unreadable.")
	   (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
	   (print "exn=" (condition->list exn))
	   (debug:print 0 " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
	   (print-call-chain)))
     (let* ((num-records 0) ;; (test:tdb-get-rundat-count tdb))
	    (cpuload  (get-cpu-load))
	    (diskfree (get-df (current-directory)))
	    (uname    (get-uname "-srvpio"))
	    (hostname (get-host-name)))
       (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes)
       (tests:update-central-meta-info test-id cpuload diskfree minutes uname hostname)))))
    
(define (tests:set-partial-meta-info db test-id run-id minutes work-area remtries)
  ;; DOES cdb:remote-run under the hood!
  (let* ((cpuload  (get-cpu-load))
	 (diskfree (get-df (current-directory)))
	 (remtries 10))
    (handle-exceptions
     exn
     (if (> remtries 0)
	 (begin
	   (set! remtries (- remtries 1))
	   (thread-sleep! 10)
	   (tests:set-full-meta-info db test-id run-id minutes work-area (- remtries 1)))
	 (let ((err-status ((condition-property-accessor 'sqlite3 'status #f) exn)))
	   (debug:print 0 "ERROR: tried for over a minute to update meta info and failed. Giving up")
	   (debug:print 0 "EXCEPTION: database probably overloaded or unreadable.")
	   (debug:print 0 " message: " ((condition-property-accessor 'exn 'message) exn))
	   (print "exn=" (condition->list exn))
	   (debug:print 0 " status:  " ((condition-property-accessor 'sqlite3 'status) exn))
	   (print-call-chain)))
     (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes)
    ;; Update central with uname and hostname = #f
    ;; Is this one of the performance problems? This info should come from testdat-meta anyway
    ;; (tests:update-central-meta-info test-id cpuload diskfree minutes #f #f)
  )))
	 
(define (tests:update-testdat-meta-info db test-id work-area cpuload diskfree minutes)
  (let ((tdb         (db:open-test-db-by-test-id db test-id work-area: work-area)))
    (if (sqlite3:database? tdb)
	(begin
	  (sqlite3:execute tdb "INSERT INTO test_rundat (update_time,cpuload,diskfree,run_duration) VALUES (strftime('%s','now'),?,?,?);"
			   cpuload diskfree minutes)