Megatest

Check-in [213395bcb3]
Login
Overview
Comment:Better handling of cached testconfigs (I think)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | v1.60
Files: files | file ages | folders
SHA1: 213395bcb376d3e79e61448e60d61e782642dcdd
User & Date: mrwellan on 2015-10-05 22:07:05
Other Links: branch diff | manifest | tags
Context
2015-10-06
00:24
Fixed couple issues with testconfig caching and fixed sorting on dashboard for -event_time case check-in: 00fe09dc6c user: matt tags: v1.60
2015-10-05
22:07
Better handling of cached testconfigs (I think) check-in: 213395bcb3 user: mrwellan tags: v1.60
2015-10-04
23:26
Added cache for testconfig. Corrected field order in dashboard test control panel check-in: d1f77a7687 user: matt tags: v1.60
Changes

Modified common.scm from [a61150e51f] to [3de2002d2b].

549
550
551
552
553
554
555
556

557
558
559
560
561
562
563
564
565

566
567
568
569
570

571
572
573
574
575
576
577
578
579
580
581
582
583
584
585






586
587
588
589
590
591
592
549
550
551
552
553
554
555

556
557
558
559
560
561
562
563
564

565
566
567
568
569

570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598







-
+








-
+




-
+















+
+
+
+
+
+








;; get cpu load by reading from /proc/loadavg, return all three values
;;
(define (common:get-cpu-load)
  (with-input-from-file "/proc/loadavg" 
    (lambda ()(list (read)(read)(read)))))

(define (common:wait-for-cpuload maxload numcpus waitdelay #!key (count 1000))
(define (common:wait-for-cpuload maxload numcpus waitdelay #!key (count 1000) (msg #f))
  (let* ((loadavg (common:get-cpu-load))
	 (first   (car loadavg))
	 (next    (cadr loadavg))
	 (adjload (* maxload numcpus))
	 (loadjmp (- first next)))
    (cond
     ((and (> first adjload)
	   (> count 0))
      (debug:print-info 0 "waiting " waitdelay " seconds due to load " first " exceeding max of " adjload)
      (debug:print-info 0 "waiting " waitdelay " seconds due to load " first " exceeding max of " adjload (if msg msg ""))
      (thread-sleep! waitdelay)
      (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1)))
     ((and (> loadjmp numcpus)
	   (> count 0))
      (debug:print-info 0 "waiting " waitdelay " seconds due to load jump " loadjmp " > numcpus " numcpus)
      (debug:print-info 0 "waiting " waitdelay " seconds due to load jump " loadjmp " > numcpus " numcpus (if msg msg ""))
      (thread-sleep! waitdelay)
      (common:wait-for-cpuload maxload numcpus waitdelay count: (- count 1))))))

(define (common:get-num-cpus)
  (with-input-from-file "/proc/cpuinfo"
    (lambda ()
      (let loop ((numcpu 0)
		 (inl    (read-line)))
	(if (eof-object? inl)
	    numcpu
	    (loop (if (string-match "^processor\\s+:\\s+\\d+$" inl)
		      (+ numcpu 1)
		      numcpu)
		  (read-line)))))))

;; wait for normalized cpu load to drop below maxload
;;
(define (common:wait-for-normalized-load maxload #!key (msg #f))
  (let ((num-cpus (common:get-num-cpus)))
    (common:wait-for-cpuload maxload num-cpus 15 msg: msg)))

(define (get-uname . params)
  (let* ((uname-res (cmd-run->list (conc "uname " (if (null? params) "-a" (car params)))))
	 (uname #f))
    (if (null? (car uname-res))
	"unknown"
	(caar uname-res))))

Modified launch.scm from [5d62fc5f7e] to [37fee9ec8f].

169
170
171
172
173
174
175
176


177
178
179
180
181
182
183
169
170
171
172
173
174
175

176
177
178
179
180
181
182
183
184







-
+
+







	 (vector-set! exit-info 3 1) ;; force fail, this used to be next-state but that doesn't make sense. should always be "COMPLETED" 
	 (tests:test-set-status! run-id test-id "COMPLETED" "FAIL" (conc "Failed at step " stepname) #f)
	 )))
    logpro-used))

(define (launch:execute encoded-cmd)
  
   (let* ((cmdinfo   (common:read-encoded-string encoded-cmd)))
   (let* ((cmdinfo    (common:read-encoded-string encoded-cmd))
	  (tconfigreg (make-hash-table)))
    (setenv "MT_CMDINFO" encoded-cmd)
    (if (list? cmdinfo) ;; ((testpath /tmp/mrwellan/jazzmind/src/example_run/tests/sqlitespeed)
	;; (test-name sqlitespeed) (runscript runscript.rb) (db-host localhost) (run-id 1))
	(let* ((testpath  (assoc/default 'testpath  cmdinfo))  ;; testpath is the test spec area
	       (top-path  (assoc/default 'toppath   cmdinfo))
	       (work-area (assoc/default 'work-area cmdinfo))  ;; work-area is the test run area
	       (test-name (assoc/default 'test-name cmdinfo))
380
381
382
383
384
385
386
387


388
389
390
391
392
393
394
381
382
383
384
385
386
387

388
389
390
391
392
393
394
395
396







-
+
+







					      (begin
						(thread-sleep! 2)
						(loop (+ i 1)))
					      )))))
				 ;; then, if runscript ran ok (or did not get called)
				 ;; do all the ezsteps (if any)
				 (if ezsteps
				     (let* ((testconfig (read-config (conc work-area "/testconfig") #f #t environ-patt: "pre-launch-env-vars")) ;; FIXME??? is allow-system ok here?
				     (let* ((testconfig ;; (read-config (conc work-area "/testconfig") #f #t environ-patt: "pre-launch-env-vars")) ;; FIXME??? is allow-system ok here?
					     (tests:get-testconfig test-name tconfigreg #t)) ;; 'return-procs)))
					    (ezstepslst (hash-table-ref/default testconfig "ezsteps" '())))
				       (hash-table-set! *testconfigs* test-name testconfig) ;; cached for lazy reads later ...
				       (if (not (file-exists? ".ezsteps"))(create-directory ".ezsteps"))
				       ;; if ezsteps was defined then we are sure to have at least one step but check anyway
				       (if (not (> (length ezstepslst) 0))
					   (debug:print 0 "ERROR: ezsteps defined but ezstepslst is zero length")
					   (let loop ((ezstep (car ezstepslst))
828
829
830
831
832
833
834






835

836
837
838
839
840
841
842
843
844




845
846
847

848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874

875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892

893
894
895
896
897
898
899
830
831
832
833
834
835
836
837
838
839
840
841
842

843
844
845
846
847
848




849
850
851
852
853
854

855
856
857
858
859
860
861
862
863
864
865
866
867
868
869

870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899

900
901
902
903
904
905
906
907







+
+
+
+
+
+
-
+





-
-
-
-
+
+
+
+


-
+














-












+

















-
+







   (list ;; (list "MT_TEST_RUN_DIR" work-area)
    (list "MT_RUN_AREA_HOME" *toppath*)
    (list "MT_TEST_NAME" test-name)
    ;; (list "MT_ITEM_INFO" (conc itemdat)) 
    (list "MT_RUNNAME"   runname)
    ;; (list "MT_TARGET"    mt_target)
    ))
  (let* ((tregistry       (make-hash-table))
	 (item-path       (let ((ip (item-list->path itemdat)))
			    (alist->env-vars (list (list "MT_ITEMPATH" ip)))
			    ip))
	 (tconfig         (or (tests:get-testconfig test-name tregistry #t)
			      test-conf)) ;; force re-read now that all vars are set
  (let* ((useshell        (let ((ush (config-lookup *configdat* "jobtools"     "useshell")))
	 (useshell        (let ((ush (config-lookup *configdat* "jobtools"     "useshell")))
			    (if ush 
				(if (equal? ush "no") ;; must use "no" to NOT use shell
				    #f
				    ush)
				#t)))     ;; default is yes
	 (runscript       (config-lookup test-conf   "setup"        "runscript"))
	 (ezsteps         (> (length (hash-table-ref/default test-conf "ezsteps" '())) 0)) ;; don't send all the steps, could be big
	 (diskspace       (config-lookup test-conf   "requirements" "diskspace"))
	 (memory          (config-lookup test-conf   "requirements" "memory"))
	 (runscript       (config-lookup tconfig   "setup"        "runscript"))
	 (ezsteps         (> (length (hash-table-ref/default tconfig "ezsteps" '())) 0)) ;; don't send all the steps, could be big
	 (diskspace       (config-lookup tconfig   "requirements" "diskspace"))
	 (memory          (config-lookup tconfig   "requirements" "memory"))
	 (hosts           (config-lookup *configdat* "jobtools"     "workhosts"))
	 (remote-megatest (config-lookup *configdat* "setup" "executable"))
	 (run-time-limit  (or (configf:lookup  test-conf   "requirements" "runtimelim")
	 (run-time-limit  (or (configf:lookup  tconfig   "requirements" "runtimelim")
			      (configf:lookup  *configdat* "setup" "runtimelim")))
	 ;; FIXME SOMEDAY: not good how this is so obtuse, this hack is to 
	 ;;                allow running from dashboard. Extract the path
	 ;;                from the called megatest and convert dashboard
	 ;;             	  or dboard to megatest
	 (local-megatest  (let* ((lm  (car (argv)))
				 (dir (pathname-directory lm))
				 (exe (pathname-strip-directory lm)))
			    (conc (if dir (conc dir "/") "")
				  (case (string->symbol exe)
				    ((dboard)    "../megatest")
				    ((mtest)     "../megatest")
				    ((dashboard) "megatest")
				    (else exe)))))
	 (item-path       (item-list->path itemdat))
	 (launcher        (common:get-launcher *configdat* test-name item-path)) ;; (config-lookup *configdat* "jobtools"     "launcher"))
	 (test-sig   (conc (common:get-testsuite-name) ":" test-name ":" item-path)) ;; (item-list->path itemdat))) ;; test-path is the full path including the item-path
	 (work-area  #f)
	 (toptest-work-area #f) ;; for iterated tests the top test contains data relevant for all
	 (diskpath   #f)
	 (cmdparms   #f)
	 (fullcmd    #f) ;; (define a (with-output-to-string (lambda ()(write x))))
	 (mt-bindir-path #f)
	 (testinfo   (rmt:get-test-info-by-id run-id test-id))
	 (mt_target  (string-intersperse (map cadr keyvals) "/"))
	 (debug-param (append (if (args:get-arg "-debug")  (list "-debug" (args:get-arg "-debug")) '())
			      (if (args:get-arg "-logging")(list "-logging") '()))))

    (setenv "MT_ITEMPATH" item-path)
    (if hosts (set! hosts (string-split hosts)))
    ;; set the megatest to be called on the remote host
    (if (not remote-megatest)(set! remote-megatest local-megatest)) ;; "megatest"))
    (set! mt-bindir-path (pathname-directory remote-megatest))
    (if launcher (set! launcher (string-split launcher)))
    ;; set up the run work area for this test
    (if (and (args:get-arg "-preclean") ;; user has requested to preclean for this run
	     (not (member (db:test-get-rundir testinfo)(list "n/a" "/tmp/badname")))) ;; n/a is a placeholder and thus not a read dir
	(begin
	  (debug:print-info 0 "attempting to preclean directory " (db:test-get-rundir testinfo) " for test " test-name "/" item-path)
	  (runs:remove-test-directory testinfo 'remove-data-only))) ;; remove data only, do not perturb the record

    ;; prevent overlapping actions - set to LAUNCHED as early as possible
    ;;
    (tests:test-set-status! run-id test-id "LAUNCHED" "n/a" #f #f) ;; (if launch-results launch-results "FAILED"))
    (rmt:roll-up-pass-fail-counts run-id test-name item-path #f "LAUNCHED")
    (set! diskpath (get-best-disk *configdat* test-conf))
    (set! diskpath (get-best-disk *configdat* tconfig))
    (if diskpath
	(let ((dat  (create-work-area run-id run-info keyvals test-id test-path diskpath test-name itemdat)))
	  (set! work-area (car dat))
	  (set! toptest-work-area (cadr dat))
	  (debug:print-info 2 "Using work area " work-area))
	(begin
	  (set! work-area (conc test-path "/tmp_run"))
942
943
944
945
946
947
948
949

950
951
952
953
954
955
956
950
951
952
953
954
955
956

957
958
959
960
961
962
963
964







-
+







    (if (args:get-arg "-xterm")(set! fullcmd (append fullcmd (list "-xterm"))))
    (debug:print 1 "Launching " work-area)
    ;; set pre-launch-env-vars before launching, keep the vars in prevvals and put the envionment back when done
    (debug:print 4 "fullcmd: " fullcmd)
    (let* ((commonprevvals (alist->env-vars
			    (hash-table-ref/default *configdat* "env-override" '())))
	   (testprevvals   (alist->env-vars
			    (hash-table-ref/default test-conf "pre-launch-env-overrides" '())))
			    (hash-table-ref/default tconfig "pre-launch-env-overrides" '())))
	   (miscprevvals   (alist->env-vars ;; consolidate this code with the code in megatest.scm for "-execute"
			    (append (list (list "MT_TEST_RUN_DIR" work-area)
					  (list "MT_TEST_NAME" test-name)
					  (list "MT_ITEM_INFO" (conc itemdat)) 
					  (list "MT_RUNNAME"   runname)
					  (list "MT_TARGET"    mt_target)
					  (list "MT_ITEMPATH"  item-path)

Modified server.scm from [2936193c4a] to [b9ea2e37c4].

124
125
126
127
128
129
130

131
132
133
134
135
136
137
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138







+







	     ;; match current ip or hostname
	     (not (string-match (conc "("curr-host "|" curr-host"\\..*)") target-host))
	     (not (equal? curr-ip target-host)))
	(begin
	  (debug:print-info 0 "Starting server on " target-host ", logfile is " logfile)
	  (setenv "TARGETHOST" target-host)))
    (setenv "TARGETHOST_LOGF" logfile)
    (common:wait-for-normalized-load 4 " delaying server start due to load") ;; do not try starting servers on an already overloaded machine, just wait forever
    (system (conc "nbfake " cmdln))
    (unsetenv "TARGETHOST_LOGF")
    (if (get-environment-variable "TARGETHOST")(unsetenv "TARGETHOST"))
    ;; (system cmdln)
    (pop-directory)))

(define (server:get-client-signature)

Modified tests.scm from [a9b36e50b6] to [c32dd3737b].

751
752
753
754
755
756
757


758

759
760
761
762
763
764
765
751
752
753
754
755
756
757
758
759

760
761
762
763
764
765
766
767







+
+
-
+







											     "pre-launch-env-vars"
											     #f)))
			   #f)))
    (hash-table-set! *testconfigs* test-name tcfg)
    (if (and cache-path
	     (not cache-exists)
	     (file-write-access? cache-path))
	(let ((tpath (conc cache-path "/.testconfig")))
	  (debug:print-info 1 "Caching testconfig for " test-name " in " tpath)
	(configf:write-alist tcfg (conc cache-path "/.testconfig")))	
	  (configf:write-alist tcfg tpath)))
    tcfg))
  
;; sort tests by priority and waiton
;; Move test specific stuff to a test unit FIXME one of these days
(define (tests:sort-by-priority-and-waiton test-records)
  (let ((mungepriority (lambda (priority)
			 (if priority