Megatest

mtest-diag.scm at [790604d640]
Login

File utils/wip/mtest-diag.scm artifact 7f6edef793 part of check-in 790604d640


#!/p/foundry/env/pkgs/chicken/4.10.0_ext/bin/csi -s

(use chicken)
(use data-structures)


(include "/nfs/site/home/bjbarcla/bin2/mtest-repair-lib.scm")
(glib-color-mode 1)

;;; check mtver in xterm
(let ((mt-ver (do-or-die "megatest -version")))
  (when (member mt-ver '("1.6309-738c" "1.6029"))
      (iwarn "This xterm has an older version of megatest.")
      (ierr "Please load latest megatest version to proceed.")
      (print "eg.: source ../scripts/newrel-setup.csh 1.63/11b")
      (exit 3)))


;;;; kill netbatch jobs from this megatest
;; TODO!


(define *diag* #t)
;;(define *user* (get-environment-variable "USER"))
(define *user* (do-or-die "ls -ld . | awk '{print $3}'"))
(print "user="*user*)
;;;; delete .homehost .homehost.config
;;;; if not on homehost, ssh homehost, cd here, killall mtest dboard
(if (not *diag*)
    (when (file-exists? ".homehost.config")
      (delete-db ".homehost.config")))
    
(when (file-exists? ".homehost")
  (let* ((homehost (with-input-from-file ".homehost" (lambda () (read)))))
    (let* ((homehostname (do-or-die "host `cat .homehost` | sed 's/.$//' | awk '{print $NF}' | awk -F. '{print $1}'"))
           (thishostname (get-environment-variable "HOST")))
      (when (not (equal? homehostname thishostname))
        (let* ((this-exe-compiled (car (argv)))
               (this-exe "/nfs/site/home/bjbarcla/bin2/mtest-diag.scm")
               (cmd (conc "ssh "homehostname" 'cd "(get-environment-variable "PWD")" && "this-exe"'")))
          (iwarn "Running on the homehost -- "homehostname)
          ;;(iwarn "eg: % ssh "homehostname" 'cd "(get-environment-variable "PWD")" && "(car (argv))"'")
          (print "cmd="cmd)
          ;;(inote "sleeping for 5 seconds.  hit ctrl-c now to run on homehost or wait to proceed.")
          (system cmd)
          (exit 0))))))




;;;; kill megatests and dashboards in this area
(define (kill-mtest-dboard)
  (if *diag*
      #f
      (let* ((this-toppath (pid->cwd (current-process-id)))
             (tmppath      (toppath->tmppath this-toppath))
             (config       (let ((res (conc this-toppath "/megatest.config")))
                             (when (not (file-exists? res))
                               (ierr "This is not a megatest run area; "res" does not exist.  Aborting.")
                               (exit 2))
                             res))
             (mtest-procs (get-my-mtest-procs))
             (dashboard-procs (get-my-dashboard-procs))
             (all-pids (map proc-PID (append mtest-procs dashboard-procs)))
             (our-pids (filter (lambda (pid)
                                 (equal? (pid->cwd pid) this-toppath))
                               all-pids)))

        (if (null? our-pids)
            (inote "No mtest or dboard processes on this host in in this runarea.")
            (begin
              (iwarn "Killing all megatest and dashboard processes on this host.")
              (gracefully-kill-pids our-pids)))
        )))

(kill-mtest-dboard)


;;;; delete /tmp/.$USER-portlogger.db
(let ((plfile (conc "/tmp/."*user* "-portlogger.db")))

  (if (safe-file-exists? plfile)
    (if *diag*
        (print "plfile exists - "plfile)
        (begin
          (inote "removing portlogger file")
          (system (conc "rm "plfile))))))


;;;; move logs dir aside
(when (not *diag*)
  (system (conc "mv logs logs-aside-`date +%s`"))
  (system "mkdir logs"))
  

;;;; fixes for dependency diagram
(when (not *diag*)
  (inote "Removing dep graph tmp files if they exist")
  (system (conc "rm /tmp/."*user*"-*.dot"))

  ;;#ln -s /p/fdk/gwa/$USER/fossil/ext/<your flow>_ext ext
  (let* ((toppath (pid->cwd (current-process-id)))
         (flow (car (string-split
                     (car (reverse (string-split toppath "/")))
                     ".")))
         (extdir (conc "/p/fdk/gwa/"*user*
                       "/fossil/ext/"flow"_ext")))
    (when (and (safe-file-exists? extdir)
               (not (safe-file-exists? "ext")))
      (inote "Linking in ext dir")
      (system (conc "ln -s "extdir" ext")))))
  

;;;; check for 0 byte megatest{,_ref}.db in tmp.  delete them
;;;; check for wal-mode megatest{,_ref}.db in tmp.  delete them
(define (repair-dbs)
  (let* ((this-toppath (pid->cwd (current-process-id)))
         (tmppath      (toppath->tmppath this-toppath))
         (golden-mtest-file (conc this-toppath "/megatest.db"))
         (golden-mtest-file-ok (check-db "megatest.db"))
         (tmp-mtest-file    (conc tmppath "/megatest.db"))
         (tmp-mtestref-file    (conc tmppath "/megatest_ref.db"))
         (tmp-mtest-file-ok (check-db tmp-mtest-file))
         (tmp-mtestref-file-ok (check-db tmp-mtestref-file))
         )
;;;; check for megatest{,_ref}.db in tmp that die on .schema.  delete them
    (when (safe-file-exists? tmppath)
        (if tmp-mtest-file-ok
            (inote "tmp megatest db file ok")
            (if *diag*
                (print "diag: tmp megatest db broken - "tmp-mtest-file)
                (delete-db tmp-mtest-file)))
        (if tmp-mtestref-file-ok
            (inote "tmp megatestref db file ok")
            (if *diag*
                (print "diag: tmpref megatest db broken - "tmp-mtestref-file)
                (delete-db tmp-mtestref-file))))

;;;; check for megatest.db
    (if golden-mtest-file-ok
        (inote "golden megatest db file ok")
        (if (not (file-exists? golden-mtest-file))
            (inote "megatest.db not present.  Continuing.")
            (begin
          ;;;; if golden megatest db is broken, stop now!
              (ierr "Golden megatest.db is broken.  Please delete it or replace it from a backup version in .snapshot.  If critical, contact env team to assist.")
              (sendmail "bjbarcla" "!!Bad golden megatest.db" this-toppath)
              (inote "Backups in .snapshot:")
              (system "ls -l .snapshot/*/megatest.db")
              (ierr "Not proceeding with any more checks.")
              (exit 3))))



    ))

(repair-dbs)