Megatest

Changes On Branch 047e6962b480b7d4
Login

Changes In Branch robust-test-execute Excluding Merge-Ins

This is equivalent to a diff from fb43245d3c to 047e6962b4

2017-03-21
11:46
Added defence against bad sqlite3 handles to the finalizer check-in: 79058725e7 user: matt tags: v1.63
2017-03-20
12:12
missing server fix commit Closed-Leaf check-in: 74d9575642 user: matt tags: server-fix
2017-03-09
20:10
Added defense against an unreadable .homehost file (shouldn't happen\!) Closed-Leaf check-in: 08cf77140c user: matt tags: homehost-protection
2017-03-08
22:37
Added messages for all possible paths in the lauch:execute initial connection and NFS dir check section Closed-Leaf check-in: 047e6962b4 user: matt tags: robust-test-execute
09:58
fixed issues with Baishali last evening check-in: fb43245d3c user: bjbarcla tags: v1.63, v1.6311
2017-03-07
22:41
Fixed couple typos Closed-Leaf check-in: e8af37dc3b user: mrwellan tags: cmdinfo-boost-to-execute
16:40
Add checks to debug:print* to verify that the port given is really a port check-in: 11e6ba414b user: matt tags: v1.63, v1.6311

Modified launch.scm from [946add6293] to [5c08c0419f].

444
445
446
447
448
449
450
451

452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
















471
472
473
474
475
476
477
                                              runscript))))) ;; assume it is on the path
	       ) ;; (rollup-status 0)

	  ;; On NFS it can be slow and unreliable to get needed startup information.
	  ;;  i. Check if we are on the homehost, if so, proceed
	  ;; ii. Check if host and port passed in via CMDINFO are valid and if
	  ;;     possible use them.
	  (let ((bestadrs (server:get-best-guess-address (get-host-name))))

	    (if (equal? homehost bestadrs) ;; we are likely on the homehost
		(debug:print-info 0 *default-log-port* "test " test-name " appears to be running on the homehost " homehost)
		(let ((host-port (if serverurl (string-split serverurl ":") #f)))
		  (if (not *runremote*)(set! *runremote* (make-remote))) ;; init *runremote*
		  (if (string? homehost)
		      (if (and host-port
			       (> (length host-port) 1))
			  (let* ((host      (car host-port))
                                 (port      (cadr host-port))
                                 (start-res (http-transport:client-connect host port))
                                 (ping-res  (rmt:login-no-auto-client-setup start-res)))
			    (if (and start-res
				     ping-res)
				(let ((url  (http-transport:server-dat-make-url start-res)))
				  (remote-conndat-set! *runremote* start-res)
				  (remote-server-url-set! *runremote* url)
				  (debug:print-info 0 *default-log-port* "connected to " url " using CMDINFO data."))
				(debug:print-info 0 *default-log-port* "received " host ":" port " for url but could not connect.")
				)))))))
















	  ;; NFS might not have propagated the directory meta data to the run host - give it time if needed
	  (let loop ((count 0))
	    (if (or (file-exists? top-path)
		    (> count 10))
		(change-directory top-path)
		(begin
		  (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " top-path " not found")







|
>


















|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
                                              runscript))))) ;; assume it is on the path
	       ) ;; (rollup-status 0)

	  ;; On NFS it can be slow and unreliable to get needed startup information.
	  ;;  i. Check if we are on the homehost, if so, proceed
	  ;; ii. Check if host and port passed in via CMDINFO are valid and if
	  ;;     possible use them.
	  (let ((bestadrs (server:get-best-guess-address (get-host-name)))
		(needcare #f))
	    (if (equal? homehost bestadrs) ;; we are likely on the homehost
		(debug:print-info 0 *default-log-port* "test " test-name " appears to be running on the homehost " homehost)
		(let ((host-port (if serverurl (string-split serverurl ":") #f)))
		  (if (not *runremote*)(set! *runremote* (make-remote))) ;; init *runremote*
		  (if (string? homehost)
		      (if (and host-port
			       (> (length host-port) 1))
			  (let* ((host      (car host-port))
                                 (port      (cadr host-port))
                                 (start-res (http-transport:client-connect host port))
                                 (ping-res  (rmt:login-no-auto-client-setup start-res)))
			    (if (and start-res
				     ping-res)
				(let ((url  (http-transport:server-dat-make-url start-res)))
				  (remote-conndat-set! *runremote* start-res)
				  (remote-server-url-set! *runremote* url)
				  (debug:print-info 0 *default-log-port* "connected to " url " using CMDINFO data."))
				(debug:print-info 0 *default-log-port* "received " host ":" port " for url but could not connect.")
				))
			  (begin
			    (debug:print-info 0 *default-log-port* (if host-port
								       (conc "received invalid host-port information " host-port)
								       "no host-port information received"))
			    ;; potential for bad situation if simultaneous starting of hundreds of jobs on servers, set needcare.
			    (set! needcare #t)))
		      (begin
			(debug:print-info 0 *default-log-port* "received no homehost information. Please report this to support as it should not happen.")
			(set! needcare #t)))))
	    (if needcare  ;; due to very slow NFS we will do a brute force mkdir to ensure that the directory inode it truly available on this host
		(let ((logdir (conc top-path "/logs"))) ;; we'll try to create this directory
		  (handle-exceptions
		      exn
		      (debug:print 0 *default-log-port* "Failed to create directory " logdir " expect problems, message: " ((condition-property-accessor 'exn 'message) exn))
		    (create-directory logdir #t)))))
		  
	  ;; NFS might not have propagated the directory meta data to the run host - give it time if needed
	  (let loop ((count 0))
	    (if (or (file-exists? top-path)
		    (> count 10))
		(change-directory top-path)
		(begin
		  (debug:print 0 *default-log-port* "INFO: Not starting job yet - directory " top-path " not found")