#!/usr/bin/perl # Grid Status Test # # http://grid.ncsa.uiuc.edu/test/grid-status-test/ # # A script to test Globus based grid services on the user's behalf # Written by Joe Greenseid # and Jim Basney # # Based on the TeraGrid Status Test # # Originally written by Jim Basney # and Joe Greenseid # # Copyright 2001-2003 The Board of Trustees of the University of Illinois. # All rights reserved. # # Developed by: # # NCSA TestGrid Project # National Center for Supercomputing Applications # University of Illinois # http://grid.ncsa.uiuc.edu/test/ # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the # "Software"), to deal with the Software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: # # Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimers. # # Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimers in the # documentation and/or other materials provided with the distribution. # # Neither the names of the National Center for Supercomputing # Applications, the University of Illinois, nor the names of its # contributors may be used to endorse or promote products derived from # this Software without specific prior written permission. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. # IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR # ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. # 12 December, 2003 require "ctime.pl"; $program_name = "NCSA TestGrid Project: Grid Status Test"; $program_version = "1.0.3"; # test result flags $FLAG_FAILED = 0; $FLAG_SUCCEEDED = 1; $FLAG_WARNING = 2; $FLAG_TIMEOUT = 3; $FLAG_NOSERVER = 4; $timeout = 60; $timeout_msg = "\nchild process timed out\n"; # Initialize empty results and errors strings $results = ""; $errors = ""; # # Read Command Line Args # while (defined($_ = shift(@ARGV))) { if (/^-h/) { $htmlfile = shift(@ARGV); $htmltitle = shift(@ARGV); } elsif (/^-t/) { $timeout = shift(@ARGV); } elsif (/^-v/i) { print "$program_name, $program_version\n"; exit 0; } elsif (/^-/) { print STDERR "error: unknown option $_\n"; &usage(); exit 1; } else { $testbedsites[++$#testbedsites] = $_; } } # # If no sites passed in on command line to test, give help and exit # if (!defined(@testbedsites)) { &usage(); exit 1; } # # Check to see that we can log into $HOME/.grid-status-test/ # $logdir = "$ENV{'HOME'}/.grid-status-test"; if (!(-e $logdir)) { runcmd("mkdir $logdir"); } elsif (!(-d $logdir)) { print "This script logs to $logdir, but $logdir exists AND is not a directory.\n"; print "Please create a directory $logdir and rerun this script.\n"; exit 1; } $current_time = gettime(); $seelog = $logdir . "/log-" . $current_time; $username = getpwuid($<); chomp($hostname = `hostname`); $ENV{'TZ'} = "GMT"; $| = 1; $timestr = &ctime($^T); chop $timestr; print "Grid Status Test STARTING: $timestr\n"; # create log file header string $logfile_header = "$program_name, version $program_version\nRun on: $timestr\nBy user: $username\nOn system: $hostname\n\n"; # setup SIGALRM handler for sub runcmd $SIG{'ALRM'} = 'sigalrmhandler'; # # check for the commands I want to run # chomp($grid_proxy_info = `which grid-proxy-info 2>/dev/null`); if (!(-x $grid_proxy_info)) { undef $grid_proxy_info; } chomp($grid_proxy_init = `which grid-proxy-init 2>/dev/null`); if (!(-x $grid_proxy_init)) { undef $grid_proxy_init; } chomp($globusrun = `which globusrun 2>/dev/null`); if (!(-x $globusrun)) { &printerr("Command 'globusrun' not was found in your \$PATH.\n"); &printerr("\tAll tests relying on this command will be skipped.\n"); &printerr("---\n\n"); undef $globusrun; } chomp($grid_info_search = `which grid-info-search 2>/dev/null`); if (!(-x $grid_info_search)) { &printerr("Command 'grid-info-search' was not found in your \$PATH.\n"); &printerr("\tAll tests relying on this command will be skipped.\n"); &printerr("---\n\n"); undef $grid_info_search; } chomp($globus_url_copy = `which globus-url-copy 2>/dev/null`); if (!(-x $globus_url_copy)) { &printerr("Command 'globus-url-copy' was not found in your \$PATH.\n"); &printerr("\tAll tests relying on this command will be skipped.\n"); &printerr("---\n\n"); undef $globus_url_copy; } chomp($gsissh = `which gsissh 2>/dev/null`); if (!(-x $gsissh)) { $gsissh = "$ENV{'GLOBUS_LOCATION'}/bin/ssh"; if (!(-x $gsissh)) { &printerr("Command 'gsissh' was not found in your \$PATH,\n"); &printerr("or in $ENV{'GLOBUS_LOCATION'}/bin\n"); &printerr("\tAll tests relying on this command will be skipped.\n"); &printerr("---\n\n"); undef $gsissh; } } # For some of my test cases, gsisshd was only running on port 222 # In later versions of this script, server ports will be a config # file option # # if (-x $gsissh) {$gsissh = $gsissh . " -p 222 ";} # Check for a valid proxy &verifyproxy(); # Write files needed for mpicc testing to temporary files # so we can later run the testmpicc test $mpicctmpsrc = "/tmp/$username.gs-test-cpi.c"; $mpicctmpshell = "/tmp/$username.gs-test-mpitest.sh"; &writempiccsource($mpicctmpsrc); &writempiccshell($mpicctmpshell); # Set temporary location of condorgtmpsrc # so we can later run the testcondor tests $condorgtmpsrc = "/tmp/$username.gs-test-condorg-test.pl"; @tests = ("auth", "GRIS", "GSIFTP", "GSISSH", "mpicc", "path", "Condor-G"); %tests = ("auth" => "Authenticate to Gatekeeper", "GRIS" => "GRIS", "GSIFTP" => "GSIFTP Server", "GSISSH" => "GSISSH Server", "mpicc" => "mpicc compilation", "path" => "Grid Programs in PATH", "Condor-G" => "Condor-G", "simplejob" => "Simple Test of", "mpijob" => "MPI Job Test of", "gsiftpjob" => "GSIFTP Job Test of"); %jobmanagers = (); foreach $site (@testbedsites) { &printerr("================================================\n"); &printerr("Site: $site\n"); &printerr("================================================\n"); &printerr("\n"); # always must reset jobmanager array to zero for each $site @jobmanagers = (); &printres("\nTesting $site:\n"); &testauth($site); &testgris($site); &getjobmanagers($site); &testgsiftp($site); &testgsissh($site); &testmpicc($site, $mpicctmpsrc, $mpicctmpshell); &checkpath($site); &testcondorg($site, $condorgtmpsrc); if (@jobmanagers > 0) { # We found at least 1 jobmanager foreach $foundjobmanager (@jobmanagers) { $jobmanagers{$foundjobmanager} = 1; &testjobmanager($site, $foundjobmanager); &testmpi($site, $foundjobmanager); &testgsiftpjob($site, $foundjobmanager); } } else { # There were no jobmanagers found, just try default $jobmanagers{"jobmanager"} = 1; &testjobmanager($site, "jobmanager"); &testmpi($site, "jobmanager"); &testgsiftpjob($site, "jobmanager"); } # If you want to test specific jobmanagers and the reporters are # not installed or running, you can put them here, though we do not # recommend this. # # $jobmanagers{"jobmanager-pbs"} = 1; # &testjobmanager($site, "jobmanager-pbs"); # &testmpi($site, "jobmanager-pbs"); # &testgsiftpjob($site, "jobmanager-pbs"); } # add the 3 jobmanager tests to the tests array, so we have a complete # array to send to writehtmlresults() and summarizeresults() foreach $jm (sort(keys(%jobmanagers))) { $tests[@tests] = "$jm"; $tests[@tests] = "$jm:mpijob"; $tests[@tests] = "$jm:gsiftpjob"; } # write the results in a pretty web page &writehtmlresults($htmlfile) if (defined($htmlfile)); &printlog($logfile_header); &printlog($errors); &summarizeresults(); print "\nCheck $seelog\nfor more information about any errors or warnings the script detected.\n\n"; $timestr = &ctime(time()); chop $timestr; print "Grid Status Test COMPLETED: $timestr\n"; exit 0; ################## # # # Infrastructure # # subroutines # # # ################## sub usage { print STDERR "usage: $0 [-h htmlfile title] [-t secs] site1 [site2 ...]\n"; print STDERR " where -h writes html results to htmlfile with title\n"; print STDERR " -t specifies a timeout for child processes\n"; print STDERR " -v prints the version number of this program\n"; print STDERR " site1 [site2 ...] is a list of sites to test\n"; } # # printres # # Print Results # sub printres { local($msg) = @_; print $msg; $results .= $msg; } # # printerr # # Print Error Messages # sub printerr { local($msg) = @_; $errors .= $msg; } # # printlog # # Print $errors to the log file # sub printlog { local($msgs) = shift; $logfile = $logdir . "/log-" . $current_time; open (LOG, ">>$logfile") || die "can't open $logfile"; print LOG $msgs; close LOG; } # # gettime # # Get the current time for use in name of log file # # Format looks like: YYYYMMDD-HHMMSS-GMT # ex: 20031007-19_52_17-GMT # sub gettime { local($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = gmtime; return sprintf "%04d%02d%02d-%02d_%02d_%02d-GMT", $year+1900, $mon+1, $mday, $hour, $min, $sec; } # # errlogentry # # takes a set of strings and formats an entry for the error log # (printing its entry to printerr(), which is written to the log # at the very end # sub errlogentry { local($test, $result, $command, $output, $reason, $likely) = @_; &printerr("---\n"); &printerr("Test: $test\n"); &printerr("Result: $result\n"); if ($reason ne "") { &printerr("Reason: $reason\n"); &printerr("\n"); return; } if ($likely ne "") { if ($likely =~ /\n/) { $likely = " " . $likely; $likely =~ s/\n/\n /g; &printerr("Likely reason:\n$likely\n"); } else { &printerr("Likely reason: $likely\n"); } } &printerr("Command: $command\n"); local(@output) = split (/\n/, $output); local($formatted) = ""; for ($i=0; $i<@output; $i++) { if ($output[$i] !~ /^(\s*)$/) { $output[$i] =~ s/^(\s+)//; $output[$i] =~ s/(\s+)$//; $formatted = $formatted . " " . $output[$i] . "\n"; } } &printerr("Output:\n$formatted\n"); &printerr("\n"); } # # nopass # # Returns true (1) if a flag indicates the test did not pass where # pass == SUCCEEDED || WARNING # # (this test is useful for checking dependencies: # if (nopass($resflag{$site}{$test})) { return without testing } # sub nopass { local($flag) = shift; if ( ($flag == 1) || ($flag == 2) ) { return 0; } else { return 1; } } ############# # # # The Tests # # # ############# # # testauth # # Test authentication to a Gatekeeper # # Dependencies: (-x globusrun) # # globusrun -a -r # sub testauth { local($site) = @_; local($test) = "Gatekeeper Authentication"; local($command) = "globusrun -a -r $site"; if (!(defined($globusrun))) { local($result) = "Not Tested"; local($reason) = "'globusrun' command not found"; &errlogentry($test, $result, "", "", $reason, ""); return; } &printres("Authenticating to gatekeeper at $site..."); local($output) = &runcmd($command); if ($output eq $timeout_msg) { &printres("timed out\n"); local($result) = "Timed out"; local($reason) = "The test did not complete within the window of $timeout seconds"; &errlogentry($test, $result, $command, "", $reason, ""); $resflag{$site}{"auth"} = $FLAG_TIMEOUT; } elsif (!($output =~ /GRAM Authentication test successful/)) { &printres("failed\n"); local($result) = "Failure"; local($likelyreason) = &gramerrorcheck($output); if ($likelyreason eq "") { &errlogentry($test, $result, $command, $output, "", ""); } else { &errlogentry($test, $result, $command, $output, "", $likelyreason); } $resflag{$site}{"auth"} = $FLAG_FAILED; } else { &printres("succeeded\n"); $resflag{$site}{"auth"} = $FLAG_SUCCEEDED; } } # # testgiis # # Check to see if is reporting to a specified GIIS # # Dependencies: (-x grid-info-search) # # grid-info-search -h -x -b '' # If output contains "Mds-Host-hn=" then success! # # NOTE: Test not used in current version of script # sub testgiis { return if (!defined($grid_info_search)); &printres("Looking up site(s) in GIIS ($giis)..."); local($output) = &runcmd("grid-info-search -h $giis -x -b '$giis_branch_point'"); if ($output eq $timeout_msg || $output =~ /Timed out/) { &printres("timed out\n"); local($result) = "Timed out"; local($reason) = "The test did not complete within the window of $timeout seconds"; &errlogentry($test, $result, $command, "", $reason, ""); } elsif (!($output =~ /Mds/)) { &printres("failed\n"); &printerr("grid-info-search output follows\n\n$output\n"); } else { &printres("\n"); foreach $site (@testbedsites) { local($founderr) = 1; if (!($output =~ /Mds-Host-hn=$site/)) { &printres("$site not found\n"); } else { $founderr = 0; } if ($founderr) { $resflag{$site}{"GIIS"} = $FLAG_FAILED; } else { &printres("$site found\n"); $resflag{$site}{"GIIS"} = $FLAG_SUCCEEDED; } } } } # # testgris # # Test to see if GRIS is reporting information # # Dependencies: (-x grid-info-search) # # grid-info-search -h -x # If output contains "Mds" then success! # sub testgris { local($site) = @_; local($test) = "GRIS Test"; local($command) = "grid-info-search -h $site -x"; if (!(defined($grid_info_search))) { local($result) = "Not Tested"; local($reason) = "'grid-info-search' command not found"; &errlogentry($test, $result, "", "", $reason, ""); return; } &printres("Querying GRIS at $site..."); local($output) = &runcmd($command); if ($output eq $timeout_msg) { &printres("timed out\n"); local($result) = "Timed out"; local($reason) = "The test did not complete within the window of $timeout seconds"; &errlogentry($test, $result, $command, "", $reason, ""); $resflag{$site}{"GRIS"} = $FLAG_TIMEOUT; } elsif ($output =~ /ldap_bind: Can't contact LDAP server/) { &printres("no server\n"); local($result) = "Server not found"; local($likelyreason) = "There does not appear to be a GRIS running"; &errlogentry($test, $result, $command, $output, "", $likelyreason); $resflag{$site}{"GRIS"} = $FLAG_NOSERVER; } elsif (!($output =~ /Mds/)) { &printres("failed\n"); local($result) = "Failure"; &errlogentry($test, $result, $command, $output, "", ""); $resflag{$site}{"GRIS"} = $FLAG_FAILED; } else { &printres("succeeded\n"); $resflag{$site}{"GRIS"} = $FLAG_SUCCEEDED; } } # # testjobmanager # # Test simple job submission to jobmanagers at # # Dependencies: &testauth # (-x globusrun) # # globusrun -o -r / \ # '&(executable="/bin/echo")(arguments="Grid Status Test")' # sub testjobmanager { local($site, $jobmanager) = @_; local($test) = "Simple test of $jobmanager"; local($command) = "globusrun -o -r $site/$jobmanager '&(executable=\"/bin/echo\")(arguments=\"Grid Status Test\")'"; if (!(defined($globusrun))) { local($result) = "Not Tested"; local($reason) = "'globusrun' command not found"; &errlogentry($test, $result, "", "", $reason, ""); return; } if ( (!(defined($resflag{$site}{"auth"}))) || (&nopass($resflag{$site}{"auth"})) ) { local($result) = "Not Tested"; local($reason) = "The gatekeeper authentication test failed or did not run. The jobmanager test submits to the gatekeeper, so this test cannot run."; &errlogentry($test, $result, "", "", $reason, ""); return; } &printres("Running simple test job on $site/$jobmanager..."); local($output) = &runcmd($command); if ($output eq $timeout_msg) { &printres("timed out\n"); local($result) = "Timed out"; local($reason) = "The test did not complete within the window of $timeout seconds"; &errlogentry($test, $result, $command, "", $reason, ""); $resflag{$site}{$jobmanager} = $FLAG_TIMEOUT; } elsif (!($output =~ /Grid Status Test/)) { &printres("failed\n"); local($result) = "Failure"; local($likelyreason) = &gramerrorcheck($output); if ($likelyreason eq "") { &errlogentry($test, $result, $command, $output, "", ""); } else { &errlogentry($test, $result, $command, $output, "", $likelyreason); } $resflag{$site}{$jobmanager} = $FLAG_FAILED; } elsif ($output ne "Grid Status Test\n") { &printres("succeeded (warning: extraneous output)\n"); $output =~ s/Grid Status Test\n//; local($result) = "Success, but warning: with extraneous output"; &errlogentry($test, $result, $command, $output, "", ""); $resflag{$site}{$jobmanager} = $FLAG_WARNING; } else { &printres("succeeded\n"); $resflag{$site}{$jobmanager} = $FLAG_SUCCEEDED; } } # # testgsiftp # # Test to see if the GSIFTP server is running # # Dependencies: (-x globus-url-copy) # # globus-url-copy file://localfile gsiftp:///remotefile # globus-url-copy gsiftp:///remotefile file://localfile # # If the integrity of the data is maintained through both transfers, # then success! # # sub testgsiftp { local($site) = @_; local($testfile) = "grid-status-test.$username.$hostname"; local($testdir) = "/tmp"; local($testpath) = "$testdir/$testfile"; local($test) = "GSIFTP server test"; local($commandone) = "globus-url-copy file:$testpath gsiftp://$site/$testpath.remote"; local($commandtwo) = "globus-url-copy gsiftp://$site/$testpath.remote file:$testpath"; if (!(defined($globus_url_copy))) { local($result) = "Not Tested"; local($reason) = "'globus-url-copy' command not found"; &errlogentry($test, $result, "", "", $reason, ""); return; } # Make sure nothing is located at $testpath before we write to it unlink $testpath; &printres("Testing GridFTP at $site..."); open(TESTFILE, ">$testpath") || die; print TESTFILE "Grid Status Test $$\n"; close(TESTFILE) || die; local($output) = &runcmd($commandone); unlink $testpath; if ($output eq $timeout_msg) { &printres("timed out\n"); local($result) = "Timed out"; local($reason) = "The test did not complete within the window of $timeout seconds"; &errlogentry($test, $result, $commandtwo, "", $reason, ""); $resflag{$site}{"GSIFTP"} = $FLAG_TIMEOUT; return; } elsif ($output =~ /Connection refused/) { &printres("no server\n"); local($result) = "Server not found"; local($likelyreason) = "It appears there is no GSIFTP server running"; &errlogentry($test, $result, $commandone, $output, "", $likelyreason); $resflag{$site}{"GSIFTP"} = $FLAG_NOSERVER; return; } elsif ($output ne "") { &printres("failed\n"); local($result) = "Failure"; &errlogentry($test, $result, $commandone, $output, "", ""); $resflag{$site}{"GSIFTP"} = $FLAG_FAILED; return; } $output = &runcmd($commandtwo); if ($output eq $timeout_msg) { &printres("timed out\n"); local($result) = "Timed out"; local($reason) = "The test did not complete within the window of $timeout seconds"; &errlogentry($test, $result, $commandtwo, "", $reason, ""); $resflag{$site}{"GSIFTP"} = $FLAG_TIMEOUT; return; } elsif ($output ne "") { &printres("failed\n"); local($result) = "Failure"; &errlogentry($test, $result, $commandtwo, $output, "", ""); $resflag{$site}{"GSIFTP"} = $FLAG_FAILED; unlink $testpath; return; } if (open(TESTFILE, "<$testpath")) { local(@data) = ; local($data) = join("\n", @data); close(TESTFILE) || die; if ($data ne "Grid Status Test $$\n") { &printres("failed\n"); local($result) = "Failure"; local($problem) = "globus-url-copy failed, bad file contents:\n Should be: Grid Status Test $$\n Actually is: $data\n"; local($command) = "\n Transfer 1: $commandone\n Transfer 2: $commandtwo\n"; &errlogentry($test, $result, $command, $problem, "", ""); $resflag{$site}{"GSIFTP"} = $FLAG_FAILED; unlink $testpath; return; } } else { &printres("failed\n"); local($result) = "Failure"; local($problem) = "globus-url-copy failed, file missing\n"; local($command) = "\n Transfer 1: $commandone\n\n Transfer 2: $commandtwo\n"; &errlogentry($test, $result, $command, $problem, "", ""); $resflag{$site}{"GSIFTP"} = $FLAG_FAILED; unlink $testpath; return; } &printres("succeeded\n"); $resflag{$site}{"GSIFTP"} = $FLAG_SUCCEEDED; unlink $testpath; } # # testgsissh # # Test to see if the GSISSH server is running # # Dependencies: (-x gsissh) # # gsissh -o "BatchMode yes" -o \ # "PreferredAuthentications external-keyx,gssapi" \ # /bin/echo "Grid Status Test" # # Result of test is one of 6 cases: # # 1) Timeout (didn't fail, didn't succeed, connection was attempting # and either slow or hung when process was terminated # because it took too long) # 2) Success # 3) Success, with extraneous output (besides the answer, some other # output was returned) # 4) Connection Refused (no sshd listening -- if no port specified, # default ssh port is 22) # 5) Login Failed # a) sshd is not gssapi/gsi enabled # b) problem with user account. Most likely problems are # either no account on or not in 's grid-mapfile # sub testgsissh { local($site) = @_; local($test) = "GSISSH Server"; local($port) = 22; local($command) = "$gsissh -x -o \"BatchMode yes\" -o \"PreferredAuthentications external-keyx,gssapi\" $site /bin/echo \"Grid Status Test\""; if (!defined($gsissh)) { local($result) = "Not Tested"; local($reason) = "'gsissh' command not found"; &errlogentry($test, $result, "", "", $reason, ""); $resflag{$site}{"GSISSH"} = $FLAG_NOSERVER; return; } &printres("Testing GSISSH at $site..."); local($output) = runcmd($command); if ($output eq $timeout_msg) { ### CASE 1 ### &printres("timed out\n"); local($result) = "Timed out"; local($reason) = "The test did not complete within the window of $timeout seconds"; &errlogentry($test, $result, $command, "", $reason, ""); $resflag{$site}{"GSISSH"} = $FLAG_TIMEOUT; } elsif ($output eq "Grid Status Test\n") { ### Case 2 ### &printres("succeeded\n"); $resflag{$site}{"GSISSH"} = $FLAG_SUCCEEDED; } elsif (($output =~ /Grid Status Test/) && ($output ne "Grid Status Test\n")) { ### Case 3 ### &printres("succeeded (warning: extraneous output)\n"); $output =~ s/Grid Status Test\n//; &printerr("Site: $site\n"); &printerr("Test: GSISSH server test\n"); local($result) = "Success, but warning: with extraneous output"; &errlogentry($test, $result, $command, $output, "", ""); $resflag{$site}{"GSISSH"} = $FLAG_WARNING; } elsif ($output =~ /Connection refused/) { ### Case 4 ### &printres("Service not found\n"); local($result) = "Server not found"; local($likelyreason) = "This most likely indicates $site does not have an sshd listening on port $port"; &errlogentry($test, $result, $command, $output, "", $likelyreason); } else { # Somehow it failed. Let's find out why if we can. &printres("failed\n"); $resflag{$site}{"GSISSH"} = $FLAG_FAILED; local($result) = "Failure"; if ($output =~ /permission denied/i) { ### CASE 4 ### if ( ($output !~ /gssapi/) || ($output !~ /external-keyx/) ) { ### CASE 4 (a) ### local($likelyreason) = "The sshd running at $site on port $port does not appear to be GSI enabled."; &errlogentry($test, $result, $command, $output, "", $likelyreason); } else { ### CASE 4 (b) ### local($likelyreason) = "You either do not have an account or are not in the grid-mapfile at $site."; &errlogentry($test, $result, $command, $output, "", $likelyreason); } } elsif ($output =~ /host key verification failed/i) { ### CASE 4 (c) ### We attempted host key exchange, so not GSI-enabled. local($likelyreason) = "The sshd running at $site on port $port does not appear to be GSI enabled."; &errlogentry($test, $result, $command, $output, "", $likelyreason); } else { &errlogentry($test, $result, $command, $output, "", ""); } } } # # testmpi # # Test MPI job submission through jobmanagers # # Dependencies: &testjobmanager # &testmpicc # (-x globusrun) # # globusrun -o -r / \ # '&(executable=$(HOME)/mpi-cpi)(jobType=mpi)(count=2)' # # NOTE: $HOME/mpi-cpi is compiled in &testmpicc # sub testmpi { local($site, $jobmanager) = @_; local($test) = "MPI test of $jobmanager"; local($command) = "globusrun -o -r $site/$jobmanager '&(executable=\$(HOME)/mpi-cpi)(jobType=mpi)(count=2)'"; if (!(defined($globusrun))) { local($result) = "Not Tested"; local($reason) = "'globusrun' command not found"; &errlogentry($test, $result, "", "", $reason, ""); return; } if ( (!(defined($resflag{$site}{"mpicc"}))) || (&nopass($resflag{$site}{"mpicc"})) ) { local($result) = "Not Tested"; local($reason) = "The mpicc compilation test failed or did not run. Because this test uses the binary compiled in that test, this test cannot run."; &errlogentry($test, $result, "", "", $reason, ""); return; } if ( (!(defined($resflag{$site}{$jobmanager}))) || (&nopass($resflag{$site}{$jobmanager})) ) { local($result) = "Not Tested"; local($reason) = "The simple test of $jobmanager failed or did not run. As this test is simply an advanced test running through $jobmanager, this test will not be attempted."; &errlogentry($test, $result, "", "", $reason, ""); return; } &printres("Running mpi test job on $site/$jobmanager (count=2)..."); local($output) = &runcmd($command); if ($output eq $timeout_msg) { &printres("timed out\n"); local($result) = "Timed out"; local($reason) = "The test did not complete within the window of $timeout seconds"; &errlogentry($test, $result, $command, "", $reason, ""); $resflag{$site}{"$jobmanager:mpijob"} = $FLAG_TIMEOUT; } elsif (!($output =~ /pi is approximately 3.14/)) { &printres("failed\n"); # &printres("failed (expected output not found)\n"); local($result) = "Failure"; local($likelyreason) = &gramerrorcheck($output); if ($likelyreason eq "") { &errlogentry($test, $result, $command, $output, "", ""); } else { &errlogentry($test, $result, $command, $output, "", $likelyreason); } $resflag{$site}{"$jobmanager:mpijob"} = $FLAG_FAILED; } else { &printres("succeeded\n"); $resflag{$site}{"$jobmanager:mpijob"} = $FLAG_SUCCEEDED; } } # # checkpath # # Checks to see if Globus commands are in $PATH on # # Dependencies: &testgsissh # (-x gsissh) # (-x grid-proxy-info) # # run the command `grid-proxy-info -subject` locally # then remotely, run: # gsissh -o "BatchMode yes" /bin/sh \ # --login -c '"grid-proxy-info -issuer"' # # If the remote output contains the output of the local run, # then success! # sub checkpath { local($site) = @_; local($test) = "Grid Programs in \$PATH"; local($command) = "$gsissh -o \"BatchMode yes\" $site /bin/sh --login -c '\"grid-proxy-info -issuer\"'"; if (!(defined($grid_proxy_info))) { local($result) = "Not Tested"; local($reason) = "'grid-proxy-info' command not found"; &errlogentry($test, $result, "", "", $reason, ""); return; } if (!(defined($gsissh))) { local($result) = "Not Tested"; local($reason) = "'gsissh' command not found"; &errlogentry($test, $result, "", "", $reason, ""); return; } if ( (!(defined($resflag{$site}{"GSISSH"}))) || (&nopass($resflag{$site}{"GSISSH"})) ) { local($result) = "Not Tested"; local($reason) = "The GSISSH test failed or was not run, and this test relies on using gsissh, so we cannot run this test."; &errlogentry($test, $result, "", "", $reason, ""); return; } &printres("Checking for Grid programs in PATH at $site..."); local($identity) = &runcmd("$grid_proxy_info -subject"); local($output) = &runcmd($command); if ($output =~ /$identity/) { &printres("found\n"); $resflag{$site}{"path"} = $FLAG_SUCCEEDED; } else { &printres("failed\n"); local($result) = "Failure"; $output = $output."\n\nLocal command output = $identity\n"; &errlogentry($test, $result, $command, $output, "", ""); $resflag{$site}{"path"} = $FLAG_FAILED; } } # # testgsiftpjob # # Test job submission of ftp jobs through a jobmanager # # Dependencies: &testjobmanager # (-x globusrun) # (-x globus-url-copy) # # globus-url-copy file:///tmp/grid-status-test.. \ # gsiftp:///tmp/..server # globusrun -o -s -r / \ # '&(executable=$(GLOBUSRUN_GASS_URL) # "globus-gsiftpjob-test.sh") \ # (environment=(LD_LIBRARY_PATH $(GLOBUS_LOCATION)/lib))' # # The shell script cats the output of the transferred file and looks for # what should be in there. If the output of the cat command from the shell # script contains the correct results, success! # sub testgsiftpjob { local($site, $jobmanager) = @_; local($test) = "GSIFTP test of $jobmanager"; local($testfile) = "grid-status-test.$username.$hostname"; local($testdir) = "/tmp"; local($testpath) = "$testdir/$testfile"; local($testjob) = "/tmp/globus-gsiftpjob-test.sh"; local($copycmd) = "$globus_url_copy file:$testpath gsiftp://$site/$testpath.server"; local($jobmanagercmd) = "$globusrun -o -s -r $site/$jobmanager '&(executable=\$(GLOBUSRUN_GASS_URL) # \"$testjob\")(environment=(LD_LIBRARY_PATH \$(GLOBUS_LOCATION)/lib))'"; if (!(defined($globus_url_copy))) { local($result) = "Not Tested"; local($reason) = "'globus-url-copy' command not found"; &errlogentry($test, $result, "", "", $reason, ""); return; } if (!(defined($globusrun))) { local($result) = "Not Tested"; local($reason) = "'globusrun' command not found"; &errlogentry($test, $result, "", "", $reason, ""); return; } if ( (!(defined($resflag{$site}{$jobmanager}))) || ($resflag{$site}{$jobmanager} == $FLAG_TIMEOUT) || ($resflag{$site}{$jobmanager} == $FLAG_FAILED) ) { local($result) = "Not Tested"; local($reason) = "The simple test of $jobmanager failed or did not run. As this test is simply an advanced test running through $jobmanager, this test will not be attempted."; &errlogentry($test, $result, "", "", $reason, ""); return; } unlink $testpath; &printres("Testing GSIFTP job on $site/$jobmanager..."); open(TESTFILE, ">$testpath") || die; print TESTFILE "Grid Status Test $$\n"; close(TESTFILE) || die; local($output) = &runcmd("$globus_url_copy file:$testpath gsiftp://$site/$testpath.server"); unlink $testpath; if ($output ne "" && !($output =~ /^$testpath:/)) { &printres("globus-url-copy error encountered before test of $jobmanager\n"); local($result) = "Not Tested"; local($likelyreason) = "Before the test of $jobmanager could be done, a globus-url-copy failed."; &errlogentry($test, $result, $copycmd, $output, "", $likelyreason); return; } open(TESTJOB, ">$testjob"); print TESTJOB "#!/bin/sh\n"; print TESTJOB "rm -f $testpath\n"; print TESTJOB "\$GLOBUS_LOCATION/bin/globus-url-copy gsiftp://$site/$testpath.server file:$testpath.remote\n"; print TESTJOB "if [ ! -e \"$testpath.remote\" ]; then\n"; print TESTJOB " exit 1;\n"; print TESTJOB "fi\n"; print TESTJOB "cat $testpath.remote\n"; print TESTJOB "rm $testpath.remote\n"; print TESTJOB "exit 0\n"; close(TESTJOB) || die; local($output) = &runcmd("$globusrun -o -s -r $site/$jobmanager '&(executable=\$(GLOBUSRUN_GASS_URL) # \"$testjob\")(environment=(LD_LIBRARY_PATH \$(GLOBUS_LOCATION)/lib))'"); if ($output eq $timeout_msg) { &printres("timed out\n"); local($result) = "Timed out"; local($reason) = "The test did not complete within the window of $timeout seconds"; &errlogentry($test, $result, $jobmanagercmd, "", $reason, ""); $resflag{$site}{"$jobmanager:gsiftpjob"} = $FLAG_TIMEOUT; } elsif (!($output =~ /Grid Status Test $$/)) { &printres("failed\n"); local($result) = "Failure"; local($likelyreason) = &gramerrorcheck($output); if ($likelyreason eq "") { &errlogentry($test, $result, $command, $output, "", ""); } else { &errlogentry($test, $result, $command, $output, "", $likelyreason); } $resflag{$site}{"$jobmanager:gsiftpjob"} = $FLAG_FAILED; } else { &printres("succeeded\n"); $resflag{$site}{"$jobmanager:gsiftpjob"} = $FLAG_SUCCEEDED; } } # # writehtmlresults # # Write results to an html file # sub writehtmlresults { local($file) = shift; open(FILE, ">$file") || die; print FILE "\n\n"; print FILE "$htmltitle\n"; print FILE "\n\n"; print FILE "

$htmltitle ($timestr)

\n"; print FILE "\n"; print FILE "\n"; foreach $test (@tests) { if ($test =~ /mpijob/) { local($tmp) = $tests{"mpijob"}; local($jm, $blah) = split (/:/, $test); print FILE "\n"; } elsif ($test =~ /gsiftp/) { local($tmp) = $tests{"gsiftpjob"}; local($jm, $blah) = split (/:/, $test); print FILE "\n"; } elsif ($test =~ /jobmanager/) { local($tmp) = $tests{"simplejob"}; print FILE "\n"; } else { local($tmp) = $tests{$test}; print FILE "\n"; } } foreach $site (@testbedsites) { print FILE "\n"; foreach $test (@tests) { if (!defined($resflag{$site}{$test})) { print FILE "\n"; } elsif ($resflag{$site}{$test} == $FLAG_FAILED) { print FILE "\n"; } elsif ($resflag{$site}{$test} == $FLAG_SUCCEEDED) { print FILE "\n"; } elsif ($resflag{$site}{$test} == $FLAG_WARNING) { print FILE "\n"; } elsif ($resflag{$site}{$test} == $FLAG_TIMEOUT) { print FILE "\n"; } elsif ($resflag{$site}{$test} == $FLAG_NOSERVER) { print FILE "\n"; } } print FILE "\n"; } print FILE "
Site$tmp $jm$tmp $jm$tmp $test$tmp
$site FailPassWarningsTimed OutNo Server
\n"; print FILE "

Grid Status Test Output

\n"; print FILE "
\n", $results, "
\n"; if ($errors ne "") { print FILE "

Grid Status Test Errors

\n"; print FILE "
\n", $errors, "
\n"; } print FILE "
\n"; print FILE "Generated by the grid-status-test script written by Joe Greenseid, "; print FILE "run by $username on $hostname.\n"; print FILE "\n"; close(FILE) || die; } # # summarizeresults # # Print out a summary of results # # : # error(s), # warning(s), # timeout(s) # sub summarizeresults { print "\nResults\n"; print "-------\n"; foreach $site (@testbedsites) { local($warnings) = 0; local($errors) = 0; local($timeouts) = 0; local($successes) = 0; local($noserver) = 0; local($total) = 0; local($servers) = ""; foreach $test (@tests) { if (!defined($resflag{$site}{$test})) { } elsif ($resflag{$site}{$test} == $FLAG_FAILED) { $errors++; } elsif ($resflag{$site}{$test} == $FLAG_WARNING) { $warnings++; } elsif ($resflag{$site}{$test} == $FLAG_TIMEOUT) { $timeouts++; } elsif ($resflag{$site}{$test} == $FLAG_SUCCEEDED) { $successes++; } elsif($resflag{$site}{$test} == $FLAG_NOSERVER) { $noserver++; $servers = $servers . "\t$test test attempted, but server not running\n"; } } $total = $successes + $warnings + $errors + $timeouts; print "$site\n"; print " $successes success(es), $warnings warning(s), $errors error(s), $timeouts timeout(s)\n"; if ($noserver > 0) { print "$servers"; } print "\n"; } } # # writempiccsource # # Write source to be used in test compilation by mpicc # to a file on disk # (this file gets copied to to be compiled there) # sub writempiccsource { local ($tmpfile) = @_; local $source = "#include \"mpi.h\" #include #include double f( double ); double f( double a ) { return (4.0 / (1.0 + a*a)); } int main( int argc, char *argv[]) { int done = 0, n, myid, numprocs, i; double PI25DT = 3.141592653589793238462643; double mypi, pi, h, sum, x; double startwtime = 0.0, endwtime; int namelen; char processor_name[MPI_MAX_PROCESSOR_NAME]; MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&numprocs); MPI_Comm_rank(MPI_COMM_WORLD,&myid); MPI_Get_processor_name(processor_name,&namelen); fprintf(stderr,\"Process %d on %s\\n\", myid, processor_name); n = 0; while (!done) { if (myid == 0) { if (n==0) n=100; else n=0; startwtime = MPI_Wtime(); } MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD); if (n == 0) done = 1; else { h = 1.0 / (double) n; sum = 0.0; for (i = myid + 1; i <= n; i += numprocs) { x = h * ((double)i - 0.5); sum += f(x); } mypi = h * sum; MPI_Reduce(&mypi, &pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if (myid == 0) { printf(\"pi is approximately %.16f, Error is %.16f\\n\", pi, fabs(pi - PI25DT)); endwtime = MPI_Wtime(); printf(\"wall clock time = %f\\n\", endwtime-startwtime); } } } MPI_Finalize(); return 0; }"; open (SOURCE, ">$tmpfile") || die "can't open $tmpfile\n"; print SOURCE "$source\n"; close (SOURCE) || die; } # # writempiccshell # # Write a shell script that will attempt to compile the mpi # source program written in &writempiccsource to disk # (this script is copied to and used to test mpicc there) # sub writempiccshell { local ($tmpfile) = @_; local $script = "#!/bin/sh MPI_TEST=\"\$HOME/mpi-cpi\" MPICC=`which mpicc 2> /dev/null` TESTFILE=\"\$HOME/$hostname.teragrid-test-cpi.c\" if [ ! -x \${MPICC} ] || [ \"\${MPICC}\" == \"\" ] ; then echo \"Can't find mpicc\" exit 1 fi \${MPICC} -o \${MPI_TEST} \${TESTFILE} if [ ! -x \${MPI_TEST} ] || [ \"\${MPI_TEST}\" == \"\" ] ; then echo \"mpi cpi test program was not found in\" echo \"\$HOME\" echo \"and \${TESTFILE} build failed\" echo \"so skipping mpi tests\" exit 1 fi echo \"Success\" exit 0"; open (SCRIPT, ">$tmpfile") || die "can't open $tmpfile\n"; print SCRIPT "$script"; close (SCRIPT) || die; } # # testmpicc # # See if mpicc on can successfully compile a small program # (mpicc used is first mpicc found in $PATH on ) # # Dependencies: (-x globus-url-copy) # (-x gsissh) # &testgsissh # &testgsiftp # # globus-url-copy file:// gsiftp:/// # globus-url-copy file:// gsiftp:/// # gsissh '/bin/sh --login # # : script written in &writempiccshell # : source file written in &writempiccsource # # NOTE: Success is defined as both globus-url-copy commands not # returning any errors, and the ssh command that runs the shell # script not returning any errors (such as compilation failing) # sub testmpicc { local($site, $srcfile, $shellscript) = @_; local($test) = "Mpicc test compilation"; local($shellcopycmd) = "$globus_url_copy file://$shellscript gsiftp://$site/~/$hostname.teragrid-test-mpitest.sh"; local($srccopycmd) = "$globus_url_copy file://$srcfile gsiftp://$site/~/$hostname.teragrid-test-cpi.c"; local($sshcmd) = "$gsissh $site '/bin/sh --login ~/$hostname.teragrid-test-mpitest.sh'"; if (!(defined($globus_url_copy))) { local($result) = "Not Tested"; local($reason) = "'globus-url-copy' command not found"; &errlogentry($test, $result, "", "", $reason, ""); return; } if (!(defined($gsissh))) { local($result) = "Not Tested"; local($reason) = "'gsissh' command not found"; &errlogentry($test, $result, "", "", $reason, ""); return; } if ( (!(defined($resflag{$site}{"GSISSH"}))) || (&nopass($resflag{$site}{"GSISSH"})) ) { local($result) = "Not Tested"; local($reason) = "The GSISSH Server test either failed or did not run. Since this test relies on using gsissh, this test cannot be run."; &errlogentry($test, $result, "", "", $reason, ""); return; } if ( (!(defined($resflag{$site}{"GSIFTP"}))) || (&nopass($resflag{$site}{"GSIFTP"})) ) { local($result) = "Not Tested"; local($reason) = "The GSIFTP Server test either failed or did not run. Since this test relies on using globus-url-copy, this test cannot be run."; &errlogentry($test, $result, "", "", $reason, ""); return; } &printres("Testing mpicc on $site..."); local($shellcopyoutput) = &runcmd("$globus_url_copy file://$shellscript gsiftp://$site/~/$hostname.teragrid-test-mpitest.sh"); local($srccopyoutput) = &runcmd("$globus_url_copy file://$srcfile gsiftp://$site/~/$hostname.teragrid-test-cpi.c"); if ( ($shellcopyoutput eq "") && ($srccopyoutput eq "") ) { local($sshoutput) = &runcmd("$gsissh $site '/bin/sh --login ~/$hostname.teragrid-test-mpitest.sh'"); if ($sshoutput =~ /Success/) { &printres("succeeded\n"); $resflag{$site}{"mpicc"} = $FLAG_SUCCEEDED; } else { &printres("failed\n"); local($result) = "Failure"; local($output) = " Failed during execution of script. Output of ssh command was:\n $sshoutput"; local($all_commands) = "\n Transfer 1: $shellcopycmd\n Transfer 2: $srccopycmd\n SSH command execution: $sshcmd\n"; &errlogentry($test, $result, $all_commands, $output, "", ""); $resflag{$site}{"mpicc"} = $FLAG_FAILED; } } else { &printres("failed\n"); local($result) = "Failure"; local($errors) = " Failed during transfer of files to $site. Output of transfers follows:\n Shell script copy:\n $shellcopyoutput\n\n Source file copy:\n $srccopyoutput\n"; local($all_commands) = "\n Transfer 1: $shellcopycmd\n Transfer 2: $srccopycmd"; &errlogentry($test, $result, $all_commands, $errors, "", ""); $resflag{$site}{"mpicc"} = $FLAG_FAILED; } } # # writecondorgtest # # Write to disk a small Perl script that will be used to do # the following: # # 1) Write to file a small Condor-G test # 2) Submit the test via condor_submit # 3) If successful submit, monitor output file for results # 4) If after 30 seconds, results not output, then condor_rm the test, # call it a failure, and die. # If results seen in output file, then success! # # NOTE: This subroutine *only* writes the Perl script to disk # &testcondorg actually copies the script to and # runs it # sub writecondorgtest { local($tmpfile, $site) = @_; local $script = "#!/usr/bin/perl \$condor_test = \"\$ENV{'HOME'}/$hostname.condorgtest-output\"; \$condor_out = \"/tmp/$username.$hostname.condorgtest.out\"; \$condor_log = \"/tmp/$username.$hostname.condorgtest.log\"; open (TEST, \">\$condor_test\") || die \"can't open \$condor_test\"; print TEST \"executable = /bin/echo\\n\"; print TEST \"arguments = Grid Status Test\\n\"; print TEST \"transfer_executable = false\\n\"; print TEST \"globusscheduler = $site/jobmanager\\n\"; print TEST \"universe = globus\\n\"; print TEST \"output = \$condor_out\\n\"; print TEST \"log = \$condor_log\\n\"; print TEST \"queue\\n\"; close (TEST); local \$submit_output = `condor_submit \$condor_test`; if (\$submit_output =~ /submitted to cluster (\\d+)./) { \$cluster_id = \$1; } if (\$? != 0) { print \"condor_submit failed to submit the job properly.\\n\"; print \"Output from condor_submit follows:\\n\"; print \"\$submit_output\\n\"; exit; } for (\$i=0; \$i<15; \$i++) { local \$/; open (OUT, \"\$condor_out\") || die \"can't open \$condor_out\"; local \$output = ; close (OUT); if (\$output =~ /Grid Status Test/) { print \"Success\\n\"; exit; } sleep 2; } print \"\\nThe job did not run in 30 seconds, this most likely means\\n\"; print \"That something is wrong. Removing job from the queue.\\n\"; `condor_rm \$cluster_id >& /dev/null`;"; open (SCRIPT, ">$tmpfile") || die "can't open $tmpfile\n"; print SCRIPT "$script"; close (SCRIPT) || die; } # # testcondorg # # Test Condor-G in two steps: # # 1) Successfully run condor_q command on # 2) Successfully submit a job to condor and get results out of it # on # # Failure of either of these is failure. # # Dependencies: (-x globus-url-copy) # (-x gsissh) # &testgsissh # &testgsiftp # # gsissh -o "BatchMode yes" /bin/sh --login -c condor_q # globus-url-copy file://