#!/bin/bash # # pdist_file.sh # version 1.0.2 April 18, 2005 # Copyright 2005, Caltech and David Mathog # Send bug reports or comments to mathog@caltech.edu # # This script distributes a file to a list of nodes using a # daisychain method similar to that in the program "dolly". # The heavy lifting is done by nettee, which is derived from dolly. # Each target node reads through the listfile until it finds # itself and then picks up the next line in the list and uses # that as the name of the next machine to contact. If it doesn't # find itself it picks the first machine in the list. # It then tees the dataflow and sends output to the disk and # also via an rsh to the next node in the daisychain. # # Tests with a linux 2.6.8 kernel and 100baseT recorded throughput # that varied from 4-6Mbytes/sec. Roughly 1/3 to 1/2 of the # theoretical network bandwidth. # # Input is ALWAYS from stdin. # # This uses my extract program instead of awk etc. Extract is # available as source code here, as is this script and nettee: # # ftp://saf.bio.caltech.edu/pub/software/linux_or_unix_tools/ # # # parameters are: # # 1. listfile. This is specified as "/path/listfile.txt" # and contains the list of nodes to receive the file # being distributed. Typically this is the PVM or MPI # list file. # 2. localsave. script to store and/or process the data stream # on each compute node. Note, this script should be able to # keep up with the network writes. If not, store the stream to # a file and then come back and process it in parallel on all nodes. # #################################################### #Set these as appropriate for your site #################################################### OPENSSL=/usr/bin/openssl EXTRACT=/usr/common/bin/extract NETTEE="/usr/common/bin/nettee -q -t 10" FINDHOSTNAME="/bin/hostname -s" ACCUDATE=/usr/common/bin/accudate THISSCRIPT=/usr/common/bin/pdist_file.sh #path to this script # # The script must also be able to find: # rm, cat, tee, and echo # and echo must support "-n". # DODELTA=0 STIME=`$ACCUDATE -t0` # find the current machine's name and find the NEXT one in input list # IAM=`$FINDHOSTNAME` if [ $# -ne 3 ] then echo "$IAM: failure wrong number of parameters" echo "usage: cat file_to_move | $THISSCRIPT LISTFILE LOCALSAVE LSPARAM" echo " LISTFILE list of target nodes, one node per line" echo " LOCALSAVE the name of a script that process the data stream locally" echo " on each node. It runs this way on each node:" echo " nettee | \$LOCALSAVE \$LSPARAM 2>&1" echo " Example 1: Store the stream to a file specified by LSPARAM" echo " #!/bin/sh" echo " cat - > \$1" echo " Example 2: cd to directory LSPARAM and unpack:" echo " #!/bin/sh" echo " cd \$1" echo " gunzip -c | tar xf - " echo " LSPARAM A single parameter, for instance, a file name." echo " LOCALSAVE scripts may be written to ignore it." echo "" echo " The concatentated log files from all nodes are echoed to stdout." exit else LISTFILE=$1 LOCALSAVE=$2 LSPARAM=$3 if [ ! -r $LISTFILE ] then echo "$IAM: failure: $LISTFILE does not exist or is not readable" exit fi if [ ! -x $LOCALSAVE ] then echo "$IAM: failure: $LOCALSAVE is not an executable program or script" exit fi if [ "x$LSPARAM" == "x" ] then echo "$IAM: failure: LSPARAM must be supplied" exit fi fi # # first find a random name to use for the pipe # # # find if this node is in the input list # BACKRSH=0 INLIST=`$EXTRACT -in $LISTFILE -if '!^#' -ifonly | $EXTRACT -if "$IAM" -ifonly` if [ $INLIST ] then # find the next name in the list and start with that #note the /dev/null part on the third extract is to eliminate #warning about starting beyond the end of the file, which will #always happen for the next line after the final node. NEXTINLIST=`$EXTRACT -in $LISTFILE -if '!^#' -ifonly |\ $EXTRACT -if "$IAM" -ifn 1 -ifonly |\ $EXTRACT -sr 2 2>/dev/null` if [ $NEXTINLIST ] then #there is another node in the list. Do two things: #1. start this script on it # echo "$IAM: Starting script on $NEXTINLIST" rsh $NEXTINLIST "$THISSCRIPT $LISTFILE $LOCALSAVE $LSPARAM " \ </dev/null 2>&1 & BACKRSH=$! #2. start nettee directed to it #handle the local part, in foreground echo "$IAM: [nettee] Connecting to $NEXTINLIST" $NETTEE -v 17 -next $NEXTINLIST | $LOCALSAVE $LSPARAM else echo "$IAM: is [ListEnd]" # #this is the last node in the list, write file, but no network ops. #LOCALSAVE will read directly from stdin. # $NETTEE -v 17 | $LOCALSAVE $LSPARAM 2>&1 fi else # find the first name in the list and start with that # just write straight through the rsh since there's no need to store # the stream locally. #the /dev/null on the 2nd extract eliminates a warning # message from extract if the input list is empty NEXTINLIST=`$EXTRACT -in $LISTFILE -if '!^#' -ifonly |\ $EXTRACT -nr 1 2>/dev/null` if [ $NEXTINLIST ] then #1. start nettee on next node in the list # echo "$IAM: Starting script on $NEXTINLIST" rsh $NEXTINLIST "$THISSCRIPT $LISTFILE $LOCALSAVE $LSPARAM" </dev/null 2>&1 & BACKRSH=$! #2. start nettee, no local save operation, read from stdin echo "$IAM: [stdin] connecting to $NEXTINLIST" DODELTA=1 $NETTEE -v 17 -in - -out none -next $NEXTINLIST else #there are no nodes in the list - fatal error echo "$IAM fatal error - no nodes in the list" fi fi if [ $BACKRSH -gt 0 ] then wait $BACKRSH fi if [ $DODELTA -gt 0 ] then ETIME=`$ACCUDATE -ds $STIME` echo "$IAM: DONE, elapsed time: $ETIME" else echo "$IAM: DONE" fi exit