#!/bin/bash # # pdist_shell.sh # version 1.0.3 May 3, 2005 # Copyright 2005, Caltech and David Mathog # Send bug reports or comments to mathog@caltech.edu # # This script creates a command chain across a set of nodes. # On the top node it creates a fifo to which shell commands may # be written. Then it distributes those commands very quickly down # the chain using nettee so that they may be executed on all nodes # (except the top one). # # Input is ALWAYS from stdin. # # This uses my extract and execinput programs. These are available as # source code here: # # ftp://saf.bio.caltech.edu/pub/software/linux_or_unix_tools/ # # # parameters are: # # 1. listfile. This is specified as "/path/listfile.txt" # and contains the list of nodes to receive the file # being distributed. Typically this is the PVM or MPI # list file. # 2. fifoname. Full path for the fifo. Typically it goes in /tmp # but you may put it elsewhere # #################################################### #Set these as appropriate for your site #################################################### EXECINPUT=/usr/common/bin/execinput EXTRACT=/usr/common/bin/extract FINDHOSTNAME="/bin/hostname -s" PORT=12000 #port to use for command nettee chain NETTEE="/usr/common/bin/nettee -p $PORT -q -t 0 -stm EOS" ACCUDATE=/usr/common/bin/accudate THISSCRIPT=/usr/common/bin/pdist_shell.sh #path to this script EOS=EOS # # The script must also be able to find: # rm, cat, tee, and echo # and echo must support "-n". # DODELTA=0 STIME=`$ACCUDATE -t0` # find the current machine's name and find the NEXT one in input list # IAM=`$FINDHOSTNAME` if [ $# -ne 2 ] then echo "$IAM: failure wrong number of parameters" echo "$THISSCRIPT LISTFILE FIFONAME" echo " LISTFILE list of target nodes, one node per line" echo " FIFONAME full path to the fifo which will accept shell commands." echo "While this script is running commands echoed to FIFONAME" echo "will be executed on every target node (in parallel)." echo "Text written to stdout or stderr by these commands will" echo "be returned to the session that started this script." echo "(Simultaneous output may be interleaved and could be unreadable.)" echo "To terminate this script echo $EOS to FIFONAME." exit else LISTFILE=$1 FIFONAME=$2 if [ ! -r $LISTFILE ] then echo "$IAM: failure: $LISTFILE does not exist or is not readable" exit fi if [ "x$FIFONAME" == "x" ] then MESLAVE="YES" else MESLAVE="" umask 0600 #only the person running this file may use the fifo mkfifo $FIFONAME if [ ! -r $FIFONAME ] then echo "$IAM: failure: $FIFONAME could not be created" exit fi fi fi # # find if this node is in the input list # BACKRSH=0 INLIST=`$EXTRACT -in $LISTFILE -if '!^#' -ifonly | $EXTRACT -if "$IAM" -ifonly` if [ $INLIST ] then if [ ! $MESLAVE ] then echo "$IAM: failure: $LISTFILE indicates slave but fifoname is >$FIFONAME<" exit fi # find the next name in the list and start with that #note the /dev/null part on the third extract is to eliminate #warning about starting beyond the end of the file, which will #always happen for the next line after the final node. NEXTINLIST=`$EXTRACT -in $LISTFILE -if '!^#' -ifonly |\ $EXTRACT -if "$IAM" -ifn 1 -ifonly |\ $EXTRACT -sr 2 2>/dev/null` if [ $NEXTINLIST ] then #there is another node in the list. Do two things: #1. start this script on it # echo "$IAM: Starting script on $NEXTINLIST" rsh $NEXTINLIST "$THISSCRIPT $LISTFILE \"\" " \ </dev/null 2>&1 & BACKRSH=$! #2. start nettee directed to it #handle the local part, in foreground echo "$IAM: [nettee] Connecting to $NEXTINLIST" export NEXTNODE=$NEXTINLIST ; $NETTEE -v 17 -next $NEXTNODE | $EXECINPUT 2>&1 else echo "$IAM: is [ListEnd]" # #this is the last node in the list, write file, but no network ops. #LOCALSAVE will read directly from stdin. # export NEXTNODE=_EOC_ ; $NETTEE -v 17 | $EXECINPUT 2>&1 fi else if [ $MESLAVE ] then echo "$IAM: failure: $LISTFILE indicates not slave but fifoname is >$FIFONAME<" exit fi # find the first name in the list and start with that # just write straight through the rsh since there's no need to store # the stream locally. #the /dev/null on the 2nd extract eliminates a warning # message from extract if the input list is empty NEXTINLIST=`$EXTRACT -in $LISTFILE -if '!^#' -ifonly |\ $EXTRACT -nr 1 2>/dev/null` if [ $NEXTINLIST ] then #1. start nettee on next node in the list rsh $NEXTINLIST "$THISSCRIPT $LISTFILE \"\" " </dev/null 2>&1 & BACKRSH=$! #2. start nettee, no local save operation, read from stdin echo "$IAM: [stdin] connecting to $NEXTINLIST" DODELTA=1 $NETTEE -v 17 -in $FIFONAME -out none -next $NEXTINLIST else #there are no nodes in the list - fatal error echo "$IAM fatal error - no nodes in the list" fi fi if [ $BACKRSH -gt 0 ] then wait $BACKRSH rm -f $FIFONAME fi if [ $DODELTA -gt 0 ] then ETIME=`$ACCUDATE -ds $STIME` echo "$IAM: DONE, elapsed time: $ETIME" else echo "$IAM: DONE" fi exit