Sophie

Sophie

distrib > Fedora > 13 > i386 > media > updates-src > by-pkgid > 56669419e71578ed363610507e39dcd4 > files > 29

gridengine-6.2u5-5.fc13.src.rpm

#!/bin/sh
#
# sgemaster Gridengine master daemon and scheduler
#
# chkconfig:   - 98 02
# description: The gridengine master daemon and scheduler

### BEGIN INIT INFO
# Provides: sge_qmaster
# Required-Start: $network $local_fs $remote_fs
# Required-Stop: $network $local_fs $remote_fs
# Should-Start: 
# Should-Stop: 
# Default-Start: 
# Default-Stop: 0 1 6
# Short-Description: Gridengine master daemon and scheduler
# Description: The gridengine master daemon and scheduler
### END INIT INFO

# Source function library.
. /etc/rc.d/init.d/functions

master_exec="/usr/bin/sge_qmaster"
master_prog="sge_qmaster"

#Defaults
SGE_ROOT=/usr/share/gridengine; export SGE_ROOT
SGE_CELL=default; export SGE_CELL

#Configuration
config=/etc/sysconfig/gridengine
[ -e $config ] && . $config

qmaster_spool_dir=`awk '$1 == "qmaster_spool_dir" { print $2 }' $SGE_ROOT/$SGE_CELL/common/bootstrap`
master_pidfile=$qmaster_spool_dir/qmaster.pid
retval=0


#---------------------------------------------------------------------------
# CheckIfQmasterHost
#    If our hostname given in $1 is the same as in the "act_qmaster" file
#    echo "true" else echo "false"
#
CheckIfQmasterHost()
{
   host=$1

   if [ "$host" = "`cat $SGE_ROOT/$SGE_CELL/common/act_qmaster`" ]; then
      echo true
   else
      echo false
   fi
}

#---------------------------------------------------------------------------
# CheckIfPrimaryQmasterHost
#    Check if our hostname given in $1 is the same as in the
#    "primary_qmaster" file
#    echo true if there is our hostname else echo false
#
CheckIfPrimaryQmasterHost()
{
   host=$1

   fname=$SGE_ROOT/$SGE_CELL/common/primary_qmaster

   if [ -f $fname ]; then
      if [ "$host" = "`cat $fname`" ]; then
         echo true
      else
         echo false
      fi
   else
      echo false
   fi
}


#---------------------------------------------------------------------------
# CheckIfShadowMasterHost
#    Check if our hostname given in $1 is contained in the
#    "shadow_masters" file
#    set shadow_host true if there is our hostname
#
CheckIfShadowMasterHost()
{
   host=$1

   fname=$SGE_ROOT/$SGE_CELL/common/shadow_masters

   if [ -f $fname ]; then
      grep -i $host $fname 2>&1 > /dev/null
      if [ $? = 0 ]; then
         shadow_host="true"
      else
         shadow_host="false"
      fi
   else
      shadow_host="false"
   fi
}

#---------------------------------------------------------------------------
# GetAdminUser
#    echo the name of the admin user on this system
#    echo "root" if admin user retrieval fails
GetAdminUser()
{
   cfgname=$SGE_ROOT/$SGE_CELL/common/bootstrap
   user=none

   if [ -f $cfgname ]; then
      user=`grep admin_user $cfgname | awk '{ print $2 }'`
   fi

   if [ `echo $user|tr "A-Z" "a-z"` = "none" ]; then
      user=root
   fi
   echo $user
}

#---------------------------------------------------------------------------
# CheckRunningQmaster
# checks, if sge_qmaster is running
# In error case the sge_qmaster didn't start, silently
#
CheckRunningQmaster()
{
   masterhost=`cat $SGE_ROOT/$SGE_CELL/common/act_qmaster`
   running=false
   loop=0

   if [ "$SGE_QMASTER_PORT" = "" ]; then
      SGE_QMASTER_PORT=`$utilbin_dir/getservbyname -number sge_qmaster`
   fi

   while [ $running = "false" -a $loop -ne 30 ]; do 
      qping -info $masterhost $SGE_QMASTER_PORT qmaster 1 > /dev/null 2>&1

      if [ "$?" = 0 ]; then
         running=true
      else
         sleep 2 
         masterhost=`cat $SGE_ROOT/$SGE_CELL/common/act_qmaster`
         loop=`expr $loop + 1`
      fi
   done

   if [ $running = "false" ]; then
      echo
      echo "sge_qmaster didn't start!"
      echo "Please check the messages file"
      echo
   fi
}

#---------------------------------------------------------------------------
usage()
{
   echo "Grid Engine start/stop script. Valid parameters are:"
   echo ""
   echo "   \"start\"        start qmaster daemon"
   echo "   \"stop\"         shutdown qmaster daemon"
   echo "   \"-qmaster\"     only start/stop qmaster (if applicable)"
   echo "   \"-shadowd\"     only start/stop shadowd (if applicable)"   
   echo "   \"-migrate\"     shutdown qmaster if it's running on another"
   echo "                    host and restart it on this host"
   echo "                    Migration only works if this host is an admin host"
   echo ""
   echo "Only one of the parameters \"start\", \"stop\" is allowed."
   echo "Only one of the parameters beginning  with \"-\" is allowed." 
   echo
   echo "Default for \"stop\" is shutting down all components."
   echo
   exit 1
}

CheckArgs() {
   if [ "$1" = -qmaster ]; then
      qmaster=true
      shadowd=false
   elif [ "$1" = -shadowd ]; then
      qmaster=false
      shadowd=true
   elif [ "$1" = -migrate ]; then
      migrate_qmaster=true
      qmaster=true
      shadowd=false
   else
      usage
   fi
}

utilbin_dir=/usr/libexec/gridengine/utilbin
if [ "$utilbin_dir" = "none" ]; then
   echo "can't determine path to Grid Engine utility binaries"
   exit 6
fi

HOST=`$utilbin_dir/gethostname -aname`
UQHOST=`$utilbin_dir/gethostname -aname | cut -f1 -d.`
CheckIfShadowMasterHost $HOST


lockfile=/var/lock/subsys/sgemaster

#Default actions
qmaster=true
shadowd=true
qstd=false
migrate_qmaster=false

start() {
    # qmaster_host=true if qmaster was running on this host the last time
    #                   this host is an execution host

    qmaster_host=`CheckIfQmasterHost $HOST`
    primary_qmaster_host=`CheckIfPrimaryQmasterHost $HOST`

    if [ $qmaster = true -a $qmaster_host = true -a $migrate_qmaster = true ]; then
        echo "   qmaster and scheduler running on this host. Will not migrate qmaster."
        exit 1
    fi

    [ -x $master_exec ] || exit 5

    if [ $qmaster = true -a $qmaster_host = false -a  \
         \( $primary_qmaster_host = true -o $migrate_qmaster = true \) ]; then
        actual_qmaster_host=`cat $SGE_ROOT/$SGE_CELL/common/act_qmaster`
        echo "   shutting down qmaster and scheduler on host \"$actual_qmaster_host\" ..."
        qconf_output=`qconf -ks 2>&1 | grep "denied"`
        if [ "$qconf_output" != "" ]; then
           echo "   denied: host \"$HOST\" is no admin host."
           exit 1
        fi
        qconf -km > /dev/null 2>&1
       
        qping_count=0
        qping_retries=10
        qping_exit_state=0
        while [ $qping_count -lt $qping_retries ]; do
            qping -info $actual_qmaster_host $SGE_QMASTER_PORT qmaster 1  > /dev/null 2>&1
            qping_exit_state=$?
            if [ $qping_exit_state -ne 0 ]; then
                break
            fi
            sleep 3
            qping_count=`expr $qping_count + 1`
        done

        if [ $qping_exit_state -eq 0 ]; then
        #  qmaster is still running
            echo "   qmaster on host $actual_qmaster_host still alive. Cannot migrate qmaster."
            exit 1
        fi

        lock_file_read_retries=10
        lock_file_read_count=0
        lock_file_found=0
        while [ $lock_file_read_count -lt $lock_file_read_retries ]; do
           if [ -f $qmaster_spool_dir/lock ]; then
              lock_file_found=1
              break
           fi
           sleep 3
           lock_file_read_count=`expr $lock_file_read_count + 1`
        done

        if [ $lock_file_found -eq 0 ]; then
        #  old qmaster did not write lock file 
           echo "   old qmaster did not write lock file. Cannot migrate qmaster."
           echo "   Please verify that qmaster on host $actual_qmaster_host is down"
           echo "   and make sure that the lock file in qmaster spool directory is"
           echo "   read-able."
           exit 1
        fi

        qmaster_host=true
    fi

    if [ $qmaster = true -a $qmaster_host = true ]; then
        echo -n $"Starting $master_prog: "
        daemon --check $master_prog --pidfile=$master_pidfile $master_exec
        retval=$?
        CheckRunningQmaster
    elif [ $qmaster = true -a $qmaster_host = false ]; then
        echo
        echo "sge_qmaster didn't start!"
        echo "This is not a qmaster host!"
        echo "Please, check your act_qmaster file!" 
        echo
    fi
      
    if [ $shadowd = true -a $shadow_host = true ]; then
        pidfile=$qmaster_spool_dir/shadowd_$HOST.pid
        [ -f $pidfile ] || pidfile=$qmaster_spool_dir/shadowd_$UQHOST.pid

        echo -n $"Starting sge_shadowd: "
        daemon --check sge_shadowd --pidfile=$pidfile /usr/bin/sge_shadowd
        retval=$?
    fi

    echo
    [ $retval -eq 0 ] && touch $lockfile
    return $retval
}

stop() {
    if [ $shadow_host = true ]; then
        prog=sge_shadowd
        pidfile=$qmaster_spool_dir/shadowd_$UQHOST.pid
        [ -f $pidfile ] || pidfile=$qmaster_spool_dir/shadowd_$HOST.pid

        # Send SIGTERM to shadowd
        echo -n $"Stopping $prog: "
        killproc -p $pidfile $prog
        retval=$?
    fi

    if [ $qmaster = true ]; then
        if [ `CheckIfQmasterHost $HOST` = true ]; then
            # Send SIGTERM to qmaster
            echo -n $"Stopping $master_prog: "
            killproc -p $master_pidfile $master_prog
            retval=`expr $retval + $?`
        fi
    fi
 
    echo
    [ $retval -eq 0 ] && rm -f $lockfile
    return $retval
}

restart() {
    stop
    start
}

reload() {
    restart
}

force_reload() {
    restart
}

rh_status() {
    # run checks to determine if the service is running or use generic status
    status $master_prog
}

rh_status_q() {
    rh_status >/dev/null 2>&1
}


case "$1" in
    start)
        rh_status_q && exit 0
        [ -n "$2" ] && CheckArgs $2
        $1
        ;;
    stop)
        rh_status_q || exit 0
        [ -n "$2" ] && CheckArgs $2
        $1
        ;;
    restart)
        $1
        ;;
    reload)
        rh_status_q || exit 7
        $1
        ;;
    force-reload)
        force_reload
        ;;
    status)
        rh_status
        ;;
    condrestart|try-restart)
        rh_status_q || exit 0
        restart
        ;;
    *)
        echo $"Usage: $0 {start|stop|status|restart|try-restart|reload|force-reload}"
        exit 2
esac
exit $?