#!/bin/sh
#
#+-#+-#+-#-+-#+-#+-#-+-#+-#+-#-+-#+-#+-#-+-#+-#+-#-+-#+-#+-#-+-#+-#+-#-+-#
#
# $SGE_ROOT/util/startup_template
#
# DO NOT EDIT THIS FILE - this file is used as an template
# Don't change the markers #+-#+-#+-# and "#-#-#-#" , they will be removed
#
#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-
#
# SGE/SGEEE startup script
#
# (c) 2002 Sun Microsystems, Inc. Use is subject to license terms.  

#
# This script can be called with the following arguments:
#
#       start       start qmaster, scheduler and execution daemon
#                   The execution daemon on current qmaster host is only
#                   started if the is an execution host for this host
#                   This is necessary to avoid starting an execution deamon
#                   on a qmaster host during the boot of a machine in case
#                   there should be no execution host for this machine
#       stop        Terminates scheduler and qmaster if we are on the master
#                   machine. Terminates commd. Terminates the execution daemon
#                   and the shepherd. This only works if the execution daemon 
#                   spool directory
#                   is in the default location.
#       softstop    do not kill the shepherd process
#       -qmaster    only starts qmaster and scheduler
#       -execd      only start the execution daemon
#                   --> "-execd" and "-qmaster may not be used together
#       -shadowd    start shadwod if found in the "shadow_masters" file
#       -migrate    shuts down qmaster and scheduler if they are running
#                   on another host and start the daemons on this host
#
# If the file "primary_qmaster" in the $SGE_ROOT/$SGE_CELL/common
# exists and it contains the hostname of the current machine and qmaster
# is running on another host it will be shut down and started on this host
#
# Unix commands which may be used in this script:
#    cat cut tr ls grep awk sed basename
#
# This script requires the script $SGE_ROOT/util/arch
#

PATH=/bin:/usr/bin:/sbin:/usr/sbin

#---------------------------------------------------------------------------
# The following lines provide the necessary info for adding a startup script
# according to the Linux Standard Base Specification (LSB) 1.0.0 which can
# be found at:
#
#    http://www.linuxbase.org/spec/gLSB/gLSB/sysinit.html
#
# See also
#
#    http://www.suse.de/~grimmer/Package-Conventions/index.html
#
### BEGIN INIT INFO
# Provides:       rcsge
# Required-Start: $network afs
# Required-Stop:
# Default-Start:  3 5
# Default-Stop: 0 1 2 6
# Description:  start Grid Engine
### END INIT INFO
#---------------------------------------------------------------------------


SGE_ROOT=GENROOT; export SGE_ROOT
SGE_CELL=GENCELL; export SGE_CELL
COMMD_PORT=GENCOMMD_PORT; export COMMD_PORT

unset CODINE_ROOT GRD_ROOT COD_CELL GRD_CELL

shlib_path_name=`$SGE_ROOT/util/arch -lib`
old_value=`eval echo '$'$shlib_path_name`
if [ x$old_value = x ]; then
   eval $shlib_path_name=$SGE_ROOT/lib/$ARCH
else
   eval $shlib_path_name=$old_value:$SGE_ROOT/lib/$ARCH
fi
export $shlib_path_name

#---------------------------------------------------------------------------
# Shutdown
# Send SIGTERM to process name $1 with pid in file $2
#
Shutdown()
{
   name=$1
   pidfile=$2
   if [ -f $pidfile ]; then
      pid=`cat $pidfile`
      $utilbin_dir/checkprog $pid $name > /dev/null
      if [ "$?" = 0 ]; then
         kill -TERM $pid
      fi
   fi
}

#---------------------------------------------------------------------------
# QmasterSpoolDir
#    Return qmaster spool directory
#
QmasterSpoolDir()
{
   qma_spool_dir=`grep qmaster_spool_dir \
                      $SGE_ROOT/$SGE_CELL/common/configuration | \
                      awk '{ print $2 }'`
   echo $qma_spool_dir
}

#---------------------------------------------------------------------------
# ExecdSpoolDir
#    Return execd spool directory. Check if local config overrides
#    the global cluster config
#
ExecdSpoolDir()
{
   lconffile=$SGE_ROOT/$SGE_CELL/common/local_conf/$HOST
   if [ -f $lconffile ]; then
      grep execd_spool_dir $lconffile  2>&1 > /dev/null
      if [ $? = 0 ]; then
         execd_spool_dir=`grep execd_spool_dir $lconffile | \
                          awk '{ print $2 }'`
         echo $execd_spool_dir/$UQHOST
         return
      fi
   fi

   execd_spool_dir=`grep execd_spool_dir \
                    $SGE_ROOT/$SGE_CELL/common/configuration | \
                    awk '{ print $2 }'`

   if [ "$execd_spool_dir" != "" ]; then
      echo $execd_spool_dir/$UQHOST
   else
      echo ""
   fi
}

#---------------------------------------------------------------------------
# CheckIfQmasterHost
#    If our hostname given in $1 is the same as in the "act_qmaster" file
#    echo "true" else echo "false"
#
CheckIfQmasterHost()
{
   host=$1

   if [ "$host" = "`cat $SGE_ROOT/$SGE_CELL/common/act_qmaster`" ]; then
      echo true
   else
      echo false
   fi
}

#---------------------------------------------------------------------------
# CheckIfExecHost
#    If $2 = true then check in the qmaster spool directory if this host $1
#    is an execution host (by verifying the exec host file).
#
#    The purpose of the check is to avoid to start an execution daemon
#    if this is a qmaster machine but should not run an execution daemon
#
#    Only on a qmaster machine we can be sure that we have access to the
#    qmaster spool directory
#
CheckIfExecHost()
{
   host=$1
   is_qmaster_host=$2

    if [ $is_qmaster_host = false ]; then
       echo false
    else
       qma_spool_dir=`QmasterSpoolDir`
       exec_host_cnt=`ls $qma_spool_dir/exec_hosts/${HOST}* 2>/dev/null | tr '[A-Z]' '[a-z]' | wc -l 2>/dev/null`
       if [ $exec_host_cnt -gt 0 ]; then
          echo true
       else
          echo false
       fi
   fi
}

#---------------------------------------------------------------------------
# CheckIfPrimaryQmasterHost
#    Check if our hostname given in $1 is the same as in the
#    "primary_qmaster" file
#    echo true if there is our hostname else echo false
#
CheckIfPrimaryQmasterHost()
{
   host=$1

   fname=$SGE_ROOT/$SGE_CELL/common/primary_qmaster

   if [ -f $fname ]; then
      if [ "$host" = "`cat $fname`" ]; then
         echo true
      else
         echo false
      fi
   else
      echo false
   fi
}


#---------------------------------------------------------------------------
# CheckIfShadowMasterHost
#    Check if our hostname given in $1 is contained in the
#    "shadow_masters" file
#    echo true if there is our hostname else echo false
#
CheckIfShadowMasterHost()
{
   host=$1

   fname=$SGE_ROOT/$SGE_CELL/common/shadow_masters

   if [ -f $fname ]; then
      grep -i $host $fname 2>&1 > /dev/null
      if [ $? = 0 ]; then
         echo true
      else
         echo false
      fi
   else
      echo false
   fi
}

#---------------------------------------------------------------------------
# GetPathToBinaries
#    echo the name of the bin_dir on this system
#    The check is fullfilled if we can access the qstat binary
#    echo "none" if we can't determine the binary path
GetPathToBinaries()
{
   cfgname=$SGE_ROOT/$SGE_CELL/common/configuration

   base=none

   if [ -f $cfgname ]; then
      base=`grep binary_path $cfgname | awk '{ print $2 }'`
      if [ -f $base/qstat ]; then
         :
      elif [ -f $SGE_ROOT/util/arch ]; then
         arch=`$SGE_ROOT/util/arch`
         if [ -f $base/$arch/qstat ]; then
               base=$base/$arch
         fi
      fi
   fi

   echo $base
}


#---------------------------------------------------------------------------
# GetAdminUser
#    echo the name of the admin user on this system
#    echo "root" if admin user retrieval fails
GetAdminUser()
{
   cfgname=$SGE_ROOT/$SGE_CELL/common/configuration
   user=none

   if [ -f $cfgname ]; then
      user=`grep admin_user $cfgname | awk '{ print $2 }'`
   fi

   if [ `echo $user|tr "A-Z" "a-z"` = "none" ]; then
      user=root
   fi
   echo $user
}

#---------------------------------------------------------------------------
# GetPathToUtilbin
#    echo the path to the binaires in utilbin
#    The check is fullfilled if we can access the "gethostname" binary
#    echo "none" if we can't determine the binary path
#
GetPathToUtilbin()
{
   base=none

   if [ -f $SGE_ROOT/util/arch ]; then
      utilbindir=$SGE_ROOT/utilbin

      arch=`$SGE_ROOT/util/arch`
      if [ -f $utilbindir/$arch/gethostname ]; then
         base=$utilbindir/$arch
      fi
   fi

   echo $base
}


#---------------------------------------------------------------------------
usage()
{
   echo "Grid Engine start/stop script. Valid parameters are:"
   echo ""
   echo "   (no parameters): start qmaster and execution daemon if applicable"
   echo "   \"start\"        dto."
   echo "   \"stop\"         shutdown local Grid Engine processes and jobs"
   echo "   \"softstop\"     shutdown local Grid Engine processes (no jobs)"
   echo "   \"-execd\"       only start/stop execution daemon"
   echo "   \"-qmaster\"     only start/stop qmaster and scheduler (if applicable)"
   echo "   \"-shadowd\"     only start/stop shadowd (if applicable)"   
   echo "   \"-migrate\"     shutdown qmaster/scheduler if it's running on another"
   echo "                    host and restart it on this host"
   echo "                    Migration only works if this host is an admin host"
   echo ""
   echo "Only one of the parameters \"start\", \"stop\" or \"softstop\" is allowed."
   echo "Only one of the parameters beginning  with \"-\" is allowed." 
   echo
   echo "Default argument is \"start\" for all components."
   echo "Default for \"stop\" is shutting down all components."
   echo
   exit 1
}


#---------------------------------------------------------------------------
# MAIN Procedure
#

if [ "$#" -gt 2 -o "$1" = "-h" -o "$1" = "help" ]; then
   usage
fi

startup=true
qmaster=true
execd=true
shadowd=true
force_execd=false
qstd=false
migrate_qmaster=false
softstop=false

for i in $*; do
   if [ "$i" = start ]; then
      startup=true
   elif [ "$i" = stop ]; then
      startup=false
   elif [ "$i" = softstop ]; then
      startup=false
      softstop=true
   elif [ "$i" = -execd ]; then
      execd=true
      force_execd=true
      qmaster=false
      shadowd=false
   elif [ "$i" = -qmaster ]; then
      execd=false
      qmaster=true
      shadowd=false
   elif [ "$i" = -shadowd ]; then
      execd=false
      qmaster=false
      shadowd=true
   elif [ "$i" = -migrate ]; then
      execd=false
      migrate_qmaster=true
      qmaster=true
      shadowd=false
   else
      usage
   fi
done

bin_dir=`GetPathToBinaries`
if [ "$bin_dir" = "none" ]; then
   echo "can't determine path to Grid Engine binaries"
   exit 1
fi

utilbin_dir=`GetPathToUtilbin`
if [ "$utilbin_dir" = "none" ]; then
   echo "can't determine path to Grid Engine utility binaries"
   exit 1
fi

HOST=`$utilbin_dir/gethostname -aname`

if [ "$HOST" = "" ]; then
   echo "can't resolve local hostname"
   exit 1
fi

UQHOST=`echo $HOST | cut -f1 -d.`
qmaster_spool_dir=`QmasterSpoolDir`

if [ "$qmaster_spool_dir" = "" ]; then
   echo "can't get qmaster spool directory"
fi

shadow_host=`CheckIfShadowMasterHost $HOST`

if [ "$startup" = true ]; then

   # qmaster_host=true if qmaster was running on this host the last time
   # exec_host=true    if qmaster was running on this host the last time and
   #                   this host is an execution host

   # execution daemon is started in this host if either
   #        - this script is started with the parameter "-execd"
   #        - qmaster *is not* running on this host
   #        - qmaster *is* running on this host *and* there is an exec host config
   #

   qmaster_host=`CheckIfQmasterHost $HOST`
   exec_host=`CheckIfExecHost $HOST $qmaster_host`
   primary_qmaster_host=`CheckIfPrimaryQmasterHost $HOST`

   if [ $qmaster = true -a $qmaster_host = false -a  \
        \( $primary_qmaster_host = true -o $migrate_qmaster = true \) ]; then
       actual_qmaster_host=`cat $SGE_ROOT/$SGE_CELL/common/act_qmaster`
       echo "   shutting down qmaster and scheduler on host \"$actual_qmaster_host\" ..."
       qconf_output=`$bin_dir/qconf -ks 2>&1 | grep "denied"`
       if [ "$qconf_output" != "" ]; then
          echo "   denied: host \"$HOST\" is no admin host."
          exit 1
       fi
       $bin_dir/qconf -km 2>&1 > /dev/null
       qmaster_sconf_info=`$bin_dir/qconf -sconf 2> /dev/null | grep "qmaster_spool_dir"`
       if [ "$qmaster_sconf_info" != "" ]; then
       #  qmaster is still running
          echo "   qmaster and scheduler still alive. Cannot migrate qmaster."
          exit 1
       fi
       qmaster_host=true
   fi

   if [ $qmaster = true -a $qmaster_host = true ]; then
      echo "   starting sge_qmaster"
      $bin_dir/sge_qmaster

      echo "   starting sge_schedd"
      $bin_dir/sge_schedd
   fi

   if [ $force_execd = true -o \
        \( $execd = true -a $qmaster_host = false \) -o \
        \( $execd = true -a $qmaster_host = true -a $exec_host = true \) ]; then
      echo "   starting sge_execd"
      $bin_dir/sge_execd
   fi

   if [ $shadowd = true -a $shadow_host = true ]; then
      start_shadowd=false
      pidfile=$qmaster_spool_dir/shadowd_$UQHOST.pid
      if [ -f $pidfile ]; then
         pid=`cat $pidfile`
         $utilbin_dir/checkprog $pid sge_shadowd > /dev/null
         if [ "$?" = 0 ]; then
            echo "   found running sge_shadowd - not starting"
         else
            start_shadowd=true
         fi
      else
        start_shadowd=true
      fi

      if [ $start_shadowd = true ]; then
         echo "   starting sge_shadowd"
         $bin_dir/sge_shadowd
      fi
   fi
else
   if [ $shadowd = true ]; then
      if [ -f $qmaster_spool_dir/shadowd_$UQHOST.pid ]; then
         # Send SIGTERM to shadowd
         echo "   Shutting down Grid Engine shadowd"
         Shutdown sge_shadowd $qmaster_spool_dir/shadowd_$UQHOST.pid
      fi
   fi

   if [ $qmaster = true ]; then
      if [ `CheckIfQmasterHost $HOST` = true ]; then
         if [ -f  $qmaster_spool_dir/schedd/schedd.pid ]; then
            # Send SIGTERM to scheduler
            echo "   Shutting down Grid Engine scheduler"
            Shutdown sge_schedd $qmaster_spool_dir/schedd/schedd.pid
         fi

         if [ -f $qmaster_spool_dir/qmaster.pid ]; then
            # Send SIGTERM to qmaster
            echo "   Shutting down Grid Engine qmaster"
            Shutdown sge_qmaster $qmaster_spool_dir/qmaster.pid
         fi
      fi
   fi

   if [ $execd = true ]; then
      # Shutdown execution daemon
      execd_spool_dir=`ExecdSpoolDir`

      if [ "$execd_spool_dir" = "" ]; then
         echo "can't get execd spool directory"
      fi

      if [ -f $execd_spool_dir/execd.pid ]; then
         # Send SIGTERM to execd
         echo "   Shutting down Grid Engine execution daemon"
         Shutdown sge_execd $execd_spool_dir/execd.pid
      fi

      if [ $softstop = false ]; then
         # Send SIGTERM to all shepherds
         for jobid in `ls $execd_spool_dir/active_jobs`; do
            echo "   Shutting down Grid Engine shepherd of job $jobid"
            Shutdown sge_shepherd $execd_spool_dir/active_jobs/$jobid/pid
         done
      fi

      # Shutdown communication daemon
      echo "   Shutting down Grid Engine communication daemon"
      $bin_dir/sgecommdcntl -k
   fi
fi
