# This file must have the bash variable assignment syntax # $Id$ ######################### # General configuration # ############################################################################### # # Database type ("mysql" or "Pg") DB_TYPE="mysql" # DataBase hostname DB_HOSTNAME="localhost" # DataBase port DB_PORT="3306" # Database base name DB_BASE_NAME="oar" # DataBase user name DB_BASE_LOGIN="oar" # DataBase user password DB_BASE_PASSWD="oar" # DataBase read only user name #DB_BASE_LOGIN_RO="oar_ro" # DataBase read only user password #DB_BASE_PASSWD_RO="oar_ro" # OAR server hostname SERVER_HOSTNAME="localhost" # OAR server port SERVER_PORT="6666" # when the user does not specify a -l option then oar use this OARSUB_DEFAULT_RESOURCES="/resource_id=1" # which real resource must be used when using the "nodes" keyword ? OARSUB_NODES_RESOURCES="network_address" # force use of job key even if --use-job-key or -k is not set. OARSUB_FORCE_JOB_KEY="no" # OAR log level: 3(debug+warnings+errors), 2(warnings+errors), 1(errors) LOG_LEVEL="2" # Log categories to display in the log file. # Ex: LOG_CATEGORIES="scheduler,main,energy" # if LOG_CATEGORIES="all" every category will be logged. LOG_CATEGORIES="all" # If you want to debug oarexec on nodes then affect 1 (only effective if DETACH_JOB_FROM_SERVER = 1) OAREXEC_DEBUG_MODE="0" # oarexec default temporary directory # This value MUST be the same in all oar.conf on all nodes of the cluster OAR_RUNTIME_DIRECTORY="/var/lib/oar" # OAR log file LOG_FILE="/var/log/oar.log" # Specify where we are connected with a job of the deploy type DEPLOY_HOSTNAME="127.0.0.1" # Specify where we are connected with a job of the cosystem type COSYSTEM_HOSTNAME="127.0.0.1" # Specify the database field to use to fill the file on the first node of the # job in $OAR_NODE_FILE (default is 'network_address'). Only resources with # type=default are displayed in this file. #NODE_FILE_DB_FIELD="network_address" # Specify the database field that will be considered to fill the node file used # by the user on the first node of the job. for each different value of this # field then OAR will put 1 line in the node file (default: resource_id). #NODE_FILE_DB_FIELD_DISTINCT_VALUES="resource_id" # If you want to free a process per job on the server you can change this tag # into 1 (you must enable all nodes to connect to SERVER_PORT on the # SERVER_HOSTNAME) DETACH_JOB_FROM_SERVER="0" # Command to use to connect to other nodes (default is "ssh" in the PATH) OPENSSH_CMD="/usr/bin/ssh -p 6667" # Set the timeout value for each ssh connection (default is 120) #OAR_SSH_CONNECTION_TIMEOUT="200" # When a oardel is requested on a the job then OAR will try to kill it and if # nothing respond in JOBDEL_WALLTIME seconds then the job is EXTERMINATED and # the resources turned into the Suspected state (default is 300s) #JOBDEL_WALLTIME="300" # If you have installed taktuk and want to use it to manage remote # admnistration commands then give the full command path # (with your options except "-m" and "-o"). # You don t also have to give any taktuk command. # (taktuk version must be >= 3.6) #TAKTUK_CMD="/usr/bin/taktuk -t 30 -s" ############################################################################### ######################################################################## # Pingchecker options: # # How to check if the nodes have a good health or not. This choice is # # directly linked to the Suspected state of the resources. # # By default OAR uses only "ping". it requests no configuration but it # # is not accurate about the state of the nodes and it is slow # ############################################################################### # # Set the frequency for checking Alive and Suspected resources (0 means never) FINAUD_FREQUENCY="300" # Set time after which Suspected resources become Dead (default is 0 and it # means never) #DEAD_SWITCH_TIME="600" # Set to yes if you want to check the nodes that runs jobs. (no is the default value) #CHECK_NODES_WITH_RUNNING_JOB='no' # Set to yes if you want to check the aliveness of nodes just after the end of # each jobs. #ACTIVATE_PINGCHECKER_AT_JOB_END="no" # Uncomment only one of the following PINGCHECKER configuration # sentinelle.pl # If you want to use sentinelle.pl then you must use this tag. # (sentinelle.pl is like a "for" of ssh but it adds timeout and window to # avoid overloading the server) # (sentinelle.pl is provided with OAR in the install directory) #PINGCHECKER_SENTINELLE_SCRIPT_COMMAND="/usr/share/oar/sentinelle.pl -t 5 -w 20" # Taktuk # taktuk may be used to check aliveness of nodes. # Give the arguments of the taktuk command WITHOUT format outputs # (DO NOT use "-o" option). # See TAKTUK_CMD tag in this file to specify the path of the taktuk command #PINGCHECKER_TAKTUK_ARG_COMMAND="broadcast exec timeout 5 kill 9 [ true ]" #PINGCHECKER_TAKTUK_ARG_COMMAND="broadcast exec timeout 5 kill 9 [ oarnodecheckquery ]" #PINGCHECKER_TAKTUK_ARG_COMMAND="broadcast exec timeout 5 kill 9 [ /path/on/nodes/to/my/check/script.sh ]" # fping # fping may be used instead of ping to check aliveness of nodes. # uncomment next line to use fping. Give the complete command path. #PINGCHECKER_FPING_COMMAND="/usr/bin/fping -q" # nmap # nmap may be used instead of ping to check aliveness of nodes. # uncomment next line to use nmap. Give the complete command path. # It will test to connect on the ssh port (22) #PINGCHECKER_NMAP_COMMAND="/usr/bin/nmap -p 22 -n -T5" # GENERIC command # a specific script may be used instead of ping to check aliveness of nodes. # uncomment next line and give the complete command path and its arguments. # The script must return bad nodes on STDERR (1 line for a bad node and it must # have exactly the same name that OAR has given in argument of the command) #PINGCHECKER_GENERIC_COMMAND="/path/to/command arg1 arg2" ############################################################################### ###################### # Mail configuration # ############################################################################### # # OAR information may be notified by email to the administror # set accordingly to your configuration and uncomment the next lines to # activate the feature. # (If this tag is right configured then users can use "--notify" option of oarsub # to receive mails about their jobs) #MAIL_SMTP_SERVER="smtp.serveur.com" # You can specify several recipients with a comma between each email address #MAIL_RECIPIENT="user@domain.com" #MAIL_SENDER="oar@domain.com" ############################################################################### ########### # Scripts # ############################################################################### # # Set the timeout for the prologue and epilogue execution on computing nodes #PROLOGUE_EPILOGUE_TIMEOUT="60" # Files to execute before and after each job on the first computing node # (by default nothing is executed) #PROLOGUE_EXEC_FILE="/path/to/prog" #EPILOGUE_EXEC_FILE="/path/to/prog" # Set the timeout for the prologue and epilogue execution on the OAR server #SERVER_PROLOGUE_EPILOGUE_TIMEOUT="60" # Files to execute before and after each job on the OAR server (by default # nothing is executed) #SERVER_PROLOGUE_EXEC_FILE="/path/to/prog" #SERVER_EPILOGUE_EXEC_FILE="/path/to/prog" # # File to execute just after a group of resources has been supected. It may # be used to trigger automatic actions to heal the resources. The script is # started with the list of resources put in its STDIN: resource_id followed # by a space and the network_address (one line per resource) #SUSPECTED_HEALING_EXEC_FILE="/path/to/prog" #SUSPECTED_HEALING_TIMEOUT="10" ######################## # Scheduler parameters # ############################################################################### # # Maximum of seconds used by a scheduler SCHEDULER_TIMEOUT="10" # Number of processes to use when performing scheduling calculations # (default is 1) #SCHEDULER_NB_PROCESSES=4 # Time to add between each jobs (for example: time for administration tasks or # time to let computers to reboot) # minimum time is 1 second # default time is 60 seconds SCHEDULER_JOB_SECURITY_TIME="60" # Minimum time in seconds that can be considered like a hole where a job could # be scheduled in (if you have performance problems, you can try to increase # this) SCHEDULER_GANTT_HOLE_MINIMUM_TIME="300" # You can add an order preference on resources assigned by the # system(SQL ORDER syntax) SCHEDULER_RESOURCE_ORDER="scheduler_priority ASC, suspended_jobs ASC, network_address DESC, resource_id ASC" # If next line is uncommented then OAR will automatically update the value of # "scheduler_priority" field corresponding to the besteffort jobs. # The syntax is field names separated by "/". The value affected to # "scheduler_priority" depends of the position of the field name. SCHEDULER_PRIORITY_HIERARCHY_ORDER="network_address/resource_id" # You can specify a type of resources that will be always assigned for each job # (for exemple: enable all jobs to be able to log on the cluster frontales) # For more information, see the FAQ #SCHEDULER_RESOURCES_ALWAYS_ASSIGNED_TYPE="frontal" # This says to the scheduler to treate resources of these types, where there is # a suspended job, like free ones. So some other jobs can be scheduled on these # resources. (list resource types separate with spaces; Default value is # nothing so no other job can be scheduled on suspended job resources) #SCHEDULER_AVAILABLE_SUSPENDED_RESOURCE_TYPE="default licence VLAN" SCHEDULER_AVAILABLE_SUSPENDED_RESOURCE_TYPE="default" # For a debug purpose, scheduler decisions can be logged into the database # Uncomment the next line in order to activate the logging mechanism #SCHEDULER_LOG_DECISIONS="yes" # Time to wait when a reservation has not got all resources that it has reserved # (some resources may have become Suspected or Absent since the job submission) # before to launch the job on the remaining resources (default is 300s) #RESERVATION_WAITING_RESOURCES_TIMEOUT="300" # Set the granularity of the OAR accounting feature (in seconds) # Used by the oaraccounting command and the # oar_sched_gantt_with_timesharing_and_fairsharing to calculate the timesharing # policy. Default is 1 day (86400s) #ACCOUNTING_WINDOW="86400" ############################## # OCAML Scheduler parameters # ############################################################################### # You can use hierarchy_extractor.rb to generate those variables #HIERARCHY_LABELS="node,cpu,core" #node="(1,16,2), (33,8,4)" #cpu="(1,8,8)" #core="(1,1,64)" ############################################## ############################################################## # Parameters available if you are using the # # oar_sched_gantt_with_timesharing_and_fairsharing scheduler # ############################################################################### # # Specify the number of job to take care at each time # Default is 30 #SCHEDULER_FAIRSHARING_MAX_JOB_PER_USER=30 # Number of seconds to consider for the fairsharing # Default is 30 days #SCHEDULER_FAIRSHARING_WINDOW_SIZE=2592000 # Specify the target percentages for project names (0 if not specified) # /!\ the syntax is a perl hash table definition with project names as keys # AND EVERYTHING MUST BE ON THE SAME LINE #SCHEDULER_FAIRSHARING_PROJECT_TARGETS="{ first => 75, default => 25 }" # Specify the target percentages for users (0 if not specified) # /!\ the syntax is a perl hash table definition with project names as keys # AND EVERYTHING MUST BE ON THE SAME LINE #SCHEDULER_FAIRSHARING_USER_TARGETS="{ toto => 75, titi => 10, tutu => 15 }" # Weight given to each criteria # By default the job project name is not taken in account #SCHEDULER_FAIRSHARING_COEF_PROJECT=0 # By default, effective job duration counts twice than the asked one ("asked" = # walltime given by the user ) #SCHEDULER_FAIRSHARING_COEF_USER=2 #SCHEDULER_FAIRSHARING_COEF_USER_ASK=1 ############################################## ########################################################################### # ENERGY SAVING # # (Management of automatic wake-up and shut-down of the nodes when they # # are not used) # # You have to set up the "available_upto" property of your resources: # # available_upto=0 : to disable the wake-up and shutdown # # available_upto=1 : to disable the wake-up (but not the halt) # # available_upto=2147483647 : to disable the halt (but not the wake-up) # # available_upto=2147483646 : to enable wake-up/halt forever # # available_upto=<timestamp> : to enable the halt, and the wake-up until # # the date given by <timestamp> # # Energy saving features also consider that you have set up a mechanism # # to automatically set the nodes in the Alive state when they are booted. # # For this, you can follow the steps described here: # # http://wiki-oar.imag.fr/index.php/Configuration_tips#Start.2Fstop_of_nodes_using_ssh_keys ############################################################################### # # Parameter for the scheduler to decide when a node is idle. # Number of seconds since the last job was terminated on nodes #SCHEDULER_NODE_MANAGER_IDLE_TIME="600" # Parameter for the scheduler to decide if a node will have enough time to sleep. # Number of seconds before the next job #SCHEDULER_NODE_MANAGER_SLEEP_TIME="600" # Parameter for the scheduler to know when a node has to be woken up before the # beginning of the job when a reservation is scheduled on a resource on this node # Number of seconds for a node to wake up #SCHEDULER_NODE_MANAGER_WAKEUP_TIME="600" # When OAR scheduler wants some nodes to wake up then it launches this command # and puts on its STDIN the list of nodes to wake up (one hostname by line). # !! This variable is ignored if you set ENERGY_SAVING_INTERNAL to yes. !! # The scheduler looks at the available_upto field in the resources table to know # if the node will be started for enough time. # There's no nodes management with this method: if you want nodes to be suspected # when they do not wake up in time, then you have to use ENERGY_SAVING_INTERNAL=yes # and set up ENERGY_SAVING_NODE_MANAGER_WAKE_UP_CMD. #SCHEDULER_NODE_MANAGER_WAKE_UP_CMD="/etc/oar/wake_up_nodes.sh" # When OAR considers that some nodes can be shut down, it launches this command # and puts the node list on its STDIN (one hostname by line). # !! This variable is ignored if you set ENERGY_SAVING_INTERNAL to yes. !! # There's no nodes management with this method: if you want some nodes to be kept # alive to be reactive to small jobs, then you have to use ENERGY_SAVING_INTERNAL=yes # and set up ENERGY_SAVING_NODE_MANAGER_SLEEP_CMD. #SCHEDULER_NODE_MANAGER_SLEEP_CMD="/path/to/the/command args" #SCHEDULER_NODE_MANAGER_SLEEP_CMD="taktuk -s -f - -t 3 b e t 3 k 9 [ oardodo halt ]" #SCHEDULER_NODE_MANAGER_SLEEP_CMD="/usr/local/oar/sentinelle.pl -f - -t 3 -p 'oardodo halt'" # Choose wether to use the internal energy saving module or not. If set to yes, # please, also provide convenient configuration for all the ENERGY_* variables. # If set to no, then you have to set up SCHEDULER_NODE_MANAGER_WAKE_UP_CMD # and SCHEDULER_NODE_MANAGER_SLEEP_CMD # Benefits of this module are: # - nodes are suspected if they do not wake up before a timeout # - some nodes can be kept always alive depending on some properties # - the launching of wakeup/shutdown commands can be windowized to prevent # from electric peeks # Possible values are "yes" and "no" ENERGY_SAVING_INTERNAL="no" # Path to the script used by the energy saving module to wake up nodes. # This command is executed from the oar server host. # OAR puts the node list on its STDIN (one hostname by line). # The scheduler looks at the available_upto field in the resources table to know # if the node will be started for enough time. #ENERGY_SAVING_NODE_MANAGER_WAKE_UP_CMD="/etc/oar/wake_up_nodes.sh" # Path to the script used by the energy saving module to shut down nodes. # This command is executed from the oar server host. # OAR puts the node list on its STDIN (one hostname by line). #ENERGY_SAVING_NODE_MANAGER_SLEEP_CMD="/etc/oar/shut_down_nodes.sh" # Timeout to consider a node broken (suspected) if it has not woken up #ENERGY_SAVING_NODE_MANAGER_WAKEUP_TIMEOUT="900" # You can set up a number of nodes that must always be on. You can use the # syntax in the examples if you want a number of alive nodes of different types # (corresponding to a specific sql properties requierement). # Example 1: keep alive 10 nodes on the whole cluster: #ENERGY_SAVING_NODES_KEEPALIVE="type='default':10" # Example 2: keep alive 4 nodes on the paradent cluster AND 6 nodes on the # paraquad cluster AND 2 nodes accepting besteffort #ENERGY_SAVING_NODES_KEEPALIVE="cluster='paradent':4 & cluster='paraquad':6 & besteffort='YES':2" # By default, keepalive is disabled: #ENERGY_SAVING_NODES_KEEPALIVE="type='default':0" # Parameter for the window launching mechanism embedded in OAR energy saving module # to know the number of commands that can be executed in parallel. # This mechanism is used in order to sleep and wake up nodes gradually. # Window size minimum is 1 #ENERGY_SAVING_WINDOW_SIZE="25" # Time in second between execution of each window. # Minimum is 0 to set no delay between each window. # This value must be smaller than ENERGY_SAVING_NODE_MANAGER_TIMEOUT. #ENERGY_SAVING_WINDOW_TIME="60" # Timeout to set the maximum duration for a execution window # This value must be greater than ENERGY_SAVING_WINDOW_TIME. #ENERGY_SAVING_WINDOW_TIMEOUT="120" ################################################################################ ############################## # Suspend/Resume job feature # ############################################################################### # # Name of the perl script that manages suspend/resume. # (default is /etc/oar/suspend_resume_manager.pl) #SUSPEND_RESUME_FILE="/etc/oar/suspend_resume_manager.pl" # Files to execute just after a job was suspended and just before a job was resumed #JUST_AFTER_SUSPEND_EXEC_FILE="/path/to/prog" #JUST_BEFORE_RESUME_EXEC_FILE="/path/to/prog" # Timeout for the two previous scripts #SUSPEND_RESUME_SCRIPT_TIMEOUT="60" ############################################################################### ################################ # JOB_RESOURCE_MANAGER feature # ############################################################################### # Specify the name of the database field that will be passed to the # JOB_RESOURCE_MANAGER script. # If this option is set then users must use oarsh instead of ssh to walk on # the nodes they reserve using oarsub. # Look at the CPUSET file # (if defined, this option turn on the execution of JOB_RESOURCE_MANAGER script # execution on each job nodes: initialize cpuset, job keys, clean nodes, ...) JOB_RESOURCE_MANAGER_PROPERTY_DB_FIELD="cpuset" # Name of the perl script that manages cpuset. # (default is /etc/oar/job_resource_manager.pl which handles the linux kernel # cpuset, job keys, clean processes, ...) #JOB_RESOURCE_MANAGER_FILE="/etc/oar/job_resource_manager.pl" # Path of the relative directory where the cpusets will be created on each # nodes(same value than in /proc/self/cpuset). # WARNING: Change this value only if you know what you are doing. # (Note: comment this line to disable cpuset feature on computing nodes. Thus # if you only want to initialize job user without the cpuset, you have # to set OARSUB_FORCE_JOB_KEY="yes") CPUSET_PATH="/oar" # Resource "type" DB field to use if you want to enable the job uid feature. # (create a unique user id per job on each nodes of the job) #JOB_RESOURCE_MANAGER_JOB_UID_TYPE="userid" # Name of the perl script the retrieve monitoring data from compute nodes. # This is used in oarmonitor command. #OARMONITOR_SENSOR_FILE="/etc/oar/oarmonitor_sensor.pl" ############################################################################### ######### # OARSH # ############################################################################### # # This variable must be set to enable the use of oarsh from a frontale node # Otherwise you must not set this variable if you are not on a frontale #OARSH_OARSTAT_CMD="/usr/bin/oarstat" # The following variable adds options to ssh (or OPENSSH_CMD if configured). # If one option is not handled by your ssh version just remove it BUT be # careful because these options are there for security reasons OARSH_OPENSSH_DEFAULT_OPTIONS="-oProxyCommand=none -oPermitLocalCommand=no" # If you set this variable to something different from 0 then oarsh will act # like a normal ssh **without** CPUSET restriction. # WARNING: this is a critical functionality (this is only useful if users want # to have a command to connect on every nodes without taking care of there ssh # configuration and act like a ssh) #OARSH_BYPASS_WHOLE_SECURITY="0" ############################################################################### #DESKTOP_COMPUTING_ALLOW_CREATE_NODE="0" #DESKTOP_COMPUTING_EXPIRY="300" #STAGEOUT_DIR="/var/lib/oar/stageouts/" #STAGEIN_DIR="/var/lib/oar/stageins" #STAGEIN_CACHE_EXPIRY="144" ############ # OARADMIN # ############# # # Uncomment the next line in order to activate the versioning mechanism # for admission rules and oar conf files # Repository is in /home/oar/.oaradmin # /home/oar/.oaradmin/rp : contains repository # /home/oar/.oaradmin/wc : contains working copy #OARADMIN_VERSIONING="yes" ############################################################################### ########### # OAR API # ############################################################################### # Disable this if you are not ok with a simple pidentd "authentication" # It is safe enough if you fully trust the client hosts (with an apropriate # ip-based access control into apache for example) #API_TRUST_IDENT="1" # Custom header for the html browsable format of the API #API_HTML_HEADER="/etc/oar/api_html_header.pl" # Custom form for posting jobs with html to the API #API_HTML_FORM="/etc/oar/api_html_postform.pl" # Default data structure variant # Value can be "oar" or "simple" (the default is "simple") # This can be override with the "?structure=<value>" part of the queries # The "oar" variant tries to be as near as possible to the data structures # used by the export options of the oarstat/oarnodes commands. # The "simple" variant aims to be more simple, using arrays instead of hashes # when it is possible. # The first is more human readable; the second is simpler for programming. #API_DEFAULT_DATA_STRUCTURE="simple"