## quick help on OpenPBS : ## -- Definition : #Job server : receving, creating, modifying the job, protecting job against system crash and runnning the job (placing into execution) #Job Executor : place the job into execution. Pbs_mom place it when it receive a copy from server. #Job Sheduler : wich job is run and where and when it is run. #Batch System : one jobserver (pbs_server), one or more Job Scheduler(pbs_sched), and one or more execution servers(pbs_mom). ### IMPORTANT !!! ### Check that your /etc/hosts contain your IP and hostname. Edit /var/spool/pbs/server_priv/nodes to add node. Edit your /var/spool/pbs/mom_priv/config to configure it with the name of your server : \$clienthost name_of_PBS_server By default it is the name of your Machine. Abrevation used : set=s queue=q execution=e routing=r server=s create=c -- Example of Configuration # Define your managers (access from other machine) : set server managers=your_name@your_host # restrict access to server : set server acl_hosts=*.your_domain set server acl_hosts_enabled=true # Enable the scheduler : s s scheduling=true # define the default node to use : s s default_node=pbs1 # define resources to use by default : s s resources_default.cput=5:00 # CPU time s s resources_default.mem=32mb # memory to use - Queue execution Configuration : # create au queue : create queue dque queue_type=execution # define in time : s q dque resources_min.cput=1,resources_max.cput=12:00:00 # define a time defaut to 30 min : s q dque resources_default.cput=30:00 # start the queue dque : s q dque enabled=true, started=true # define default queue : s s default_queue=dque - Queue Routing Configuration : # create a routing queue : create queue feed queue_type=routing # list local queue where job will be sent : set queue feed route_destinations=dque,dque2@pbs2.mdk.com - Record configuration of your server : # subcommand qmgr qmgr -c "print server" > /tmp/serverPBS.con # reuse the configuration : qmgr < /tmp/serverPBS.con # http://www.fysik.dtu.dk/CAMP/pbs.html # # Create queues and set their attributes. # Create and define queue verylong create queue verylong set queue verylong queue_type = Execution set queue verylong Priority = 40 set queue verylong max_running = 10 set queue verylong resources_max.cput = 72:00:00 set queue verylong resources_min.cput = 12:00:01 set queue verylong resources_default.cput = 72:00:00 set queue verylong enabled = True set queue verylong started = True # Create and define queue long create queue long set queue long queue_type = Execution set queue long Priority = 60 set queue long max_running = 10 set queue long resources_max.cput = 12:00:00 set queue long resources_min.cput = 02:00:01 set queue long resources_default.cput = 12:00:00 set queue long enabled = True set queue long started = True # Create and define queue medium create queue medium set queue medium queue_type = Execution set queue medium Priority = 80 set queue medium max_running = 10 set queue medium resources_max.cput = 02:00:00 set queue medium resources_min.cput = 00:20:01 set queue medium resources_default.cput = 02:00:00 set queue medium enabled = True set queue medium started = True # Create and define queue small create queue small set queue small queue_type = Execution set queue small Priority = 100 set queue small max_running = 10 set queue small resources_max.cput = 00:20:00 set queue small resources_default.cput = 00:20:00 set queue small enabled = True set queue small started = True # Create and define queue default create queue default set queue default queue_type = Route set queue default max_running = 10 set queue default route_destinations = small set queue default route_destinations += medium set queue default route_destinations += long set queue default route_destinations += verylong set queue default enabled = True set queue default started = True # Set server attributes. set server scheduling = True set server max_user_run = 6 set server acl_host_enable = True set server acl_hosts = *.mandrakesoft.com set server default_queue = default set server log_events = 63 set server mail_from = adm set server query_other_jobs = True set server resources_default.cput = 01:00:00 set server resources_default.neednodes = 1 set server resources_default.nodect = 1 set server resources_default.nodes = 1 set server scheduler_iteration = 60 set server default_node = 1#shared # start all queue : qstart default small medium long verylong qenable default small medium long verylong #Batch job scripts # #!/bin/sh ### Job name #PBS -N test ### Declare job non-rerunable #PBS -r n ### Output files #PBS -e test.err #PBS -o test.log ### Mail to user #PBS -m ae ### Queue name (small, medium, long, verylong) #PBS -q long ### Number of nodes (node property ev67 wanted) #PBS -l nodes=8:ev67 # This job's working directory echo Working directory is $PBS_O_WORKDIR cd $PBS_O_WORKDIR echo Running on host `hostname` echo Time is `date` echo Directory is `pwd` echo This jobs runs on the following processors: echo `cat $PBS_NODEFILE` # Define number of processors NPROCS=`wc -l < $PBS_NODEFILE` echo This job has allocated $NPROCS nodes # Run the parallel MPI executable "a.out" mpirun -v -machinefile $PBS_NODEFILE -np $NPROCS a.out If you specify #PBS -l nodes=1 in the script, you will be running a non-parallel (or serial) batch job: #!/bin/sh ### Job name #PBS -N test ### Declare job non-rerunable #PBS -r n ### Output files #PBS -e test.err #PBS -o test.log ### Mail to user #PBS -m ae ### Queue name (small, medium, long, verylong) #PBS -q long ### Number of nodes (node property ev6 wanted) #PBS -l nodes=1:ev6 # This job's working directory echo Working directory is $PBS_O_WORKDIR cd $PBS_O_WORKDIR echo Running on host `hostname` echo Time is `date` echo Directory is `pwd` # Run your executable a.out