87 lines
2.3 KiB
Bash
87 lines
2.3 KiB
Bash
|
|
#!/bin/sh
|
||
|
|
|
||
|
|
# Common part
|
||
|
|
MAX_PPN=60
|
||
|
|
MAX_CPU_SHARING_FACTOR=4
|
||
|
|
MAX_CUDA_SHARING_FACTOR=16
|
||
|
|
|
||
|
|
# Default
|
||
|
|
NODE_COUNT=1
|
||
|
|
MAX_NODES_PER_TASK=1
|
||
|
|
INTERACTIVE=1
|
||
|
|
HAS_RES_MANAGER=0
|
||
|
|
|
||
|
|
# Specializations
|
||
|
|
if [ `hostname` = "k100" ]; then
|
||
|
|
NODE_COUNT=64
|
||
|
|
MAX_NODES_PER_TASK=8
|
||
|
|
INTERACTIVE=0
|
||
|
|
# Since launch isn't interactive - one must provide is_launched, is_finished, get_elapsed_time, stdout_fn, stderr_fn calls
|
||
|
|
get_task_dir() {
|
||
|
|
local n
|
||
|
|
for n in 1 2 3 4 5 6 7 8 9; do
|
||
|
|
if [ -d "$1.$n" ]; then
|
||
|
|
printf %s "$1.$n"
|
||
|
|
return
|
||
|
|
fi
|
||
|
|
done
|
||
|
|
printf %s "$1"
|
||
|
|
}
|
||
|
|
is_launched() {
|
||
|
|
local STDOUT_FN
|
||
|
|
STDOUT_FN="$1"
|
||
|
|
local STDERR_FN
|
||
|
|
STDERR_FN="$2"
|
||
|
|
# Add handling for refuses from SUPPZ
|
||
|
|
echo 1
|
||
|
|
}
|
||
|
|
is_finished() {
|
||
|
|
if [ "$(tail -n 1 $(get_task_dir "$1")/manager.log)" = "Exiting..." ]; then
|
||
|
|
echo 1
|
||
|
|
else
|
||
|
|
echo 0
|
||
|
|
fi
|
||
|
|
}
|
||
|
|
get_elapsed_time() {
|
||
|
|
local da
|
||
|
|
local mo
|
||
|
|
local ye
|
||
|
|
local dat
|
||
|
|
local tim
|
||
|
|
local sec1
|
||
|
|
local sec2
|
||
|
|
local task_dir
|
||
|
|
task_dir="$(get_task_dir "$1")"
|
||
|
|
dat=`grep "started at" <"$task_dir/manager.log" | awk '{print $5}' | sed 's/\./ /g'`
|
||
|
|
tim=`grep "started at" <"$task_dir/manager.log" | awk '{print $6}'`
|
||
|
|
da=`echo "$dat" | awk '{print $1}'`
|
||
|
|
mo=`echo "$dat" | awk '{print $2}'`
|
||
|
|
ye=`echo "$dat" | awk '{print $3}'`
|
||
|
|
dat="$ye-$mo-$da $tim"
|
||
|
|
sec1=`date -d "$dat" +%s`
|
||
|
|
dat=`grep "done at" <"$task_dir/manager.log" | awk '{print $6}' | sed 's/\./ /g'`
|
||
|
|
tim=`grep "done at" <"$task_dir/manager.log" | awk '{print $7}'`
|
||
|
|
da=`echo "$dat" | awk '{print $1}'`
|
||
|
|
mo=`echo "$dat" | awk '{print $2}'`
|
||
|
|
ye=`echo "$dat" | awk '{print $3}'`
|
||
|
|
dat="$ye-$mo-$da $tim"
|
||
|
|
sec2=`date -d "$dat" +%s`
|
||
|
|
echo $(( sec2 - sec1 ))
|
||
|
|
}
|
||
|
|
stdout_fn() {
|
||
|
|
echo "$(get_task_dir "$1")/output"
|
||
|
|
}
|
||
|
|
stderr_fn() {
|
||
|
|
echo "$(get_task_dir "$1")/errors"
|
||
|
|
}
|
||
|
|
HAS_RES_MANAGER=1
|
||
|
|
# Since machine has resource manager (task queue) - one must provide can_launch call
|
||
|
|
can_launch() {
|
||
|
|
if [ `mps 2>/dev/null | tail -n +3 | wc -l` -lt 6 ]; then
|
||
|
|
echo 1
|
||
|
|
else
|
||
|
|
echo 0
|
||
|
|
fi
|
||
|
|
}
|
||
|
|
fi
|