367 lines
12 KiB
Bash
367 lines
12 KiB
Bash
#!/bin/bash
|
|
# Bash is required due to usage of 'disown' command
|
|
|
|
SAVE_DIR=`pwd`
|
|
MY_DIR=$(cd "$(dirname "$(which "$0")")" && pwd)
|
|
|
|
RESULTS_DIR="$1"
|
|
|
|
. "$MY_DIR/machine-config.sh"
|
|
|
|
if [ -f "$SAVE_DIR/machine-config.sh" ]; then
|
|
. "$SAVE_DIR/machine-config.sh"
|
|
fi
|
|
|
|
. "$MY_DIR/configure-run.sh"
|
|
|
|
if [ -f "$SAVE_DIR/configure-run.sh" ]; then
|
|
. "$SAVE_DIR/configure-run.sh"
|
|
fi
|
|
|
|
. "$MY_DIR/test-utils.sh"
|
|
|
|
if [ $INTERACTIVE -ne 0 ]; then
|
|
stdout_fn() {
|
|
echo "$1.stdout"
|
|
}
|
|
stderr_fn() {
|
|
echo "$1.stderr"
|
|
}
|
|
fi
|
|
|
|
if [ $HAS_RES_MANAGER -eq 0 ]; then
|
|
RES_MAN_DIR=`mktemp -d`
|
|
fi
|
|
|
|
resources_freed() {
|
|
FN=`mktemp`
|
|
if [ $SHARE_RESOURCES -eq 0 ]; then
|
|
FREED_CPUS=$(( CPUS_PER_NODE * MAX_CPU_SHARING_FACTOR ))
|
|
FREED_CUDAS=$(( CUDAS_PER_NODE * MAX_CUDA_SHARING_FACTOR ))
|
|
else
|
|
FREED_CPUS=$(( totalProcs * CPUS_PER_PROC ))
|
|
FREED_CUDAS=$(( totalProcs * CUDAS_PER_PROC ))
|
|
fi
|
|
echo "FREED_CPUS=$FREED_CPUS" >>$FN
|
|
echo "FREED_CUDAS=$FREED_CUDAS" >>$FN
|
|
# echo "rm $FN" >>$FN
|
|
mv $FN $RES_MAN_DIR/
|
|
}
|
|
|
|
interactive_launcher() {
|
|
cd "$LAUNCH_DIR"
|
|
STDOUT_FN=`stdout_fn "$LAUNCH_NAME"`
|
|
STDERR_FN=`stderr_fn "$LAUNCH_NAME"`
|
|
:>$STDOUT_FN
|
|
:>$STDERR_FN
|
|
set -m
|
|
# echo ./dvm run $PROC_GRID "$TASK_EXE"
|
|
START_T=`date +%s`
|
|
if [ -f "run.sh" ]; then
|
|
PATH="$LAUNCH_DIR:$PATH" PROC_GRID="$PROC_GRID" DVMH_PPN=$LAUNCH_PPN DVMH_NUM_THREADS=$CPUS_PER_PROC DVMH_NUM_CUDAS=$CUDAS_PER_PROC ./run.sh </dev/null >"$STDOUT_FN" 2>"$STDERR_FN" &
|
|
LAUNCH_PID=$!
|
|
else
|
|
DVMH_PPN=$LAUNCH_PPN DVMH_NUM_THREADS=$CPUS_PER_PROC DVMH_NUM_CUDAS=$CUDAS_PER_PROC ./dvm run $PROC_GRID "$TASK_EXE" </dev/null >"$STDOUT_FN" 2>"$STDERR_FN" &
|
|
LAUNCH_PID=$!
|
|
fi
|
|
if [ $TEST_MAX_TIME -gt 0 ]; then
|
|
# echo "Setting proc_killer to process $LAUNCH_PID for $TEST_MAX_TIME"
|
|
proc_killer -$LAUNCH_PID $TEST_MAX_TIME </dev/null >/dev/null 2>& 1 &
|
|
KILLER_PID=$!
|
|
disown
|
|
fi
|
|
wait $LAUNCH_PID
|
|
START_RES=$?
|
|
END_T=`date +%s`
|
|
CALC_TIME=$(( END_T - START_T ))
|
|
if [ $TEST_MAX_TIME -gt 0 ]; then
|
|
kill -2 $KILLER_PID >/dev/null 2>& 1
|
|
kill -15 $KILLER_PID >/dev/null 2>& 1
|
|
kill -9 $KILLER_PID >/dev/null 2>& 1
|
|
fi
|
|
if [ $HAS_RES_MANAGER -eq 0 ]; then
|
|
resources_freed
|
|
fi
|
|
echo "$START_RES $CALC_TIME" >"$TASK_EXE.finished"
|
|
}
|
|
|
|
non_interactive_launcher() {
|
|
cd "$LAUNCH_DIR"
|
|
STDOUT_FN=`mktemp`
|
|
STDERR_FN=`mktemp`
|
|
# echo ./dvm run $PROC_GRID "$TASK_EXE"
|
|
if [ $TEST_MAX_TIME -gt 0 ]; then
|
|
export maxtime=$(( (TEST_MAX_TIME + 59) / 60))
|
|
fi
|
|
if [ -f "run.sh" ]; then
|
|
PATH="$LAUNCH_DIR:$PATH" PROC_GRID="$PROC_GRID" DVMH_PPN=$LAUNCH_PPN DVMH_NUM_THREADS=$CPUS_PER_PROC DVMH_NUM_CUDAS=$CUDAS_PER_PROC ./run.sh >$STDOUT_FN 2>$STDERR_FN
|
|
START_RES=$?
|
|
else
|
|
DVMH_PPN=$LAUNCH_PPN DVMH_NUM_THREADS=$CPUS_PER_PROC DVMH_NUM_CUDAS=$CUDAS_PER_PROC ./dvm run $PROC_GRID "$TASK_EXE" >$STDOUT_FN 2>$STDERR_FN
|
|
START_RES=$?
|
|
fi
|
|
unset maxtime
|
|
:>"$TASK_EXE.committed"
|
|
IS_LAUNCHED=`is_launched $STDOUT_FN $STDERR_FN`
|
|
rm $STDOUT_FN $STDERR_FN
|
|
if [ $START_RES -eq 0 -a $IS_LAUNCHED -ne 0 ]; then
|
|
while [ `is_finished "$LAUNCH_NAME"` -eq 0 ]; do
|
|
sleep 1
|
|
done
|
|
CALC_TIME=`get_elapsed_time "$LAUNCH_NAME"`
|
|
fi
|
|
if [ $HAS_RES_MANAGER -eq 0 ]; then
|
|
resources_freed
|
|
fi
|
|
echo "$START_RES $CALC_TIME" >"$TASK_EXE.finished"
|
|
}
|
|
|
|
already_analyzed() {
|
|
# echo -n "PLATFORM=\"$TEST_PLATFORM\""
|
|
# echo -n " NOH_FLAG=$TASK_NOH_FLAG"
|
|
# echo -n " AUTOTFM_FLAG=$TASK_AUTOTFM_FLAG"
|
|
# echo -n " PROC_GRID=\"$PROC_GRID\""
|
|
# echo -n " CPUS_PER_PROC=$CPUS_PER_PROC"
|
|
# echo -n " CUDAS_PER_PROC=$CUDAS_PER_PROC"
|
|
local res
|
|
res=0
|
|
if [ -f "$RESULTS_DIR/$TEST_SHORT_PATH.result" ]; then
|
|
if [ $( cat "$RESULTS_DIR/$TEST_SHORT_PATH.result" | grep "PLATFORM=\"$TEST_PLATFORM\"" | grep "NOH_FLAG=$TASK_NOH_FLAG" | grep "AUTOTFM_FLAG=$TASK_AUTOTFM_FLAG" | grep "PROC_GRID=\"$PROC_GRID\"" | grep "CPUS_PER_PROC=$CPUS_PER_PROC" | grep "CUDAS_PER_PROC=$CUDAS_PER_PROC" | wc -l ) -gt 0 ]; then
|
|
res=1
|
|
fi
|
|
fi
|
|
echo $res
|
|
}
|
|
|
|
launcher() {
|
|
counter=0
|
|
if [ $HAS_RES_MANAGER -eq 0 ]; then
|
|
if [ $MAX_NODES_PER_TASK -gt 1 ]; then
|
|
echo "Can manage resources only for one-node system"
|
|
MAX_NODES_PER_TASK=1
|
|
fi
|
|
FREE_CPUS=$(( CPUS_PER_NODE * MAX_CPU_SHARING_FACTOR ))
|
|
FREE_CUDAS=$(( CUDAS_PER_NODE * MAX_CUDA_SHARING_FACTOR ))
|
|
fi
|
|
exec 4>$1
|
|
while IFS= read -r TASK_SPEC; do
|
|
TEST_PLATFORM=Unknown
|
|
TASK_NOH_FLAG=0
|
|
TASK_AUTOTFM_FLAG=0
|
|
PROC_GRID=0
|
|
CPUS_PER_PROC=0
|
|
CUDAS_PER_PROC=0
|
|
eval $TASK_SPEC
|
|
LAUNCHED_FLAG=0
|
|
ALREADY_ANALYZED=$( already_analyzed )
|
|
if [ $TASK_TYPE -eq 1 -a $ALREADY_ANALYZED -eq 0 ]; then
|
|
CAN_CPUS=$CPUS_PER_NODE
|
|
CAN_CUDAS=$CUDAS_PER_NODE
|
|
if [ $SHARE_RESOURCES -ne 0 ]; then
|
|
CAN_CPUS=$(( CAN_CPUS * MAX_CPU_SHARING_FACTOR ))
|
|
CAN_CUDAS=$(( CAN_CUDAS * MAX_CUDA_SHARING_FACTOR ))
|
|
fi
|
|
LAUNCH_PPN=$MAX_PPN
|
|
CUR_PPN=$LAUNCH_PPN
|
|
if [ $CPUS_PER_PROC -gt 0 ]; then
|
|
CUR_PPN=$(( CAN_CPUS / $CPUS_PER_PROC ))
|
|
fi
|
|
if [ $CUR_PPN -lt $LAUNCH_PPN ]; then
|
|
LAUNCH_PPN=$CUR_PPN
|
|
fi
|
|
if [ $CUDAS_PER_PROC -gt 0 ]; then
|
|
CUR_PPN=$(( CAN_CUDAS / $CUDAS_PER_PROC ))
|
|
fi
|
|
if [ $CUR_PPN -lt $LAUNCH_PPN ]; then
|
|
LAUNCH_PPN=$CUR_PPN
|
|
fi
|
|
totalProcs=1
|
|
for proc in $PROC_GRID; do
|
|
totalProcs=$(( totalProcs * proc ))
|
|
done
|
|
if [ $LAUNCH_PPN -gt 0 ]; then
|
|
USE_NODES=$(( ( totalProcs + LAUNCH_PPN - 1 ) / LAUNCH_PPN ))
|
|
else
|
|
LAUNCH_PPN=1
|
|
USE_NODES=$(( MAX_NODES_PER_TASK + 1 ))
|
|
fi
|
|
NEED_CPUS=$(( totalProcs * CPUS_PER_PROC ))
|
|
NEED_CUDAS=$(( totalProcs * CUDAS_PER_PROC ))
|
|
if [ $USE_NODES -le $MAX_NODES_PER_TASK ]; then
|
|
# Launch
|
|
counter=$(( counter + 1 ))
|
|
LAUNCH_DIR=`mktemp -d`
|
|
cp -r $TASK_DIR/* $LAUNCH_DIR/
|
|
TASK_SPEC=$( echo -n "$TASK_SPEC" ; echo " LAUNCH_DIR=\"$LAUNCH_DIR\"" )
|
|
if [ $HAS_RES_MANAGER -eq 0 ]; then
|
|
LAUNCH_NAME="$LAUNCH_DIR/$TASK_EXE"
|
|
else
|
|
LAUNCH_NAME="$LAUNCH_DIR/$TASK_EXE.$totalProcs.1"
|
|
fi
|
|
TASK_SPEC=$( echo -n "$TASK_SPEC" ; echo " LAUNCH_NAME=\"$LAUNCH_NAME\"" )
|
|
while true; do
|
|
if [ -f "$SAVE_DIR/dvm-tester.pause" ] && [ "$(cat "$SAVE_DIR/dvm-tester.pause")" = "Immediate" ]; then
|
|
:
|
|
elif [ -f "$MY_DIR/dvm-tester.pause" ] && [ "$(cat "$MY_DIR/dvm-tester.pause")" = "Immediate" ]; then
|
|
:
|
|
else
|
|
break
|
|
fi
|
|
sleep 60
|
|
done
|
|
if [ $HAS_RES_MANAGER -ne 0 ]; then
|
|
while [ `can_launch` -eq 0 ]; do
|
|
sleep 1
|
|
done
|
|
else
|
|
if [ $SHARE_RESOURCES -eq 0 ]; then
|
|
NEED_CPUS=$(( CPUS_PER_NODE * MAX_CPU_SHARING_FACTOR ))
|
|
NEED_CUDAS=$(( CUDAS_PER_NODE * MAX_CUDA_SHARING_FACTOR ))
|
|
fi
|
|
cd "$RES_MAN_DIR"
|
|
while [ $FREE_CPUS -lt $NEED_CPUS -o $FREE_CUDAS -lt $NEED_CUDAS ]; do
|
|
FOUND_SMTH=0
|
|
for f in `ls`; do
|
|
FREED_CPUS=
|
|
FREED_CUDAS=
|
|
. ./$f
|
|
if [ -n "$FREED_CPUS" -a -n "$FREED_CUDAS" ]; then
|
|
FOUND_SMTH=1
|
|
FREE_CPUS=$(( FREE_CPUS + FREED_CPUS ))
|
|
FREE_CUDAS=$(( FREE_CUDAS + FREED_CUDAS ))
|
|
rm $f
|
|
fi
|
|
done
|
|
if [ $FOUND_SMTH -eq 0 ]; then
|
|
sleep 1
|
|
fi
|
|
done
|
|
FREE_CPUS=$(( FREE_CPUS - NEED_CPUS ))
|
|
FREE_CUDAS=$(( FREE_CUDAS - NEED_CUDAS ))
|
|
fi
|
|
# Actually launch
|
|
if [ $INTERACTIVE -ne 0 ]; then
|
|
interactive_launcher &
|
|
else
|
|
non_interactive_launcher &
|
|
if [ $HAS_RES_MANAGER -ne 0 ]; then
|
|
while [ ! -f "$LAUNCH_DIR/$TASK_EXE.committed" ]; do
|
|
sleep 1
|
|
done
|
|
fi
|
|
fi
|
|
LAUNCHED_FLAG=1
|
|
else
|
|
# Can not launch such big task
|
|
echo "Discarding too big task: $TASK_SPEC"
|
|
fi
|
|
elif [ $TASK_TYPE -eq 0 ]; then
|
|
LAUNCHED_FLAG=1
|
|
else
|
|
echo "Discarding task: $TASK_SPEC"
|
|
fi
|
|
if [ $LAUNCHED_FLAG -ne 0 ]; then
|
|
echo "$TASK_SPEC" >& 4
|
|
fi
|
|
done
|
|
echo ":" >& 4
|
|
exec 4>&-
|
|
echo "Total tasks launched: $counter"
|
|
}
|
|
|
|
print_result_line() {
|
|
echo -n "PLATFORM=\"$TEST_PLATFORM\""
|
|
echo -n " NOH_FLAG=$TASK_NOH_FLAG"
|
|
echo -n " AUTOTFM_FLAG=$TASK_AUTOTFM_FLAG"
|
|
echo -n " PROC_GRID=\"$PROC_GRID\""
|
|
echo -n " CPUS_PER_PROC=$CPUS_PER_PROC"
|
|
echo -n " CUDAS_PER_PROC=$CUDAS_PER_PROC"
|
|
echo -n " CALC_TIME=$TASK_CALC_TIME"
|
|
echo -n " TEST_PASSED=$TEST_PASSED"
|
|
echo -n " RESULT_COMMENT=\"$RESULT_COMMENT\""
|
|
echo " ERROR_LEVEL=$ERROR_LEVEL"
|
|
}
|
|
|
|
analyzer() {
|
|
counter=0
|
|
FIFO_NAME="$1"
|
|
while IFS= read -r TASK_SPEC; do
|
|
if [ "$TASK_SPEC" = ":" ]; then
|
|
break
|
|
fi
|
|
CPUS_PER_PROC=0
|
|
CUDAS_PER_PROC=0
|
|
TASK_NOH_FLAG=0
|
|
TASK_AUTOTFM_FLAG=0
|
|
eval $TASK_SPEC
|
|
if [ $TASK_TYPE -eq 0 ]; then
|
|
if [ ! -f "$TASK_DIR/$TASK_EXE" ]; then
|
|
# Report compilation error
|
|
if [ `basename "$TEST_SHORT_PATH"` != "$TEST_SHORT_PATH" ]; then
|
|
mkdir -p "$RESULTS_DIR/$(dirname "$TEST_SHORT_PATH")"
|
|
fi
|
|
PROC_GRID=
|
|
CPUS_PER_PROC=
|
|
CUDAS_PER_PROC=
|
|
TASK_CALC_TIME=
|
|
TEST_PASSED=0
|
|
RESULT_COMMENT="Compilation error"
|
|
ERROR_LEVEL=255
|
|
print_result_line >>"$RESULTS_DIR/$TEST_SHORT_PATH.result"
|
|
fi
|
|
# Cleanup all the test's stuff
|
|
rm -rf "$TASK_DIR"
|
|
else
|
|
counter=$(( counter + 1 ))
|
|
cd "$LAUNCH_DIR"
|
|
while [ ! -f "$TASK_EXE.finished" ]; do
|
|
sleep 1
|
|
done
|
|
read LAUNCH_EXIT_CODE TASK_CALC_TIME <"$TASK_EXE.finished"
|
|
STDOUT_FN=`stdout_fn "$LAUNCH_NAME"`
|
|
STDERR_FN=`stderr_fn "$LAUNCH_NAME"`
|
|
SUBTEST_COUNT=0
|
|
. $TEST_ANALYZER
|
|
if [ `basename "$TEST_SHORT_PATH"` != "$TEST_SHORT_PATH" ]; then
|
|
mkdir -p "$RESULTS_DIR/$(dirname "$TEST_SHORT_PATH")"
|
|
fi
|
|
print_result_line >>"$RESULTS_DIR/$TEST_SHORT_PATH.result"
|
|
if [ $SUBTEST_COUNT -gt 0 ]; then
|
|
mkdir -p $RESULTS_DIR/$TEST_SHORT_PATH
|
|
for i in `seq $SUBTEST_COUNT`; do
|
|
SUBTEST_NAME=$i
|
|
analyze_subtest $i
|
|
print_result_line >>"$RESULTS_DIR/$TEST_SHORT_PATH/$SUBTEST_NAME.result"
|
|
done
|
|
fi
|
|
# if [ $LAUNCH_EXIT_CODE -ne 0 -o "$RESULT_COMMENT" = "Crash" ]; then
|
|
# echo "Test's $TEST_SHORT_PATH stdout:"
|
|
# cat "$STDOUT_FN"
|
|
# echo "Test's $TEST_SHORT_PATH stderr:"
|
|
# cat "$STDERR_FN"
|
|
# fi
|
|
rm -rf "$LAUNCH_DIR"
|
|
fi
|
|
done <$FIFO_NAME
|
|
echo "Total tasks analyzed: $counter"
|
|
}
|
|
|
|
FIFO_NAME="$(mktemp -u).launch-fifo"
|
|
mkfifo $FIFO_NAME
|
|
|
|
analyzer $FIFO_NAME &
|
|
launcher $FIFO_NAME
|
|
|
|
wait
|
|
|
|
rm $FIFO_NAME
|
|
|
|
if [ $HAS_RES_MANAGER -eq 0 ]; then
|
|
cd "$RES_MAN_DIR"
|
|
for f in `ls`; do
|
|
. ./$f
|
|
done
|
|
cd "$SAVE_DIR"
|
|
rm -rf "$RES_MAN_DIR"
|
|
fi
|