#!/bin/bash # Bash is required due to usage of 'disown' command SAVE_DIR=`pwd` MY_DIR=$(cd "$(dirname "$(which "$0")")" && pwd) RESULTS_DIR="$1" . "$MY_DIR/machine-config.sh" if [ -f "$SAVE_DIR/machine-config.sh" ]; then . "$SAVE_DIR/machine-config.sh" fi . "$MY_DIR/configure-run.sh" if [ -f "$SAVE_DIR/configure-run.sh" ]; then . "$SAVE_DIR/configure-run.sh" fi . "$MY_DIR/test-utils.sh" if [ $INTERACTIVE -ne 0 ]; then stdout_fn() { echo "$1.stdout" } stderr_fn() { echo "$1.stderr" } fi if [ $HAS_RES_MANAGER -eq 0 ]; then RES_MAN_DIR=`mktemp -d` fi resources_freed() { FN=`mktemp` if [ $SHARE_RESOURCES -eq 0 ]; then FREED_CPUS=$(( CPUS_PER_NODE * MAX_CPU_SHARING_FACTOR )) FREED_CUDAS=$(( CUDAS_PER_NODE * MAX_CUDA_SHARING_FACTOR )) else FREED_CPUS=$(( totalProcs * CPUS_PER_PROC )) FREED_CUDAS=$(( totalProcs * CUDAS_PER_PROC )) fi echo "FREED_CPUS=$FREED_CPUS" >>$FN echo "FREED_CUDAS=$FREED_CUDAS" >>$FN # echo "rm $FN" >>$FN mv $FN $RES_MAN_DIR/ } interactive_launcher() { cd "$LAUNCH_DIR" STDOUT_FN=`stdout_fn "$LAUNCH_NAME"` STDERR_FN=`stderr_fn "$LAUNCH_NAME"` :>$STDOUT_FN :>$STDERR_FN set -m # echo ./dvm run $PROC_GRID "$TASK_EXE" START_T=`date +%s` if [ -f "run.sh" ]; then PATH="$LAUNCH_DIR:$PATH" PROC_GRID="$PROC_GRID" DVMH_PPN=$LAUNCH_PPN DVMH_NUM_THREADS=$CPUS_PER_PROC DVMH_NUM_CUDAS=$CUDAS_PER_PROC ./run.sh "$STDOUT_FN" 2>"$STDERR_FN" & LAUNCH_PID=$! else DVMH_PPN=$LAUNCH_PPN DVMH_NUM_THREADS=$CPUS_PER_PROC DVMH_NUM_CUDAS=$CUDAS_PER_PROC ./dvm run $PROC_GRID "$TASK_EXE" "$STDOUT_FN" 2>"$STDERR_FN" & LAUNCH_PID=$! fi if [ $TEST_MAX_TIME -gt 0 ]; then # echo "Setting proc_killer to process $LAUNCH_PID for $TEST_MAX_TIME" proc_killer -$LAUNCH_PID $TEST_MAX_TIME /dev/null 2>& 1 & KILLER_PID=$! disown fi wait $LAUNCH_PID START_RES=$? END_T=`date +%s` CALC_TIME=$(( END_T - START_T )) if [ $TEST_MAX_TIME -gt 0 ]; then kill -2 $KILLER_PID >/dev/null 2>& 1 kill -15 $KILLER_PID >/dev/null 2>& 1 kill -9 $KILLER_PID >/dev/null 2>& 1 fi if [ $HAS_RES_MANAGER -eq 0 ]; then resources_freed fi echo "$START_RES $CALC_TIME" >"$TASK_EXE.finished" } non_interactive_launcher() { cd "$LAUNCH_DIR" STDOUT_FN=`mktemp` STDERR_FN=`mktemp` # echo ./dvm run $PROC_GRID "$TASK_EXE" if [ $TEST_MAX_TIME -gt 0 ]; then export maxtime=$(( (TEST_MAX_TIME + 59) / 60)) fi if [ -f "run.sh" ]; then PATH="$LAUNCH_DIR:$PATH" PROC_GRID="$PROC_GRID" DVMH_PPN=$LAUNCH_PPN DVMH_NUM_THREADS=$CPUS_PER_PROC DVMH_NUM_CUDAS=$CUDAS_PER_PROC ./run.sh >$STDOUT_FN 2>$STDERR_FN START_RES=$? else DVMH_PPN=$LAUNCH_PPN DVMH_NUM_THREADS=$CPUS_PER_PROC DVMH_NUM_CUDAS=$CUDAS_PER_PROC ./dvm run $PROC_GRID "$TASK_EXE" >$STDOUT_FN 2>$STDERR_FN START_RES=$? fi unset maxtime :>"$TASK_EXE.committed" IS_LAUNCHED=`is_launched $STDOUT_FN $STDERR_FN` rm $STDOUT_FN $STDERR_FN if [ $START_RES -eq 0 -a $IS_LAUNCHED -ne 0 ]; then while [ `is_finished "$LAUNCH_NAME"` -eq 0 ]; do sleep 1 done CALC_TIME=`get_elapsed_time "$LAUNCH_NAME"` fi if [ $HAS_RES_MANAGER -eq 0 ]; then resources_freed fi echo "$START_RES $CALC_TIME" >"$TASK_EXE.finished" } already_analyzed() { # echo -n "PLATFORM=\"$TEST_PLATFORM\"" # echo -n " NOH_FLAG=$TASK_NOH_FLAG" # echo -n " AUTOTFM_FLAG=$TASK_AUTOTFM_FLAG" # echo -n " PROC_GRID=\"$PROC_GRID\"" # echo -n " CPUS_PER_PROC=$CPUS_PER_PROC" # echo -n " CUDAS_PER_PROC=$CUDAS_PER_PROC" local res res=0 if [ -f "$RESULTS_DIR/$TEST_SHORT_PATH.result" ]; then if [ $( cat "$RESULTS_DIR/$TEST_SHORT_PATH.result" | grep "PLATFORM=\"$TEST_PLATFORM\"" | grep "NOH_FLAG=$TASK_NOH_FLAG" | grep "AUTOTFM_FLAG=$TASK_AUTOTFM_FLAG" | grep "PROC_GRID=\"$PROC_GRID\"" | grep "CPUS_PER_PROC=$CPUS_PER_PROC" | grep "CUDAS_PER_PROC=$CUDAS_PER_PROC" | wc -l ) -gt 0 ]; then res=1 fi fi echo $res } launcher() { counter=0 if [ $HAS_RES_MANAGER -eq 0 ]; then if [ $MAX_NODES_PER_TASK -gt 1 ]; then echo "Can manage resources only for one-node system" MAX_NODES_PER_TASK=1 fi FREE_CPUS=$(( CPUS_PER_NODE * MAX_CPU_SHARING_FACTOR )) FREE_CUDAS=$(( CUDAS_PER_NODE * MAX_CUDA_SHARING_FACTOR )) fi exec 4>$1 while IFS= read -r TASK_SPEC; do TEST_PLATFORM=Unknown TASK_NOH_FLAG=0 TASK_AUTOTFM_FLAG=0 PROC_GRID=0 CPUS_PER_PROC=0 CUDAS_PER_PROC=0 eval $TASK_SPEC LAUNCHED_FLAG=0 ALREADY_ANALYZED=$( already_analyzed ) if [ $TASK_TYPE -eq 1 -a $ALREADY_ANALYZED -eq 0 ]; then CAN_CPUS=$CPUS_PER_NODE CAN_CUDAS=$CUDAS_PER_NODE if [ $SHARE_RESOURCES -ne 0 ]; then CAN_CPUS=$(( CAN_CPUS * MAX_CPU_SHARING_FACTOR )) CAN_CUDAS=$(( CAN_CUDAS * MAX_CUDA_SHARING_FACTOR )) fi LAUNCH_PPN=$MAX_PPN CUR_PPN=$LAUNCH_PPN if [ $CPUS_PER_PROC -gt 0 ]; then CUR_PPN=$(( CAN_CPUS / $CPUS_PER_PROC )) fi if [ $CUR_PPN -lt $LAUNCH_PPN ]; then LAUNCH_PPN=$CUR_PPN fi if [ $CUDAS_PER_PROC -gt 0 ]; then CUR_PPN=$(( CAN_CUDAS / $CUDAS_PER_PROC )) fi if [ $CUR_PPN -lt $LAUNCH_PPN ]; then LAUNCH_PPN=$CUR_PPN fi totalProcs=1 for proc in $PROC_GRID; do totalProcs=$(( totalProcs * proc )) done if [ $LAUNCH_PPN -gt 0 ]; then USE_NODES=$(( ( totalProcs + LAUNCH_PPN - 1 ) / LAUNCH_PPN )) else LAUNCH_PPN=1 USE_NODES=$(( MAX_NODES_PER_TASK + 1 )) fi NEED_CPUS=$(( totalProcs * CPUS_PER_PROC )) NEED_CUDAS=$(( totalProcs * CUDAS_PER_PROC )) if [ $USE_NODES -le $MAX_NODES_PER_TASK ]; then # Launch counter=$(( counter + 1 )) LAUNCH_DIR=`mktemp -d` cp -r $TASK_DIR/* $LAUNCH_DIR/ TASK_SPEC=$( echo -n "$TASK_SPEC" ; echo " LAUNCH_DIR=\"$LAUNCH_DIR\"" ) if [ $HAS_RES_MANAGER -eq 0 ]; then LAUNCH_NAME="$LAUNCH_DIR/$TASK_EXE" else LAUNCH_NAME="$LAUNCH_DIR/$TASK_EXE.$totalProcs.1" fi TASK_SPEC=$( echo -n "$TASK_SPEC" ; echo " LAUNCH_NAME=\"$LAUNCH_NAME\"" ) while true; do if [ -f "$SAVE_DIR/dvm-tester.pause" ] && [ "$(cat "$SAVE_DIR/dvm-tester.pause")" = "Immediate" ]; then : elif [ -f "$MY_DIR/dvm-tester.pause" ] && [ "$(cat "$MY_DIR/dvm-tester.pause")" = "Immediate" ]; then : else break fi sleep 60 done if [ $HAS_RES_MANAGER -ne 0 ]; then while [ `can_launch` -eq 0 ]; do sleep 1 done else if [ $SHARE_RESOURCES -eq 0 ]; then NEED_CPUS=$(( CPUS_PER_NODE * MAX_CPU_SHARING_FACTOR )) NEED_CUDAS=$(( CUDAS_PER_NODE * MAX_CUDA_SHARING_FACTOR )) fi cd "$RES_MAN_DIR" while [ $FREE_CPUS -lt $NEED_CPUS -o $FREE_CUDAS -lt $NEED_CUDAS ]; do FOUND_SMTH=0 for f in `ls`; do FREED_CPUS= FREED_CUDAS= . ./$f if [ -n "$FREED_CPUS" -a -n "$FREED_CUDAS" ]; then FOUND_SMTH=1 FREE_CPUS=$(( FREE_CPUS + FREED_CPUS )) FREE_CUDAS=$(( FREE_CUDAS + FREED_CUDAS )) rm $f fi done if [ $FOUND_SMTH -eq 0 ]; then sleep 1 fi done FREE_CPUS=$(( FREE_CPUS - NEED_CPUS )) FREE_CUDAS=$(( FREE_CUDAS - NEED_CUDAS )) fi # Actually launch if [ $INTERACTIVE -ne 0 ]; then interactive_launcher & else non_interactive_launcher & if [ $HAS_RES_MANAGER -ne 0 ]; then while [ ! -f "$LAUNCH_DIR/$TASK_EXE.committed" ]; do sleep 1 done fi fi LAUNCHED_FLAG=1 else # Can not launch such big task echo "Discarding too big task: $TASK_SPEC" fi elif [ $TASK_TYPE -eq 0 ]; then LAUNCHED_FLAG=1 else echo "Discarding task: $TASK_SPEC" fi if [ $LAUNCHED_FLAG -ne 0 ]; then echo "$TASK_SPEC" >& 4 fi done echo ":" >& 4 exec 4>&- echo "Total tasks launched: $counter" } print_result_line() { echo -n "PLATFORM=\"$TEST_PLATFORM\"" echo -n " NOH_FLAG=$TASK_NOH_FLAG" echo -n " AUTOTFM_FLAG=$TASK_AUTOTFM_FLAG" echo -n " PROC_GRID=\"$PROC_GRID\"" echo -n " CPUS_PER_PROC=$CPUS_PER_PROC" echo -n " CUDAS_PER_PROC=$CUDAS_PER_PROC" echo -n " CALC_TIME=$TASK_CALC_TIME" echo -n " TEST_PASSED=$TEST_PASSED" echo -n " RESULT_COMMENT=\"$RESULT_COMMENT\"" echo " ERROR_LEVEL=$ERROR_LEVEL" } analyzer() { counter=0 FIFO_NAME="$1" while IFS= read -r TASK_SPEC; do if [ "$TASK_SPEC" = ":" ]; then break fi CPUS_PER_PROC=0 CUDAS_PER_PROC=0 TASK_NOH_FLAG=0 TASK_AUTOTFM_FLAG=0 eval $TASK_SPEC if [ $TASK_TYPE -eq 0 ]; then if [ ! -f "$TASK_DIR/$TASK_EXE" ]; then # Report compilation error if [ `basename "$TEST_SHORT_PATH"` != "$TEST_SHORT_PATH" ]; then mkdir -p "$RESULTS_DIR/$(dirname "$TEST_SHORT_PATH")" fi PROC_GRID= CPUS_PER_PROC= CUDAS_PER_PROC= TASK_CALC_TIME= TEST_PASSED=0 RESULT_COMMENT="Compilation error" ERROR_LEVEL=255 print_result_line >>"$RESULTS_DIR/$TEST_SHORT_PATH.result" fi # Cleanup all the test's stuff rm -rf "$TASK_DIR" else counter=$(( counter + 1 )) cd "$LAUNCH_DIR" while [ ! -f "$TASK_EXE.finished" ]; do sleep 1 done read LAUNCH_EXIT_CODE TASK_CALC_TIME <"$TASK_EXE.finished" STDOUT_FN=`stdout_fn "$LAUNCH_NAME"` STDERR_FN=`stderr_fn "$LAUNCH_NAME"` SUBTEST_COUNT=0 . $TEST_ANALYZER if [ `basename "$TEST_SHORT_PATH"` != "$TEST_SHORT_PATH" ]; then mkdir -p "$RESULTS_DIR/$(dirname "$TEST_SHORT_PATH")" fi print_result_line >>"$RESULTS_DIR/$TEST_SHORT_PATH.result" if [ $SUBTEST_COUNT -gt 0 ]; then mkdir -p $RESULTS_DIR/$TEST_SHORT_PATH for i in `seq $SUBTEST_COUNT`; do SUBTEST_NAME=$i analyze_subtest $i print_result_line >>"$RESULTS_DIR/$TEST_SHORT_PATH/$SUBTEST_NAME.result" done fi # if [ $LAUNCH_EXIT_CODE -ne 0 -o "$RESULT_COMMENT" = "Crash" ]; then # echo "Test's $TEST_SHORT_PATH stdout:" # cat "$STDOUT_FN" # echo "Test's $TEST_SHORT_PATH stderr:" # cat "$STDERR_FN" # fi rm -rf "$LAUNCH_DIR" fi done <$FIFO_NAME echo "Total tasks analyzed: $counter" } FIFO_NAME="$(mktemp -u).launch-fifo" mkfifo $FIFO_NAME analyzer $FIFO_NAME & launcher $FIFO_NAME wait rm $FIFO_NAME if [ $HAS_RES_MANAGER -eq 0 ]; then cd "$RES_MAN_DIR" for f in `ls`; do . ./$f done cd "$SAVE_DIR" rm -rf "$RES_MAN_DIR" fi