updated dvm

This commit is contained in:
ALEXks
2025-01-24 17:33:55 +03:00
committed by Dudarenko
parent d2f5e5fcc1
commit 44600a50c1
5 changed files with 36 additions and 19 deletions

View File

@@ -37,7 +37,6 @@ static SgSymbol *s_end[MAX_LOOP_LEVEL], *s_blocksS_k[MAX_LOOP_LEVEL], *s_loopSte
static SgType *type_DvmType, *type_CudaIndexType, *type_with_len_DvmType, *type_FortranDvmType, *CudaIndexType_k;
static int loopIndexCount;
//------ C ----------
static const char *red_kernel_func_names[] = {
NULL,
@@ -55,7 +54,6 @@ static const char *fermiPreprocDir = "CUDA_FERMI_ARCH";
static SgSymbol *s_CudaIndexType, *s_CudaOffsetTypeRef, *s_DvmType;
static SgStatement *end_block, *end_info_block;
int warpSize = 32;
reduction_operation_list *red_struct_list;
symb_list *shared_list, *acc_call_list, *by_value_list;
@@ -10369,7 +10367,7 @@ SgStatement *Assign_To_IndVar2(SgStatement *dost, int i, int nloop)
// ind_i = begin_i + (cur_blocks*blockDim%x + threadIdx%x [- 1]) [ * step_i ]
{
eth = ThreadIdxRefExpr("x");
if (currentLoop && currentLoop->irregularAnalysisIsOn())
if (currentLoop && currentLoop->irregularAnalysisIsOn())
es = &((*new SgVarRefExp(s_cur_blocks) * *new SgRecordRefExp(*s_blockdim, "x") + *eth) / *new SgVarRefExp(s_warpsize));
else
es = &(*new SgVarRefExp(s_cur_blocks) * *new SgRecordRefExp(*s_blockdim, "x") + *eth);
@@ -14174,7 +14172,7 @@ SgStatement *Create_C_Adapter_Function(SgSymbol *sadapter)
stmt = new SgCExpStmt(SgAssignOp(*new SgVarRefExp(*s_overallBlocks), *new SgArrayRefExp(*s_blocksS, *new SgValueExp(0))));
st_end->insertStmtBefore(*stmt, *st_hedr);
if (currentLoop && currentLoop->irregularAnalysisIsOn())
if (currentLoop && currentLoop->irregularAnalysisIsOn())
stmt = new SgCExpStmt(SgAssignOp(*new SgVarRefExp(*s_restBlocks), *new SgVarRefExp(*s_overallBlocks) * *new SgVarRefExp(s_warpsize)));
else
stmt = new SgCExpStmt(SgAssignOp(*new SgVarRefExp(*s_restBlocks), *new SgVarRefExp(*s_overallBlocks)));
@@ -14194,7 +14192,7 @@ SgStatement *Create_C_Adapter_Function(SgSymbol *sadapter)
/* ------ block for prepare reductions ----*/
if (red_list)
{
{
InsertAssignForReduction(st_end, s_num_of_red_blocks, s_fill_flag, s_overallBlocks, s_threads, s_loop_ref);
if(!options.isOn(C_CUDA))
InsertDoWhileForRedCount_C(st_end, s_threads, s_red_count);
@@ -14237,7 +14235,7 @@ SgStatement *Create_C_Adapter_Function(SgSymbol *sadapter)
GetMemoryForPrivateArrays(private_first, s_loop_ref, lnp, st_end, st_hedr, new SgVarRefExp(s_total_threads));
}
if (currentLoop && currentLoop->irregularAnalysisIsOn())
{
{
stmt = new SgCExpStmt(SgAssignOp(*new SgVarRefExp(*s_max_blocks), *new SgVarRefExp(*s_max_blocks) / *GetWarpSize(s_loop_ref) * *GetWarpSize(s_loop_ref)));
st_end->insertStmtBefore(*stmt, *st_hedr);
}
@@ -14751,7 +14749,7 @@ void InsertDoWhileForRedCount_C(SgStatement *cp, SgSymbol *s_threads, SgSymbol *
// !!!!!!!!!!!!! END OF DEPRECATED !!!!!!!!!!!!!!!!!!!!!!
*/
}
void InsertAssignForReduction(SgStatement *st_where, SgSymbol *s_num_of_red_blocks, SgSymbol *s_fill_flag, SgSymbol *s_overallBlocks, SgSymbol *s_threads, SgSymbol* s_loop_ref)
{
// inserting before statement 'st_where' the block of assignments:
@@ -14762,7 +14760,7 @@ void InsertAssignForReduction(SgStatement *st_where, SgSymbol *s_num_of_red_bloc
// num_of_red_blocks = overallBlocks;
SgExpression *re = new SgVarRefExp(*s_overallBlocks);
if(options.isOn(C_CUDA))
if(options.isOn(C_CUDA))
re = &(*re * (*new SgRecordRefExp(*s_threads, "x") * *new SgRecordRefExp(*s_threads, "y") * *new SgRecordRefExp(*s_threads, "z") / *GetWarpSize(s_loop_ref)));
ass = new SgCExpStmt(SgAssignOp(*new SgVarRefExp(s_num_of_red_blocks), *re));
st_where->insertStmtBefore(*ass, *st_where->controlParent());

View File

@@ -26,7 +26,6 @@ extern SgExpression *CudaReplicate(SgSymbol *, SgSymbol *, SgSymbol *, SgSymbol
extern SgStatement *IncludeLine(char*);
extern void optimizeLoopBodyForOne(vector<newInfo> &allNewInfo);
extern void searchIdxs(vector<acrossInfo> &allInfo, SgExpression *st);
extern int warpSize;
// local functions
vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol*, SgSymbol*, const int, const int, const int, const vector<SageSymbols>&, const vector<SageSymbols>&);
@@ -2904,7 +2903,7 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
e = &SgAssignOp(*new SgVarRefExp(*red_blocks),
(*new SgRecordRefExp(*s_blocks, "x") * *new SgRecordRefExp(*s_blocks, "y") * *new SgRecordRefExp(*s_blocks, "z") *
*new SgRecordRefExp(*s_threads, "x") * *new SgRecordRefExp(*s_threads, "y") * *new SgRecordRefExp(*s_threads, "z"))
/ *new SgValueExp(warpSize));
/ *GetWarpSize(s_loop_ref));
stmt = new SgCExpStmt(*e);
st_end->insertStmtBefore(*stmt, *st_hedr);
}
@@ -3217,7 +3216,7 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
e = &SgAssignOp(*new SgVarRefExp(*red_blocks), (*new SgVarRefExp(q) / *new SgVarRefExp(nums[0]) +
SgNeqOp(*new SgVarRefExp(q) % *new SgVarRefExp(nums[0]), *new SgValueExp(0))) *
*new SgRecordRefExp(*s_blocks, "y") *
*new SgRecordRefExp(*s_threads, "x") * *new SgRecordRefExp(*s_threads, "y") * *new SgRecordRefExp(*s_threads, "z") / *new SgValueExp(warpSize));
*new SgRecordRefExp(*s_threads, "x") * *new SgRecordRefExp(*s_threads, "y") * *new SgRecordRefExp(*s_threads, "z") / *GetWarpSize(s_loop_ref));
stmt = new SgCExpStmt(*e);
st_end->insertStmtBefore(*stmt, *st_hedr);
}
@@ -3226,7 +3225,7 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
e = &SgAssignOp(*new SgVarRefExp(*red_blocks), (*new SgVarRefExp(q) / *new SgVarRefExp(nums[0]) +
SgNeqOp(*new SgVarRefExp(q) % *new SgVarRefExp(nums[0]), *new SgValueExp(0))) *
*new SgRecordRefExp(*s_blocks, "y") * *new SgRecordRefExp(*s_blocks, "z") *
*new SgRecordRefExp(*s_threads, "x") * *new SgRecordRefExp(*s_threads, "y") * *new SgRecordRefExp(*s_threads, "z") / *new SgValueExp(warpSize));
*new SgRecordRefExp(*s_threads, "x") * *new SgRecordRefExp(*s_threads, "y") * *new SgRecordRefExp(*s_threads, "z") / *GetWarpSize(s_loop_ref));
stmt = new SgCExpStmt(*e);
st_end->insertStmtBefore(*stmt, *st_hedr);
}
@@ -3672,7 +3671,7 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
SgNeqOp(*new SgVarRefExp(Emin) % *new SgVarRefExp(nums[0]), *new SgValueExp(0))) *
(*f_m2 / *new SgVarRefExp(nums[1]) + SgNeqOp(*f_m2 % *new SgVarRefExp(nums[1]), *new SgValueExp(0)))
* *new SgRecordRefExp(*s_blocks, (char*)s_cuda_var[2]) *
*new SgRecordRefExp(*s_threads, "x") * *new SgRecordRefExp(*s_threads, "y") * *new SgRecordRefExp(*s_threads, "z") / *new SgValueExp(warpSize));
*new SgRecordRefExp(*s_threads, "x") * *new SgRecordRefExp(*s_threads, "y") * *new SgRecordRefExp(*s_threads, "z") / *GetWarpSize(s_loop_ref));
stmt = new SgCExpStmt(*e);
st_end->insertStmtBefore(*stmt, *st_hedr);
}

View File

@@ -27,6 +27,7 @@ int do_stmtfn = 0;
int gcount = 0;
int has_generic_interface = 0;
int in_region = 0;
int in_routine = 0;
//-----------------------------------------------------------------------------------------
graph_node *GraphNode(SgSymbol *s, SgStatement *header_st, int flag_new);
graph_node *NodeForSymbInGraph(SgSymbol *s, SgStatement *stheader);
@@ -1136,6 +1137,7 @@ SgStatement *Subprogram(SgStatement *func)
DECL(func->symbol()) = 1;
HEDR(func->symbol()) = func->thebif;
cur_func = func;
in_routine = 0;
//if( func->variant() == PROG_HEDR)
// PROGRAM_HEADER(func->symbol()) = func->thebif;
@@ -1175,7 +1177,9 @@ SgStatement *Subprogram(SgStatement *func)
case PAUSE_NODE:
case GOTO_NODE: // GO TO
break;
case ACC_ROUTINE_DIR:
in_routine = 1;
break;
case VAR_DECL:
case SWITCH_NODE: // SELECT CASE ...
case ARITHIF_NODE: // Arithmetical IF
@@ -1240,9 +1244,8 @@ END_:
// for debugging
if (deb_reg > 1)
PrintGraphNode(cur_node);
in_routine = 0;
return(last);
}
void FunctionCallSearch(SgExpression *e)
@@ -1605,6 +1608,7 @@ void Call_Site(SgSymbol *s, int inlined, SgStatement *stat, SgExpression *e)
{
graph_node * gnode, *node_by_attr = NULL;
SgSymbol *s_new = s;
SgStatement *interface_st = NULL;
//printf("\n%s id= %d \n", s->identifier(), s->id());
if (!do_dummy && isDummyArgument(s)) return;
if (!do_stmtfn && isStatementFunction(s)) return;
@@ -1614,7 +1618,7 @@ void Call_Site(SgSymbol *s, int inlined, SgStatement *stat, SgExpression *e)
if(s->variant() == INTERFACE_NAME && in_region)
{
//printf("INTERFACE_NAME %s\n",s->identifier());
SgStatement *interface_st = getGenericInterface(s, stat ? stat->expr(0) : e->lhs());
interface_st = getGenericInterface(s, stat ? stat->expr(0) : e->lhs());
SgSymbol *s_gen = s;
if(!interface_st)
{
@@ -1649,6 +1653,11 @@ void Call_Site(SgSymbol *s, int inlined, SgStatement *stat, SgExpression *e)
}
if (gnode->st_header)
MarkAsUserProcedure(s_new);
else if (in_routine && (interface_st || (interface_st = getInterface(s_new))))
{
SaveInterface(s_new, interface_st);
MarkAsUserProcedure(s_new);
}
//printf(" call site on line %d: %d %s: %d %d\n", stat ? stat->lineNumber() : 0, ATTR_NODE(s_new) ? GRAPHNODE(s_new)->id : -1, s_new->identifier(), s_new->id(), s->id());
}

View File

@@ -4987,3 +4987,13 @@ SgExpression *DisposePrivateArray(SgSymbol *s_loop_ref, SgSymbol *s_array)
fe->addArg(*new SgVarRefExp(s_array));
return(fe);
}
SgExpression* GetWarpSize(SgSymbol* s_loop_ref)
{// generating function call:
// int dvmh_get_warp_size(DvmType *InDvmhLoop)
SgFunctionCallExp* fe = new SgFunctionCallExp(*new SgSymbol(FUNCTION_NAME, "dvmh_get_warp_size", SgTypeInt(), s_loop_ref->scope()));
fe->addArg(*new SgVarRefExp(s_loop_ref));
return(fe);
}

View File

@@ -1366,7 +1366,7 @@ SgStatement *Assign_To_cur_blocks(int i, int nloop);
SgStatement *Assign_To_cur_blocks(int i, int nloop);
SgStatement *Assign_To_rest_blocks(int i);
SgStatement *Assign_To_IndVar2(SgStatement *dost, int i, int nloop);
SgExpression *KernelCondition2(SgStatement *dost, int level);
SgExpression *KernelCondition2(SgStatement *dost, int level);
void InsertAssignForReduction(SgStatement *st_where,SgSymbol *s_num_of_red_blocks,SgSymbol *s_fill_flag,SgSymbol *s_overallBlocks, SgSymbol *s_threads, SgSymbol *s_loop_ref);
void InsertPrepareReductionCalls(SgStatement *st_where,SgSymbol *s_loop_ref,SgSymbol *s_num_of_red_blocks,SgSymbol *s_fill_flag,SgSymbol *s_red_num);
void InsertFinishReductionCalls(SgStatement *st_where,SgSymbol *s_loop_ref,SgSymbol *s_red_num);
@@ -1925,6 +1925,7 @@ SgExpression *GetDeviceProp(SgSymbol *s_loop_ref, SgExpression *ep);
SgExpression *GetDeviceProp(SgSymbol *s_loop_ref, SgExpression *ep);
SgExpression *GetMaxBlocks(SgSymbol *s_loop_ref, SgSymbol *s_max_blocks, SgSymbol *s_needed_bytes);
SgExpression *GetPrivateArray(SgSymbol *s_loop_ref, SgExpression *e_bytes);
SgExpression *DisposePrivateArray(SgSymbol *s_loop_ref, SgSymbol *s_array);
SgExpression* GetWarpSize(SgSymbol* s_loop_ref);
/* io.cpp */