From 8879eb2fbf33d01c7093355783a36410257d17e6 Mon Sep 17 00:00:00 2001 From: ALEXks Date: Fri, 24 Jan 2025 17:33:55 +0300 Subject: [PATCH] updated dvm --- dvm/fdvm/trunk/fdvm/acc.cpp | 14 ++++++-------- dvm/fdvm/trunk/fdvm/acc_across.cpp | 9 ++++----- dvm/fdvm/trunk/fdvm/calls.cpp | 19 ++++++++++++++----- dvm/fdvm/trunk/fdvm/funcall.cpp | 10 ++++++++++ dvm/fdvm/trunk/include/dvm.h | 3 ++- 5 files changed, 36 insertions(+), 19 deletions(-) diff --git a/dvm/fdvm/trunk/fdvm/acc.cpp b/dvm/fdvm/trunk/fdvm/acc.cpp index 26a145e..8f6ff5f 100644 --- a/dvm/fdvm/trunk/fdvm/acc.cpp +++ b/dvm/fdvm/trunk/fdvm/acc.cpp @@ -37,7 +37,6 @@ static SgSymbol *s_end[MAX_LOOP_LEVEL], *s_blocksS_k[MAX_LOOP_LEVEL], *s_loopSte static SgType *type_DvmType, *type_CudaIndexType, *type_with_len_DvmType, *type_FortranDvmType, *CudaIndexType_k; static int loopIndexCount; - //------ C ---------- static const char *red_kernel_func_names[] = { NULL, @@ -55,7 +54,6 @@ static const char *fermiPreprocDir = "CUDA_FERMI_ARCH"; static SgSymbol *s_CudaIndexType, *s_CudaOffsetTypeRef, *s_DvmType; static SgStatement *end_block, *end_info_block; -int warpSize = 32; reduction_operation_list *red_struct_list; symb_list *shared_list, *acc_call_list, *by_value_list; @@ -10369,7 +10367,7 @@ SgStatement *Assign_To_IndVar2(SgStatement *dost, int i, int nloop) { eth = ThreadIdxRefExpr("x"); if (currentLoop && currentLoop->irregularAnalysisIsOn()) - es = &((*new SgVarRefExp(s_cur_blocks) * *new SgRecordRefExp(*s_blockdim, "x") + *eth) / *new SgValueExp(warpSize)); + es = &((*new SgVarRefExp(s_cur_blocks) * *new SgRecordRefExp(*s_blockdim, "x") + *eth) / *new SgVarRefExp(s_warpsize)); else es = &(*new SgVarRefExp(s_cur_blocks) * *new SgRecordRefExp(*s_blockdim, "x") + *eth); es = step_e == NULL ? es : &(*es * *step_e); @@ -14174,7 +14172,7 @@ SgStatement *Create_C_Adapter_Function(SgSymbol *sadapter) stmt = new SgCExpStmt(SgAssignOp(*new SgVarRefExp(*s_overallBlocks), *new SgArrayRefExp(*s_blocksS, *new SgValueExp(0)))); st_end->insertStmtBefore(*stmt, *st_hedr); if (currentLoop && currentLoop->irregularAnalysisIsOn()) - stmt = new SgCExpStmt(SgAssignOp(*new SgVarRefExp(*s_restBlocks), *new SgVarRefExp(*s_overallBlocks) * *new SgValueExp(warpSize))); + stmt = new SgCExpStmt(SgAssignOp(*new SgVarRefExp(*s_restBlocks), *new SgVarRefExp(*s_overallBlocks) * *new SgVarRefExp(s_warpsize))); else stmt = new SgCExpStmt(SgAssignOp(*new SgVarRefExp(*s_restBlocks), *new SgVarRefExp(*s_overallBlocks))); st_end->insertStmtBefore(*stmt, *st_hedr); @@ -14194,7 +14192,7 @@ SgStatement *Create_C_Adapter_Function(SgSymbol *sadapter) /* ------ block for prepare reductions ----*/ if (red_list) { - InsertAssignForReduction(st_end, s_num_of_red_blocks, s_fill_flag, s_overallBlocks, s_threads); + InsertAssignForReduction(st_end, s_num_of_red_blocks, s_fill_flag, s_overallBlocks, s_threads, s_loop_ref); if(!options.isOn(C_CUDA)) InsertDoWhileForRedCount_C(st_end, s_threads, s_red_count); InsertPrepareReductionCalls(st_end, s_loop_ref, s_num_of_red_blocks, s_fill_flag, s_red_num); @@ -14237,7 +14235,7 @@ SgStatement *Create_C_Adapter_Function(SgSymbol *sadapter) } if (currentLoop && currentLoop->irregularAnalysisIsOn()) { - stmt = new SgCExpStmt(SgAssignOp(*new SgVarRefExp(*s_max_blocks), *new SgVarRefExp(*s_max_blocks) / *new SgValueExp(warpSize) * *new SgValueExp(warpSize))); + stmt = new SgCExpStmt(SgAssignOp(*new SgVarRefExp(*s_max_blocks), *new SgVarRefExp(*s_max_blocks) / *GetWarpSize(s_loop_ref) * *GetWarpSize(s_loop_ref))); st_end->insertStmtBefore(*stmt, *st_hedr); } @@ -14751,7 +14749,7 @@ void InsertDoWhileForRedCount_C(SgStatement *cp, SgSymbol *s_threads, SgSymbol * */ } -void InsertAssignForReduction(SgStatement *st_where, SgSymbol *s_num_of_red_blocks, SgSymbol *s_fill_flag, SgSymbol *s_overallBlocks, SgSymbol *s_threads) +void InsertAssignForReduction(SgStatement *st_where, SgSymbol *s_num_of_red_blocks, SgSymbol *s_fill_flag, SgSymbol *s_overallBlocks, SgSymbol *s_threads, SgSymbol* s_loop_ref) { // inserting before statement 'st_where' the block of assignments: SgStatement *ass; @@ -14762,7 +14760,7 @@ void InsertAssignForReduction(SgStatement *st_where, SgSymbol *s_num_of_red_bloc SgExpression *re = new SgVarRefExp(*s_overallBlocks); if(options.isOn(C_CUDA)) - re = &(*re * (*new SgRecordRefExp(*s_threads, "x") * *new SgRecordRefExp(*s_threads, "y") * *new SgRecordRefExp(*s_threads, "z") / *new SgValueExp(warpSize))); + re = &(*re * (*new SgRecordRefExp(*s_threads, "x") * *new SgRecordRefExp(*s_threads, "y") * *new SgRecordRefExp(*s_threads, "z") / *GetWarpSize(s_loop_ref))); ass = new SgCExpStmt(SgAssignOp(*new SgVarRefExp(s_num_of_red_blocks), *re)); st_where->insertStmtBefore(*ass, *st_where->controlParent()); ass->addComment("// Prepare reduction"); diff --git a/dvm/fdvm/trunk/fdvm/acc_across.cpp b/dvm/fdvm/trunk/fdvm/acc_across.cpp index 6ab4e28..d4bc926 100644 --- a/dvm/fdvm/trunk/fdvm/acc_across.cpp +++ b/dvm/fdvm/trunk/fdvm/acc_across.cpp @@ -26,7 +26,6 @@ extern SgExpression *CudaReplicate(SgSymbol *, SgSymbol *, SgSymbol *, SgSymbol extern SgStatement *IncludeLine(char*); extern void optimizeLoopBodyForOne(vector &allNewInfo); extern void searchIdxs(vector &allInfo, SgExpression *st); -extern int warpSize; // local functions vector Create_C_Adapter_Function_Across_variants(SgSymbol*, SgSymbol*, const int, const int, const int, const vector&, const vector&); @@ -2904,7 +2903,7 @@ vector Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt e = &SgAssignOp(*new SgVarRefExp(*red_blocks), (*new SgRecordRefExp(*s_blocks, "x") * *new SgRecordRefExp(*s_blocks, "y") * *new SgRecordRefExp(*s_blocks, "z") * *new SgRecordRefExp(*s_threads, "x") * *new SgRecordRefExp(*s_threads, "y") * *new SgRecordRefExp(*s_threads, "z")) - / *new SgValueExp(warpSize)); + / *GetWarpSize(s_loop_ref)); stmt = new SgCExpStmt(*e); st_end->insertStmtBefore(*stmt, *st_hedr); } @@ -3217,7 +3216,7 @@ vector Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt e = &SgAssignOp(*new SgVarRefExp(*red_blocks), (*new SgVarRefExp(q) / *new SgVarRefExp(nums[0]) + SgNeqOp(*new SgVarRefExp(q) % *new SgVarRefExp(nums[0]), *new SgValueExp(0))) * *new SgRecordRefExp(*s_blocks, "y") * - *new SgRecordRefExp(*s_threads, "x") * *new SgRecordRefExp(*s_threads, "y") * *new SgRecordRefExp(*s_threads, "z") / *new SgValueExp(warpSize)); + *new SgRecordRefExp(*s_threads, "x") * *new SgRecordRefExp(*s_threads, "y") * *new SgRecordRefExp(*s_threads, "z") / *GetWarpSize(s_loop_ref)); stmt = new SgCExpStmt(*e); st_end->insertStmtBefore(*stmt, *st_hedr); } @@ -3226,7 +3225,7 @@ vector Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt e = &SgAssignOp(*new SgVarRefExp(*red_blocks), (*new SgVarRefExp(q) / *new SgVarRefExp(nums[0]) + SgNeqOp(*new SgVarRefExp(q) % *new SgVarRefExp(nums[0]), *new SgValueExp(0))) * *new SgRecordRefExp(*s_blocks, "y") * *new SgRecordRefExp(*s_blocks, "z") * - *new SgRecordRefExp(*s_threads, "x") * *new SgRecordRefExp(*s_threads, "y") * *new SgRecordRefExp(*s_threads, "z") / *new SgValueExp(warpSize)); + *new SgRecordRefExp(*s_threads, "x") * *new SgRecordRefExp(*s_threads, "y") * *new SgRecordRefExp(*s_threads, "z") / *GetWarpSize(s_loop_ref)); stmt = new SgCExpStmt(*e); st_end->insertStmtBefore(*stmt, *st_hedr); } @@ -3672,7 +3671,7 @@ vector Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt SgNeqOp(*new SgVarRefExp(Emin) % *new SgVarRefExp(nums[0]), *new SgValueExp(0))) * (*f_m2 / *new SgVarRefExp(nums[1]) + SgNeqOp(*f_m2 % *new SgVarRefExp(nums[1]), *new SgValueExp(0))) * *new SgRecordRefExp(*s_blocks, (char*)s_cuda_var[2]) * - *new SgRecordRefExp(*s_threads, "x") * *new SgRecordRefExp(*s_threads, "y") * *new SgRecordRefExp(*s_threads, "z") / *new SgValueExp(warpSize)); + *new SgRecordRefExp(*s_threads, "x") * *new SgRecordRefExp(*s_threads, "y") * *new SgRecordRefExp(*s_threads, "z") / *GetWarpSize(s_loop_ref)); stmt = new SgCExpStmt(*e); st_end->insertStmtBefore(*stmt, *st_hedr); } diff --git a/dvm/fdvm/trunk/fdvm/calls.cpp b/dvm/fdvm/trunk/fdvm/calls.cpp index 996d03b..7a08ba3 100644 --- a/dvm/fdvm/trunk/fdvm/calls.cpp +++ b/dvm/fdvm/trunk/fdvm/calls.cpp @@ -27,6 +27,7 @@ int do_stmtfn = 0; int gcount = 0; int has_generic_interface = 0; int in_region = 0; +int in_routine = 0; //----------------------------------------------------------------------------------------- graph_node *GraphNode(SgSymbol *s, SgStatement *header_st, int flag_new); graph_node *NodeForSymbInGraph(SgSymbol *s, SgStatement *stheader); @@ -1136,6 +1137,7 @@ SgStatement *Subprogram(SgStatement *func) DECL(func->symbol()) = 1; HEDR(func->symbol()) = func->thebif; cur_func = func; + in_routine = 0; //if( func->variant() == PROG_HEDR) // PROGRAM_HEADER(func->symbol()) = func->thebif; @@ -1175,7 +1177,9 @@ SgStatement *Subprogram(SgStatement *func) case PAUSE_NODE: case GOTO_NODE: // GO TO break; - + case ACC_ROUTINE_DIR: + in_routine = 1; + break; case VAR_DECL: case SWITCH_NODE: // SELECT CASE ... case ARITHIF_NODE: // Arithmetical IF @@ -1240,9 +1244,8 @@ END_: // for debugging if (deb_reg > 1) PrintGraphNode(cur_node); - + in_routine = 0; return(last); - } void FunctionCallSearch(SgExpression *e) @@ -1605,6 +1608,7 @@ void Call_Site(SgSymbol *s, int inlined, SgStatement *stat, SgExpression *e) { graph_node * gnode, *node_by_attr = NULL; SgSymbol *s_new = s; + SgStatement *interface_st = NULL; //printf("\n%s id= %d \n", s->identifier(), s->id()); if (!do_dummy && isDummyArgument(s)) return; if (!do_stmtfn && isStatementFunction(s)) return; @@ -1614,7 +1618,7 @@ void Call_Site(SgSymbol *s, int inlined, SgStatement *stat, SgExpression *e) if(s->variant() == INTERFACE_NAME && in_region) { //printf("INTERFACE_NAME %s\n",s->identifier()); - SgStatement *interface_st = getGenericInterface(s, stat ? stat->expr(0) : e->lhs()); + interface_st = getGenericInterface(s, stat ? stat->expr(0) : e->lhs()); SgSymbol *s_gen = s; if(!interface_st) { @@ -1648,7 +1652,12 @@ void Call_Site(SgSymbol *s, int inlined, SgStatement *stat, SgExpression *e) s_new->addAttribute(GRAPH_NODE, (void*)pnode, sizeof(graph_node *)); } if (gnode->st_header) - MarkAsUserProcedure(s_new); + MarkAsUserProcedure(s_new); + else if (in_routine && (interface_st || (interface_st = getInterface(s_new)))) + { + SaveInterface(s_new, interface_st); + MarkAsUserProcedure(s_new); + } //printf(" call site on line %d: %d %s: %d %d\n", stat ? stat->lineNumber() : 0, ATTR_NODE(s_new) ? GRAPHNODE(s_new)->id : -1, s_new->identifier(), s_new->id(), s->id()); } diff --git a/dvm/fdvm/trunk/fdvm/funcall.cpp b/dvm/fdvm/trunk/fdvm/funcall.cpp index 2a96aa2..fcef8cb 100644 --- a/dvm/fdvm/trunk/fdvm/funcall.cpp +++ b/dvm/fdvm/trunk/fdvm/funcall.cpp @@ -4987,3 +4987,13 @@ SgExpression *DisposePrivateArray(SgSymbol *s_loop_ref, SgSymbol *s_array) fe->addArg(*new SgVarRefExp(s_array)); return(fe); } + +SgExpression* GetWarpSize(SgSymbol* s_loop_ref) +{// generating function call: + // int dvmh_get_warp_size(DvmType *InDvmhLoop) + + SgFunctionCallExp* fe = new SgFunctionCallExp(*new SgSymbol(FUNCTION_NAME, "dvmh_get_warp_size", SgTypeInt(), s_loop_ref->scope())); + + fe->addArg(*new SgVarRefExp(s_loop_ref)); + return(fe); +} diff --git a/dvm/fdvm/trunk/include/dvm.h b/dvm/fdvm/trunk/include/dvm.h index f743488..3d3be5c 100644 --- a/dvm/fdvm/trunk/include/dvm.h +++ b/dvm/fdvm/trunk/include/dvm.h @@ -1366,7 +1366,7 @@ SgStatement *Assign_To_cur_blocks(int i, int nloop); SgStatement *Assign_To_rest_blocks(int i); SgStatement *Assign_To_IndVar2(SgStatement *dost, int i, int nloop); SgExpression *KernelCondition2(SgStatement *dost, int level); -void InsertAssignForReduction(SgStatement *st_where,SgSymbol *s_num_of_red_blocks,SgSymbol *s_fill_flag,SgSymbol *s_overallBlocks, SgSymbol *s_threads); +void InsertAssignForReduction(SgStatement *st_where,SgSymbol *s_num_of_red_blocks,SgSymbol *s_fill_flag,SgSymbol *s_overallBlocks, SgSymbol *s_threads, SgSymbol *s_loop_ref); void InsertPrepareReductionCalls(SgStatement *st_where,SgSymbol *s_loop_ref,SgSymbol *s_num_of_red_blocks,SgSymbol *s_fill_flag,SgSymbol *s_red_num); void InsertFinishReductionCalls(SgStatement *st_where,SgSymbol *s_loop_ref,SgSymbol *s_red_num); SgStatement *IfForHeader(SgSymbol *s_restBlocks, SgSymbol *s_blocks, SgSymbol *s_max_blocks); @@ -1925,6 +1925,7 @@ SgExpression *GetDeviceProp(SgSymbol *s_loop_ref, SgExpression *ep); SgExpression *GetMaxBlocks(SgSymbol *s_loop_ref, SgSymbol *s_max_blocks, SgSymbol *s_needed_bytes); SgExpression *GetPrivateArray(SgSymbol *s_loop_ref, SgExpression *e_bytes); SgExpression *DisposePrivateArray(SgSymbol *s_loop_ref, SgSymbol *s_array); +SgExpression* GetWarpSize(SgSymbol* s_loop_ref); /* io.cpp */ void IO_ThroughBuffer(SgSymbol *ar, SgStatement *stmt, SgExpression *eiostat);