diff --git a/dvm/fdvm/trunk/fdvm/acc.cpp b/dvm/fdvm/trunk/fdvm/acc.cpp index 02e9cbd..5758e8e 100644 --- a/dvm/fdvm/trunk/fdvm/acc.cpp +++ b/dvm/fdvm/trunk/fdvm/acc.cpp @@ -5338,6 +5338,18 @@ SgStatement *doIfThenConstrForLoop_GPU(SgExpression *ref, SgStatement *endhost, return(ifst); } +SgExpression *ReductionPrivateVariables() +{ + reduction_operation_list *rl; + SgExpression *red_vars=NULL; + for (rl = red_struct_list; rl; rl = rl->next) + { + red_vars = AddListToList(red_vars, new SgExprListExp(*new SgVarRefExp(rl->redvar))); + if (rl->locvar) + red_vars = AddListToList(red_vars, new SgExprListExp(*new SgVarRefExp(rl->locvar))); + } + return red_vars; +} SgExpression * TranslateReductionToOpenmp(SgExpression *reduction_clause) /* OpenMP */ { @@ -6003,20 +6015,20 @@ SgStatement *Create_Host_Loop_Subroutine_Main (SgSymbol *sHostProc) SgStatement *Create_Host_Loop_Subroutine(SgSymbol *sHostProc, int dependency) { SgStatement *stmt = NULL, *st_end = NULL, *st_hedr = NULL, *cur = NULL, *last_decl = NULL, *ass = NULL; - SgStatement *alloc = NULL; + SgStatement *alloc = NULL, *red_init_first = NULL; SgStatement *paralleldo = NULL; SgStatement *firstdopar = NULL; SgExprListExp *parallellist = NULL; SgExprListExp *omp_dolist = NULL; SgExprListExp *omp_perflist = NULL; - SgExpression *ae, *arg_list = NULL, *el = NULL, *de = NULL, *tail = NULL, *baseMem_list = NULL; + SgExpression *ae, *arg_list = NULL, *el = NULL, *de = NULL, *tail = NULL, *baseMem_list = NULL, *omp_red_vars=NULL; SgSymbol *s_loop_ref = NULL, *sarg = NULL, *h_first = NULL, *hl = NULL; SgSymbol *s_lgsc = NULL; /* OpenMP */ SgVarRefExp *v_lgsc = NULL; /* OpenMP */ SgSymbol *s = NULL, *s_low_bound = NULL, *s_high_bound = NULL, *s_step = NULL; symb_list *sl = NULL; SgType *tdvm = NULL; - int ln, lrank, addopenmp; + int ln, lrank, addopenmp, number_of_reductions = 0; char *name; tail = NULL; addopenmp = 1; /* OpenMP */ @@ -6185,7 +6197,7 @@ SgStatement *Create_Host_Loop_Subroutine(SgSymbol *sHostProc, int dependency) // create reduction variables declarations and // generate 'loop_red_init' and 'loop_red_post' function calls - + //looking through the reduction list if (red_list) { @@ -6193,10 +6205,9 @@ SgStatement *Create_Host_Loop_Subroutine(SgSymbol *sHostProc, int dependency) SgExpression *ev, *ered, *er, *red; SgSymbol *loc_var; reduction_operation_list *rl; - - red = TranslateReductionToOpenmp(&red_list->copy()); /* OpenMP */ - if (red != NULL) parallellist->append(*red); /* OpenMP */ - else addopenmp = 0; /* OpenMP */ + red = TranslateReductionToOpenmp(&red_list->copy()); /* OpenMP */ + if(red != NULL) parallellist->append(*red); /* OpenMP */ + else omp_red_vars = ReductionPrivateVariables(); /*MAXLOC/MINLOC*/ /* OpenMP */ for (rl = red_struct_list,nr = 1; rl; rl = rl->next, nr++) { if (rl->locvar) @@ -6209,10 +6220,12 @@ SgStatement *Create_Host_Loop_Subroutine(SgSymbol *sHostProc, int dependency) stmt = LoopRedInit_HH(s_loop_ref, nr, sred, rl->locvar); cur->insertStmtAfter(*stmt, *st_hedr); cur = stmt; + if (nr == 1) red_init_first = stmt; stmt = LoopRedPost_HH(s_loop_ref, nr, sred, rl->locvar); st_end->insertStmtBefore(*stmt, *st_hedr); } + number_of_reductions = nr; /* OpenMP */ } // create local variables and it's declarations: @@ -6251,7 +6264,7 @@ SgStatement *Create_Host_Loop_Subroutine(SgSymbol *sHostProc, int dependency) DeclareArrayCoefficients(st_hedr); // - if ((addopenmp == 1) && (private_list != NULL)) parallellist->append(*new SgExpression(OMP_PRIVATE, new SgExprListExp(*private_list), NULL, NULL)); /* OpenMP */ + if ((addopenmp == 1) && (private_list != NULL)) parallellist->append(*new SgExpression(OMP_PRIVATE, &(private_list->copy()), NULL, NULL)); /* OpenMP */ for (el = private_list; el; el = el->rhs()) { SgSymbol *sp = el->lhs()->symbol(); @@ -6273,7 +6286,7 @@ SgStatement *Create_Host_Loop_Subroutine(SgSymbol *sHostProc, int dependency) } /* OpenMP */ } - if ((addopenmp == 1) && (indexes != NULL)) parallellist->append(*new SgExpression(OMP_PRIVATE, indexes, NULL, NULL)); /* OpenMP */ + if ((addopenmp == 1) && (indexes != NULL)) parallellist->append(*new SgExpression(OMP_PRIVATE, AddListToList(indexes,omp_red_vars), NULL, NULL)); /* OpenMP */ // create dummy argument declarations @@ -6324,35 +6337,37 @@ SgStatement *Create_Host_Loop_Subroutine(SgSymbol *sHostProc, int dependency) else firstdopar = stmt = first_do_par->copyPtr(); cur->insertStmtAfter(*stmt, *st_hedr); - if (addopenmp == 1) { /* OpenMP */ - SgCallStmt *stDvmhstat = NULL; - SgStatement *omp_do = new SgStatement(OMP_DO_DIR); /* OpenMP */ - SgStatement *omp_parallel = new SgStatement(OMP_PARALLEL_DIR); /* OpenMP */ - SgStatement *omp_endparallel = new SgStatement(OMP_END_PARALLEL_DIR); /* OpenMP */ - SgStatement *omp_enddo = new SgStatement(OMP_END_DO_DIR); /* OpenMP */ - SgForStmt *stdo = isSgForStmt(firstdopar); /* OpenMP */ - SgStatement *lastdo=LastStatementOfDoNest(stdo); - cur->insertStmtAfter(*omp_parallel, *st_hedr); /* OpenMP */ - if (omp_perf) {/* OpenMP */ - stDvmhstat = new SgCallStmt(*fdvm[OMP_STAT_BP],*omp_perflist);/* OpenMP */ - stDvmhstat->setlineNumber(-1);/* OpenMP */ - cur->insertStmtAfter(*stDvmhstat, *st_hedr); /* OpenMP */ - } - lastdo->insertStmtAfter(*omp_endparallel); /* OpenMP */ - if (omp_perf) {/* OpenMP */ - stDvmhstat = new SgCallStmt(*fdvm[OMP_STAT_AL],*omp_perflist);/* OpenMP */ - stDvmhstat->setlineNumber(-1);/* OpenMP */ - lastdo->insertStmtAfter(*stDvmhstat);/* OpenMP */ - }/* OpenMP */ - omp_parallel->setExpression(0, *parallellist);/* OpenMP */ - omp_do->setExpression(0, *omp_dolist);/* OpenMP */ - omp_enddo->setExpression(0, *new SgExprListExp(*new SgExpression(OMP_NOWAIT))); /* OpenMP */ - ass = new SgAssignStmt(*v_lgsc, *LoopGetSlotCount_HH(s_loop_ref)); /* OpenMP */ + SgCallStmt *stDvmhstat = NULL; + SgStatement *omp_do = new SgStatement(OMP_DO_DIR); /* OpenMP */ + SgStatement *omp_parallel = new SgStatement(OMP_PARALLEL_DIR); /* OpenMP */ + SgStatement *omp_endparallel = new SgStatement(OMP_END_PARALLEL_DIR); /* OpenMP */ + SgStatement *omp_enddo = new SgStatement(OMP_END_DO_DIR); /* OpenMP */ + SgForStmt *stdo = isSgForStmt(firstdopar); /* OpenMP */ + SgStatement *lastdo=LastStatementOfDoNest(stdo); + cur->insertStmtAfter(*omp_parallel, *st_hedr); /* OpenMP */ + if (omp_perf) {/* OpenMP */ + stDvmhstat = new SgCallStmt(*fdvm[OMP_STAT_BP],*omp_perflist);/* OpenMP */ + stDvmhstat->setlineNumber(-1);/* OpenMP */ + cur->insertStmtAfter(*stDvmhstat, *st_hedr); /* OpenMP */ + } + if (omp_red_vars) /* MINLOC/MAXLOC */ /* OpenMP */ + st_end->insertStmtBefore(*omp_endparallel,*st_hedr); /* OpenMP */ + else + lastdo->insertStmtAfter(*omp_endparallel,*st_hedr); /* OpenMP */ + if (omp_perf) {/* OpenMP */ + stDvmhstat = new SgCallStmt(*fdvm[OMP_STAT_AL],*omp_perflist);/* OpenMP */ + stDvmhstat->setlineNumber(-1);/* OpenMP */ + lastdo->insertStmtAfter(*stDvmhstat);/* OpenMP */ + }/* OpenMP */ + omp_parallel->setExpression(0, *parallellist);/* OpenMP */ + omp_do->setExpression(0, *omp_dolist);/* OpenMP */ + omp_enddo->setExpression(0, *new SgExprListExp(*new SgExpression(OMP_NOWAIT))); /* OpenMP */ + ass = new SgAssignStmt(*v_lgsc, *LoopGetSlotCount_HH(s_loop_ref)); /* OpenMP */ if (!dependency) { - omp_parallel->insertStmtAfter(*omp_do); /* OpenMP */ - lastdo->insertStmtAfter(*omp_enddo); /* OpenMP */ + omp_parallel->insertStmtAfter(*omp_do); /* OpenMP */ + lastdo->insertStmtAfter(*omp_enddo); /* OpenMP */ } else if (isSgForStmt(firstdopar->lexNext())) { /* OpenMP */ int step = 1; /* OpenMP */ SgSymbol *s_iam = NULL; /* OpenMP */ @@ -6515,6 +6530,17 @@ SgStatement *Create_Host_Loop_Subroutine(SgSymbol *sHostProc, int dependency) ass->setlineNumber(-1); /* OpenMP */ } /* OpenMP */ cur->insertStmtAfter(*ass, *st_hedr); /* OpenMP */ + if (omp_red_vars) { /* OpenMP */ + //transfer of reduction initialization statements in case of maxloc/minloc + int i; /* OpenMP */ + SgStatement *from = red_init_first->lexPrev(); /* OpenMP */ + cur = omp_parallel; /* OpenMP */ + for (i=number_of_reductions-1; i; i--) { /* OpenMP */ + stmt = from->lexNext()->extractStmt(); /* OpenMP */ + cur->insertStmtAfter(*stmt); /* OpenMP */ + cur = stmt; /* OpenMP */ + } /* OpenMP */ + } /* OpenMP */ if (omp_perf) {/* OpenMP */ stDvmhstat = new SgCallStmt(*fdvm[OMP_STAT_BL],*omp_perflist);/* OpenMP */ stDvmhstat->setlineNumber(-1);/* OpenMP */ @@ -6523,6 +6549,7 @@ SgStatement *Create_Host_Loop_Subroutine(SgSymbol *sHostProc, int dependency) stDvmhstat->setlineNumber(-1);/* OpenMP */ omp_endparallel->insertStmtAfter(*stDvmhstat);/* OpenMP */ }/* OpenMP */ + } /* OpenMP */ @@ -7943,6 +7970,21 @@ SgSymbol *RedVariableSymbolInKernel(SgSymbol *s, SgExpression *dimSizeArgs, SgEx return(soff); } +SgSymbol *LocRedVariableSymbolInKernel(reduction_operation_list *rsl) +{ + SgType *declT; + + if (isSgArrayType(rsl->locvar->type())) + { + SgArrayType *arrT = new SgArrayType(*C_Type(rsl->locvar->type())); + arrT->addDimension(new SgValueExp(rsl->number)); + declT = arrT; + } + else + declT = C_Type(rsl->locvar->type()); + return (new SgVariableSymb(rsl->locvar->identifier(), *declT, *kernel_st)); +} + SgSymbol *SymbolInKernel(SgSymbol *s) { char *name; @@ -8399,7 +8441,7 @@ void MakeDeclarationsForKernelGpuO1(SgSymbol *red_count_symb, SgType *idxTypeInK DeclareDoVars(); // declare private(local in kernel) variables - DeclarePrivateVars(); + DeclarePrivateVars(idxTypeInKernel); // declare dummy arguments: // declare reduction dummy arguments @@ -9539,7 +9581,7 @@ void MakeDeclarationsForKernel(SgSymbol *red_count_symb, SgType *idxTypeInKernel DeclareDoVars(); // declare private(local in kernel) variables - DeclarePrivateVars(); + DeclarePrivateVars(idxTypeInKernel); // declare dummy arguments: eatr = new SgExprListExp(*new SgExpression(ACC_VALUE_OP)); @@ -9601,7 +9643,7 @@ void MakeDeclarationsForKernel_On_C(SgType *idxTypeInKernel) DeclareDoVars(idxTypeInKernel); // declare private(local in kernel) variables - DeclarePrivateVars(); + DeclarePrivateVars(idxTypeInKernel); // declare variables, used in loop and passed by reference: // & = *p_; @@ -9791,6 +9833,11 @@ void DeclareInternalPrivateVars() } void DeclarePrivateVars() +{ + DeclarePrivateVars(C_UnsignedLongLongType()); +} + +void DeclarePrivateVars(SgType *idxTypeInKernel) { SgStatement *st = NULL, *st_first=NULL; SgExpression *var = NULL, *e; @@ -9825,7 +9872,7 @@ void DeclarePrivateVars() { char *name = new char[strlen(s->identifier())+7]; sprintf(name, "_%s_dims", s->identifier()); - s_dims = ArraySymbol(name, C_UnsignedLongLongType(), new SgValueExp(Rank(s)-1), kernel_st); + s_dims = ArraySymbol(name, idxTypeInKernel, new SgValueExp(Rank(s)-1), kernel_st); SgExpression *einit = new SgExpression(INIT_LIST); SgExpression *elist = NULL; if (!TestArrayShape(s)) @@ -9841,7 +9888,7 @@ void DeclarePrivateVars() else { for (int i=Rank(s)-1; i; i--) - elist = AddListToList(elist, Calculate(ArrayDimSize(s,i))); + elist = AddListToList(elist, new SgExprListExp(*Calculate(ArrayDimSize(s,i)))); } einit->setLhs(elist); SgStatement *st_dims = makeSymbolDeclarationWithInit(s_dims, einit);//Declaration_Statement(s_dims); @@ -10587,18 +10634,7 @@ void ReductionBlockInKernel_On_C_Cuda(SgStatement *stat, SgSymbol *i_var, SgExpr } else if (rsl->locvar) // maxloc/minloc reduction scalar { - SgType *decl; - int rank = rsl->number; - - if (rank > 1) - { - SgArrayType *arrT = new SgArrayType(*C_Type(rsl->locvar->type())); - arrT->addDimension(new SgValueExp(rank)); - decl = arrT; - } - else - decl = C_Type(rsl->locvar->type()); - newst = Declaration_Statement(new SgVariableSymb(rsl->locvar->identifier(), *decl, *kernel_st)); //declare location variable + newst = Declaration_Statement(LocRedVariableSymbolInKernel(rsl)); //declare location variable kernel_st->insertStmtAfter(*newst, *kernel_st); // __dvmh_blockReduceLoc(, ) @@ -10615,20 +10651,30 @@ void ReductionBlockInKernel_On_C_Cuda(SgStatement *stat, SgSymbol *i_var, SgExpr stat->insertStmtBefore(*new SgCExpStmt(*fun_ref), *stat->controlParent()); - newst = AssignStatement(new SgArrayRefExp(*rsl->red_grid, *BlockIdxRefExpr("x") * *ex1 + *ex), new SgVarRefExp(rsl->redvar)); + if (across) + newst = AssignStatement(new SgArrayRefExp(*rsl->red_grid, *ex), new SgVarRefExp(rsl->redvar)); + else + newst = AssignStatement(new SgArrayRefExp(*rsl->red_grid, *BlockIdxRefExpr("x") * *ex1 + *ex), new SgVarRefExp(rsl->redvar)); + if_st->insertStmtAfter(*newst); if (rsl->number > 1) { for (int i = 0; i < rsl->number; ++i) { - newst = AssignStatement(new SgArrayRefExp(*rsl->loc_grid, *new SgValueExp(rsl->number) * (*BlockIdxRefExpr("x") * *ex1 + *ex) + *new SgValueExp(i)), new SgArrayRefExp(*rsl->locvar, *new SgValueExp(i))); + if (across) + newst = AssignStatement(new SgArrayRefExp(*rsl->loc_grid, *new SgValueExp(rsl->number) * *ex + *new SgValueExp(i)), new SgArrayRefExp(*rsl->locvar, *new SgValueExp(i))); + else + newst = AssignStatement(new SgArrayRefExp(*rsl->loc_grid, *new SgValueExp(rsl->number) * (*BlockIdxRefExpr("x") * *ex1 + *ex) + *new SgValueExp(i)), new SgArrayRefExp(*rsl->locvar, *new SgValueExp(i))); if_st->lastExecutable()->insertStmtAfter(*newst); } } else { - newst = AssignStatement(new SgArrayRefExp(*rsl->loc_grid, *BlockIdxRefExpr("x") * *ex1 + *ex), new SgVarRefExp(*rsl->locvar)); + if (across) + newst = AssignStatement(new SgArrayRefExp(*rsl->loc_grid, *ex), new SgVarRefExp(*rsl->locvar)); + else + newst = AssignStatement(new SgArrayRefExp(*rsl->loc_grid, *BlockIdxRefExpr("x") * *ex1 + *ex), new SgVarRefExp(*rsl->locvar)); if_st->lastExecutable()->insertStmtAfter(*newst); } diff --git a/dvm/fdvm/trunk/fdvm/acc_across.cpp b/dvm/fdvm/trunk/fdvm/acc_across.cpp index 7351ac2..95f7ec1 100644 --- a/dvm/fdvm/trunk/fdvm/acc_across.cpp +++ b/dvm/fdvm/trunk/fdvm/acc_across.cpp @@ -1588,6 +1588,7 @@ vector Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadap for (int i = NumberOfCoeffs(sg); i>0; i--) funcCall->addArg(*new SgArrayRefExp(*sg, *new SgValueExp(i))); } + if (red_list) { reduction_operation_list *rsl; @@ -1613,6 +1614,7 @@ vector Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadap funcCall->addArg(*new SgArrayRefExp(*s, *new SgValueExp(i))); } s = s->next(); + if (options.isOn(C_CUDA)) funcCall->addArg(*new SgVarRefExp(reduction_ptr[i])); else @@ -1738,8 +1740,8 @@ static inline void insertReductionArgs(SgSymbol **reduction_ptr, SgSymbol **redu SgFunctionCallExp *funcCallKernel, SgSymbol* numBlocks, int &has_red_array) { reduction_operation_list *rsl; - SgSymbol *s; - SgExpression *e; + SgSymbol *s = NULL; + SgExpression *e = NULL; for (rsl = red_struct_list, s = red_first; rsl; rsl = rsl->next) //s!=s_blocks_info { @@ -1776,13 +1778,16 @@ static inline void insertReductionArgs(SgSymbol **reduction_ptr, SgSymbol **redu else funcCallKernel->addArg(*new SgCastExp(*C_PointerType(new SgDescriptType(*SgTypeChar(), BIT_SIGNED)), *new SgVarRefExp(reduction_ptr[i]))); - //TODO!! if (rsl->locvar) //MAXLOC,MINLOC { for (int k = 0; k < rsl->number; ++k) funcCallKernel->addArg(*new SgArrayRefExp(*reduction_loc_symb[i], *new SgValueExp(k))); s = s->next(); - e = new SgCastExp(*C_PointerType(options.isOn(C_CUDA) ? C_Type(rsl->locvar->type()) : new SgDescriptType(*SgTypeChar(), BIT_SIGNED)), *new SgVarRefExp(s)); + + if (options.isOn(C_CUDA)) + e = new SgCastExp(*C_PointerType(C_Type(rsl->locvar->type())), *new SgVarRefExp(reduction_loc_ptr[i])); + else + e = new SgCastExp(*C_PointerType(new SgDescriptType(*SgTypeChar(), BIT_SIGNED)), *new SgVarRefExp(s));// TODO it like in C_Cuda funcCallKernel->addArg(*e); s = s->next(); } @@ -2442,6 +2447,7 @@ vector Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt stmt = makeSymbolDeclarationWithInit(s, new SgValueExp(0)); st_hedr->insertStmtAfter(*stmt, *st_hedr); } + // create indxs for (int i = 0; i < acrossV; ++i) { @@ -4216,7 +4222,7 @@ void MakeDeclarationsForKernel_On_C_Across(SgType *indexType) DeclareDoVars(indexType); // declare private(local in kernel) variables - DeclarePrivateVars(); + DeclarePrivateVars(indexType); // declare variables, used in loop and passed by reference: // & = *p_; @@ -4233,7 +4239,7 @@ void MakeDeclarationsForKernelAcross(SgType *indexType) DeclareDoVars(); // declare private(local in kernel) variables - DeclarePrivateVars(); + DeclarePrivateVars(indexType); // declare dummy arguments: @@ -5829,6 +5835,7 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel* argsKer, i flag_func_call = 0; // maxloc else if (num == 10) flag_func_call = 0; // minloc + if (flag_func_call == 1) { SgFunctionCallExp *funcCall = new SgFunctionCallExp(*createNewFunctionSymbol(str_operation)); @@ -5923,9 +5930,9 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel* argsKer, i locGrid->setType(*new SgArrayType(*tmp_list->loc_grid->type())); if (options.isOn(C_CUDA)) - st = AssignStatement(*new SgArrayRefExp(*locGrid, *new SgValueExp(i), *e1), *new SgArrayRefExp(*loc_var_ref->symbol(), *new SgValueExp(i))); + st = AssignStatement(*new SgArrayRefExp(*locGrid, *new SgValueExp(loc_el_num) * *e1 + *new SgValueExp(i)), *new SgArrayRefExp(*loc_var_ref->symbol(), *new SgValueExp(i))); else - st = AssignStatement(*new SgArrayRefExp(*locGrid, *new SgValueExp(i + 1), *e1), *new SgArrayRefExp(*loc_var_ref->symbol(), *new SgValueExp(i + 1))); + st = AssignStatement(*new SgArrayRefExp(*locGrid, *new SgValueExp(i + 1), *e1), *new SgArrayRefExp(*loc_var_ref->symbol(), *new SgValueExp(i + 1)));//TODO it like in C_Cuda ifSt->insertStmtAfter(*st); } } @@ -6153,13 +6160,13 @@ void DeclarationOfReductionBlockInKernelAcross(SgExpression *ered, reduction_ope if (rsl->locvar) { - newst = Declaration_Statement(rsl->locvar); //declare location variable + newst = Declaration_Statement(LocRedVariableSymbolInKernel(rsl)); //declare location variable kernel_st->insertStmtAfter(*newst, *kernel_st); } if (rsl->redvar_size > 0) { - newst = Declaration_Statement(rsl->redvar); //declare reduction variable + newst = Declaration_Statement(RedVariableSymbolInKernel(rsl->redvar,NULL,NULL)); //declare reduction variable kernel_st->insertStmtAfter(*newst, *kernel_st); } else if (rsl->redvar_size < 0) @@ -6168,26 +6175,22 @@ void DeclarationOfReductionBlockInKernelAcross(SgExpression *ered, reduction_ope newst = Declaration_Statement(red_var_k); //declare reduction variable kernel_st->insertStmtAfter(*newst, *kernel_st); } - rtype = (rsl->redvar_size >= 0) ? TypeOfRedBlockSymbol(ered) : red_var_k->type(); - s_block = RedBlockSymbolInKernelAcross(red_var, rtype); - - newst = Declaration_Statement(s_block); - - if (options.isOn(C_CUDA)) - newst->addDeclSpec(BIT_CUDA_SHARED | BIT_EXTERN); - else + //XXX: shared memory doesnt use in ACROSS by C_Cuda + if (!options.isOn(C_CUDA)) { + rtype = (rsl->redvar_size >= 0) ? TypeOfRedBlockSymbol(ered) : red_var_k->type(); + s_block = RedBlockSymbolInKernelAcross(red_var, rtype); + newst = Declaration_Statement(s_block); eatr = new SgExprListExp(*new SgExpression(ACC_SHARED_OP)); - newst->setExpression(2, *eatr); - } + newst->setExpression(2, *eatr); + kernel_st->insertStmtAfter(*newst, *kernel_st); - kernel_st->insertStmtAfter(*newst, *kernel_st); - - if (isSgExprListExp(ered->rhs())) //MAXLOC,MINLOC - { - typedecl = MakeStructDecl(rtype->symbol()); - kernel_st->insertStmtAfter(*typedecl, *kernel_st); + if (isSgExprListExp(ered->rhs())) //MAXLOC,MINLOC + { + typedecl = MakeStructDecl(rtype->symbol()); + kernel_st->insertStmtAfter(*typedecl, *kernel_st); + } } } diff --git a/dvm/fdvm/trunk/fdvm/acc_across_analyzer.cpp b/dvm/fdvm/trunk/fdvm/acc_across_analyzer.cpp index eb1d6bf..2c680ca 100644 --- a/dvm/fdvm/trunk/fdvm/acc_across_analyzer.cpp +++ b/dvm/fdvm/trunk/fdvm/acc_across_analyzer.cpp @@ -5,10 +5,12 @@ using namespace std; // special storages to avoid recomputing -map lhs; -map rhs; -map unparsedLhs; -map unparsedRhs; +static map lhs; +static map rhs; +static map unparsedLhs; +static map unparsedRhs; + +extern reduction_operation_list* red_struct_list; template static inline OutIt difference(InIt1 first1, InIt1 last1, InIt2 first2, InIt2 last2, OutIt dest) @@ -1220,7 +1222,14 @@ void Loop::analyzeInderectAccess() Loop::Loop(SgStatement* loop_body, bool enable_opt, bool irreg_access) : irregular_acc_opt(irreg_access), enable_opt(enable_opt), loop_body(loop_body), dimension(0), acrossType(0), acrossDims(NULL), do_irreg_opt(false) -{ +{ + reduction_operation_list* rsl; + for (rsl = red_struct_list; rsl; rsl = rsl->next) + { + if (rsl->locvar) //MAXLOC,MINLOC + redArrays.insert(rsl->locvar); + } + lhs.clear(); rhs.clear(); unparsedLhs.clear(); @@ -1420,7 +1429,7 @@ void Loop::analyzeAssignments(SgExpression* ex, const int blockIndex) else { SgSymbol* symbol = ex->symbol(); - if (isSgArrayType(symbol->type()) != NULL) + if (isSgArrayType(symbol->type()) != NULL && redArrays.find(symbol) == redArrays.end()) { SgExpression* subscripts = ((SgArrayRefExp*)(ex))->subscripts(); if (!subscripts) @@ -1949,7 +1958,14 @@ void Loop::buildCFG() } Loop::Loop(SgStatement* stmt) : do_irreg_opt(false) -{ +{ + reduction_operation_list* rsl; + for (rsl = red_struct_list; rsl; rsl = rsl->next) + { + if (rsl->locvar) //MAXLOC,MINLOC + redArrays.insert(rsl->locvar); + } + lhs.clear(); rhs.clear(); unparsedLhs.clear(); unparsedRhs.clear(); buildCFG(); } diff --git a/dvm/fdvm/trunk/include/acc_across_analyzer.h b/dvm/fdvm/trunk/include/acc_across_analyzer.h index b2a1f1f..77be4ac 100644 --- a/dvm/fdvm/trunk/include/acc_across_analyzer.h +++ b/dvm/fdvm/trunk/include/acc_across_analyzer.h @@ -104,6 +104,7 @@ private: SgStatement* loop_body; int dimension; std::map arrays; + std::set redArrays; int* acrossDims; int acrossType; std::vector symbols; diff --git a/dvm/fdvm/trunk/include/dvm.h b/dvm/fdvm/trunk/include/dvm.h index db50a5f..31e51aa 100644 --- a/dvm/fdvm/trunk/include/dvm.h +++ b/dvm/fdvm/trunk/include/dvm.h @@ -1377,6 +1377,7 @@ int TestOneGroupStatement(SgStatement *stmt); void DeclareUsedVars(); void DeclareInternalPrivateVars(); void DeclarePrivateVars(); +void DeclarePrivateVars(SgType *idxTypeInKernel); void DeclareArrayBases(); void DeclareArrayCoeffsInKernel(SgType*); void DeclareLocalPartVars(); @@ -1451,6 +1452,7 @@ SgExpression * DummyListForPrivateArrays(SgStatement *st_hedr); SgExpression *CreatePrivateDummyList(); char *PointerNameForPrivateArray(SgSymbol *symb); void GetMemoryForPrivateArrays(SgSymbol *private_first, SgSymbol *s_loop_ref, int nump, SgStatement *st_end, SgStatement *st_hedr, SgExpression *e_totalThreads); +SgSymbol *LocRedVariableSymbolInKernel(reduction_operation_list *rsl); /* acc_analyzer.cpp */ //void Private_Vars_Analyzer(SgStatement *firstSt, SgStatement *lastSt);