7 Commits

5 changed files with 157 additions and 89 deletions

View File

@@ -5338,6 +5338,18 @@ SgStatement *doIfThenConstrForLoop_GPU(SgExpression *ref, SgStatement *endhost,
return(ifst); return(ifst);
} }
SgExpression *ReductionPrivateVariables()
{
reduction_operation_list *rl;
SgExpression *red_vars=NULL;
for (rl = red_struct_list; rl; rl = rl->next)
{
red_vars = AddListToList(red_vars, new SgExprListExp(*new SgVarRefExp(rl->redvar)));
if (rl->locvar)
red_vars = AddListToList(red_vars, new SgExprListExp(*new SgVarRefExp(rl->locvar)));
}
return red_vars;
}
SgExpression * TranslateReductionToOpenmp(SgExpression *reduction_clause) /* OpenMP */ SgExpression * TranslateReductionToOpenmp(SgExpression *reduction_clause) /* OpenMP */
{ {
@@ -6003,20 +6015,20 @@ SgStatement *Create_Host_Loop_Subroutine_Main (SgSymbol *sHostProc)
SgStatement *Create_Host_Loop_Subroutine(SgSymbol *sHostProc, int dependency) SgStatement *Create_Host_Loop_Subroutine(SgSymbol *sHostProc, int dependency)
{ {
SgStatement *stmt = NULL, *st_end = NULL, *st_hedr = NULL, *cur = NULL, *last_decl = NULL, *ass = NULL; SgStatement *stmt = NULL, *st_end = NULL, *st_hedr = NULL, *cur = NULL, *last_decl = NULL, *ass = NULL;
SgStatement *alloc = NULL, *red_init_first = NULL; SgStatement *alloc = NULL, *red_init_first = NULL;
SgStatement *paralleldo = NULL; SgStatement *paralleldo = NULL;
SgStatement *firstdopar = NULL; SgStatement *firstdopar = NULL;
SgExprListExp *parallellist = NULL; SgExprListExp *parallellist = NULL;
SgExprListExp *omp_dolist = NULL; SgExprListExp *omp_dolist = NULL;
SgExprListExp *omp_perflist = NULL; SgExprListExp *omp_perflist = NULL;
SgExpression *ae, *arg_list = NULL, *el = NULL, *de = NULL, *tail = NULL, *baseMem_list = NULL, *omp_red_vars=NULL; SgExpression *ae, *arg_list = NULL, *el = NULL, *de = NULL, *tail = NULL, *baseMem_list = NULL, *omp_red_vars=NULL;
SgSymbol *s_loop_ref = NULL, *sarg = NULL, *h_first = NULL, *hl = NULL; SgSymbol *s_loop_ref = NULL, *sarg = NULL, *h_first = NULL, *hl = NULL;
SgSymbol *s_lgsc = NULL; /* OpenMP */ SgSymbol *s_lgsc = NULL; /* OpenMP */
SgVarRefExp *v_lgsc = NULL; /* OpenMP */ SgVarRefExp *v_lgsc = NULL; /* OpenMP */
SgSymbol *s = NULL, *s_low_bound = NULL, *s_high_bound = NULL, *s_step = NULL; SgSymbol *s = NULL, *s_low_bound = NULL, *s_high_bound = NULL, *s_step = NULL;
symb_list *sl = NULL; symb_list *sl = NULL;
SgType *tdvm = NULL; SgType *tdvm = NULL;
int ln, lrank, addopenmp, number_of_reductions = 0; int ln, lrank, addopenmp, number_of_reductions = 0;
char *name; char *name;
tail = NULL; tail = NULL;
@@ -6193,10 +6205,9 @@ SgStatement *Create_Host_Loop_Subroutine(SgSymbol *sHostProc, int dependency)
int nr; int nr;
SgExpression *ev, *ered, *er, *red; SgExpression *ev, *ered, *er, *red;
SgSymbol *loc_var; SgSymbol *loc_var;
reduction_operation_list *rl; reduction_operation_list *rl;
red = TranslateReductionToOpenmp(&red_list->copy()); /* OpenMP */
red = TranslateReductionToOpenmp(&red_list->copy()); /* OpenMP */ if(red != NULL) parallellist->append(*red); /* OpenMP */
if (red != NULL) parallellist->append(*red); /* OpenMP */
else omp_red_vars = ReductionPrivateVariables(); /*MAXLOC/MINLOC*/ /* OpenMP */ else omp_red_vars = ReductionPrivateVariables(); /*MAXLOC/MINLOC*/ /* OpenMP */
for (rl = red_struct_list,nr = 1; rl; rl = rl->next, nr++) for (rl = red_struct_list,nr = 1; rl; rl = rl->next, nr++)
{ {
@@ -6209,10 +6220,12 @@ SgStatement *Create_Host_Loop_Subroutine(SgSymbol *sHostProc, int dependency)
// generate loop_red_init and loop_red_post function calls // generate loop_red_init and loop_red_post function calls
stmt = LoopRedInit_HH(s_loop_ref, nr, sred, rl->locvar); stmt = LoopRedInit_HH(s_loop_ref, nr, sred, rl->locvar);
cur->insertStmtAfter(*stmt, *st_hedr); cur->insertStmtAfter(*stmt, *st_hedr);
cur = stmt;
if (nr == 1) red_init_first = stmt; if (nr == 1) red_init_first = stmt;
stmt = LoopRedPost_HH(s_loop_ref, nr, sred, rl->locvar); stmt = LoopRedPost_HH(s_loop_ref, nr, sred, rl->locvar);
st_end->insertStmtBefore(*stmt, *st_hedr); st_end->insertStmtBefore(*stmt, *st_hedr);
}
number_of_reductions = nr; /* OpenMP */ number_of_reductions = nr; /* OpenMP */
} }
@@ -6251,7 +6264,7 @@ SgStatement *Create_Host_Loop_Subroutine(SgSymbol *sHostProc, int dependency)
if (!options.isOn(O_HOST)) if (!options.isOn(O_HOST))
DeclareArrayCoefficients(st_hedr); DeclareArrayCoefficients(st_hedr);
// <private_variables> // <private_variables>
if ((addopenmp == 1) && (private_list != NULL)) parallellist->append(*new SgExpression(OMP_PRIVATE, &(private_list->copy()), NULL, NULL)); /* OpenMP */ if ((addopenmp == 1) && (private_list != NULL)) parallellist->append(*new SgExpression(OMP_PRIVATE, &(private_list->copy()), NULL, NULL)); /* OpenMP */
for (el = private_list; el; el = el->rhs()) for (el = private_list; el; el = el->rhs())
{ {
@@ -6273,7 +6286,7 @@ SgStatement *Create_Host_Loop_Subroutine(SgSymbol *sHostProc, int dependency)
else indexes = new SgExprListExp(*el->lhs()); /* OpenMP */ else indexes = new SgExprListExp(*el->lhs()); /* OpenMP */
} /* OpenMP */ } /* OpenMP */
} }
if ((addopenmp == 1) && (indexes != NULL)) parallellist->append(*new SgExpression(OMP_PRIVATE, AddListToList(indexes,omp_red_vars), NULL, NULL)); /* OpenMP */ if ((addopenmp == 1) && (indexes != NULL)) parallellist->append(*new SgExpression(OMP_PRIVATE, AddListToList(indexes,omp_red_vars), NULL, NULL)); /* OpenMP */
// create dummy argument declarations // create dummy argument declarations
@@ -6325,34 +6338,36 @@ SgStatement *Create_Host_Loop_Subroutine(SgSymbol *sHostProc, int dependency)
else firstdopar = stmt = first_do_par->copyPtr(); else firstdopar = stmt = first_do_par->copyPtr();
cur->insertStmtAfter(*stmt, *st_hedr); cur->insertStmtAfter(*stmt, *st_hedr);
if (addopenmp == 1) { /* OpenMP */ if (addopenmp == 1) { /* OpenMP */
SgCallStmt *stDvmhstat = NULL; SgCallStmt *stDvmhstat = NULL;
SgStatement *omp_do = new SgStatement(OMP_DO_DIR); /* OpenMP */ SgStatement *omp_do = new SgStatement(OMP_DO_DIR); /* OpenMP */
SgStatement *omp_parallel = new SgStatement(OMP_PARALLEL_DIR); /* OpenMP */ SgStatement *omp_parallel = new SgStatement(OMP_PARALLEL_DIR); /* OpenMP */
SgStatement *omp_endparallel = new SgStatement(OMP_END_PARALLEL_DIR); /* OpenMP */ SgStatement *omp_endparallel = new SgStatement(OMP_END_PARALLEL_DIR); /* OpenMP */
SgStatement *omp_enddo = new SgStatement(OMP_END_DO_DIR); /* OpenMP */ SgStatement *omp_enddo = new SgStatement(OMP_END_DO_DIR); /* OpenMP */
SgForStmt *stdo = isSgForStmt(firstdopar); /* OpenMP */ SgForStmt *stdo = isSgForStmt(firstdopar); /* OpenMP */
SgStatement *lastdo=LastStatementOfDoNest(stdo); SgStatement *lastdo=LastStatementOfDoNest(stdo);
cur->insertStmtAfter(*omp_parallel, *st_hedr); /* OpenMP */ cur->insertStmtAfter(*omp_parallel, *st_hedr); /* OpenMP */
if (omp_perf) {/* OpenMP */ if (omp_perf) {/* OpenMP */
stDvmhstat = new SgCallStmt(*fdvm[OMP_STAT_BP],*omp_perflist);/* OpenMP */ stDvmhstat = new SgCallStmt(*fdvm[OMP_STAT_BP],*omp_perflist);/* OpenMP */
stDvmhstat->setlineNumber(-1);/* OpenMP */ stDvmhstat->setlineNumber(-1);/* OpenMP */
cur->insertStmtAfter(*stDvmhstat, *st_hedr); /* OpenMP */ cur->insertStmtAfter(*stDvmhstat, *st_hedr); /* OpenMP */
} }
lastdo->insertStmtAfter(*omp_endparallel); /* OpenMP */ if (omp_red_vars) /* MINLOC/MAXLOC */ /* OpenMP */
if (omp_perf) {/* OpenMP */ st_end->insertStmtBefore(*omp_endparallel,*st_hedr); /* OpenMP */
stDvmhstat = new SgCallStmt(*fdvm[OMP_STAT_AL],*omp_perflist);/* OpenMP */ else
stDvmhstat->setlineNumber(-1);/* OpenMP */ lastdo->insertStmtAfter(*omp_endparallel,*st_hedr); /* OpenMP */
lastdo->insertStmtAfter(*stDvmhstat);/* OpenMP */ if (omp_perf) {/* OpenMP */
}/* OpenMP */ stDvmhstat = new SgCallStmt(*fdvm[OMP_STAT_AL],*omp_perflist);/* OpenMP */
omp_parallel->setExpression(0, *parallellist);/* OpenMP */ stDvmhstat->setlineNumber(-1);/* OpenMP */
omp_do->setExpression(0, *omp_dolist);/* OpenMP */ lastdo->insertStmtAfter(*stDvmhstat);/* OpenMP */
omp_enddo->setExpression(0, *new SgExprListExp(*new SgExpression(OMP_NOWAIT))); /* OpenMP */ }/* OpenMP */
omp_parallel->setExpression(0, *parallellist);/* OpenMP */
omp_do->setExpression(0, *omp_dolist);/* OpenMP */
omp_enddo->setExpression(0, *new SgExprListExp(*new SgExpression(OMP_NOWAIT))); /* OpenMP */
ass = new SgAssignStmt(*v_lgsc, *LoopGetSlotCount_HH(s_loop_ref)); /* OpenMP */ ass = new SgAssignStmt(*v_lgsc, *LoopGetSlotCount_HH(s_loop_ref)); /* OpenMP */
if (!dependency) { if (!dependency) {
omp_parallel->insertStmtAfter(*omp_do); /* OpenMP */ omp_parallel->insertStmtAfter(*omp_do); /* OpenMP */
lastdo->insertStmtAfter(*omp_enddo); /* OpenMP */ lastdo->insertStmtAfter(*omp_enddo); /* OpenMP */
} else if (isSgForStmt(firstdopar->lexNext())) { /* OpenMP */ } else if (isSgForStmt(firstdopar->lexNext())) { /* OpenMP */
int step = 1; /* OpenMP */ int step = 1; /* OpenMP */
@@ -6515,6 +6530,17 @@ SgStatement *Create_Host_Loop_Subroutine(SgSymbol *sHostProc, int dependency)
if (alloc != NULL) cur->insertStmtAfter(*alloc, *st_hedr); /* OpenMP */ if (alloc != NULL) cur->insertStmtAfter(*alloc, *st_hedr); /* OpenMP */
ass->setlineNumber(-1); /* OpenMP */ ass->setlineNumber(-1); /* OpenMP */
} /* OpenMP */ } /* OpenMP */
cur->insertStmtAfter(*ass, *st_hedr); /* OpenMP */
if (omp_red_vars) { /* OpenMP */
//transfer of reduction initialization statements in case of maxloc/minloc
int i; /* OpenMP */
SgStatement *from = red_init_first->lexPrev(); /* OpenMP */
cur = omp_parallel; /* OpenMP */
for (i=number_of_reductions-1; i; i--) { /* OpenMP */
stmt = from->lexNext()->extractStmt(); /* OpenMP */
cur->insertStmtAfter(*stmt); /* OpenMP */
cur = stmt; /* OpenMP */
} /* OpenMP */
} /* OpenMP */ } /* OpenMP */
if (omp_perf) {/* OpenMP */ if (omp_perf) {/* OpenMP */
stDvmhstat = new SgCallStmt(*fdvm[OMP_STAT_BL],*omp_perflist);/* OpenMP */ stDvmhstat = new SgCallStmt(*fdvm[OMP_STAT_BL],*omp_perflist);/* OpenMP */
@@ -6523,6 +6549,7 @@ SgStatement *Create_Host_Loop_Subroutine(SgSymbol *sHostProc, int dependency)
stDvmhstat = new SgCallStmt(*fdvm[OMP_STAT_AP],*omp_perflist);/* OpenMP */ stDvmhstat = new SgCallStmt(*fdvm[OMP_STAT_AP],*omp_perflist);/* OpenMP */
stDvmhstat->setlineNumber(-1);/* OpenMP */ stDvmhstat->setlineNumber(-1);/* OpenMP */
omp_endparallel->insertStmtAfter(*stDvmhstat);/* OpenMP */ omp_endparallel->insertStmtAfter(*stDvmhstat);/* OpenMP */
}/* OpenMP */
} /* OpenMP */ } /* OpenMP */
@@ -7943,6 +7970,21 @@ SgSymbol *RedVariableSymbolInKernel(SgSymbol *s, SgExpression *dimSizeArgs, SgEx
return(soff); return(soff);
} }
SgSymbol *LocRedVariableSymbolInKernel(reduction_operation_list *rsl)
{
SgType *declT;
if (isSgArrayType(rsl->locvar->type()))
{
SgArrayType *arrT = new SgArrayType(*C_Type(rsl->locvar->type()));
arrT->addDimension(new SgValueExp(rsl->number));
declT = arrT;
}
else
declT = C_Type(rsl->locvar->type());
return (new SgVariableSymb(rsl->locvar->identifier(), *declT, *kernel_st));
}
SgSymbol *SymbolInKernel(SgSymbol *s) SgSymbol *SymbolInKernel(SgSymbol *s)
{ {
@@ -8399,7 +8441,7 @@ void MakeDeclarationsForKernelGpuO1(SgSymbol *red_count_symb, SgType *idxTypeInK
// declare do_variables // declare do_variables
DeclareDoVars(); DeclareDoVars();
// declare private(local in kernel) variables // declare private(local in kernel) variables
DeclarePrivateVars(idxTypeInKernel); DeclarePrivateVars(idxTypeInKernel);
// declare dummy arguments: // declare dummy arguments:
@@ -9539,7 +9581,7 @@ void MakeDeclarationsForKernel(SgSymbol *red_count_symb, SgType *idxTypeInKernel
// declare do_variables // declare do_variables
DeclareDoVars(); DeclareDoVars();
// declare private(local in kernel) variables // declare private(local in kernel) variables
DeclarePrivateVars(idxTypeInKernel); DeclarePrivateVars(idxTypeInKernel);
// declare dummy arguments: // declare dummy arguments:
@@ -9601,7 +9643,7 @@ void MakeDeclarationsForKernel_On_C(SgType *idxTypeInKernel)
// declare do_variables // declare do_variables
DeclareDoVars(idxTypeInKernel); DeclareDoVars(idxTypeInKernel);
// declare private(local in kernel) variables // declare private(local in kernel) variables
DeclarePrivateVars(idxTypeInKernel); DeclarePrivateVars(idxTypeInKernel);
// declare variables, used in loop and passed by reference: // declare variables, used in loop and passed by reference:
@@ -9791,6 +9833,11 @@ void DeclareInternalPrivateVars()
} }
} }
void DeclarePrivateVars()
{
DeclarePrivateVars(C_UnsignedLongLongType());
}
void DeclarePrivateVars(SgType *idxTypeInKernel) void DeclarePrivateVars(SgType *idxTypeInKernel)
{ {
SgStatement *st = NULL, *st_first=NULL; SgStatement *st = NULL, *st_first=NULL;
@@ -9825,7 +9872,7 @@ void DeclarePrivateVars()
if (Rank(s)>1) if (Rank(s)>1)
{ {
char *name = new char[strlen(s->identifier())+7]; char *name = new char[strlen(s->identifier())+7];
sprintf(name, "_%s_dims", s->identifier()); sprintf(name, "_%s_dims", s->identifier());
s_dims = ArraySymbol(name, idxTypeInKernel, new SgValueExp(Rank(s)-1), kernel_st); s_dims = ArraySymbol(name, idxTypeInKernel, new SgValueExp(Rank(s)-1), kernel_st);
SgExpression *einit = new SgExpression(INIT_LIST); SgExpression *einit = new SgExpression(INIT_LIST);
SgExpression *elist = NULL; SgExpression *elist = NULL;
@@ -9841,7 +9888,7 @@ void DeclarePrivateVars()
} }
else else
{ {
for (int i=Rank(s)-1; i; i--) for (int i=Rank(s)-1; i; i--)
elist = AddListToList(elist, new SgExprListExp(*Calculate(ArrayDimSize(s,i)))); elist = AddListToList(elist, new SgExprListExp(*Calculate(ArrayDimSize(s,i))));
} }
einit->setLhs(elist); einit->setLhs(elist);
@@ -10587,18 +10634,7 @@ void ReductionBlockInKernel_On_C_Cuda(SgStatement *stat, SgSymbol *i_var, SgExpr
} }
} }
else if (rsl->locvar) // maxloc/minloc reduction scalar else if (rsl->locvar) // maxloc/minloc reduction scalar
{ {
SgType *decl;
int rank = rsl->number;
if (rank > 1)
{
SgArrayType *arrT = new SgArrayType(*C_Type(rsl->locvar->type()));
arrT->addDimension(new SgValueExp(rank));
decl = arrT;
}
else
decl = C_Type(rsl->locvar->type());
newst = Declaration_Statement(LocRedVariableSymbolInKernel(rsl)); //declare location variable newst = Declaration_Statement(LocRedVariableSymbolInKernel(rsl)); //declare location variable
kernel_st->insertStmtAfter(*newst, *kernel_st); kernel_st->insertStmtAfter(*newst, *kernel_st);
@@ -10615,20 +10651,30 @@ void ReductionBlockInKernel_On_C_Cuda(SgStatement *stat, SgSymbol *i_var, SgExpr
fun_ref->setRhs(tmplArgs); fun_ref->setRhs(tmplArgs);
stat->insertStmtBefore(*new SgCExpStmt(*fun_ref), *stat->controlParent()); stat->insertStmtBefore(*new SgCExpStmt(*fun_ref), *stat->controlParent());
if (across)
newst = AssignStatement(new SgArrayRefExp(*rsl->red_grid, *ex), new SgVarRefExp(rsl->redvar));
else
newst = AssignStatement(new SgArrayRefExp(*rsl->red_grid, *BlockIdxRefExpr("x") * *ex1 + *ex), new SgVarRefExp(rsl->redvar));
if_st->insertStmtAfter(*newst); if_st->insertStmtAfter(*newst);
if (rsl->number > 1) if (rsl->number > 1)
{ {
for (int i = 0; i < rsl->number; ++i) for (int i = 0; i < rsl->number; ++i)
{ {
if (across)
newst = AssignStatement(new SgArrayRefExp(*rsl->loc_grid, *new SgValueExp(rsl->number) * *ex + *new SgValueExp(i)), new SgArrayRefExp(*rsl->locvar, *new SgValueExp(i)));
else
newst = AssignStatement(new SgArrayRefExp(*rsl->loc_grid, *new SgValueExp(rsl->number) * (*BlockIdxRefExpr("x") * *ex1 + *ex) + *new SgValueExp(i)), new SgArrayRefExp(*rsl->locvar, *new SgValueExp(i))); newst = AssignStatement(new SgArrayRefExp(*rsl->loc_grid, *new SgValueExp(rsl->number) * (*BlockIdxRefExpr("x") * *ex1 + *ex) + *new SgValueExp(i)), new SgArrayRefExp(*rsl->locvar, *new SgValueExp(i)));
if_st->lastExecutable()->insertStmtAfter(*newst); if_st->lastExecutable()->insertStmtAfter(*newst);
} }
} }
else else
{ {
if (across)
newst = AssignStatement(new SgArrayRefExp(*rsl->loc_grid, *ex), new SgVarRefExp(*rsl->locvar));
else
newst = AssignStatement(new SgArrayRefExp(*rsl->loc_grid, *BlockIdxRefExpr("x") * *ex1 + *ex), new SgVarRefExp(*rsl->locvar)); newst = AssignStatement(new SgArrayRefExp(*rsl->loc_grid, *BlockIdxRefExpr("x") * *ex1 + *ex), new SgVarRefExp(*rsl->locvar));
if_st->lastExecutable()->insertStmtAfter(*newst); if_st->lastExecutable()->insertStmtAfter(*newst);
} }

View File

@@ -1588,6 +1588,7 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadap
for (int i = NumberOfCoeffs(sg); i>0; i--) for (int i = NumberOfCoeffs(sg); i>0; i--)
funcCall->addArg(*new SgArrayRefExp(*sg, *new SgValueExp(i))); funcCall->addArg(*new SgArrayRefExp(*sg, *new SgValueExp(i)));
} }
if (red_list) if (red_list)
{ {
reduction_operation_list *rsl; reduction_operation_list *rsl;
@@ -1613,6 +1614,7 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadap
funcCall->addArg(*new SgArrayRefExp(*s, *new SgValueExp(i))); funcCall->addArg(*new SgArrayRefExp(*s, *new SgValueExp(i)));
} }
s = s->next(); s = s->next();
if (options.isOn(C_CUDA)) if (options.isOn(C_CUDA))
funcCall->addArg(*new SgVarRefExp(reduction_ptr[i])); funcCall->addArg(*new SgVarRefExp(reduction_ptr[i]));
else else
@@ -1738,8 +1740,8 @@ static inline void insertReductionArgs(SgSymbol **reduction_ptr, SgSymbol **redu
SgFunctionCallExp *funcCallKernel, SgSymbol* numBlocks, int &has_red_array) SgFunctionCallExp *funcCallKernel, SgSymbol* numBlocks, int &has_red_array)
{ {
reduction_operation_list *rsl; reduction_operation_list *rsl;
SgSymbol *s; SgSymbol *s = NULL;
SgExpression *e; SgExpression *e = NULL;
for (rsl = red_struct_list, s = red_first; rsl; rsl = rsl->next) //s!=s_blocks_info for (rsl = red_struct_list, s = red_first; rsl; rsl = rsl->next) //s!=s_blocks_info
{ {
@@ -1776,13 +1778,16 @@ static inline void insertReductionArgs(SgSymbol **reduction_ptr, SgSymbol **redu
else else
funcCallKernel->addArg(*new SgCastExp(*C_PointerType(new SgDescriptType(*SgTypeChar(), BIT_SIGNED)), *new SgVarRefExp(reduction_ptr[i]))); funcCallKernel->addArg(*new SgCastExp(*C_PointerType(new SgDescriptType(*SgTypeChar(), BIT_SIGNED)), *new SgVarRefExp(reduction_ptr[i])));
//TODO!!
if (rsl->locvar) //MAXLOC,MINLOC if (rsl->locvar) //MAXLOC,MINLOC
{ {
for (int k = 0; k < rsl->number; ++k) for (int k = 0; k < rsl->number; ++k)
funcCallKernel->addArg(*new SgArrayRefExp(*reduction_loc_symb[i], *new SgValueExp(k))); funcCallKernel->addArg(*new SgArrayRefExp(*reduction_loc_symb[i], *new SgValueExp(k)));
s = s->next(); s = s->next();
e = new SgCastExp(*C_PointerType(options.isOn(C_CUDA) ? C_Type(rsl->locvar->type()) : new SgDescriptType(*SgTypeChar(), BIT_SIGNED)), *new SgVarRefExp(s));
if (options.isOn(C_CUDA))
e = new SgCastExp(*C_PointerType(C_Type(rsl->locvar->type())), *new SgVarRefExp(reduction_loc_ptr[i]));
else
e = new SgCastExp(*C_PointerType(new SgDescriptType(*SgTypeChar(), BIT_SIGNED)), *new SgVarRefExp(s));// TODO it like in C_Cuda
funcCallKernel->addArg(*e); funcCallKernel->addArg(*e);
s = s->next(); s = s->next();
} }
@@ -2442,6 +2447,7 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
stmt = makeSymbolDeclarationWithInit(s, new SgValueExp(0)); stmt = makeSymbolDeclarationWithInit(s, new SgValueExp(0));
st_hedr->insertStmtAfter(*stmt, *st_hedr); st_hedr->insertStmtAfter(*stmt, *st_hedr);
} }
// create indxs // create indxs
for (int i = 0; i < acrossV; ++i) for (int i = 0; i < acrossV; ++i)
{ {
@@ -4216,7 +4222,7 @@ void MakeDeclarationsForKernel_On_C_Across(SgType *indexType)
DeclareDoVars(indexType); DeclareDoVars(indexType);
// declare private(local in kernel) variables // declare private(local in kernel) variables
DeclarePrivateVars(); DeclarePrivateVars(indexType);
// declare variables, used in loop and passed by reference: // declare variables, used in loop and passed by reference:
// <type> &<name> = *p_<name>; // <type> &<name> = *p_<name>;
@@ -4233,7 +4239,7 @@ void MakeDeclarationsForKernelAcross(SgType *indexType)
DeclareDoVars(); DeclareDoVars();
// declare private(local in kernel) variables // declare private(local in kernel) variables
DeclarePrivateVars(); DeclarePrivateVars(indexType);
// declare dummy arguments: // declare dummy arguments:
@@ -5829,6 +5835,7 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel* argsKer, i
flag_func_call = 0; // maxloc flag_func_call = 0; // maxloc
else if (num == 10) else if (num == 10)
flag_func_call = 0; // minloc flag_func_call = 0; // minloc
if (flag_func_call == 1) if (flag_func_call == 1)
{ {
SgFunctionCallExp *funcCall = new SgFunctionCallExp(*createNewFunctionSymbol(str_operation)); SgFunctionCallExp *funcCall = new SgFunctionCallExp(*createNewFunctionSymbol(str_operation));
@@ -5923,9 +5930,9 @@ SgStatement *CreateLoopKernelAcross(SgSymbol *skernel, ArgsForKernel* argsKer, i
locGrid->setType(*new SgArrayType(*tmp_list->loc_grid->type())); locGrid->setType(*new SgArrayType(*tmp_list->loc_grid->type()));
if (options.isOn(C_CUDA)) if (options.isOn(C_CUDA))
st = AssignStatement(*new SgArrayRefExp(*locGrid, *new SgValueExp(i), *e1), *new SgArrayRefExp(*loc_var_ref->symbol(), *new SgValueExp(i))); st = AssignStatement(*new SgArrayRefExp(*locGrid, *new SgValueExp(loc_el_num) * *e1 + *new SgValueExp(i)), *new SgArrayRefExp(*loc_var_ref->symbol(), *new SgValueExp(i)));
else else
st = AssignStatement(*new SgArrayRefExp(*locGrid, *new SgValueExp(i + 1), *e1), *new SgArrayRefExp(*loc_var_ref->symbol(), *new SgValueExp(i + 1))); st = AssignStatement(*new SgArrayRefExp(*locGrid, *new SgValueExp(i + 1), *e1), *new SgArrayRefExp(*loc_var_ref->symbol(), *new SgValueExp(i + 1)));//TODO it like in C_Cuda
ifSt->insertStmtAfter(*st); ifSt->insertStmtAfter(*st);
} }
} }
@@ -6153,13 +6160,13 @@ void DeclarationOfReductionBlockInKernelAcross(SgExpression *ered, reduction_ope
if (rsl->locvar) if (rsl->locvar)
{ {
newst = Declaration_Statement(rsl->locvar); //declare location variable newst = Declaration_Statement(LocRedVariableSymbolInKernel(rsl)); //declare location variable
kernel_st->insertStmtAfter(*newst, *kernel_st); kernel_st->insertStmtAfter(*newst, *kernel_st);
} }
if (rsl->redvar_size > 0) if (rsl->redvar_size > 0)
{ {
newst = Declaration_Statement(rsl->redvar); //declare reduction variable newst = Declaration_Statement(RedVariableSymbolInKernel(rsl->redvar,NULL,NULL)); //declare reduction variable
kernel_st->insertStmtAfter(*newst, *kernel_st); kernel_st->insertStmtAfter(*newst, *kernel_st);
} }
else if (rsl->redvar_size < 0) else if (rsl->redvar_size < 0)
@@ -6168,26 +6175,22 @@ void DeclarationOfReductionBlockInKernelAcross(SgExpression *ered, reduction_ope
newst = Declaration_Statement(red_var_k); //declare reduction variable newst = Declaration_Statement(red_var_k); //declare reduction variable
kernel_st->insertStmtAfter(*newst, *kernel_st); kernel_st->insertStmtAfter(*newst, *kernel_st);
} }
rtype = (rsl->redvar_size >= 0) ? TypeOfRedBlockSymbol(ered) : red_var_k->type();
s_block = RedBlockSymbolInKernelAcross(red_var, rtype); //XXX: shared memory doesnt use in ACROSS by C_Cuda
if (!options.isOn(C_CUDA))
newst = Declaration_Statement(s_block);
if (options.isOn(C_CUDA))
newst->addDeclSpec(BIT_CUDA_SHARED | BIT_EXTERN);
else
{ {
rtype = (rsl->redvar_size >= 0) ? TypeOfRedBlockSymbol(ered) : red_var_k->type();
s_block = RedBlockSymbolInKernelAcross(red_var, rtype);
newst = Declaration_Statement(s_block);
eatr = new SgExprListExp(*new SgExpression(ACC_SHARED_OP)); eatr = new SgExprListExp(*new SgExpression(ACC_SHARED_OP));
newst->setExpression(2, *eatr); newst->setExpression(2, *eatr);
} kernel_st->insertStmtAfter(*newst, *kernel_st);
kernel_st->insertStmtAfter(*newst, *kernel_st); if (isSgExprListExp(ered->rhs())) //MAXLOC,MINLOC
{
if (isSgExprListExp(ered->rhs())) //MAXLOC,MINLOC typedecl = MakeStructDecl(rtype->symbol());
{ kernel_st->insertStmtAfter(*typedecl, *kernel_st);
typedecl = MakeStructDecl(rtype->symbol()); }
kernel_st->insertStmtAfter(*typedecl, *kernel_st);
} }
} }

View File

@@ -5,10 +5,12 @@
using namespace std; using namespace std;
// special storages to avoid recomputing // special storages to avoid recomputing
map<string, SgExpression*> lhs; static map<string, SgExpression*> lhs;
map<string, SgExpression*> rhs; static map<string, SgExpression*> rhs;
map<SgExpression*, string> unparsedLhs; static map<SgExpression*, string> unparsedLhs;
map<SgExpression*, string> unparsedRhs; static map<SgExpression*, string> unparsedRhs;
extern reduction_operation_list* red_struct_list;
template<typename InIt1, typename InIt2, typename OutIt> template<typename InIt1, typename InIt2, typename OutIt>
static inline OutIt difference(InIt1 first1, InIt1 last1, InIt2 first2, InIt2 last2, OutIt dest) static inline OutIt difference(InIt1 first1, InIt1 last1, InIt2 first2, InIt2 last2, OutIt dest)
@@ -1221,6 +1223,13 @@ Loop::Loop(SgStatement* loop_body, bool enable_opt, bool irreg_access) :
irregular_acc_opt(irreg_access), enable_opt(enable_opt), loop_body(loop_body), irregular_acc_opt(irreg_access), enable_opt(enable_opt), loop_body(loop_body),
dimension(0), acrossType(0), acrossDims(NULL), do_irreg_opt(false) dimension(0), acrossType(0), acrossDims(NULL), do_irreg_opt(false)
{ {
reduction_operation_list* rsl;
for (rsl = red_struct_list; rsl; rsl = rsl->next)
{
if (rsl->locvar) //MAXLOC,MINLOC
redArrays.insert(rsl->locvar);
}
lhs.clear(); lhs.clear();
rhs.clear(); rhs.clear();
unparsedLhs.clear(); unparsedLhs.clear();
@@ -1420,7 +1429,7 @@ void Loop::analyzeAssignments(SgExpression* ex, const int blockIndex)
else else
{ {
SgSymbol* symbol = ex->symbol(); SgSymbol* symbol = ex->symbol();
if (isSgArrayType(symbol->type()) != NULL) if (isSgArrayType(symbol->type()) != NULL && redArrays.find(symbol) == redArrays.end())
{ {
SgExpression* subscripts = ((SgArrayRefExp*)(ex))->subscripts(); SgExpression* subscripts = ((SgArrayRefExp*)(ex))->subscripts();
if (!subscripts) if (!subscripts)
@@ -1950,6 +1959,13 @@ void Loop::buildCFG()
Loop::Loop(SgStatement* stmt) : do_irreg_opt(false) Loop::Loop(SgStatement* stmt) : do_irreg_opt(false)
{ {
reduction_operation_list* rsl;
for (rsl = red_struct_list; rsl; rsl = rsl->next)
{
if (rsl->locvar) //MAXLOC,MINLOC
redArrays.insert(rsl->locvar);
}
lhs.clear(); rhs.clear(); unparsedLhs.clear(); unparsedRhs.clear(); lhs.clear(); rhs.clear(); unparsedLhs.clear(); unparsedRhs.clear();
buildCFG(); buildCFG();
} }

View File

@@ -104,6 +104,7 @@ private:
SgStatement* loop_body; SgStatement* loop_body;
int dimension; int dimension;
std::map<SgSymbol*, Array*> arrays; std::map<SgSymbol*, Array*> arrays;
std::set<SgSymbol*> redArrays;
int* acrossDims; int* acrossDims;
int acrossType; int acrossType;
std::vector<SgSymbol*> symbols; std::vector<SgSymbol*> symbols;

View File

@@ -1377,6 +1377,7 @@ int TestOneGroupStatement(SgStatement *stmt);
int TestOneGroupStatement(SgStatement *stmt); int TestOneGroupStatement(SgStatement *stmt);
void DeclareUsedVars(); void DeclareUsedVars();
void DeclareInternalPrivateVars(); void DeclareInternalPrivateVars();
void DeclarePrivateVars();
void DeclarePrivateVars(SgType *idxTypeInKernel); void DeclarePrivateVars(SgType *idxTypeInKernel);
void DeclareArrayBases(); void DeclareArrayBases();
void DeclareArrayCoeffsInKernel(SgType*); void DeclareArrayCoeffsInKernel(SgType*);
@@ -1451,6 +1452,7 @@ SgExpression * DummyListForPrivateArrays(SgStatement *st_hedr);
SgExpression * DummyListForPrivateArrays(SgStatement *st_hedr); SgExpression * DummyListForPrivateArrays(SgStatement *st_hedr);
SgExpression *CreatePrivateDummyList(); SgExpression *CreatePrivateDummyList();
char *PointerNameForPrivateArray(SgSymbol *symb); char *PointerNameForPrivateArray(SgSymbol *symb);
void GetMemoryForPrivateArrays(SgSymbol *private_first, SgSymbol *s_loop_ref, int nump, SgStatement *st_end, SgStatement *st_hedr, SgExpression *e_totalThreads);
SgSymbol *LocRedVariableSymbolInKernel(reduction_operation_list *rsl); SgSymbol *LocRedVariableSymbolInKernel(reduction_operation_list *rsl);
/* acc_analyzer.cpp */ /* acc_analyzer.cpp */