added new implimentation of private arrays

This commit is contained in:
ALEXks
2024-10-29 11:53:41 +03:00
parent 1a1705d2e4
commit 5e60b5cd5c
12 changed files with 945 additions and 113 deletions

View File

@@ -236,6 +236,7 @@
#define DVM_EXIT_INTERVAL_DIR 639 /* DVM-F */ #define DVM_EXIT_INTERVAL_DIR 639 /* DVM-F */
#define DVM_TEMPLATE_CREATE_DIR 640 /* DVM-F */ #define DVM_TEMPLATE_CREATE_DIR 640 /* DVM-F */
#define DVM_TEMPLATE_DELETE_DIR 641 /* DVM-F */ #define DVM_TEMPLATE_DELETE_DIR 641 /* DVM-F */
#define PRIVATE_AR_DECL 642 /* DVM-F */
/***************** variant tags for low level nodes ********************/ /***************** variant tags for low level nodes ********************/

View File

@@ -238,6 +238,7 @@ script using "tag". Run make tag.h to regenerate this file */
tag [ DVM_EXIT_INTERVAL_DIR ] = "DVM_EXIT_INTERVAL_DIR"; tag [ DVM_EXIT_INTERVAL_DIR ] = "DVM_EXIT_INTERVAL_DIR";
tag [ DVM_TEMPLATE_CREATE_DIR ] = "DVM_TEMPLATE_CREATE_DIR"; tag [ DVM_TEMPLATE_CREATE_DIR ] = "DVM_TEMPLATE_CREATE_DIR";
tag [ DVM_TEMPLATE_DELETE_DIR ] = "DVM_TEMPLATE_DELETE_DIR"; tag [ DVM_TEMPLATE_DELETE_DIR ] = "DVM_TEMPLATE_DELETE_DIR";
tag [ PRIVATE_AR_DECL ] = "PRIVATE_AR_DECL";
/***************** variant tags for low level nodes ********************/ /***************** variant tags for low level nodes ********************/

View File

@@ -139,6 +139,8 @@ DEFNODECODE(CONT_STAT, "%CMNT%PUTTABcontinue;%NL",
's',0,BIFNODE) 's',0,BIFNODE)
DEFNODECODE(VAR_DECL, "%CMNT%SETFLAG(VARDECL)%IF (%CHECKFLAG(ENUM) == %NULL)%IF (%CHECKFLAG(CLASSDECL) != %NULL)%PROTECTION%ENDIF%PUTTAB%DECLSPEC%TYPE %ENDIF%LL1%IF (%CHECKFLAG(ENUM) == %NULL);%ENDIF%UNSETFLAG(VARDECL)%NL", DEFNODECODE(VAR_DECL, "%CMNT%SETFLAG(VARDECL)%IF (%CHECKFLAG(ENUM) == %NULL)%IF (%CHECKFLAG(CLASSDECL) != %NULL)%PROTECTION%ENDIF%PUTTAB%DECLSPEC%TYPE %ENDIF%LL1%IF (%CHECKFLAG(ENUM) == %NULL);%ENDIF%UNSETFLAG(VARDECL)%NL",
's',0,BIFNODE) 's',0,BIFNODE)
DEFNODECODE(PRIVATE_AR_DECL, "%CMNT%PUTTABPrivateArray<%LL1,%LL2> %LL3;%NL",
's',0,BIFNODE)
DEFNODECODE(PARAM_DECL, "%ERROR", DEFNODECODE(PARAM_DECL, "%ERROR",
's',0,BIFNODE) 's',0,BIFNODE)
DEFNODECODE(COMM_STAT, "%ERROR", DEFNODECODE(COMM_STAT, "%ERROR",

View File

@@ -736,6 +736,12 @@ SgSymbol *RedCountSymbol(SgStatement *scope)
} }
char *PointerNameForPrivateArray(SgSymbol *symb)
{
char *name = new char[strlen(symb->identifier())+4];
sprintf(name, "_%s_p", symb->identifier());
return name;
}
SgSymbol *OverallBlocksSymbol() SgSymbol *OverallBlocksSymbol()
{ {
@@ -1224,7 +1230,7 @@ int TestLocal(SgExpression *list)
return (0); return (0);
} }
int is_deleted_module_symbol(SgSymbol *s) int is_deleted_module_symbol(SgSymbol *s) // deleted because it was renamed (parser/sym.c: function delete_symbol())
{ {
if (!strcmp("***", s->identifier())) if (!strcmp("***", s->identifier()))
return 1; return 1;
@@ -2688,6 +2694,7 @@ void ACC_CreateParallelLoop(int ipl, SgStatement *first_do, int nloop, SgStateme
// creating private_list // creating private_list
private_list = clause[PRIVATE_] ? clause[PRIVATE_]->lhs() : NULL; private_list = clause[PRIVATE_] ? clause[PRIVATE_]->lhs() : NULL;
dost = InnerMostLoop(first_do, nloop); dost = InnerMostLoop(first_do, nloop);
// error checking // error checking
@@ -2700,6 +2707,7 @@ void ACC_CreateParallelLoop(int ipl, SgStatement *first_do, int nloop, SgStateme
for_shadow_compute = clause[SHADOW_COMPUTE_] ? 1 : 0; // for optimization of shadow_compute for_shadow_compute = clause[SHADOW_COMPUTE_] ? 1 : 0; // for optimization of shadow_compute
uses_list = UsesList(dost->lexNext(), lastStmtOfDo(dost)); uses_list = UsesList(dost->lexNext(), lastStmtOfDo(dost));
RefInExpr(IsRedBlack(nloop), _READ_); // add to uses_list variables used in start-expression of redblack loop RefInExpr(IsRedBlack(nloop), _READ_); // add to uses_list variables used in start-expression of redblack loop
if (!options.isOn(C_CUDA))
UsesInPrivateArrayDeclarations(private_list); // add to uses_list variables used in private array declarations UsesInPrivateArrayDeclarations(private_list); // add to uses_list variables used in private array declarations
if(USE_STATEMENTS_ARE_REQUIRED) // || !IN_COMPUTE_REGION) if(USE_STATEMENTS_ARE_REQUIRED) // || !IN_COMPUTE_REGION)
CorrectUsesList(); CorrectUsesList();
@@ -2934,8 +2942,8 @@ int CreateLoopForSequence(SgStatement *first)
} }
void doStatementsToPerformByHandler(int ilh, SgSymbol *adapter_symb, SgSymbol *hostproc_symb,int is_parloop,int interface) void doStatementsToPerformByHandler(int ilh, SgSymbol *adapter_symb, SgSymbol *hostproc_symb,int is_parloop,int interface)
{ SgExpression *arg_list, *base_list, *copy_uses_list, *copy_arg_list, *red_dim_list, *red_bound_list; { SgExpression *arg_list, *base_list, *copy_uses_list, *copy_arg_list, *red_dim_list, *red_bound_list, *private_dim_list=NULL, *private_bound_list=NULL;
int numb, numb_r, numb_b; int numb=0, numb_r=0, numb_b=0, numb_p_dim=0, numb_p_bound=0;
SgStatement *st_register; SgStatement *st_register;
copy_uses_list = uses_list ? &(uses_list->copy()) : NULL; //!!! copy_uses_list = uses_list ? &(uses_list->copy()) : NULL; //!!!
@@ -2944,9 +2952,16 @@ void doStatementsToPerformByHandler(int ilh, SgSymbol *adapter_symb, SgSymbol *
arg_list = AddListToList(arg_list, ArrayArgumentList()); arg_list = AddListToList(arg_list, ArrayArgumentList());
copy_arg_list = arg_list ? &(arg_list->copy()) : NULL; copy_arg_list = arg_list ? &(arg_list->copy()) : NULL;
red_dim_list = DimSizeListOfReductionArrays(); red_dim_list = DimSizeListOfReductionArrays();
red_bound_list = BoundListOfReductionArrays();
numb_b = ListElemNumber(red_bound_list);
numb_r = ListElemNumber(red_dim_list); numb_r = ListElemNumber(red_dim_list);
red_bound_list = BoundListOfReductionArrays(); // !!! to change
numb_b = ListElemNumber(red_bound_list);
private_bound_list = BoundListOfPrivateArrays();
numb_p_bound = ListElemNumber(private_bound_list);
if (options.isOn(C_CUDA))
{
private_dim_list = DimSizeListOfPrivateArrays();
numb_p_dim = ListElemNumber(private_dim_list);
}
numb = ListElemNumber(arg_list) + ListElemNumber(uses_list); numb = ListElemNumber(arg_list) + ListElemNumber(uses_list);
// register CUDA-handler // register CUDA-handler
@@ -2955,13 +2970,14 @@ void doStatementsToPerformByHandler(int ilh, SgSymbol *adapter_symb, SgSymbol *
arg_list = AddListToList(arg_list, copy_uses_list); arg_list = AddListToList(arg_list, copy_uses_list);
arg_list = AddListToList(arg_list, red_dim_list); arg_list = AddListToList(arg_list, red_dim_list);
arg_list = AddListToList(arg_list, private_dim_list);
if(interface == 1) if(interface == 1)
{ {
InsertNewStatementAfter(RegisterHandler_H(ilh, DeviceTypeConst(CUDA), ConstRef(0), adapter_symb->next(), 0, numb + numb_r), cur_st, cur_st->controlParent()); /* OpenMP */ InsertNewStatementAfter(RegisterHandler_H(ilh, DeviceTypeConst(CUDA), ConstRef(0), adapter_symb->next(), 0, numb + numb_r + numb_p_dim), cur_st, cur_st->controlParent()); /* OpenMP */
AddListToList(cur_st->expr(0), arg_list); AddListToList(cur_st->expr(0), arg_list);
} else } else
{ {
SgExpression *efun = HandlerFunc(adapter_symb->next(), numb + numb_r, arg_list); SgExpression *efun = HandlerFunc(adapter_symb->next(), numb + numb_r + numb_p_dim, arg_list);
InsertNewStatementAfter(RegisterHandler_H2(ilh, DeviceTypeConst(CUDA), ConstRef(0), efun), cur_st, cur_st->controlParent()); /* OpenMP */ InsertNewStatementAfter(RegisterHandler_H2(ilh, DeviceTypeConst(CUDA), ConstRef(0), efun), cur_st, cur_st->controlParent()); /* OpenMP */
} }
} }
@@ -2974,15 +2990,15 @@ void doStatementsToPerformByHandler(int ilh, SgSymbol *adapter_symb, SgSymbol *
copy_uses_list = uses_list ? &(uses_list->copy()) : NULL; copy_uses_list = uses_list ? &(uses_list->copy()) : NULL;
copy_arg_list = AddListToList(copy_arg_list, copy_uses_list); copy_arg_list = AddListToList(copy_arg_list, copy_uses_list);
copy_arg_list = AddListToList(copy_arg_list, red_bound_list); copy_arg_list = AddListToList(copy_arg_list, red_bound_list);
copy_arg_list = AddListToList(copy_arg_list, private_bound_list);
if(interface == 1) if(interface == 1)
{ {
InsertNewStatementAfter(RegisterHandler_H(ilh, DeviceTypeConst(HOST), DVM000(iht), hostproc_symb, 0, numb+numb_b), cur_st, cur_st->controlParent()); /* OpenMP */ InsertNewStatementAfter(RegisterHandler_H(ilh, DeviceTypeConst(HOST), DVM000(iht), hostproc_symb, 0, numb+numb_b+numb_p_bound), cur_st, cur_st->controlParent()); /* OpenMP */
AddListToList(cur_st->expr(0), copy_arg_list); AddListToList(cur_st->expr(0), copy_arg_list);
} else } else
{ {
SgExpression *efun = HandlerFunc(hostproc_symb, numb+numb_b, copy_arg_list); SgExpression *efun = HandlerFunc(hostproc_symb, numb+numb_b+numb_p_bound, copy_arg_list);
InsertNewStatementAfter(RegisterHandler_H2(ilh, DeviceTypeConst(HOST), DVM000(iht), efun), cur_st, cur_st->controlParent()); /* OpenMP */ InsertNewStatementAfter(RegisterHandler_H2(ilh, DeviceTypeConst(HOST), DVM000(iht), efun), cur_st, cur_st->controlParent()); /* OpenMP */
} }
cur_st->addComment(OpenMpComment_HandlerType(iht)); cur_st->addComment(OpenMpComment_HandlerType(iht));
@@ -3015,7 +3031,7 @@ SgExpression *DimSizeListOfReductionArrays()
//arg = SizeFunction(rsl->redvar,idim); //arg = SizeFunction(rsl->redvar,idim);
Error("Assumed-size array: %s", rsl->redvar->identifier(), 162, dvm_parallel_dir); Error("Assumed-size array: %s", rsl->redvar->identifier(), 162, dvm_parallel_dir);
else else
arg = SizeFunctionWithKind(rsl->redvar, idim, len_DvmType); arg = DvmType_Ref(SizeFunctionWithKind(rsl->redvar, idim, len_DvmType));
ell = new SgExprListExp(*arg); ell = new SgExprListExp(*arg);
ell->setRhs(el); ell->setRhs(el);
el = ell; el = ell;
@@ -3036,6 +3052,27 @@ SgExpression *DimSizeListOfReductionArrays()
return(arg_list); return(arg_list);
} }
SgExpression *DimSizeListOfPrivateArrays()
{
int i;
SgExpression *pl, *arg_list=NULL;
SgSymbol *s;
if (!private_list)
return(NULL);
for (pl = private_list; pl; pl = pl->rhs())
{
s = pl->lhs()->symbol();
if (isSgArrayType(s->type()) && !TestArrayShape(s))
{
for (i=0; i<Rank(s); i++)
arg_list = AddListToList( arg_list, new SgExprListExp(*DvmType_Ref(SizeFunctionWithKind(s, i+1, len_DvmType))));
for (i=0; i<Rank(s); i++)
arg_list = AddListToList( arg_list, new SgExprListExp(*DvmType_Ref(LBOUNDFunction(s,i+1))));
}
}
return (arg_list);
}
SgExpression *isConstantBound(SgSymbol *rv, int i, int isLower) SgExpression *isConstantBound(SgSymbol *rv, int i, int isLower)
{ {
SgExpression *bound; SgExpression *bound;
@@ -3057,10 +3094,10 @@ SgExpression *CreateBoundListOfArray(SgSymbol *ar)
for(i=0;i<Rank(ar); i++) for(i=0;i<Rank(ar); i++)
{ {
if(!isConstantBound(ar,i,1)) if(!isConstantBound(ar,i,1))
sl = AddListToList( sl, new SgExprListExp(LowerBound(ar,i)->copy()) ); sl = AddListToList( sl, new SgExprListExp(*LBOUNDFunction(ar,i+1)) );
if(!isConstantBound(ar,i,0)) if(!isConstantBound(ar,i,0))
sl = AddListToList( sl, new SgExprListExp(UpperBound(ar,i)->copy()) ); sl = AddListToList( sl, new SgExprListExp(*UBOUNDFunction(ar,i+1)) );
} }
return(sl); return(sl);
} }
@@ -3079,6 +3116,19 @@ SgExpression * BoundListOfReductionArrays()
return bound_list; return bound_list;
} }
SgExpression * BoundListOfPrivateArrays()
{
SgExpression *pl, *bound_list=NULL;
SgSymbol *s;
for (pl = private_list; pl; pl = pl->rhs())
{
s = pl->lhs()->symbol();
if (isSgArrayType(s->type()))
bound_list = AddListToList(bound_list, CreateBoundListOfArray(s));
}
return bound_list;
}
void ReplaceCaseStatement(SgStatement *first) void ReplaceCaseStatement(SgStatement *first)
{ {
SgStatement *stmt, *last_st; SgStatement *stmt, *last_st;
@@ -5676,6 +5726,15 @@ SgStatement *Create_Host_Across_Loop_Subroutine(SgSymbol *sHostProc)
if(!tail) if(!tail)
tail = red_bound_list; tail = red_bound_list;
} }
// add dummy arguments for private arrays
if(private_list)
{
SgExpression * private_dummy_list;
AddListToList(arg_list, private_dummy_list = DummyListForPrivateArrays(st_hedr));
if(!tail)
tail = private_dummy_list;
}
// create get_dependency_mask function declaration // create get_dependency_mask function declaration
stmt = fdvm[GET_DEP_MASK_F]->makeVarDeclStmt(); stmt = fdvm[GET_DEP_MASK_F]->makeVarDeclStmt();
@@ -5868,6 +5927,7 @@ SgStatement *Create_Host_Loop_Subroutine_Main (SgSymbol *sHostProc)
AddListToList(arg_list, copy_uses_list = &(uses_list->copy())); AddListToList(arg_list, copy_uses_list = &(uses_list->copy()));
if (!tail) if (!tail)
tail = copy_uses_list; tail = copy_uses_list;
}
// add dummy arguments for reductions // add dummy arguments for reductions
if(red_list) if(red_list)
{ {
@@ -5876,6 +5936,15 @@ SgStatement *Create_Host_Loop_Subroutine_Main (SgSymbol *sHostProc)
if(!tail) if(!tail)
tail = red_bound_list; tail = red_bound_list;
} }
// add dummy arguments for private arrays
if(private_list)
{
SgExpression * private_dummy_list;
AddListToList(arg_list, private_dummy_list = DummyListForPrivateArrays(st_hedr));
if(!tail)
tail = private_dummy_list;
}
// create external statement // create external statement
stmt = new SgStatement(EXTERN_STAT); stmt = new SgStatement(EXTERN_STAT);
@@ -6006,6 +6075,7 @@ SgStatement *Create_Host_Loop_Subroutine(SgSymbol *sHostProc, int dependency)
AddListToList(arg_list, copy_uses_list = &(uses_list->copy())); AddListToList(arg_list, copy_uses_list = &(uses_list->copy()));
if (!tail) if (!tail)
tail = copy_uses_list; tail = copy_uses_list;
}
// add dummy arguments for reductions // add dummy arguments for reductions
if(red_list) if(red_list)
{ {
@@ -6013,6 +6083,14 @@ SgStatement *Create_Host_Loop_Subroutine(SgSymbol *sHostProc, int dependency)
AddListToList(arg_list, red_bound_list = DummyListForReductionArrays(st_hedr)); AddListToList(arg_list, red_bound_list = DummyListForReductionArrays(st_hedr));
if(!tail) if(!tail)
tail = red_bound_list; tail = red_bound_list;
}
// add dummy arguments for private arrays
if(private_list)
{
SgExpression * private_dummy_list;
AddListToList(arg_list, private_dummy_list = DummyListForPrivateArrays(st_hedr));
if(!tail)
tail = private_dummy_list;
} }
// create external statement // create external statement
@@ -6177,9 +6255,8 @@ SgStatement *Create_Host_Loop_Subroutine(SgSymbol *sHostProc, int dependency)
if ((addopenmp == 1) && (private_list != NULL)) parallellist->append(*new SgExpression(OMP_PRIVATE, new SgExprListExp(*private_list), NULL, NULL)); /* OpenMP */ if ((addopenmp == 1) && (private_list != NULL)) parallellist->append(*new SgExpression(OMP_PRIVATE, new SgExprListExp(*private_list), NULL, NULL)); /* OpenMP */
for (el = private_list; el; el = el->rhs()) for (el = private_list; el; el = el->rhs())
{ {
SgSymbol *sp = el->lhs()->symbol(); SgSymbol *sp = el->lhs()->symbol();
//if(HEADER(sp)) // dvm-array is declared as dummy argument SgSymbol *sph = isSgArrayType(sp->type()) ? *(SgSymbol **)(el->lhs()->attributeValue(0, PRIVATE_ARRAY)) : sp;
// continue;
DeclareSymbolInHostHandler(sp, st_hedr, sph); DeclareSymbolInHostHandler(sp, st_hedr, sph);
} }
// <loop_index_variables> // <loop_index_variables>
@@ -6918,6 +6995,22 @@ int ExplicitShape(SgExpression *eShape)
} }
return 1; return 1;
} }
int TestArrayShape(SgSymbol *ar)
{
int i;
SgExpression *esize = NULL;
for(i=1; i<=Rank(ar); i++)
{
//calculating size of i-th dimension
esize = ReplaceParameter(ArrayDimSize(ar, i));
//if(err && esize && esize->variant()==STAR_RANGE)
// return 0; //Error("Assumed-size array: %s",ar->identifier(),162,stmt);
if(!esize || !esize->isInteger())
return 0;
}
return 1;
}
SgSymbol *ArraySymbolInHostHandler(SgSymbol *ar, SgStatement *scope) SgSymbol *ArraySymbolInHostHandler(SgSymbol *ar, SgStatement *scope)
{ {
@@ -6926,7 +7019,7 @@ SgSymbol *ArraySymbolInHostHandler(SgSymbol *ar, SgStatement *scope)
int rank, i; int rank, i;
rank = Rank(ar); rank = Rank(ar);
soff = ArraySymbol(ar->identifier(), ar->type()->baseType(), NULL, scope); soff = ArraySymbol(ar->identifier(), ar->type()->baseType(), NULL, scope);
if (!options.isOn(C_CUDA) && !ExplicitShape(isSgArrayType(ar->type())->getDimList())) if (!options.isOn(C_CUDA) && !ExplicitShape(isSgArrayType(ar->type())->getDimList()))
Error("Illegal array bound of private array %s", ar->identifier(), 442, dvm_parallel_dir); Error("Illegal array bound of private array %s", ar->identifier(), 442, dvm_parallel_dir);
@@ -7363,6 +7456,26 @@ SgExpression * DummyListForReductionArrays(SgStatement *st_hedr)
} }
return dummy_list; return dummy_list;
} }
SgExpression * DummyListForPrivateArrays(SgStatement *st_hedr)
{
SgExpression *dummy_list = NULL, *pl;
SgSymbol *s;
for (pl=private_list; pl;pl=pl->rhs())
{
s = pl->lhs()->symbol();
if (isSgArrayType(s->type()))
{
SgType *tp = s->type()->baseType();
SgSymbol *new_ar = ArraySymbol(s->identifier(), tp, NULL, st_hedr);
dummy_list = AddListToList(dummy_list, CreateDummyBoundListOfArray(s, new_ar, st_hedr));
SgSymbol **satr = new (SgSymbol *);
*satr = new_ar;
pl->lhs()->addAttribute(PRIVATE_ARRAY, (void *)satr, sizeof(SgSymbol *) );
}
}
return dummy_list;
}
/***************************************************************************************/ /***************************************************************************************/
/*ACC*/ /*ACC*/
@@ -8253,7 +8366,8 @@ SgExpression *CreateKernelDummyList(SgSymbol *s_red_count_k, std::vector<SgSymbo
arg_list = AddListToList(arg_list, ae); arg_list = AddListToList(arg_list, ae);
} }
if (uses_list) if (uses_list)
arg_list = AddListToList(arg_list, CreateUsesDummyList()); //[+ <uses> ] arg_list = AddListToList(arg_list, CreateUsesDummyList()); //[+ <uses> ]
if (private_list)
arg_list = AddListToList(arg_list, CreatePrivateDummyList()); //[+ dummys for private arrays ] arg_list = AddListToList(arg_list, CreatePrivateDummyList()); //[+ dummys for private arrays ]
for (size_t i = 0; i < lowI.size(); ++i) for (size_t i = 0; i < lowI.size(); ++i)
{ {
@@ -9058,6 +9172,8 @@ SgExpression *CreateKernelDummyList(SgSymbol *s_red_count_k, SgType *idxTypeInKe
arg_list = AddListToList(arg_list, ae); arg_list = AddListToList(arg_list, ae);
} }
if (uses_list) if (uses_list)
arg_list = AddListToList(arg_list, CreateUsesDummyList()); //[+ <uses> ]
if (private_list)
arg_list = AddListToList(arg_list, CreatePrivateDummyList()); //[+ dummys for private arrays ] arg_list = AddListToList(arg_list, CreatePrivateDummyList()); //[+ dummys for private arrays ]
return arg_list; return arg_list;
@@ -9219,6 +9335,40 @@ SgExpression *CreateUsesDummyList()
} }
return(arg_list); return(arg_list);
} }
SgExpression *CreatePrivateDummyList()
{
SgSymbol *s_dummy, *s;
SgExpression *el, *ae;
SgExpression *arg_list = NULL;
if (!options.isOn(C_CUDA) || !sizeOfPrivateArraysInBytes())
return NULL;
for (el = private_list; el; el = el->rhs())
{
s = el->lhs()->symbol();
if (!IS_ARRAY(s))
continue;
s_dummy = ArraySymbol(PointerNameForPrivateArray(s), C_Type(s->type()->baseType()), NULL, kernel_st);
ae = new SgArrayRefExp(*s_dummy, *new SgExprListExp());
ae->setType(s_dummy->type());
arg_list = AddListToList(arg_list, new SgExprListExp(*ae));
SgSymbol **satr = new (SgSymbol *);
*satr = s_dummy;
el->lhs()->addAttribute(PRIVATE_POINTER, (void *)satr, sizeof(SgSymbol *) );
if (!TestArrayShape(s))
{
SgExpression **eatr = (SgExpression **) el->lhs()->attributeValue(0, DIM_SIZES);
SgExpression *ela;
for (ela = *eatr; ela; ela=ela->rhs())
arg_list = AddListToList(arg_list, new SgExprListExp(*new SgVarRefExp(ela->lhs()->lhs()->symbol()))); //AddListToList(arg_list, &(ela->copy()));
eatr = (SgExpression **) el->lhs()->attributeValue(0, L_BOUNDS);
for (ela = *eatr; ela; ela=ela->rhs())
arg_list = AddListToList(arg_list, new SgExprListExp(*new SgVarRefExp(ela->lhs()->lhs()->symbol()))); //AddListToList(arg_list, &(ela->copy()));
}
}
return(arg_list);
} }
SgExpression *CreateRedDummyList() SgExpression *CreateRedDummyList()
@@ -9642,23 +9792,85 @@ void DeclareInternalPrivateVars()
} }
void DeclarePrivateVars() void DeclarePrivateVars()
{ {
SgStatement *st = NULL; SgStatement *st = NULL, *st_first=NULL;
SgExpression *var = NULL; SgExpression *var = NULL, *e;
SgSymbol *s;
SgExpression *e_all_private_size = sizeOfPrivateArraysInBytes();
// declare private variables // declare private variables
for (var = private_list; var; var = var->rhs()) for (var = private_list; var; var = var->rhs())
{ {
s = var->lhs()->symbol();
if (isParDoIndexVar(s)) continue; // declared as index variable of parallel loop if (isParDoIndexVar(s)) continue; // declared as index variable of parallel loop
//if (HEADER(var->lhs()->symbol())) continue; // dvm-array declared as dummy argument //if (HEADER(var->lhs()->symbol())) continue; // dvm-array declared as dummy argument
if (!options.isOn(C_CUDA) || !IS_ARRAY(s) || !e_all_private_size )
{
st = Declaration_Statement(SymbolInKernel(s)); st = Declaration_Statement(SymbolInKernel(s));
kernel_st->insertStmtAfter(*st);
st_first = st; st_first = st;
} }
else
{
SgSymbol *s_dims=NULL;
st = new SgStatement(PRIVATE_AR_DECL);
kernel_st->insertStmtAfter(*st);
st_first = st;
e = new SgExpression(TYPE_OP);
e->setType(C_Type(s->type()->baseType()));
st->setExpression(0, e);
e = new SgValueExp(Rank(s));
st->setExpression(1, e);
if (Rank(s)>1)
{
char *name = new char[strlen(s->identifier())+7];
sprintf(name, "_%s_dims", s->identifier());
s_dims = ArraySymbol(name, C_UnsignedLongLongType(), new SgValueExp(Rank(s)-1), kernel_st);
SgExpression *einit = new SgExpression(INIT_LIST);
SgExpression *elist = NULL;
if (!TestArrayShape(s))
{
SgExpression **eatr = (SgExpression **) var->lhs()->attributeValue(0, DIM_SIZES);
SgExpression *ela;
for (ela = *eatr; ela->rhs(); ela = ela->rhs())
{
SgExpression *ed = new SgVarRefExp(ela->lhs()->lhs()->symbol());
elist = AddListToList(new SgExprListExp(*ed), elist);
}
}
else
{
for (int i=Rank(s)-1; i; i--)
elist = AddListToList(elist, Calculate(ArrayDimSize(s,i)));
}
einit->setLhs(elist);
SgStatement *st_dims = makeSymbolDeclarationWithInit(s_dims, einit);//Declaration_Statement(s_dims);
kernel_st->insertStmtAfter(*st_dims);
st_first = st_dims;
}
SgSymbol *s_new = & s->copy();
SYMB_SCOPE(s_new->thesymb) = kernel_st->thebif;
SgFunctionCallExp *efc = new SgFunctionCallExp(*s_new);
if (s_dims)
{
efc->addArg(*new SgVarRefExp(s_dims));
}
SgSymbol **satr = (SgSymbol **) var->lhs()->attributeValue(0, PRIVATE_POINTER);
if (satr)
{
SgSymbol *sp = *satr;
efc->addArg(*new SgVarRefExp(sp)); //e->setLhs(new SgExprListExp(*new SgVarRefExp(sp)));
}
st->setExpression(2, efc);
}
}
if (!st_first) if (!st_first)
return; return;
if (options.isOn(C_CUDA)) if (options.isOn(C_CUDA))
st_first->addComment("// Private variables"); st_first->addComment("// Private variables");
else else
st_first->addComment("! Private variables\n"); st_first->addComment("! Private variables\n");
} }
@@ -11332,14 +11544,39 @@ SgExpression *BlockDimsProduct()
{ {
return &(*new SgRecordRefExp(*s_blockdim, "x") * *new SgRecordRefExp(*s_blockdim, "y") * *new SgRecordRefExp(*s_blockdim, "z")); return &(*new SgRecordRefExp(*s_blockdim, "x") * *new SgRecordRefExp(*s_blockdim, "y") * *new SgRecordRefExp(*s_blockdim, "z"));
} }
reduction_operation_list *ElementOfReductionStruct(SgSymbol *ar)
{
reduction_operation_list *rl;
for (rl=red_struct_list; rl; rl=rl->next)
if (!strcmp(rl->redvar->identifier(), ar->identifier()))
return rl;
return red_struct_list;
}
SgExpression *ElementOfPrivateList(SgSymbol *ar)
{
SgExpression *el;
for (el=private_list; el; el=el->rhs())
if (!strcmp(el->lhs()->symbol()->identifier(), ar->identifier()))
return el->lhs();
return private_list->lhs();
}
SgExpression *LowerShiftForArrays (SgSymbol *ar, int i, int type) SgExpression *LowerShiftForArrays (SgSymbol *ar, int i, int type)
{ {
SgExpression *e = isConstantBound(ar, i, 1); SgExpression *e = isConstantBound(ar, i, 1);
if(e) return e; if (e) return e;
if(type==0) //private array if (type==0) //private array
{
SgExpression **eatr = (SgExpression **)ElementOfPrivateList(ar)->attributeValue(0, L_BOUNDS);
SgExprListExp *ebounds = (SgExprListExp *)*eatr;
e = new SgVarRefExp(ebounds->elem(i)->lhs()->symbol());
} }
else // reduction array else // reduction array
{
SgExprListExp *el = ((SgExprListExp *) ElementOfReductionStruct(ar)->lowBound_arg);
e = &( el->elem(i)->copy() );
} }
return e; return e;
} }
@@ -11636,6 +11873,11 @@ SgType * C_LongLongType()
{ {
return(new SgDescriptType(*new SgType(T_LONG), BIT_LONG)); return(new SgDescriptType(*new SgType(T_LONG), BIT_LONG));
} }
SgType * C_UnsignedLongLongType()
{
return( new SgDescriptType(*new SgType(T_LONG), BIT_UNSIGNED | BIT_LONG)); //TYPE_LONG_SHORT(type->thetype) = BIT_UNSIGNED & BIT_LONG;
}
SgType * C_DvmType() SgType * C_DvmType()
{ {
@@ -13013,16 +13255,16 @@ SgStatement *Create_C_Adapter_Function(SgSymbol *sadapter)
{ {
symb_list *sl; symb_list *sl;
SgStatement *st_hedr, *st_end, *stmt, *do_while, *first_exec, *st_base = NULL, *st_call, *cur; SgStatement *st_hedr, *st_end, *stmt, *do_while, *first_exec, *st_base = NULL, *st_call, *cur;
SgExpression *fe, *ae, *arg_list, *el, *e, *er; SgExpression *fe, *ae, *arg_list, *el, *e, *er;
SgExpression *espec, *e_all_private_size = NULL; SgExpression *espec, *e_all_private_size = NULL;
SgFunctionCallExp *fcall; SgFunctionCallExp *fcall;
//SgStatement *fileHeaderSt; //SgStatement *fileHeaderSt;
SgSymbol *s_loop_ref, *sarg, *s, *sb, *sg, *sdev, *h_first, *hgpu_first, *base_first, *red_first, *uses_first, *scalar_first, *private_first; SgSymbol *s_loop_ref, *sarg, *s, *sb, *sg, *sdev, *h_first, *hgpu_first, *base_first, *red_first, *uses_first, *scalar_first, *private_first;
SgSymbol *s_stream = NULL, *s_blocks = NULL, *s_threads = NULL, *s_blocks_info = NULL, *s_red_count = NULL, *s_tmp_var = NULL; SgSymbol *s_stream = NULL, *s_blocks = NULL, *s_threads = NULL, *s_blocks_info = NULL, *s_red_count = NULL, *s_tmp_var = NULL;
SgSymbol *s_dev_num = NULL, *s_shared_mem = NULL, *s_regs = NULL, *s_blocksS = NULL, *s_idxL = NULL, *s_idxH = NULL, *s_step = NULL, *s_idxTypeInKernel = NULL; SgSymbol *s_dev_num = NULL, *s_shared_mem = NULL, *s_regs = NULL, *s_blocksS = NULL, *s_idxL = NULL, *s_idxH = NULL, *s_step = NULL, *s_idxTypeInKernel = NULL;
SgSymbol *s_num_of_red_blocks = NULL, *s_fill_flag = NULL, *s_red_num = NULL, *s_restBlocks = NULL, *s_addBlocks = NULL, *s_overallBlocks = NULL; SgSymbol *s_num_of_red_blocks = NULL, *s_fill_flag = NULL, *s_red_num = NULL, *s_restBlocks = NULL, *s_addBlocks = NULL, *s_overallBlocks = NULL;
SgSymbol *s_max_blocks; SgSymbol *s_max_blocks;
SgType *typ = NULL; SgType *typ = NULL;
int ln, num, i, uses_num, shared_mem_count, has_red_array, use_device_num, nbuf, lnp; int ln, num, i, uses_num, shared_mem_count, has_red_array, use_device_num, nbuf, lnp;
char *define_name; char *define_name;
int pl_rank = ParLoopRank(); int pl_rank = ParLoopRank();
@@ -13038,7 +13280,7 @@ SgStatement *Create_C_Adapter_Function(SgSymbol *sadapter)
st_hedr->addComment(Cuda_LoopHandlerComment()); st_hedr->addComment(Cuda_LoopHandlerComment());
first_exec = st_end; first_exec = st_end;
// create dummy argument list: // create dummy argument list:
// loop_ref,<dvm-array-headers>,<uses>,<reduction_array_dimSizes-Lbounds>,<private_array_dimSizes_Lbounds> // loop_ref,<dvm-array-headers>,<uses>,<reduction_array_dimSizes-Lbounds>,<private_array_dimSizes_Lbounds>
typ = C_PointerType(C_Derived_Type(s_DvmhLoopRef)); typ = C_PointerType(C_Derived_Type(s_DvmhLoopRef));
@@ -13084,7 +13326,7 @@ SgStatement *Create_C_Adapter_Function(SgSymbol *sadapter)
uses_first = sarg; uses_first = sarg;
} }
uses_num = ln; uses_num = ln;
if (red_list) // reduction array shapes if (red_list) // reduction array shapes
{ {
reduction_operation_list *rsl; //create dimmesion size list for reduction arrays reduction_operation_list *rsl; //create dimmesion size list for reduction arrays
@@ -13111,11 +13353,6 @@ SgStatement *Create_C_Adapter_Function(SgSymbol *sadapter)
sarg = new SgSymbol(VARIABLE_NAME, DimSizeName(rsl->redvar, idim), *t, *st_hedr); sarg = new SgSymbol(VARIABLE_NAME, DimSizeName(rsl->redvar, idim), *t, *st_hedr);
ae = new SgVarRefExp(sarg); ae = new SgVarRefExp(sarg);
ae->setType(t); ae->setType(t);
el = AddElementToList(el, new SgPointerDerefExp(*ae));
/*
ell = new SgExprListExp(*new SgPointerDerefExp(*ae));
ell->setRhs(el);
el = ell;
el = AddElementToList(el, new SgPointerDerefExp(*ae)); el = AddElementToList(el, new SgPointerDerefExp(*ae));
} }
rsl->dimSize_arg = el; rsl->dimSize_arg = el;
@@ -13128,6 +13365,49 @@ SgStatement *Create_C_Adapter_Function(SgSymbol *sadapter)
} }
} }
} }
if (options.isOn(C_CUDA)) // private array shapes
{
int idim;
SgExpression *elp;
SgType *t = C_PointerType(C_DvmType());
for (elp=private_list; elp; elp = elp->rhs())
{
s = elp->lhs()->symbol();
if (IS_ARRAY(s) && !TestArrayShape(s))
{
el = NULL;
for (idim = Rank(s); idim; idim--)
{
sarg = new SgSymbol(VARIABLE_NAME, DimSizeName(s, idim), *t, *st_hedr);
ae = new SgVarRefExp(sarg);
ae->setType(t);
el = AddElementToList(el, new SgPointerDerefExp(*ae));
}
SgExpression **edim = new (SgExpression *);
*edim = el;
elp->lhs()->addAttribute(DIM_SIZES, (void *)edim, sizeof(SgExpression *) );
arg_list = AddListToList(arg_list, &el->copy());
el = NULL;
for (idim = Rank(s); idim; idim--)
{
sarg = new SgSymbol(VARIABLE_NAME, BoundName(s, idim, 1), *t, *st_hedr);
ae = new SgVarRefExp(sarg);
ae->setType(t);
el = AddElementToList(el, new SgPointerDerefExp(*ae));
}
SgExpression **elb = new (SgExpression *);
*elb = el;
elp->lhs()->addAttribute(L_BOUNDS, (void *)elb, sizeof(SgExpression *) );
arg_list = AddListToList(arg_list, &el->copy());
while (arg_list->rhs() != 0)
arg_list = arg_list->rhs();
}
}
} }
// create variable's declarations: <dvm_array_headers>,<dvm_array_bases>,<scalar_device_addr>,<reduction_variables>,blocks_info [ or blocksS,idxL,idxH ],stream,blocks,threads // create variable's declarations: <dvm_array_headers>,<dvm_array_bases>,<scalar_device_addr>,<reduction_variables>,blocks_info [ or blocksS,idxL,idxH ],stream,blocks,threads
if (red_list) if (red_list)
@@ -13212,7 +13492,7 @@ SgStatement *Create_C_Adapter_Function(SgSymbol *sadapter)
if (isSgArrayType(loc_type)) if (isSgArrayType(loc_type))
btype = loc_type->baseType(); btype = loc_type->baseType();
else else
btype = loc_type; btype = loc_type;
SgArrayType *typearray = new SgArrayType(*C_Type(btype)); SgArrayType *typearray = new SgArrayType(*C_Type(btype));
typearray->addRange(*new SgValueExp(loc_el_num)); typearray->addRange(*new SgValueExp(loc_el_num));
@@ -13251,7 +13531,7 @@ SgStatement *Create_C_Adapter_Function(SgSymbol *sadapter)
} }
} }
if (!options.isOn(NO_BL_INFO)) if (!options.isOn(NO_BL_INFO))
{ {
s_blocks_info = s = new SgSymbol(VARIABLE_NAME, TestAndCorrectName("blocks_info"), *C_PointerType(C_VoidType()), *st_hedr); s_blocks_info = s = new SgSymbol(VARIABLE_NAME, TestAndCorrectName("blocks_info"), *C_PointerType(C_VoidType()), *st_hedr);
stmt = makeSymbolDeclaration(s); stmt = makeSymbolDeclaration(s);
st_hedr->insertStmtAfter(*stmt, *st_hedr); st_hedr->insertStmtAfter(*stmt, *st_hedr);
@@ -13260,13 +13540,13 @@ SgStatement *Create_C_Adapter_Function(SgSymbol *sadapter)
{ {
s_blocksS = s = ArraySymbol(TestAndCorrectName("blocksS"), C_DvmType(), new SgValueExp(pl_rank), st_hedr); s_blocksS = s = ArraySymbol(TestAndCorrectName("blocksS"), C_DvmType(), new SgValueExp(pl_rank), st_hedr);
stmt = makeSymbolDeclaration(s); stmt = makeSymbolDeclaration(s);
st_hedr->insertStmtAfter(*stmt, *st_hedr); st_hedr->insertStmtAfter(*stmt, *st_hedr);
s_restBlocks = s = new SgSymbol(VARIABLE_NAME, TestAndCorrectName("restBlocks"), *C_Derived_Type(s_cudaStream), *st_hedr); s_restBlocks = s = new SgSymbol(VARIABLE_NAME, TestAndCorrectName("restBlocks"), *C_Derived_Type(s_cudaStream), *st_hedr);
addDeclExpList(s, stmt->expr(0)); addDeclExpList(s, stmt->expr(0));
s_max_blocks = s = new SgSymbol(VARIABLE_NAME, TestAndCorrectName("maxBlocks"), *C_DvmType(), *st_hedr); s_max_blocks = s = new SgSymbol(VARIABLE_NAME, TestAndCorrectName("maxBlocks"), *C_DvmType(), *st_hedr);
addDeclExpList(s, stmt->expr(0)); addDeclExpList(s, stmt->expr(0));
s_addBlocks = s = new SgSymbol(VARIABLE_NAME, TestAndCorrectName("addBlocks"), *C_Derived_Type(s_cudaStream), *st_hedr); s_addBlocks = s = new SgSymbol(VARIABLE_NAME, TestAndCorrectName("addBlocks"), *C_Derived_Type(s_cudaStream), *st_hedr);
addDeclExpList(s, stmt->expr(0)); addDeclExpList(s, stmt->expr(0));
s_overallBlocks = s = new SgSymbol(VARIABLE_NAME, TestAndCorrectName("overallBlocks"), *C_Derived_Type(s_cudaStream), *st_hedr); s_overallBlocks = s = new SgSymbol(VARIABLE_NAME, TestAndCorrectName("overallBlocks"), *C_Derived_Type(s_cudaStream), *st_hedr);
addDeclExpList(s, stmt->expr(0)); addDeclExpList(s, stmt->expr(0));
s_idxL = s = ArraySymbol(TestAndCorrectName("idxL"), C_DvmType(), new SgValueExp(pl_rank), st_hedr); s_idxL = s = ArraySymbol(TestAndCorrectName("idxL"), C_DvmType(), new SgValueExp(pl_rank), st_hedr);
@@ -13277,15 +13557,15 @@ SgStatement *Create_C_Adapter_Function(SgSymbol *sadapter)
s_step = s = ArraySymbol(TestAndCorrectName("loopSteps"), C_DvmType(), new SgValueExp(pl_rank), st_hedr); s_step = s = ArraySymbol(TestAndCorrectName("loopSteps"), C_DvmType(), new SgValueExp(pl_rank), st_hedr);
addDeclExpList(s, stmt->expr(0)); addDeclExpList(s, stmt->expr(0));
} }
s_stream = s = new SgSymbol(VARIABLE_NAME, TestAndCorrectName("stream"), *C_Derived_Type(s_cudaStream), *st_hedr); s_stream = s = new SgSymbol(VARIABLE_NAME, TestAndCorrectName("stream"), *C_Derived_Type(s_cudaStream), *st_hedr);
stmt = makeSymbolDeclaration(s); stmt = makeSymbolDeclaration(s);
st_hedr->insertStmtAfter(*stmt, *st_hedr); st_hedr->insertStmtAfter(*stmt, *st_hedr);
s_blocks = s = new SgSymbol(VARIABLE_NAME, TestAndCorrectName("blocks"), *t_dim3, *st_hedr); s_blocks = s = new SgSymbol(VARIABLE_NAME, TestAndCorrectName("blocks"), *t_dim3, *st_hedr);
stmt = makeSymbolDeclaration(s); stmt = makeSymbolDeclaration(s);
st_hedr->insertStmtAfter(*stmt, *st_hedr); st_hedr->insertStmtAfter(*stmt, *st_hedr);
s_threads = s = new SgSymbol(VARIABLE_NAME, TestAndCorrectName("threads"), *t_dim3, *st_hedr); s_threads = s = new SgSymbol(VARIABLE_NAME, TestAndCorrectName("threads"), *t_dim3, *st_hedr);
addDeclExpList(s, stmt->expr(0)); addDeclExpList(s, stmt->expr(0));
@@ -13615,6 +13895,34 @@ SgStatement *Create_C_Adapter_Function(SgSymbol *sadapter)
fcall->addArg(*e); fcall->addArg(*e);
sdev = sdev->next(); sdev = sdev->next();
} }
e_all_private_size = sizeOfPrivateArraysInBytes();
if (options.isOn(C_CUDA) && e_all_private_size)
{
for (el=private_list, lnp=0; el; el=el->rhs())
{
s = el->lhs()->symbol();
if (IS_ARRAY(s))
{
sarg = new SgSymbol(VARIABLE_NAME, PointerNameForPrivateArray(s), *C_PointerType(C_VoidType()), *st_hedr);
ae = new SgCastExp(*C_PointerType( C_Type(s->type()->baseType())), *new SgVarRefExp(sarg));
fcall->addArg(*ae);
if (!lnp)
private_first = sarg;
lnp++;
if (!TestArrayShape(s))
{
SgExpression **eatr = (SgExpression **) el->lhs()->attributeValue(0, DIM_SIZES);
SgExpression *ela;
for (ela = *eatr; ela; ela = ela->rhs())
fcall->addArg(SgDerefOp(*new SgVarRefExp(ela->lhs()->lhs()->symbol())));
eatr = (SgExpression **) el->lhs()->attributeValue(0, L_BOUNDS);
for (ela = *eatr; ela; ela = ela->rhs())
fcall->addArg(SgDerefOp(*new SgVarRefExp(ela->lhs()->lhs()->symbol())));
}
}
}
} }
if (!options.isOn(NO_BL_INFO)) if (!options.isOn(NO_BL_INFO))
@@ -13684,13 +13992,38 @@ SgStatement *Create_C_Adapter_Function(SgSymbol *sadapter)
//insert kernel call //insert kernel call
st_call = createKernelCallsInCudaHandler(fcall, s_loop_ref, s_idxTypeInKernel, s_blocks); st_call = createKernelCallsInCudaHandler(fcall, s_loop_ref, s_idxTypeInKernel, s_blocks);
SgFunctionCallExp *getProp = new SgFunctionCallExp(*new SgSymbol(FUNCTION_NAME, "loop_cuda_get_device_prop"));
getProp->addArg(*new SgVarRefExp(s_loop_ref));
getProp->addArg(*new SgKeywordValExp("CUDA_MAX_GRID_X"));
SgExpression *getProp = GetDeviceProp(s_loop_ref, new SgKeywordValExp("CUDA_MAX_GRID_X")); SgExpression *getProp = GetDeviceProp(s_loop_ref, new SgKeywordValExp("CUDA_MAX_GRID_X"));
stmt = new SgCExpStmt(SgAssignOp(*new SgVarRefExp(*s_max_blocks), *getProp)); stmt = new SgCExpStmt(SgAssignOp(*new SgVarRefExp(*s_max_blocks), *getProp));
st_end->insertStmtBefore(*stmt, *st_hedr); st_end->insertStmtBefore(*stmt, *st_hedr);
// insert code for big private arrays
if (options.isOn(C_CUDA) && e_all_private_size) //(e_size = sizeOfPrivateArraysInBytes()))
{
SgSymbol *s_private_size = new SgSymbol(VARIABLE_NAME, TestAndCorrectName("privateSizeForBlock"), *C_DvmType(), *st_hedr);
stmt = makeSymbolDeclaration(s_private_size);
st_end->insertStmtBefore(*stmt, *st_hedr);
SgSymbol *s_total_threads = new SgSymbol(VARIABLE_NAME, TestAndCorrectName("totalThreads"), *C_DvmType(), *st_hedr);
addDeclExpList(s_total_threads, stmt->expr(0));
SgExpression *e_threads = &(*new SgRecordRefExp(*s_threads, "x") * *new SgRecordRefExp(*s_threads, "y") * *new SgRecordRefExp(*s_threads, "z"));
SgExpression *e_private_size_for_block = &(*e_threads * *e_all_private_size);
stmt = new SgCExpStmt(SgAssignOp(*new SgVarRefExp(*s_private_size), *e_private_size_for_block));
st_end->insertStmtBefore(*stmt, *st_hedr);
SgExpression *e_maxBlocks = GetMaxBlocks(s_loop_ref, s_max_blocks, s_private_size);
stmt = new SgCExpStmt(SgAssignOp(*new SgVarRefExp(*s_max_blocks), *e_maxBlocks));
st_end->insertStmtBefore(*stmt, *st_hedr);
SgFunctionCallExp *fmin = new SgFunctionCallExp(*new SgSymbol(FUNCTION_NAME, "min", *C_DvmType(), *st_hedr));
fmin->addArg(*new SgVarRefExp(s_max_blocks));
fmin->addArg(*new SgVarRefExp(s_restBlocks));
SgExpression *e_total_threads = &((e_threads->copy()) * *fmin);
stmt = new SgCExpStmt(SgAssignOp(*new SgVarRefExp(*s_total_threads), *e_total_threads));
st_end->insertStmtBefore(*stmt, *st_hedr);
// Get private arrays
GetMemoryForPrivateArrays(private_first, s_loop_ref, lnp, st_end, st_hedr, new SgVarRefExp(s_total_threads));
} }
if (currentLoop && currentLoop->irregularAnalysisIsOn()) if (currentLoop && currentLoop->irregularAnalysisIsOn())
{ {
@@ -13710,6 +14043,14 @@ SgStatement *Create_C_Adapter_Function(SgSymbol *sadapter)
st_call->insertStmtAfter(*stmt, *do_while); st_call->insertStmtAfter(*stmt, *do_while);
/* ------ block for finish reductions ----*/ /* ------ block for finish reductions ----*/
if (red_list) if (red_list)
InsertFinishReductionCalls(st_end, s_loop_ref, s_red_num);
// to dispose private arrays
if (options.isOn(C_CUDA) && e_all_private_size)
for (s = private_first, ln = 0; ln < lnp; s = s->next(), ln++) // private arrays
{
stmt = new SgCExpStmt(*DisposePrivateArray(s_loop_ref, s));
st_end->insertStmtBefore(*stmt, *st_hedr);
} }
} }
@@ -13986,7 +14327,7 @@ SgStatement *Create_C_Adapter_Function_For_Sequence(SgSymbol *sadapter, SgStatem
fcall->addArg(*e); fcall->addArg(*e);
sdev = sdev->next(); sdev = sdev->next();
} }
// inset kernel call // inset kernel call
stmt = createKernelCallsInCudaHandler(fcall, s_loop_ref, s_idxTypeInKernel, s_blocks); stmt = createKernelCallsInCudaHandler(fcall, s_loop_ref, s_idxTypeInKernel, s_blocks);
/* ------- WHILE (loop_cuda_do(DvmhLoopRef *InDvmhLoop, dim3 *OutBlocks, dim3 *OutThreads, cudaStream_t *OutStream, CudaIndexType **InOutBlocks) != 0) ----*/ /* ------- WHILE (loop_cuda_do(DvmhLoopRef *InDvmhLoop, dim3 *OutBlocks, dim3 *OutThreads, cudaStream_t *OutStream, CudaIndexType **InOutBlocks) != 0) ----*/
@@ -13998,6 +14339,99 @@ SgStatement *Create_C_Adapter_Function_For_Sequence(SgSymbol *sadapter, SgStatem
return(st_hedr); return(st_hedr);
} }
void GetMemoryForPrivateArrays(SgSymbol *private_first, SgSymbol *s_loop_ref, int nump, SgStatement *st_end, SgStatement *st_hedr, SgExpression *e_totalThreads)
{
SgSymbol *s;
SgExpression *el;
SgStatement *stmt;
int ln;
if (!private_first)
return;
SgStatement *st_decl = makeSymbolDeclaration(private_first);
st_end->insertStmtBefore(*st_decl, *st_hedr);
st_decl->addComment("// Get private arrays");
for (s = private_first, el = private_list, ln = 0; ln < nump; s = s->next(), el = el->rhs(), ln++) // private arrays
{
while (!IS_ARRAY(el->lhs()->symbol()))
el = el->rhs();
if (ln)
addDeclExpList(s, st_decl->expr(0));
SgExpression **esizes = (SgExpression **) el->lhs()->attributeValue(0, DIM_SIZES);
SgExpression *elength = esizes ? &( *ProductOfDimSizeArgs(*esizes) * *sizeOfElementInBytes(el->lhs()->symbol())) : ArrayLength(el->lhs()->symbol(), dvm_parallel_dir, 0);
SgExpression *e_bytes = &(*elength * *e_totalThreads);
stmt = new SgCExpStmt(SgAssignOp(*new SgVarRefExp(*s), *GetPrivateArray(s_loop_ref, e_bytes)));
st_end->insertStmtBefore(*stmt, *st_hedr);
}
}
SgExpression *sizeOfElementInBytes(SgSymbol *symb)
{
int isz = TypeSize(symb->type()->baseType());
if (isz <= 0 )
Error("Illegal type of private array %s, not implemented yet for GPU",symb->identifier(), 592, dvm_parallel_dir);
return (new SgValueExp(isz));
}
SgExpression *sizeOfPrivateArraysInBytes()
{
SgExpression *el, *e_size = NULL;
int isize = 0;
//if (newVars.size() != 0)
//{
// correctPrivateList(RESTORE);
// newVars.clear();
//}
for (el = private_list; el; el = el->rhs())
{
SgSymbol *symb = el->lhs()->symbol();
if (IS_ARRAY(symb))
{
SgExpression **eatr = (SgExpression **) el->lhs()->attributeValue(0, DIM_SIZES);
SgExpression *esa;
if (eatr)
esa = &(*ProductOfDimSizeArgs(*eatr) * *sizeOfElementInBytes(symb));
else
esa = &(*ArrayLengthInElems(symb, dvm_parallel_dir, 1) * *sizeOfElementInBytes(symb)); //ArrayLength(symb, dvm_parallel_dir, 1);
if (e_size)
e_size = &( *e_size + *esa );
else
e_size = esa;
// if (e_size)
// e_size = &( *e_size + *ArrayLengthInElems(symb, dvm_parallel_dir, 1) * *sizeOfElementInBytes(symb));
// else
// e_size = &( *ArrayLengthInElems(symb, dvm_parallel_dir, 1) * *sizeOfElementInBytes(symb));
}
}
if (e_size && e_size->isInteger()) // calculating length if it is possible
{
int i_size = e_size->valueInteger();
e_size = new SgValueExp(i_size);
if (i_size > 512)
return e_size;
else
return NULL;
}
return e_size;
}
SgExpression *ProductOfDimSizeArgs(SgExpression *esizes)
{
SgExpression *el, *eprod = NULL;
for (el=esizes; el; el=el->rhs())
{
if (eprod)
eprod = &(*eprod * SgDerefOp(*new SgVarRefExp(el->lhs()->lhs()->symbol())));
else
eprod = &SgDerefOp(*new SgVarRefExp(el->lhs()->lhs()->symbol()));
}
return eprod;
}
SgStatement *AssignBlocksSElement(int i, int pl_rank, SgSymbol *s_blocksS, SgSymbol *s_idxL, SgSymbol *s_idxH, SgSymbol *s_step, SgSymbol *s_threads) SgStatement *AssignBlocksSElement(int i, int pl_rank, SgSymbol *s_blocksS, SgSymbol *s_idxL, SgSymbol *s_idxH, SgSymbol *s_step, SgSymbol *s_threads)
{ {

View File

@@ -633,7 +633,7 @@ ArgsForKernel *Create_C_Adapter_Function_Across(SgSymbol *sadapter)
// clear information // clear information
allRegNames.clear(); allRegNames.clear();
SgStatement *st_hedr, *st_end, *first_exec, *stmt; SgStatement *st_hedr=NULL, *st_end, *first_exec, *stmt;
vector<SgStatement*> cuda_kernel; vector<SgStatement*> cuda_kernel;
SgExpression *fe, *ae, *el, *arg_list; SgExpression *fe, *ae, *el, *arg_list;
SgType *typ; SgType *typ;
@@ -700,6 +700,7 @@ ArgsForKernel *Create_C_Adapter_Function_Across(SgSymbol *sadapter)
kernel_symbNew += "_llong"; kernel_symbNew += "_llong";
cuda_kernel[t] = CreateLoopKernelAcross(new SgSymbol(FUNCTION_NAME, kernel_symbNew.c_str(), *C_VoidType(), *block_C), &retValueForKernel[t], indexTypeInKernel(rtTypes[t])); cuda_kernel[t] = CreateLoopKernelAcross(new SgSymbol(FUNCTION_NAME, kernel_symbNew.c_str(), *C_VoidType(), *block_C), &retValueForKernel[t], indexTypeInKernel(rtTypes[t]));
if (options.isOn(RTC)) if (options.isOn(RTC))
{ {
acc_call_list = ACC_RTC_ExpandCallList(acc_call_list); acc_call_list = ACC_RTC_ExpandCallList(acc_call_list);
@@ -839,7 +840,7 @@ ArgsForKernel *Create_C_Adapter_Function_Across(SgSymbol *sadapter)
first_exec = st_end; first_exec = st_end;
mywarn("start: create dummy argument list "); mywarn("start: create dummy argument list ");
// create dummy argument list: loop_ref, <dvm-array-headers>, <uses> // create dummy argument list: loop_ref, <dvm-array-headers>, <uses> ,<private-array-shapes>
typ = C_PointerType(C_Derived_Type(s_DvmhLoopRef)); typ = C_PointerType(C_Derived_Type(s_DvmhLoopRef));
s_loop_ref = new SgSymbol(VARIABLE_NAME, "loop_ref", *typ, *st_hedr); s_loop_ref = new SgSymbol(VARIABLE_NAME, "loop_ref", *typ, *st_hedr);
argsForVariantFunction.push_back(s_loop_ref); argsForVariantFunction.push_back(s_loop_ref);
@@ -879,6 +880,46 @@ ArgsForKernel *Create_C_Adapter_Function_Across(SgSymbol *sadapter)
arg_list->setRhs(*new SgExprListExp(*ae)); arg_list->setRhs(*new SgExprListExp(*ae));
arg_list = arg_list->rhs(); arg_list = arg_list->rhs();
} }
if (options.isOn(C_CUDA)) // <private-array-shapes>
{
int idim;
SgExpression *elp;
SgType *t = C_PointerType(C_DvmType());
for (elp=private_list; elp; elp = elp->rhs())
{
s = elp->lhs()->symbol();
if (IS_ARRAY(s) && !TestArrayShape(s))
{
el = NULL;
for (idim = 1; idim<=Rank(s); idim++)
{
sarg = new SgSymbol(VARIABLE_NAME, DimSizeName(s, idim), *t, *st_hedr);
argsForVariantFunction.push_back(sarg);
ae = new SgVarRefExp(sarg);
ae->setType(t);
ae = new SgPointerDerefExp(*ae);
arg_list->setRhs(*new SgExprListExp(*ae));
arg_list = arg_list->rhs();
}
el = NULL;
for (idim = 1; idim<=Rank(s); idim++)
{
sarg = new SgSymbol(VARIABLE_NAME, BoundName(s, idim, 1), *t, *st_hedr);
argsForVariantFunction.push_back(sarg);
ae = new SgVarRefExp(sarg);
ae->setType(t);
ae = new SgPointerDerefExp(*ae);
arg_list->setRhs(*new SgExprListExp(*ae));
arg_list = arg_list->rhs();
}
}
}
}
mywarn(" end: create dummy argument list "); mywarn(" end: create dummy argument list ");
mywarn("start: create IF BLOCK "); mywarn("start: create IF BLOCK ");
@@ -1112,6 +1153,7 @@ ArgsForKernel *Create_C_Adapter_Function_Across(SgSymbol *sadapter)
mywarn(" end: create IF BLOCK "); mywarn(" end: create IF BLOCK ");
} }
if (options.isOn(C_CUDA)) if (options.isOn(C_CUDA))
RenamingCudaFunctionVariables(st_hedr, s_loop_ref, 0); //(st_hedr, current_symbol->next(), 0); RenamingCudaFunctionVariables(st_hedr, s_loop_ref, 0); //(st_hedr, current_symbol->next(), 0);
@@ -1127,14 +1169,14 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadap
SgSymbol **reduction_ptr; SgSymbol **reduction_ptr;
SgSymbol *lowI, *highI, *idxI; SgSymbol *lowI, *highI, *idxI;
symb_list *sl; symb_list *sl;
SgStatement *st_hedr, *st_end, *stmt, *first_exec; SgStatement *st_hedr, *st_end, *stmt, *first_exec, *stmt_save;
SgExpression *fe, *ae, *arg_list, *el, *e, *espec, *er; SgExpression *fe, *ae, *arg_list, *el, *e, *espec, *er, *e_all_private_size = NULL;
SgSymbol *s_loop_ref, *sarg, *s, *sb, *sg, *sdev, *h_first, *hgpu_first, *base_first, *uses_first, *scalar_first; SgSymbol *s_loop_ref, *sarg, *s, *sb, *sg, *sdev, *h_first, *hgpu_first, *base_first, *uses_first, *scalar_first, *private_first=NULL;
SgSymbol *s_blocks, *s_threads, *s_dev_num, *s_tmp_var, *idxTypeInKernel; SgSymbol *s_blocks, *s_threads, *s_dev_num, *s_tmp_var, *idxTypeInKernel;
SgType *typ; SgType *typ;
SgFunctionCallExp *funcCall; SgFunctionCallExp *funcCall;
vector<char*> dvm_array_headers; vector<char*> dvm_array_headers;
int ln, num, uses_num, has_red_array, use_device_num, num_of_red_arrays = 0, nbuf = 0; int ln, num, uses_num, has_red_array, use_device_num, num_of_red_arrays = 0, nbuf = 0, lnp = 0;
// init block // init block
reduction_ptr = NULL; reduction_ptr = NULL;
@@ -1206,8 +1248,58 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadap
} }
uses_num = ln; uses_num = ln;
mywarn(" end: create dummy argument list "); if (options.isOn(C_CUDA)) // <private-array-shapes>
{
int idim;
SgExpression *elp;
SgType *t = C_PointerType(C_DvmType());
for (elp=private_list; elp; elp = elp->rhs())
{
s = elp->lhs()->symbol();
if (IS_ARRAY(s) && !TestArrayShape(s))
{
el = NULL;
for (idim = Rank(s); idim; idim--)
{
sarg = new SgSymbol(VARIABLE_NAME, DimSizeName(s, idim), *t, *st_hedr);
ae = new SgVarRefExp(sarg);
ae->setType(t);
el = AddElementToList(el, new SgPointerDerefExp(*ae));
}
arg_list = AddListToList(arg_list, &el->copy());
if (!elp->lhs()->attributeValue(0, DIM_SIZES))
{
SgExpression **edim = new (SgExpression *);
*edim = el;
elp->lhs()->addAttribute(DIM_SIZES, (void *)edim, sizeof(SgExpression *) );
}
el = NULL;
for (idim = Rank(s); idim; idim--)
{
sarg = new SgSymbol(VARIABLE_NAME, BoundName(s, idim, 1), *t, *st_hedr);
ae = new SgVarRefExp(sarg);
ae->setType(t);
el = AddElementToList(el, new SgPointerDerefExp(*ae));
}
arg_list = AddListToList(arg_list, &el->copy());
if (!elp->lhs()->attributeValue(0, L_BOUNDS))
{
SgExpression **elb = new (SgExpression *);
*elb = el;
elp->lhs()->addAttribute(L_BOUNDS, (void *)elb, sizeof(SgExpression *) );
}
while (arg_list->rhs() != 0)
arg_list = arg_list->rhs();
}
}
}
mywarn(" end: create dummy argument list ");
// create variable's declarations: <dvm_array_headers>,<dvm_array_bases>,<scalar_device_addr>,<reduction_variables>,<private-arrays>,blocks_info [ or blocksS,idxL,idxH ],stream,blocks,threads
if (red_list) // reduction section if (red_list) // reduction section
{ {
mywarn("start: in reduction section "); mywarn("start: in reduction section ");
@@ -1269,7 +1361,7 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadap
btype = loc_type->baseType(); btype = loc_type->baseType();
else else
btype = loc_type; btype = loc_type;
//!printf("__112\n");
SgArrayType *typearray = new SgArrayType(*C_Type(btype)); SgArrayType *typearray = new SgArrayType(*C_Type(btype));
typearray->addRange(*new SgValueExp(loc_el_num)); typearray->addRange(*new SgValueExp(loc_el_num));
s_loc_var->setType(*typearray); s_loc_var->setType(*typearray);
@@ -1282,7 +1374,6 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadap
st_hedr->insertStmtAfter(*stmt, *st_hedr); st_hedr->insertStmtAfter(*stmt, *st_hedr);
} }
//!printf("__113\n");
/*--- executable statements: register reductions in RTS ---*/ /*--- executable statements: register reductions in RTS ---*/
e = &SgAssignOp(*new SgVarRefExp(s_tmp_var), *new SgValueExp(ln+1)); e = &SgAssignOp(*new SgVarRefExp(s_tmp_var), *new SgValueExp(ln+1));
stmt = new SgCExpStmt(*e); stmt = new SgCExpStmt(*e);
@@ -1438,10 +1529,12 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadap
st_end->insertStmtBefore(*stmt, *st_hedr); st_end->insertStmtBefore(*stmt, *st_hedr);
stmt->addComment("// Get bounds"); stmt->addComment("// Get bounds");
mywarn(" end: create assigns"); mywarn(" end: create assigns");
stmt_save = stmt;
stmt = new SgCExpStmt(SgAssignOp(*new SgRecordRefExp(*s_blocks, "x"), *new SgValueExp(1))); stmt = new SgCExpStmt(SgAssignOp(*new SgRecordRefExp(*s_blocks, "x"), *new SgValueExp(1)));
st_end->insertStmtBefore(*stmt, *st_hedr); st_end->insertStmtBefore(*stmt, *st_hedr);
stmt->addComment("// Start counting"); stmt->addComment("// Start counting");
SgStatement *st_where = stmt;
stmt = new SgCExpStmt(SgAssignOp(*new SgRecordRefExp(*s_threads, "x"), *new SgValueExp(1))); stmt = new SgCExpStmt(SgAssignOp(*new SgRecordRefExp(*s_threads, "x"), *new SgValueExp(1)));
st_end->insertStmtBefore(*stmt, *st_hedr); st_end->insertStmtBefore(*stmt, *st_hedr);
@@ -1543,6 +1636,35 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadap
} }
} }
e_all_private_size = sizeOfPrivateArraysInBytes();
if (options.isOn(C_CUDA) && e_all_private_size)
{
for (el=private_list, lnp=0; el; el=el->rhs())
{
s = el->lhs()->symbol();
if (IS_ARRAY(s))
{
sarg = new SgSymbol(VARIABLE_NAME, PointerNameForPrivateArray(s), *C_PointerType(C_VoidType()), *st_hedr);
ae = new SgCastExp(*C_PointerType( C_Type(s->type()->baseType())), *new SgVarRefExp(sarg));
funcCall->addArg(*ae);
if (!lnp)
private_first = sarg;
lnp++;
if (!TestArrayShape(s))
{
SgExpression **eatr = (SgExpression **) el->lhs()->attributeValue(0, DIM_SIZES);
SgExpression *ela;
for (ela = *eatr; ela; ela = ela->rhs())
funcCall->addArg(SgDerefOp(*new SgVarRefExp(ela->lhs()->lhs()->symbol())));
eatr = (SgExpression **) el->lhs()->attributeValue(0, L_BOUNDS);
for (ela = *eatr; ela; ela = ela->rhs())
funcCall->addArg(SgDerefOp(*new SgVarRefExp(ela->lhs()->lhs()->symbol())));
}
}
}
}
for (int i = 0; i < acrossV + loopV; ++i) for (int i = 0; i < acrossV + loopV; ++i)
{ {
funcCall->addArg(*new SgArrayRefExp(*lowI, *new SgValueExp(i))); funcCall->addArg(*new SgArrayRefExp(*lowI, *new SgValueExp(i)));
@@ -1557,7 +1679,7 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadap
if (red_list) if (red_list)
{ {
ln = 0; ln = 0;
for (er = red_list; er; er = er->rhs(), ++ln) for (er = red_list, s = red_first; er; er = er->rhs(), ++ln, s=s->next())
{ {
funcCall = new SgFunctionCallExp(*createNewFunctionSymbol("cudaMemcpy")); funcCall = new SgFunctionCallExp(*createNewFunctionSymbol("cudaMemcpy"));
funcCall->addArg(SgAddrOp(*new SgVarRefExp(&(er->lhs()->rhs()->symbol()->copy())))); funcCall->addArg(SgAddrOp(*new SgVarRefExp(&(er->lhs()->rhs()->symbol()->copy()))));
@@ -1571,9 +1693,10 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadap
stmt = new SgCExpStmt(*e); stmt = new SgCExpStmt(*e);
st_end->insertStmtBefore(*stmt, *st_hedr); st_end->insertStmtBefore(*stmt, *st_hedr);
stmt = new SgCExpStmt(*RedPost(s_loop_ref, s_tmp_var, &(er->lhs()->rhs()->symbol()->copy()), NULL)); // loop_red_post_ stmt = new SgCExpStmt(*RedPost(s_loop_ref, s_tmp_var, s, NULL)); // loop_red_post_
st_end->insertStmtBefore(*stmt, *st_hedr); st_end->insertStmtBefore(*stmt, *st_hedr);
} }
ln = 0; ln = 0;
for (er = red_list; er; er = er->rhs(), ++ln) for (er = red_list; er; er = er->rhs(), ++ln)
{ {
@@ -1585,6 +1708,18 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadap
stmt->addComment("// Free temporary variables"); stmt->addComment("// Free temporary variables");
} }
} }
// insert code for big private arrays
if (options.isOn(C_CUDA) && e_all_private_size)
{
GetMemoryForPrivateArrays(private_first, s_loop_ref, lnp, st_where, st_hedr, new SgValueExp(1));
// to dispose private arrays
for (s = private_first, ln = 0; ln < lnp; s = s->next(), ln++) // private arrays
{
stmt = new SgCExpStmt(*DisposePrivateArray(s_loop_ref, s));
st_end->insertStmtBefore(*stmt, *st_hedr);
}
}
// create args for kernel and return it // create args for kernel and return it
vector<ArgsForKernel> argsKernel(countKernels); vector<ArgsForKernel> argsKernel(countKernels);
for (unsigned i = 0; i < countKernels; ++i) for (unsigned i = 0; i < countKernels; ++i)
@@ -1594,6 +1729,7 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_OneThread(SgSymbol *sadap
mywarn(" end Adapter Function"); mywarn(" end Adapter Function");
if (options.isOn(C_CUDA)) if (options.isOn(C_CUDA))
RenamingCudaFunctionVariables(st_hedr, s_loop_ref, 0); RenamingCudaFunctionVariables(st_hedr, s_loop_ref, 0);
return argsKernel; return argsKernel;
} }
@@ -1653,6 +1789,27 @@ static inline void insertReductionArgs(SgSymbol **reduction_ptr, SgSymbol **redu
} }
} }
static void createPrivatePointers(SgSymbol* &private_first, int &lnp, SgStatement* st_hedr, SgExpression* &e_all_private_size)
{
private_first = NULL;
if (options.isOn(C_CUDA) && (e_all_private_size=sizeOfPrivateArraysInBytes()))
{
SgExpression *el, *ae;
SgSymbol *sarg;
for (el=private_list, lnp=0; el; el=el->rhs())
{
SgSymbol *s = el->lhs()->symbol();
if (IS_ARRAY(s))
{
sarg = new SgSymbol(VARIABLE_NAME, PointerNameForPrivateArray(s), *C_PointerType(C_VoidType()), *st_hedr);
if (!lnp)
private_first = sarg;
lnp++;
}
}
}
}
static void createArgsForKernelForTwoDeps(SgFunctionCallExp*& funcCallKernel, SgSymbol* kernel_symb, SgExpression* espec, SgSymbol*& sg, SgSymbol* hgpu_first, static void createArgsForKernelForTwoDeps(SgFunctionCallExp*& funcCallKernel, SgSymbol* kernel_symb, SgExpression* espec, SgSymbol*& sg, SgSymbol* hgpu_first,
SgSymbol*& sb, SgSymbol* base_first, symb_list*& sl, int& ln, int num, SgExpression*& e, SgSymbol** reduction_ptr, SgSymbol*& sb, SgSymbol* base_first, symb_list*& sl, int& ln, int num, SgExpression*& e, SgSymbol** reduction_ptr,
@@ -1660,7 +1817,7 @@ static void createArgsForKernelForTwoDeps(SgFunctionCallExp*& funcCallKernel, Sg
SgSymbol* diag, const int& loopV, SgSymbol** num_elems, const int& acrossV, SgSymbol* acrossBase[16], SgSymbol* loopBase[16], SgSymbol* diag, const int& loopV, SgSymbol** num_elems, const int& acrossV, SgSymbol* acrossBase[16], SgSymbol* loopBase[16],
SgSymbol* idxI, const vector<SageSymbols>& loopAcrossSymb, const vector<SageSymbols>& loopSymb, SgSymbol*& s, SgSymbol* uses_first, SgSymbol* idxI, const vector<SageSymbols>& loopAcrossSymb, const vector<SageSymbols>& loopSymb, SgSymbol*& s, SgSymbol* uses_first,
SgSymbol*& sdev, SgSymbol* scalar_first, int uses_num, vector<char*>& dvm_array_headers, SgSymbol*& sdev, SgSymbol* scalar_first, int uses_num, vector<char*>& dvm_array_headers,
SgSymbol** addressingParams, SgSymbol** outTypeOfTransformation, SgSymbol* type_of_run, SgSymbol* bIdxs) SgSymbol** addressingParams, SgSymbol** outTypeOfTransformation, SgSymbol* type_of_run, SgSymbol* bIdxs, SgSymbol* private_first, int lnp)
{ {
funcCallKernel = CallKernel(kernel_symb, espec); funcCallKernel = CallKernel(kernel_symb, espec);
@@ -1728,6 +1885,32 @@ static void createArgsForKernelForTwoDeps(SgFunctionCallExp*& funcCallKernel, Sg
} }
} }
if (options.isOn(C_CUDA) && private_first) // there are big private arrays
{
SgExpression *el, *ae;
SgSymbol *sarg, *sp, *s;
int ln;
for (sp = private_first, el = private_list, ln = 0; ln < lnp; sp = sp->next(), el = el->rhs(), ln++)
{
while (!IS_ARRAY(el->lhs()->symbol()))
el = el->rhs();
s = el->lhs()->symbol();
ae = new SgCastExp(*C_PointerType( C_Type(s->type()->baseType())), *new SgVarRefExp(sp));
funcCallKernel->addArg(*ae);
if (!TestArrayShape(s))
{
SgExpression **eatr = (SgExpression **) el->lhs()->attributeValue(0, DIM_SIZES);
SgExpression *ela;
for (ela = *eatr; ela; ela = ela->rhs())
funcCallKernel->addArg(SgDerefOp(*new SgVarRefExp(ela->lhs()->lhs()->symbol())));
eatr = (SgExpression **) el->lhs()->attributeValue(0, L_BOUNDS);
for (ela = *eatr; ela; ela = ela->rhs())
funcCallKernel->addArg(SgDerefOp(*new SgVarRefExp(ela->lhs()->lhs()->symbol())));
}
}
}
if (options.isOn(AUTO_TFM)) if (options.isOn(AUTO_TFM))
{ {
for (size_t i = 0; i < dvm_array_headers.size(); ++i) for (size_t i = 0; i < dvm_array_headers.size(); ++i)
@@ -1767,14 +1950,14 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
symb_list *sl; symb_list *sl;
SgStatement *st_hedr, *st_end, *stmt, *first_exec; SgStatement *st_hedr, *st_end, *stmt, *first_exec;
SgExpression *fe, *ae, *arg_list, *el, *e, *espec, *ex, *er; SgExpression *fe, *ae, *arg_list, *el, *e, *espec, *ex, *er, *e_all_private_size = NULL, *e_totalThreads;
SgSymbol *s_loop_ref, *sarg, *s, *sb, *sg, *sdev, *h_first, *hgpu_first, *base_first, *uses_first, *scalar_first; SgSymbol *s_loop_ref, *sarg, *s, *sb, *sg, *sdev, *h_first, *hgpu_first, *base_first, *uses_first, *scalar_first, *private_first;
SgSymbol *s_blocks, *s_threads, *s_dev_num, *s_tmp_var, *type_of_run, *s_i = NULL, *s_k = NULL, *s_tmp_var_1; SgSymbol *s_blocks, *s_threads, *s_dev_num, *s_tmp_var, *type_of_run, *s_i = NULL, *s_k = NULL, *s_tmp_var_1;
SgSymbol *idxTypeInKernel; SgSymbol *idxTypeInKernel;
SgType *typ; SgType *typ;
SgFunctionCallExp *funcCall, *funcCallKernel; SgFunctionCallExp *funcCall, *funcCallKernel;
vector<char*> dvm_array_headers; vector<char*> dvm_array_headers;
int ln, num, uses_num, has_red_array, use_device_num, num_of_red_arrays, nbuf = 0; int ln, num, uses_num, has_red_array, use_device_num, num_of_red_arrays, nbuf = 0, lnp;
// init block // init block
lowI = highI = idxI = elem = red_blocks = shared_mem = stream_t = bIdxs = NULL; lowI = highI = idxI = elem = red_blocks = shared_mem = stream_t = bIdxs = NULL;
@@ -1850,6 +2033,56 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
} }
uses_num = ln; uses_num = ln;
if (options.isOn(C_CUDA)) // <private-array-shapes>
{
int idim;
SgExpression *elp;
SgType *t = C_PointerType(C_DvmType());
for (elp=private_list; elp; elp = elp->rhs())
{
s = elp->lhs()->symbol();
if (IS_ARRAY(s) && !TestArrayShape(s))
{
el = NULL;
for (idim = Rank(s); idim; idim--)
{
sarg = new SgSymbol(VARIABLE_NAME, DimSizeName(s, idim), *t, *st_hedr);
ae = new SgVarRefExp(sarg);
ae->setType(t);
el = AddElementToList(el, new SgPointerDerefExp(*ae));
}
arg_list = AddListToList(arg_list, &el->copy());
if (!elp->lhs()->attributeValue(0, DIM_SIZES))
{
SgExpression **edim = new (SgExpression *);
*edim = el;
elp->lhs()->addAttribute(DIM_SIZES, (void *)edim, sizeof(SgExpression *) );
}
el = NULL;
for (idim = Rank(s); idim; idim--)
{
sarg = new SgSymbol(VARIABLE_NAME, BoundName(s, idim, 1), *t, *st_hedr);
ae = new SgVarRefExp(sarg);
ae->setType(t);
el = AddElementToList(el, new SgPointerDerefExp(*ae));
}
arg_list = AddListToList(arg_list, &el->copy());
if (!elp->lhs()->attributeValue(0, L_BOUNDS))
{
SgExpression **elb = new (SgExpression *);
*elb = el;
elp->lhs()->addAttribute(L_BOUNDS, (void *)elb, sizeof(SgExpression *) );
}
while (arg_list->rhs() != 0)
arg_list = arg_list->rhs();
}
}
}
type_of_run = new SgSymbol(VARIABLE_NAME, TestAndCorrectName("type_of_run"), *LongT, *st_hedr); type_of_run = new SgSymbol(VARIABLE_NAME, TestAndCorrectName("type_of_run"), *LongT, *st_hedr);
ae = new SgVarRefExp(type_of_run); ae = new SgVarRefExp(type_of_run);
ae->setType(LongT); ae->setType(LongT);
@@ -1941,7 +2174,7 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
btype = loc_type->baseType(); btype = loc_type->baseType();
else else
btype = loc_type; btype = loc_type;
//!printf("__112\n");
SgArrayType *typearray = new SgArrayType(*C_Type(btype)); SgArrayType *typearray = new SgArrayType(*C_Type(btype));
typearray->addRange(*new SgValueExp(loc_el_num)); typearray->addRange(*new SgValueExp(loc_el_num));
s_loc_var->setType(*typearray); s_loc_var->setType(*typearray);
@@ -1955,7 +2188,7 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
st_hedr->insertStmtAfter(*stmt, *st_hedr); st_hedr->insertStmtAfter(*stmt, *st_hedr);
} }
//!printf("__113\n");
/*--- executable statements: register reductions in RTS ---*/ /*--- executable statements: register reductions in RTS ---*/
e = &SgAssignOp(*new SgVarRefExp(s_tmp_var), *new SgValueExp(ln+1)); e = &SgAssignOp(*new SgVarRefExp(s_tmp_var), *new SgValueExp(ln+1));
stmt = new SgCExpStmt(*e); stmt = new SgCExpStmt(*e);
@@ -2556,7 +2789,7 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
e = &SgAssignOp(*new SgVarRefExp(s_blocks), *f); e = &SgAssignOp(*new SgVarRefExp(s_blocks), *f);
stmt = new SgCExpStmt(*e); stmt = new SgCExpStmt(*e);
st_end->insertStmtBefore(*stmt, *st_hedr); st_end->insertStmtBefore(*stmt, *st_hedr);
stmt->addComment("//Start method"); stmt->addComment("// Start method");
e = &SgAssignOp(*new SgVarRefExp(acrossBase[0]), *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[0].len))); e = &SgAssignOp(*new SgVarRefExp(acrossBase[0]), *new SgArrayRefExp(*lowI, *new SgValueExp(loopAcrossSymb[0].len)));
stmt = new SgCExpStmt(*e); stmt = new SgCExpStmt(*e);
@@ -2722,7 +2955,6 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
st_end->insertStmtBefore(*stmt, *st_hedr); st_end->insertStmtBefore(*stmt, *st_hedr);
} }
} }
mywarn("start: in adding args section"); mywarn("start: in adding args section");
/* args for kernel */ /* args for kernel */
@@ -2781,6 +3013,35 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
sdev = sdev->next(); sdev = sdev->next();
} }
} }
e_all_private_size = sizeOfPrivateArraysInBytes();
if (options.isOn(C_CUDA) && e_all_private_size)
{
for (el=private_list, lnp=0; el; el=el->rhs())
{
s = el->lhs()->symbol();
if (IS_ARRAY(s))
{
sarg = new SgSymbol(VARIABLE_NAME, PointerNameForPrivateArray(s), *C_PointerType(C_VoidType()), *st_hedr);
ae = new SgCastExp(*C_PointerType( C_Type(s->type()->baseType())), *new SgVarRefExp(sarg));
funcCallKernel->addArg(*ae);
if (!lnp)
private_first = sarg;
lnp++;
if (!TestArrayShape(s))
{
SgExpression **eatr = (SgExpression **) el->lhs()->attributeValue(0, DIM_SIZES);
SgExpression *ela;
for (ela = *eatr; ela; ela = ela->rhs())
funcCallKernel->addArg(SgDerefOp(*new SgVarRefExp(ela->lhs()->lhs()->symbol())));
eatr = (SgExpression **) el->lhs()->attributeValue(0, L_BOUNDS);
for (ela = *eatr; ela; ela = ela->rhs())
funcCallKernel->addArg(SgDerefOp(*new SgVarRefExp(ela->lhs()->lhs()->symbol())));
}
}
}
}
funcCallKernel->addArg(*new SgVarRefExp(type_of_run)); funcCallKernel->addArg(*new SgVarRefExp(type_of_run));
for (int i = 0; i < acrossV + loopV; ++i) for (int i = 0; i < acrossV + loopV; ++i)
funcCallKernel->addArg(*new SgArrayRefExp(*bIdxs, *new SgValueExp(i))); funcCallKernel->addArg(*new SgArrayRefExp(*bIdxs, *new SgValueExp(i)));
@@ -2816,7 +3077,15 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
SgForStmt *simple; SgForStmt *simple;
simple = new SgForStmt(&SgAssignOp(*new SgVarRefExp(tmpV), *new SgValueExp(0)), &(*new SgVarRefExp(tmpV1) < *new SgArrayRefExp(*highI, *new SgValueExp(loopAcrossSymb[0].len))), expr, stmt); simple = new SgForStmt(&SgAssignOp(*new SgVarRefExp(tmpV), *new SgValueExp(0)), &(*new SgVarRefExp(tmpV1) < *new SgArrayRefExp(*highI, *new SgValueExp(loopAcrossSymb[0].len))), expr, stmt);
st_end->insertStmtBefore(*simple); st_end->insertStmtBefore(*simple);
stmt = simple;
} }
stmt->addComment("// GPU execution");
if (options.isOn(C_CUDA) && e_all_private_size)
{
e_totalThreads = &(*new SgRecordRefExp(*s_blocks, "x") * *new SgRecordRefExp(*s_blocks, "y") * *new SgRecordRefExp(*s_blocks, "z") * *new SgRecordRefExp(*s_threads, "x") * *new SgRecordRefExp(*s_threads, "y") * *new SgRecordRefExp(*s_threads, "z"));
GetMemoryForPrivateArrays(private_first, s_loop_ref, lnp, stmt, st_hedr, e_totalThreads);
}
} }
else if (acrossV == 2) // ACROSS with two dependence: generate method else if (acrossV == 2) // ACROSS with two dependence: generate method
{ {
@@ -2972,7 +3241,8 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
mywarn(" end: out red section"); mywarn(" end: out red section");
} }
createPrivatePointers(private_first, lnp, st_hedr, e_all_private_size);
GetMemoryForPrivateArrays (private_first, s_loop_ref, lnp, st_end, st_hedr, new SgVarRefExp(q));
mywarn("strat: init bases"); mywarn("strat: init bases");
// init bases // init bases
for (int i = 0; i < acrossV; ++i) for (int i = 0; i < acrossV; ++i)
@@ -3014,7 +3284,7 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
reduction_ptr, reduction_loc_ptr, reduction_symb, reduction_loc_symb, red_blocks, reduction_ptr, reduction_loc_ptr, reduction_symb, reduction_loc_symb, red_blocks,
has_red_array, diag, loopV, num_elems, acrossV, acrossBase, loopBase, idxI, has_red_array, diag, loopV, num_elems, acrossV, acrossBase, loopBase, idxI,
loopAcrossSymb, loopSymb, s, uses_first, sdev, scalar_first, uses_num, dvm_array_headers, loopAcrossSymb, loopSymb, s, uses_first, sdev, scalar_first, uses_num, dvm_array_headers,
addressingParams, outTypeOfTransformation, type_of_run, bIdxs); addressingParams, outTypeOfTransformation, type_of_run, bIdxs, private_first, lnp);
stmt = createKernelCallsInCudaHandler(funcCallKernel, s_loop_ref, idxTypeInKernel, s_blocks); stmt = createKernelCallsInCudaHandler(funcCallKernel, s_loop_ref, idxTypeInKernel, s_blocks);
while_st->insertStmtAfter(*stmt); while_st->insertStmtAfter(*stmt);
@@ -3093,7 +3363,7 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
reduction_ptr, reduction_loc_ptr, reduction_symb, reduction_loc_symb, red_blocks, reduction_ptr, reduction_loc_ptr, reduction_symb, reduction_loc_symb, red_blocks,
has_red_array, q, loopV, num_elems, acrossV, acrossBase, loopBase, idxI, has_red_array, q, loopV, num_elems, acrossV, acrossBase, loopBase, idxI,
loopAcrossSymb, loopSymb, s, uses_first, sdev, scalar_first, uses_num, dvm_array_headers, loopAcrossSymb, loopSymb, s, uses_first, sdev, scalar_first, uses_num, dvm_array_headers,
addressingParams, outTypeOfTransformation, type_of_run, bIdxs); addressingParams, outTypeOfTransformation, type_of_run, bIdxs, private_first, lnp);
while_st1->insertStmtAfter(*createKernelCallsInCudaHandler(funcCallKernel, s_loop_ref, idxTypeInKernel, s_blocks)); while_st1->insertStmtAfter(*createKernelCallsInCudaHandler(funcCallKernel, s_loop_ref, idxTypeInKernel, s_blocks));
while_st2->insertStmtAfter(*createKernelCallsInCudaHandler(funcCallKernel, s_loop_ref, idxTypeInKernel, s_blocks)); while_st2->insertStmtAfter(*createKernelCallsInCudaHandler(funcCallKernel, s_loop_ref, idxTypeInKernel, s_blocks));
@@ -3105,7 +3375,7 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
reduction_ptr, reduction_loc_ptr, reduction_symb, reduction_loc_symb, red_blocks, reduction_ptr, reduction_loc_ptr, reduction_symb, reduction_loc_symb, red_blocks,
has_red_array, elem, loopV, num_elems, acrossV, acrossBase, loopBase, idxI, has_red_array, elem, loopV, num_elems, acrossV, acrossBase, loopBase, idxI,
loopAcrossSymb, loopSymb, s, uses_first, sdev, scalar_first, uses_num, dvm_array_headers, loopAcrossSymb, loopSymb, s, uses_first, sdev, scalar_first, uses_num, dvm_array_headers,
addressingParams, outTypeOfTransformation, type_of_run, bIdxs); addressingParams, outTypeOfTransformation, type_of_run, bIdxs, private_first, lnp);
while_st3->insertStmtAfter(*createKernelCallsInCudaHandler(funcCallKernel, s_loop_ref, idxTypeInKernel, s_blocks)); while_st3->insertStmtAfter(*createKernelCallsInCudaHandler(funcCallKernel, s_loop_ref, idxTypeInKernel, s_blocks));
while_st4->insertStmtAfter(*createKernelCallsInCudaHandler(funcCallKernel, s_loop_ref, idxTypeInKernel, s_blocks)); while_st4->insertStmtAfter(*createKernelCallsInCudaHandler(funcCallKernel, s_loop_ref, idxTypeInKernel, s_blocks));
@@ -3190,6 +3460,30 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
sdev = sdev->next(); sdev = sdev->next();
} }
} }
createPrivatePointers(private_first, lnp, st_hedr, e_all_private_size);
if (options.isOn(C_CUDA) && private_first) // there are big private arrays
{
SgSymbol *sp;
for (sp = private_first, el = private_list, ln = 0; ln < lnp; sp = sp->next(), el = el->rhs(), ln++)
{
while (!IS_ARRAY(el->lhs()->symbol()))
el = el->rhs();
s = el->lhs()->symbol();
ae = new SgCastExp(*C_PointerType( C_Type(s->type()->baseType())), *new SgVarRefExp(sp));
funcCallKernel->addArg(*ae);
if (!TestArrayShape(s))
{
SgExpression **eatr = (SgExpression **) el->lhs()->attributeValue(0, DIM_SIZES);
SgExpression *ela;
for (ela = *eatr; ela; ela = ela->rhs())
funcCallKernel->addArg(SgDerefOp(*new SgVarRefExp(ela->lhs()->lhs()->symbol())));
eatr = (SgExpression **) el->lhs()->attributeValue(0, L_BOUNDS);
for (ela = *eatr; ela; ela = ela->rhs())
funcCallKernel->addArg(SgDerefOp(*new SgVarRefExp(ela->lhs()->lhs()->symbol())));
}
}
}
funcCall = new SgFunctionCallExp(*createNewFunctionSymbol("MIN")); funcCall = new SgFunctionCallExp(*createNewFunctionSymbol("MIN"));
funcCall->addArg(*new SgVarRefExp(M1)); funcCall->addArg(*new SgVarRefExp(M1));
funcCall->addArg(*new SgVarRefExp(M2)); funcCall->addArg(*new SgVarRefExp(M2));
@@ -3393,6 +3687,18 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
mywarn(" end: out red section"); mywarn(" end: out red section");
} }
if (options.isOn(C_CUDA) && private_first)
{
SgFunctionCallExp *f1 = new SgFunctionCallExp(*createNewFunctionSymbol("MAX"));
SgFunctionCallExp *f2 = new SgFunctionCallExp(*createNewFunctionSymbol("MAX"));
f1->addArg(*new SgVarRefExp(M1));
f1->addArg(*new SgVarRefExp(M2));
f2->addArg(*f1);
f2->addArg(*new SgVarRefExp(M3));
e_totalThreads = &(*new SgVarRefExp(Emin) * *f2);
GetMemoryForPrivateArrays (private_first, s_loop_ref, lnp, st_end, st_hedr, e_totalThreads);
}
int flag_comment = 0; int flag_comment = 0;
for (int i = 3; i < acrossV; ++i) for (int i = 3; i < acrossV; ++i)
{ {
@@ -3755,6 +4061,13 @@ vector<ArgsForKernel> Create_C_Adapter_Function_Across_variants(SgSymbol *sadapt
st_end->insertStmtBefore(*stmt, *st_hedr); st_end->insertStmtBefore(*stmt, *st_hedr);
} }
} }
// to dispose private arrays
if (options.isOn(C_CUDA) && e_all_private_size)
for (s = private_first, ln = 0; ln < lnp; s = s->next(), ln++) // private arrays
{
stmt = new SgCExpStmt(*DisposePrivateArray(s_loop_ref, s));
st_end->insertStmtBefore(*stmt, *st_hedr);
}
// create args for kernel and return it // create args for kernel and return it
vector<ArgsForKernel> argsKernel(countKernels); vector<ArgsForKernel> argsKernel(countKernels);
@@ -3976,6 +4289,9 @@ SgExpression *CreateKernelDummyListAcross(ArgsForKernel *argsKer, SgType *idxTyp
if (uses_list) if (uses_list)
arg_list = AddListToList(arg_list, CreateUsesDummyList()); //[+ <uses> ] arg_list = AddListToList(arg_list, CreateUsesDummyList()); //[+ <uses> ]
if (private_list)
arg_list = AddListToList(arg_list, CreatePrivateDummyList()); //[+ dummys for private arrays ]
if (argsKer->symb.size() >= 3) if (argsKer->symb.size() >= 3)
for (int it = 0; it < argsKer->sizeVars.size(); ++it) for (int it = 0; it < argsKer->sizeVars.size(); ++it)
arg_list = AddListToList(arg_list, new SgExprListExp(*new SgVarRefExp(argsKer->sizeVars[it]))); arg_list = AddListToList(arg_list, new SgExprListExp(*new SgVarRefExp(argsKer->sizeVars[it])));
@@ -5802,18 +6118,17 @@ SgSymbol *RedBlockSymbolInKernelAcross(SgSymbol *s, SgType *type)
void DeclarationOfReductionBlockInKernelAcross(SgExpression *ered, reduction_operation_list *rsl) void DeclarationOfReductionBlockInKernelAcross(SgExpression *ered, reduction_operation_list *rsl)
{ {
SgStatement *ass, *newst, *current, *if_st, *while_st, *typedecl, *st, *do_st; SgStatement *newst, *current, *if_st, *while_st, *typedecl, *st, *do_st;
SgExpression *le, *re, *eatr, *cond, *ev; SgExpression *eatr, *cond, *ev;
SgSymbol *red_var, *red_var_k, *s_block, *loc_var, *sf; SgSymbol *red_var, *red_var_k, *s_block, *loc_var, *sf;
SgType *rtype; SgType *rtype;
int i, ind;
//init block //init block
ass = newst = current = if_st = while_st = typedecl = st = do_st = NULL; newst = current = if_st = while_st = typedecl = st = do_st = NULL;
le = re = eatr = cond = ev = NULL; eatr = cond = ev = NULL;
red_var = red_var_k = s_block = loc_var = sf = NULL; red_var = red_var_k = s_block = loc_var = sf = NULL;
rtype = NULL; rtype = NULL;
i = ind = loc_el_num = 0; loc_el_num = 0;
//end of init block //end of init block
// analys of reduction operation // analys of reduction operation

View File

@@ -4942,3 +4942,48 @@ SgExpression *RtcSetLang(SgSymbol *s_loop_ref, const int lang)
fe->addArg(*new SgKeywordValExp("UNKNOWN_CUDA")); fe->addArg(*new SgKeywordValExp("UNKNOWN_CUDA"));
return(fe); return(fe);
} }
SgExpression *GetDeviceProp(SgSymbol *s_loop_ref, SgExpression *ep)
{// generating function call:
// DvmType loop_cuda_get_device_prop(DvmType *InDvmhLoop, DvmType prop);
SgFunctionCallExp *fe = new SgFunctionCallExp(*fdvm[GET_DEVICE_PROP]);
fe->addArg(*new SgVarRefExp(s_loop_ref));
fe->addArg(*ep);
return(fe);
}
SgExpression *GetMaxBlocks(SgSymbol *s_loop_ref, SgSymbol *s_max_blocks, SgSymbol *s_needed_bytes)
{// generating function call:
// DvmType loop_cuda_get_max_blocks(DvmType *InDvmhLoop, DvmType maxBlocks, DvmType neededBytesForBlock)
SgFunctionCallExp *fe = new SgFunctionCallExp(*fdvm[GET_MAX_BLOCKS]);
fe->addArg(*new SgVarRefExp(s_loop_ref));
fe->addArg(*new SgVarRefExp(s_max_blocks));
fe->addArg(*new SgVarRefExp(s_needed_bytes));
return(fe);
}
SgExpression *GetPrivateArray(SgSymbol *s_loop_ref, SgExpression *e_bytes)
{// generating function call:
// DvmType *loop_cuda_get_private_array(DvmType *InDvmhLoop, UDvmType neededBytes)
SgFunctionCallExp *fe = new SgFunctionCallExp(*fdvm[GET_PRIVATE_ARR]);
fe->addArg(*new SgVarRefExp(s_loop_ref));
fe->addArg(*e_bytes);
return(fe);
}
SgExpression *DisposePrivateArray(SgSymbol *s_loop_ref, SgSymbol *s_array)
{// generating function call:
// void loop_cuda_dispose_private_array(DvmType *InDvmhLoop, void *array)
SgFunctionCallExp *fe = new SgFunctionCallExp(*fdvm[DISPOSE_PRIVATE_AR]);
fe->addArg(*new SgVarRefExp(s_loop_ref));
fe->addArg(*new SgVarRefExp(s_array));
return(fe);
}

View File

@@ -261,6 +261,10 @@ const int END_OF_USE_LIST = 1050; /*ACC*/
const int END_OF_USE_LIST = 1050; /*ACC*/ const int END_OF_USE_LIST = 1050; /*ACC*/
const int ROUTINE_ATTR = 1051; /*ACC*/ const int ROUTINE_ATTR = 1051; /*ACC*/
const int DATA_REGION_SYMB = 1052; /*ACC*/ const int DATA_REGION_SYMB = 1052; /*ACC*/
const int REMOTE_ACCESS_BUF = 1053; /*ACC*/
const int L_BOUNDS = 1054; /*ACC*/
const int DIM_SIZES = 1055; /*ACC*/
const int PRIVATE_ARRAY = 1056; /*ACC*/
const int PRIVATE_POINTER = 1057; /*ACC*/ const int PRIVATE_POINTER = 1057; /*ACC*/
const int MAX_LOOP_LEVEL = 20; // 7 - maximal number of loops in parallel loop nest const int MAX_LOOP_LEVEL = 20; // 7 - maximal number of loops in parallel loop nest
@@ -1275,6 +1279,7 @@ SgSymbol *isSameRedVar(char *name);
SgSymbol *isSameRedVar(char *name); SgSymbol *isSameRedVar(char *name);
SgSymbol *isSameArray(char *name); SgSymbol *isSameArray(char *name);
SgSymbol *isSameIndexVar(char *name); SgSymbol *isSameIndexVar(char *name);
SgType * C_LongLongType();
SgType * C_UnsignedLongLongType(); SgType * C_UnsignedLongLongType();
SgType * C_DvmType(); SgType * C_DvmType();
SgType * C_CudaIndexType(); SgType * C_CudaIndexType();
@@ -1434,6 +1439,18 @@ SgSymbol *HeaderSymbolForHandler(SgSymbol *ar);
SgSymbol *HeaderSymbolForHandler(SgSymbol *ar); SgSymbol *HeaderSymbolForHandler(SgSymbol *ar);
void TestRoutineAttribute(SgSymbol *s, SgStatement *routine_interface); void TestRoutineAttribute(SgSymbol *s, SgStatement *routine_interface);
int LookForRoutineDir(SgStatement *interfaceFunc); int LookForRoutineDir(SgStatement *interfaceFunc);
SgStatement *Interface(SgSymbol *s);
SgExpression *sizeOfElementInBytes(SgSymbol *symb);
SgExpression *sizeOfPrivateArraysInBytes();
SgExpression *ProductOfDimSizeArgs(SgExpression *esizes);
//void doPrivateArrayList(SgExpression *private_arrays, SgStatement *st_hedr);
void addPrivateArrayList(SgFunctionCallExp *fcall, SgExpression *private_arrays, SgStatement *st_hedr);
int TestArrayShape(SgSymbol *ar);
SgExpression *DimSizeListOfPrivateArrays();
SgExpression *BoundListOfPrivateArrays();
SgExpression * DummyListForPrivateArrays(SgStatement *st_hedr);
SgExpression *CreatePrivateDummyList();
char *PointerNameForPrivateArray(SgSymbol *symb);
void GetMemoryForPrivateArrays(SgSymbol *private_first, SgSymbol *s_loop_ref, int nump, SgStatement *st_end, SgStatement *st_hedr, SgExpression *e_totalThreads); void GetMemoryForPrivateArrays(SgSymbol *private_first, SgSymbol *s_loop_ref, int nump, SgStatement *st_end, SgStatement *st_hedr, SgExpression *e_totalThreads);
/* acc_analyzer.cpp */ /* acc_analyzer.cpp */
@@ -1900,6 +1917,10 @@ SgStatement *Consistent_H (int il, SgExpression *hedr, SgExpression *axis_list);
SgStatement *Consistent_H (int il, SgExpression *hedr, SgExpression *axis_list); SgStatement *Consistent_H (int il, SgExpression *hedr, SgExpression *axis_list);
SgStatement *LoopRemoteAccess_H (int il, SgExpression *hedr, SgSymbol *ar, SgExpression *axis_list); SgStatement *LoopRemoteAccess_H (int il, SgExpression *hedr, SgSymbol *ar, SgExpression *axis_list);
SgStatement *RemoteAccess_H2 (SgExpression *buf_hedr, SgSymbol *ar, SgExpression *ar_hedr, SgExpression *axis_list); SgStatement *RemoteAccess_H2 (SgExpression *buf_hedr, SgSymbol *ar, SgExpression *ar_hedr, SgExpression *axis_list);
SgStatement *GetRemoteBuf (SgSymbol *loop_s, int n, SgSymbol *s_buf_head);
SgExpression *GetDeviceProp(SgSymbol *s_loop_ref, SgExpression *ep);
SgExpression *GetMaxBlocks(SgSymbol *s_loop_ref, SgSymbol *s_max_blocks, SgSymbol *s_needed_bytes);
SgExpression *GetPrivateArray(SgSymbol *s_loop_ref, SgExpression *e_bytes);
SgExpression *DisposePrivateArray(SgSymbol *s_loop_ref, SgSymbol *s_array); SgExpression *DisposePrivateArray(SgSymbol *s_loop_ref, SgSymbol *s_array);
/* io.cpp */ /* io.cpp */
@@ -2089,7 +2110,6 @@ char *Check_Correct_Name(const char *name);
/* acc_f2c.cpp */ /* acc_f2c.cpp */
void Translate_Fortran_To_C(SgStatement *stat, SgStatement *last, std::vector <std::stack <SgStatement*> > &, int); void Translate_Fortran_To_C(SgStatement *stat, SgStatement *last, std::vector <std::stack <SgStatement*> > &, int);
SgStatement* Translate_Fortran_To_C(SgStatement* Stmt, bool isSapforConv = false);
SgStatement* Translate_Fortran_To_C(SgStatement* Stmt, bool isSapforConv = false); SgStatement* Translate_Fortran_To_C(SgStatement* Stmt, bool isSapforConv = false);
SgSymbol* createNewFunctionSymbol(const char *name); SgSymbol* createNewFunctionSymbol(const char *name);
void swapDimentionsInprivateList(void); void swapDimentionsInprivateList(void);
@@ -2103,6 +2123,9 @@ void RenamingNewProcedureVariables(SgSymbol *proc_name);
void RenamingNewProcedureVariables(SgSymbol *proc_name); void RenamingNewProcedureVariables(SgSymbol *proc_name);
SgSymbol *hasSameNameAsSource(SgSymbol *symb); SgSymbol *hasSameNameAsSource(SgSymbol *symb);
void RenamingCudaFunctionVariables(SgStatement *first, SgSymbol *k_symb, int replace_flag); void RenamingCudaFunctionVariables(SgStatement *first, SgSymbol *k_symb, int replace_flag);
void replaceVariableSymbSameNameInStatements(SgStatement *first, SgStatement *last, SgSymbol *symb, SgSymbol *s_new, int replace_flag);
void RenamingCalledProcedureSymbols(SgStatement *header, SgStatement *copy_header);
void RenamingCalledProcedureSymbolsInKernel(SgSymbol *first_symb);
/* acc_across.cpp */ /* acc_across.cpp */
ArgsForKernel *Create_C_Adapter_Function_Across(SgSymbol *sadapter); ArgsForKernel *Create_C_Adapter_Function_Across(SgSymbol *sadapter);
@@ -2238,7 +2261,7 @@ void ConvertLoopWithLabelToEnddoLoop (SgStatement *stat); /*OMP*/
// options on FDVM converter // options on FDVM converter
enum OPTIONS { enum OPTIONS {
AUTO_TFM = 0, ONE_THREAD, SPEED_TEST_L0, SPEED_TEST_L1, GPU_O0, GPU_O1, RTC, C_CUDA, OPT_EXP_COMP, AUTO_TFM = 0, ONE_THREAD, SPEED_TEST_L0, SPEED_TEST_L1, GPU_O0, GPU_O1, RTC, C_CUDA, OPT_EXP_COMP,
O_HOST, NO_CUDA, NO_BL_INFO, LOOP_ANALYSIS, PRIVATE_ANALYSIS, IO_RTS, READ_ALL, NO_REMOTE, NO_PURE_FUNC, O_HOST, NO_CUDA, NO_BL_INFO, LOOP_ANALYSIS, PRIVATE_ANALYSIS, IO_RTS, READ_ALL, NO_REMOTE, NO_PURE_FUNC,
GPU_IRR_ACC, O_PL, O_PL2, BIG_P, NUM_OPT}; GPU_IRR_ACC, O_PL, O_PL2, BIG_P, NUM_OPT};
// ONE_THREAD - compile one thread CUDA-kernels only for across (TODO for all CUDA-kernels) // ONE_THREAD - compile one thread CUDA-kernels only for across (TODO for all CUDA-kernels)
// SPEED_TEST_L0, SPEED_TEST_L1 - debug options for speed testof CUDA-kernels for across // SPEED_TEST_L0, SPEED_TEST_L1 - debug options for speed testof CUDA-kernels for across

View File

@@ -63,6 +63,7 @@
#define DVM_EXIT_INTERVAL_DIR 639 #define DVM_EXIT_INTERVAL_DIR 639
#define DVM_TEMPLATE_CREATE_DIR 640 #define DVM_TEMPLATE_CREATE_DIR 640
#define DVM_TEMPLATE_DELETE_DIR 641 #define DVM_TEMPLATE_DELETE_DIR 641
#define PRIVATE_AR_DECL 642
#define BLOCK_OP 705 #define BLOCK_OP 705
#define NEW_SPEC_OP 706 #define NEW_SPEC_OP 706
#define REDUCTION_OP 707 #define REDUCTION_OP 707

View File

@@ -334,3 +334,7 @@ name_dvm[GUESS_INDEX_TYPE] = "loop_guess_index_type_";
name_dvm[GUESS_INDEX_TYPE_2]="dvmh_loop_guess_index_type_C"; name_dvm[GUESS_INDEX_TYPE_2]="dvmh_loop_guess_index_type_C";
name_dvm[RTC_SET_LANG] = "loop_cuda_rtc_set_lang"; name_dvm[RTC_SET_LANG] = "loop_cuda_rtc_set_lang";
name_dvm[GET_REMOTE_BUF_C] = "dvmh_loop_get_remote_buf_C"; name_dvm[GET_REMOTE_BUF_C] = "dvmh_loop_get_remote_buf_C";
name_dvm[GET_DEVICE_PROP] = "loop_cuda_get_device_prop";
name_dvm[GET_MAX_BLOCKS] = "loop_cuda_get_max_blocks";
name_dvm[GET_PRIVATE_ARR] = "loop_cuda_get_private_array";
name_dvm[DISPOSE_PRIVATE_AR]="loop_cuda_dispose_private_array";

View File

@@ -332,5 +332,9 @@ enum {
GUESS_INDEX_TYPE_2, GUESS_INDEX_TYPE_2,
RTC_SET_LANG, RTC_SET_LANG,
GET_REMOTE_BUF_C, GET_REMOTE_BUF_C,
GET_DEVICE_PROP,
GET_MAX_BLOCKS,
GET_PRIVATE_ARR,
DISPOSE_PRIVATE_AR,
MAX_LIBFUN_NUM MAX_LIBFUN_NUM
}; };

View File

@@ -236,6 +236,7 @@
#define DVM_EXIT_INTERVAL_DIR 639 /* DVM-F */ #define DVM_EXIT_INTERVAL_DIR 639 /* DVM-F */
#define DVM_TEMPLATE_CREATE_DIR 640 /* DVM-F */ #define DVM_TEMPLATE_CREATE_DIR 640 /* DVM-F */
#define DVM_TEMPLATE_DELETE_DIR 641 /* DVM-F */ #define DVM_TEMPLATE_DELETE_DIR 641 /* DVM-F */
#define PRIVATE_AR_DECL 642 /* DVM-F */
/***************** variant tags for low level nodes ********************/ /***************** variant tags for low level nodes ********************/

View File

@@ -238,6 +238,7 @@ script using "tag". Run make tag.h to regenerate this file */
tag [ DVM_EXIT_INTERVAL_DIR ] = "DVM_EXIT_INTERVAL_DIR"; tag [ DVM_EXIT_INTERVAL_DIR ] = "DVM_EXIT_INTERVAL_DIR";
tag [ DVM_TEMPLATE_CREATE_DIR ] = "DVM_TEMPLATE_CREATE_DIR"; tag [ DVM_TEMPLATE_CREATE_DIR ] = "DVM_TEMPLATE_CREATE_DIR";
tag [ DVM_TEMPLATE_DELETE_DIR ] = "DVM_TEMPLATE_DELETE_DIR"; tag [ DVM_TEMPLATE_DELETE_DIR ] = "DVM_TEMPLATE_DELETE_DIR";
tag [ PRIVATE_AR_DECL ] = "PRIVATE_AR_DECL";
/***************** variant tags for low level nodes ********************/ /***************** variant tags for low level nodes ********************/