Files
SAPFOR/Sapfor/_src/GraphLoop/graph_loops_base.cpp
2025-03-12 12:37:19 +03:00

1389 lines
53 KiB
C++
Raw Blame History

#include "../Utils/leak_detector.h"
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <cstdint>
#include <map>
#include <unordered_map>
#include <vector>
#include <set>
#include <string>
using std::vector;
using std::map;
using std::unordered_map;
using std::tuple;
using std::set;
using std::string;
using std::wstring;
using std::pair;
using std::make_pair;
using std::get;
#include "graph_loops.h"
#include "../GraphCall/graph_calls_func.h"
#include "../Utils/errors.h"
#include "../Distribution/Distribution.h"
#include "../Distribution/CreateDistributionDirs.h"
#include "../ParallelizationRegions/ParRegions.h"
#include "../VisualizerCalls/get_information.h"
#include "../DirectiveProcessing/directive_creator.h"
static void fillWriteReadOps(LoopGraph *&currLoop, DIST::Array *symbol, const ArrayInfo *arrayOps)
{
auto it = currLoop->readOps.find(symbol);
if (it == currLoop->readOps.end())
it = currLoop->readOps.insert(it, make_pair(symbol, make_pair(vector<ArrayOp>(), vector<bool>())));
const int dim = symbol->GetDimSize();
it->second.first.resize(dim);
it->second.second.resize(dim);
for (int z = 0; z < dim; ++z)
{
it->second.first[z] = arrayOps->readOps[z];
it->second.second[z] = arrayOps->unrecReadOps[z];
}
auto it1 = currLoop->writeOps.find(symbol);
if (it1 == currLoop->writeOps.end())
it1 = currLoop->writeOps.insert(it1, make_pair(symbol, vector<ArrayOp>()));
it1->second.resize(dim);
for (int z = 0; z < dim; ++z)
it1->second[z] = arrayOps->writeOps[z];
}
static void uniteVectors(const ArrayOp &from, ArrayOp &to)
{
for (auto &elemFrom : from.coefficients)
{
auto it = to.coefficients.find(elemFrom.first);
if (it == to.coefficients.end())
it = to.coefficients.insert(it, elemFrom);
else
it->second += elemFrom.second;
}
}
static void uniteChildReadInfo(LoopGraph *currLoop)
{
if (currLoop->perfectLoop > 1)
{
int depth = currLoop->perfectLoop;
while (depth != 1)
{
LoopGraph *part1 = currLoop, *part2 = currLoop;
for (int i = 0; i < depth - 1; ++i)
part1 = part1->children[0];
for (int i = 0; i < depth - 2; ++i)
part2 = part2->children[0];
set<DIST::Array*> newToAdd;
for (auto it = part1->readOps.begin(); it != part1->readOps.end(); ++it)
{
auto it2 = part2->readOps.find(it->first);
if (it2 == part2->readOps.end())
newToAdd.insert(it->first);
}
for (auto it = part2->readOps.begin(); it != part2->readOps.end(); ++it)
{
auto it2 = part1->readOps.find(it->first);
if (it2 != part1->readOps.end())
{
const vector<ArrayOp> &toAddReads = it2->second.first;
const vector<bool> &toAddUnrecReads = it2->second.second;
for (int i = 0; i < it->second.first.size(); ++i)
uniteVectors(toAddReads[i], it->second.first[i]);
for (int i = 0; i < it->second.second.size(); ++i)
it->second.second[i] = it->second.second[i] || toAddUnrecReads[i];
}
}
for (auto &arrayMissed : newToAdd)
part2->readOps[arrayMissed] = part1->readOps[arrayMissed];
depth--;
}
}
else
{
for (int i = 0; i < currLoop->children.size(); ++i)
uniteChildReadInfo(currLoop->children[i]);
}
}
static void fillConflictState(LoopGraph *currLoop, map<DIST::Array*, bool> &foundConflicts, map<DIST::Array*, vector<ArrayOp>> &unitedWROps)
{
for (int i = 0; i < currLoop->children.size(); ++i)
{
if (i > 0)
{
foundConflicts.clear();
unitedWROps.clear();
}
fillConflictState(currLoop->children[i], foundConflicts, unitedWROps);
}
for (auto it = currLoop->writeOps.begin(); it != currLoop->writeOps.end(); ++it)
{
DIST::Array *arrayN = it->first;
vector<ArrayOp> currWrites = it->second;
//TODO: <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>?! <20><><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> REMOTE
/*auto itRead = currLoop->readOps.find(arrayN);
if (itRead != currLoop->readOps.end())
{
const vector<ArrayOp> &currReads = itRead->second.first;
for (int i = 0; i < currWrites.size(); ++i)
uniteVectors(currReads[i], currWrites[i]);
}*/
auto it2 = unitedWROps.find(arrayN);
if (it2 != unitedWROps.end())
{
vector<ArrayOp> &unitedW = it2->second;
for (int i = 0; i < unitedW.size(); ++i)
{
if (currWrites[i].coefficients.size() != 0 && unitedW[i].coefficients.size() != 0)
{
foundConflicts[arrayN] = true;
currLoop->hasConflicts[arrayN] = true;
}
else if (unitedW[i].coefficients.size() == 0)
unitedW[i] = currWrites[i];
else
{
for (auto &oldWrites : currWrites[i].coefficients)
{
auto it = unitedW[i].coefficients.find(oldWrites.first);
if (it == unitedW[i].coefficients.end())
it = unitedW[i].coefficients.insert(it, make_pair(oldWrites.first, 0));
it->second += oldWrites.second;
}
}
}
}
else
it2 = unitedWROps.insert(it2, make_pair(arrayN, currWrites));
}
}
void processLoopInformationForFunction(map<LoopGraph*, map<DIST::Array*, ArrayInfo*>> &loopInfo)
{
// fill reads info
for (auto& loop : loopInfo)
{
LoopGraph *graphLoop = loop.first;
const map<DIST::Array*, ArrayInfo*> &currInfo = loop.second;
for (auto it3 = currInfo.begin(); it3 != currInfo.end(); ++it3)
fillWriteReadOps(graphLoop, it3->first, it3->second);
for (auto it3 = currInfo.begin(); it3 != currInfo.end(); ++it3)
{
const ArrayInfo *currInfo = it3->second;
bool nulReads = true;
for (int z = 0; z < currInfo->getDimSize(); ++z)
{
if (currInfo->readOps[z].coefficients.size() != 0)
{
nulReads = false;
break;
}
}
if (!nulReads)
graphLoop->readOpsArray.insert(it3->first);
}
}
// fill conflict state
for (auto& loop : loopInfo)
{
map<DIST::Array*, bool> foundConflicts;
map<DIST::Array*, vector<ArrayOp>> unitedWROps;
fillConflictState(loop.first, foundConflicts, unitedWROps);
}
//save read and write info
for (auto& loop : loopInfo)
{
LoopGraph* graphLoop = loop.first;
graphLoop->writeOpsForLoop = graphLoop->writeOps;
for (auto& elem : graphLoop->readOps)
graphLoop->readOpsForLoop[elem.first] = elem.second.first;
}
//unite reads info
for (auto& loop : loopInfo)
uniteChildReadInfo(loop.first);
}
#define GROUP_BY_REQUEST 1
#if GROUP_BY_REQUEST
class GroupItem
{
private:
int maxDim1;
int maxDim2;
vector<pair<pair<int, int>, map<attrType, double>>> coeffs;
public:
GroupItem(int maxD1, int maxD2) : maxDim1(maxD1), maxDim2(maxD2), coeffs(maxD1 * maxD2) { }
void inline AddToGroup(int dim1, int dim2, const attrType &key, const double currW)
{
auto shiftedAndInversedKey = DIST::inverseArcByShifts(DIST::shiftByDiffInArc(key));
int pos = dim2 * maxDim1 + dim1;
auto &current = coeffs[pos];
current.first = make_pair(dim1, dim2);
auto it = current.second.find(shiftedAndInversedKey);
if (it == current.second.end())
it = current.second.insert(it, make_pair(shiftedAndInversedKey, 0.0));
it->second += currW;
}
const vector<pair<pair<int, int>, map<attrType, double>>>& GetCoeffs() const { return coeffs; }
};
static void inline addGroup(DIST::GraphCSR<int, double, attrType> &G,
DIST::Arrays<int> &allArrays,
const map<pair<DIST::Array*, DIST::Array*>, GroupItem> &group,
const links linkType)
{
for (auto &elem : group)
{
DIST::Array *from = elem.first.first;
DIST::Array *to = elem.first.second;
for (auto &coeffs : elem.second.GetCoeffs())
{
const auto &arc = coeffs.first;
for (auto &weight : coeffs.second)
AddArrayAccess(G, allArrays, from, to, arc, weight.second, weight.first, linkType);
}
}
}
#endif
static double calculateSizes(const vector<pair<int, int>> &in, vector<int> &out)
{
double all = 1.0;
for (auto &elem : in)
{
if (elem.first >= elem.second)
{
out.push_back(2);
all *= 2;
}
else
{
out.push_back(elem.second - elem.first + 1);
all *= (elem.second - elem.first + 1);
}
}
return all;
}
static bool addToGraph(DIST::GraphCSR<int, double, attrType> &G,
DIST::Arrays<int> &allArrays,
const ArrayInfo *from, DIST::Array *fromSymb,
const ArrayInfo *to, DIST::Array *toSymb, const links linkType)
{
bool loopHasWrite = false;
#if GROUP_BY_REQUEST
map<pair<DIST::Array*, DIST::Array*>, GroupItem> ww_links;
map<pair<DIST::Array*, DIST::Array*>, GroupItem> wr_links;
map<pair<DIST::Array*, DIST::Array*>, GroupItem> rr_links;
#endif
auto sizesFromPair = fromSymb->GetSizes();
auto sizesToPair = toSymb->GetSizes();
vector<int> sizesFrom;
vector<int> sizesTo;
double allFrom = calculateSizes(sizesFromPair, sizesFrom);
double allTo = calculateSizes(sizesToPair, sizesTo);
if (linkType == WW_link)
{
// add W-R and W-W
for (int dimFrom = 0; dimFrom < from->getDimSize(); ++dimFrom)
{
for (int dimTo = 0; dimTo < to->getDimSize(); ++dimTo)
{
if ((from->writeOps[dimFrom].coefficients.size() != 0) || (to->writeOps[dimTo].coefficients.size() != 0))
loopHasWrite = true;
if ((from->writeOps[dimFrom].coefficients.size() != 0 || from->readOps[dimFrom].coefficients.size() != 0) &&
(to->writeOps[dimTo].coefficients.size() != 0 || to->readOps[dimTo].coefficients.size() != 0))
{
for (auto &writeFrom : from->writeOps[dimFrom].coefficients)
{
for (auto &writeTo : to->writeOps[dimTo].coefficients)
#if GROUP_BY_REQUEST
{
const auto key = make_pair(fromSymb, toSymb);
auto it = ww_links.find(key);
if (it == ww_links.end())
it = ww_links.insert(it, make_pair(key, GroupItem(fromSymb->GetDimSize(), toSymb->GetDimSize())));
it->second.AddToGroup(dimFrom, dimTo, make_pair(writeFrom.first, writeTo.first), writeTo.second * allTo + writeFrom.second * allFrom);
}
#else
AddArrayAccess(G, allArrays, fromSymb, toSymb, make_pair(dimFrom, dimTo), writeTo.second * allTo + writeFrom.second * allFrom, make_pair(writeFrom.first, writeTo.first), WW_link);
#endif
}
}
}
}
}
if (linkType == WR_link)
{
for (int dimFrom = 0; dimFrom < from->getDimSize(); ++dimFrom)
{
for (int dimTo = 0; dimTo < to->getDimSize(); ++dimTo)
{
if ((from->writeOps[dimFrom].coefficients.size() != 0) || (to->writeOps[dimTo].coefficients.size() != 0))
loopHasWrite = true;
if ((from->writeOps[dimFrom].coefficients.size() != 0 || from->readOps[dimFrom].coefficients.size() != 0) &&
(to->writeOps[dimTo].coefficients.size() != 0 || to->readOps[dimTo].coefficients.size() != 0))
{
for (auto &writeFrom : from->writeOps[dimFrom].coefficients)
{
for (auto &readTo : to->readOps[dimTo].coefficients)
#if GROUP_BY_REQUEST
{
const auto key = make_pair(fromSymb, toSymb);
auto it = wr_links.find(key);
if (it == wr_links.end())
it = wr_links.insert(it, make_pair(key, GroupItem(fromSymb->GetDimSize(), toSymb->GetDimSize())));
it->second.AddToGroup(dimFrom, dimTo, make_pair(writeFrom.first, readTo.first), readTo.second * allTo);
}
#else
AddArrayAccess(G, allArrays, fromSymb, toSymb, make_pair(dimFrom, dimTo), readTo.second * allTo, make_pair(writeFrom.first, readTo.first), WR_link);
#endif
}
}
}
}
}
//add R-R, if no W
if (linkType == RR_link)
{
for (int dimFrom = 0; dimFrom < from->getDimSize(); ++dimFrom)
for (int dimTo = 0; dimTo < to->getDimSize(); ++dimTo)
if (from->readOps[dimFrom].coefficients.size() != 0 && to->readOps[dimTo].coefficients.size() != 0)
for (auto &readFrom : from->readOps[dimFrom].coefficients)
for (auto &readTo : to->readOps[dimTo].coefficients)
#if GROUP_BY_REQUEST
{
const auto key = make_pair(fromSymb, toSymb);
auto it = rr_links.find(key);
if (it == rr_links.end())
it = rr_links.insert(it, make_pair(key, GroupItem(fromSymb->GetDimSize(), toSymb->GetDimSize())));
it->second.AddToGroup(dimFrom, dimTo, make_pair(readFrom.first, readTo.first), readTo.second * std::max(allTo, allFrom));
}
#else
AddArrayAccess(G, allArrays, fromSymb, toSymb, make_pair(dimFrom, dimTo), readTo.second * std::max(allTo, allFrom), make_pair(readFrom.first, readTo.first), RR_link);
#endif
}
#if GROUP_BY_REQUEST
addGroup(G, allArrays, ww_links, WW_link);
addGroup(G, allArrays, wr_links, WR_link);
addGroup(G, allArrays, rr_links, RR_link);
#endif
return loopHasWrite;
}
//TODO: check for recursion!!
void getRealArrayRefs(DIST::Array* addTo, DIST::Array* curr,
set<DIST::Array*>& realArrayRefs,
const map<DIST::Array*, set<DIST::Array*>>& arrayLinksByFuncCalls)
{
auto itLink = arrayLinksByFuncCalls.find(curr);
if (itLink == arrayLinksByFuncCalls.end())
realArrayRefs.insert(curr);
else
for (auto& link : itLink->second)
getRealArrayRefs(addTo, link, realArrayRefs, arrayLinksByFuncCalls);
}
void getAllArrayRefs(DIST::Array *addTo, DIST::Array *curr,
set<DIST::Array*> &allArrayRefs,
const map<DIST::Array*, set<DIST::Array*>> &arrayLinksByFuncCalls)
{
auto itLink = arrayLinksByFuncCalls.find(curr);
allArrayRefs.insert(curr);
if (itLink == arrayLinksByFuncCalls.end())
return;
else
for (auto &link : itLink->second)
if (allArrayRefs.find(link) == allArrayRefs.end())
getAllArrayRefs(addTo, link, allArrayRefs, arrayLinksByFuncCalls);
}
#define DEB_GRAPH 0
static bool processLinks(const vector<pair<DIST::Array*, ArrayInfo*>> &currAccessesV,
DIST::Arrays<int> &allArrays, map<DIST::Array*, set<DIST::Array*>> &realArrayRefs,
DIST::GraphCSR<int, double, attrType> &graph,
const links linkType)
{
bool has_Wr_Ww_edges = false;
int countAdd = 0;
for (int z = 0; z < currAccessesV.size(); ++z)
{
const ArrayInfo& fromUniq = *currAccessesV[z].second;
allArrays.AddArrayToGraph(currAccessesV[z].first);
for (auto &fromSymb : realArrayRefs[currAccessesV[z].first])
{
for (int z1 = (linkType == WR_link) ? 0 : z + 1; z1 < currAccessesV.size(); ++z1)
{
if (z1 == z)
continue;
const ArrayInfo &toUniq = *(currAccessesV[z1].second);
allArrays.AddArrayToGraph(currAccessesV[z1].first);
for (auto &toSymb : realArrayRefs[currAccessesV[z1].first])
{
bool res = addToGraph(graph, allArrays, &fromUniq, fromSymb, &toUniq, toSymb, linkType);
countAdd++;
has_Wr_Ww_edges |= res;
}
}
}
const set<DIST::Array*>& realRefsSet = realArrayRefs[currAccessesV[z].first];
if (realRefsSet.size() > 1)
{
const vector<DIST::Array*> realRefs(realRefsSet.begin(), realRefsSet.end());
ArrayInfo unitedCopy = fromUniq;
//copy read operations to write if empty
for (int z = 0; z < unitedCopy.readOps.size(); ++z)
for (auto& elem : unitedCopy.readOps[z].coefficients)
if (unitedCopy.writeOps[z].coefficients.find(elem.first) == unitedCopy.writeOps[z].coefficients.end())
unitedCopy.writeOps[z].coefficients[elem.first] = elem.second;
for (int k1 = 0; k1 < realRefs.size(); ++k1)
{
for (int k2 = k1 + 1; k2 < realRefs.size(); ++k2)
{
addToGraph(graph, allArrays, &unitedCopy, realRefs[k1], &unitedCopy, realRefs[k2], WW_link);
countAdd++;
}
}
}
}
#if DEB_GRAPH
__spf_print(DEB_GRAPH, "added count = %d\n", countAdd);
#endif
return has_Wr_Ww_edges;
}
static bool sortByLine(const pair<LoopGraph*, vector<pair<DIST::Array*, ArrayInfo*>>>& l, const pair<LoopGraph*, vector<pair<DIST::Array*, ArrayInfo*>>>& r)
{
return l.first->lineNum < r.first->lineNum;
}
static bool sortByName(const pair<DIST::Array*, ArrayInfo*>& l, const pair<DIST::Array*, ArrayInfo*>& r)
{
return l.first->GetName() < r.first->GetName();
}
void addToDistributionGraph(const map<LoopGraph*, map<DIST::Array*, ArrayInfo*>> &loopInfo,
const map<DIST::Array*, set<DIST::Array*>> &arrayLinksByFuncCalls)
{
vector<pair<LoopGraph*, vector<pair<DIST::Array*, ArrayInfo*>>>> sortedInfo;
for (auto& loopAccess : loopInfo)
{
vector<pair<DIST::Array*, ArrayInfo*>> toAdd(loopAccess.second.begin(), loopAccess.second.end());
sort(toAdd.begin(), toAdd.end(), sortByName);
sortedInfo.push_back(make_pair(loopAccess.first, toAdd));
}
sort(sortedInfo.begin(), sortedInfo.end(), sortByLine);
#if 0
for (auto& loop : sortedInfo)
{
printf("info for loop %d %s\n", loop.first->lineNum, loop.first->fileName.c_str());
for (auto& elem : loop.second)
{
printf(" for Array %s\n", elem.first->GetName().c_str());
elem.second->printInfo();
}
}
#endif
for (auto& loopAccess : sortedInfo)
{
createNeededException();
ParallelRegion *currReg = loopAccess.first->region;
if (currReg == NULL)
{
__spf_print(1, "Skip loop on line %d - no parallel region for this loop\n", loopAccess.first->lineNum);
continue;
}
if (!loopAccess.first->isFor)
continue;
DIST::GraphCSR<int, double, attrType>& G = currReg->GetGraphToModify();
DIST::GraphCSR<int, double, attrType>& loopGraph = loopAccess.first->getGraphToModify();
__spf_print(DEB_GRAPH, "added to loop %d %s\n", loopAccess.first->lineNum, loopAccess.first->fileName.c_str());
DIST::Arrays<int> &allArrays = currReg->GetAllArraysToModify();
//printf("for loop on line %d: \n", it->first->lineNum);
const vector<pair<DIST::Array*, ArrayInfo*>> & currAccessesV = loopAccess.second;
map<DIST::Array*, set<DIST::Array*>> realArrayRefs;
for (auto &access : currAccessesV)
getRealArrayRefs(access.first, access.first, realArrayRefs[access.first], arrayLinksByFuncCalls);
bool has_Wr_edges = false, has_Ww_edges = false, has_Rr_edges = false;
has_Wr_edges = processLinks(currAccessesV, allArrays, realArrayRefs, sharedMemoryParallelization == 0 ? G :loopGraph, WW_link);
has_Ww_edges |= processLinks(currAccessesV, allArrays, realArrayRefs, sharedMemoryParallelization == 0 ? G : loopGraph, WR_link);
if (!has_Wr_edges && !has_Ww_edges)
has_Rr_edges = processLinks(currAccessesV, allArrays, realArrayRefs, sharedMemoryParallelization == 0 ? G : loopGraph, RR_link);
if (sharedMemoryParallelization)
{
if (!has_Wr_edges && !has_Ww_edges && !has_Rr_edges)
for (auto& elem : realArrayRefs)
for (auto& array : elem.second)
allArrays.AddArrayToGraph(array);
}
#if 0
{
char fName[256];
sprintf(fName, "_graph_reg%d_%s.txt", it->first->lineNum, it->first->fileName.c_str());
loopGraph.CreateGraphWiz(fName, vector<tuple<int, int, attrType>>(), allArrays, true);
}
#endif
}
}
#undef DEB_GRAPH
static bool addToDistributionGraph(const LoopGraph *loopInfo, const string &inFunction, int nesting)
{
ParallelRegion *currReg = loopInfo->region;
if (currReg == NULL || loopInfo->hasLimitsToParallel())
{
__spf_print(1, "Skip loop on line %d\n", loopInfo->lineNum);
return false;
}
if (loopInfo->perfectLoop < nesting || nesting < 1)
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
bool checkFlag = true;
const LoopGraph* check = loopInfo;
for (int z = 0; z < nesting; ++z, check->children.size() ? check = check->children[0] : check)
checkFlag = checkFlag && check->withoutDistributedArrays;
if (checkFlag == false)
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
DIST::GraphCSR<int, double, attrType> &G = currReg->GetGraphToModify();
DIST::Arrays<int> &allArrays = currReg->GetAllArraysToModify();
string fullLoopName = loopInfo->genLoopArrayName(inFunction);
string loopName = fullLoopName;
vector<string> regs;
regs.push_back(currReg->GetName());
DIST::Array *loopArray = new DIST::Array(fullLoopName, loopName, nesting, getUniqArrayId(), loopInfo->fileName,
loopInfo->lineNum, make_pair(DIST::l_LOCAL, inFunction), NULL, false, false, false, regs, 0);
if (loopInfo->calculatedCountOfIters == 0)
{
const LoopGraph* updateInfo = loopInfo;
vector<pair<Expression*, Expression*>> toAdd;
for (int z = 0; z < nesting; ++z, updateInfo->children.size() ? updateInfo = updateInfo->children[0] : updateInfo)
{
if (updateInfo->startEndExpr.first && updateInfo->startEndExpr.second)
{
if (updateInfo->stepVal < 0)
toAdd.push_back(make_pair(updateInfo->startEndExpr.second, updateInfo->startEndExpr.first));
else
toAdd.push_back(updateInfo->startEndExpr);
}
}
if (toAdd.size())
loopArray->SetSizesExpr(toAdd);
}
const LoopGraph* updateInfo = loopInfo;
for (int z = 0; z < nesting; ++z, updateInfo->children.size() ? updateInfo = updateInfo->children[0] : updateInfo)
loopArray->ExtendDimSize(z, (updateInfo->stepVal < 0) ? make_pair(updateInfo->endVal, updateInfo->startVal) : make_pair(updateInfo->startVal, updateInfo->endVal));
loopArray->SetLoopArray(true);
allArrays.AddArrayToGraph(loopArray);
return true;
}
void selectFreeLoopsForParallelization(const vector<LoopGraph*>& loops, const string& funcName, bool isDistribute,
const vector<ParallelRegion*>& regions, vector<Messages>& messagesForFile)
{
for (auto& loopRef : loops)
{
if (loopRef->withoutDistributedArrays && loopRef->region && !loopRef->hasLimitsToParallel() && loopRef->lineNum > 0)
{
int nesting = 0;
LoopGraph* it = loopRef;
for (int z = 0; z < loopRef->perfectLoop; ++z, it->children.size() ? it = it->children[0] : it)
if (it->withoutDistributedArrays && it->region && !it->hasLimitsToParallel() && it->lineNum > 0)
++nesting;
if (isDistribute)
addToDistributionGraph(loopRef, funcName, nesting);
else
{
auto region = loopRef->region;
auto allArrays = region->GetAllArrays();
string fullLoopName = loopRef->genLoopArrayName(funcName);
auto loopArray = allArrays.GetArrayByName(fullLoopName);
vector<ArrayInfo> tmpArrayInfo(nesting);
map<LoopGraph*, map<DIST::Array*, ArrayInfo*>> convertedLoopInfo;
LoopGraph* it = loopRef;
for (int z = 0; z < nesting; ++z, it->children.size() ? it = it->children[0] : it)
{
ArrayInfo& curr = tmpArrayInfo[z];
curr.setDimSize(nesting);
ArrayOp tmpOp(make_pair(make_pair(1, 0), 1.0));
for (int k = 0; k < nesting; ++k)
if (k == z)
curr.writeOps[k] = tmpOp;
map<DIST::Array*, ArrayInfo*> tmpAdd;
tmpAdd.insert(make_pair(loopArray, &curr));
convertedLoopInfo.insert(make_pair(it, tmpAdd));
}
createParallelDirectives(convertedLoopInfo, regions, map<DIST::Array*, set<DIST::Array*>>(), messagesForFile);
}
}
else
selectFreeLoopsForParallelization(loopRef->children, funcName, isDistribute, regions, messagesForFile);
}
}
static void printBlanks(FILE *file, const int sizeOfBlank, const int countOfBlanks)
{
for (int k = 0; k < countOfBlanks; ++k)
for (int m = 0; m < sizeOfBlank; ++m)
fprintf(file, " ");
}
static void printLoopGraphLvl(FILE *file, const vector<LoopGraph*> &childs, const int lvl, bool withRegs = false)
{
for (int k = 0; k < (int)childs.size(); ++k)
{
bool needToPrint = true;
if (withRegs)
if (childs[k]->region == NULL)
needToPrint = false;
if (needToPrint)
{
printBlanks(file, 2, lvl);
fprintf(file, "FOR on line %d -- %d", childs[k]->lineNum, childs[k]->lineNumAfterLoop);
if (childs[k]->perfectLoop > 1)
fprintf(file, " [PERFECT]");
if (childs[k]->hasGoto)
fprintf(file, " [HAS GOTO]");
if (childs[k]->hasPrints)
fprintf(file, " [HAS I/O OPS]");
if (childs[k]->region)
fprintf(file, " [REGION %s]", childs[k]->region->GetName().c_str());
if (childs[k]->userDvmDirective)
fprintf(file, " [USER DVM]");
fprintf(file, " [IT = %d / MULT = %f]", childs[k]->countOfIters, childs[k]->countOfIterNested);
fprintf(file, "\n");
for (int i = 0; i < (int)childs[k]->calls.size(); ++i)
{
printBlanks(file, 2, lvl);
fprintf(file, "CALL %s [%d]\n", childs[k]->calls[i].first.c_str(), childs[k]->calls[i].second);
}
}
printLoopGraphLvl(file, childs[k]->children, lvl + 1, withRegs);
}
}
int printLoopGraph(const char *fileName, const map<string, vector<LoopGraph*>> &loopGraph, bool withRegs)
{
FILE *file = fopen(fileName, "w");
if (file == NULL)
{
__spf_print(1, "can not open file %s\n", fileName);
return -1;
}
map<string, vector<LoopGraph*>>::const_iterator it;
for (it = loopGraph.begin(); it != loopGraph.end(); it++)
{
fprintf(file, "*** FILE %s\n", it->first.c_str());
printLoopGraphLvl(file, it->second, 1, withRegs);
fprintf(file, "\n");
}
fclose(file);
return 0;
}
static void isAllOk(const vector<LoopGraph*> &loops, vector<Messages> &currMessages, set<void*> &isNotOkey, set<wstring> &uniqMessages)
{
for (int i = 0; i < loops.size(); ++i)
{
if (loops[i]->region)
{
if (loops[i]->countOfIters == 0 && loops[i]->region && loops[i]->isFor)
{
wstring bufE, bufR;
__spf_printToLongBuf(bufE, L" Can not calculate count of iterations for this loop, information about iterations in all loops in parallel regions '%s' will be ignored",
to_wstring(loops[i]->region->GetName()).c_str());
auto itM = uniqMessages.find(bufE);
if (itM == uniqMessages.end())
{
uniqMessages.insert(itM, bufE);
__spf_printToLongBuf(bufR, R48, to_wstring(loops[i]->region->GetName()).c_str());
currMessages.push_back(Messages(NOTE, loops[i]->lineNum, bufR, bufE, 1016));
__spf_print(1, " Can not calculate count of iterations for loop on line %d, information about iterations in all loops in parallel regions '%s' will be ignored\n", loops[i]->lineNum, loops[i]->region->GetName().c_str());
}
isNotOkey.insert(loops[i]->region);
}
isAllOk(loops[i]->children, currMessages, isNotOkey, uniqMessages);
}
}
}
static void setToDefaultCountIter(vector<LoopGraph*> &loops, const set<void*> &isNotOkey)
{
for (int i = 0; i < loops.size(); ++i)
{
if (loops[i]->region)
{
if (isNotOkey.find(loops[i]->region) != isNotOkey.end() && loops[i]->countOfIters <= 0)
loops[i]->countOfIters = 2;
setToDefaultCountIter(loops[i]->children, isNotOkey);
}
}
}
static void multiplyCountIter(vector<LoopGraph*> &loops, const double allCount)
{
for (int i = 0; i < loops.size(); ++i)
{
loops[i]->countOfIterNested = loops[i]->countOfIters * allCount;
multiplyCountIter(loops[i]->children, loops[i]->countOfIterNested);
}
}
static void recAddToChildren(vector<LoopGraph*> &loops, const double coef, map<LoopGraph*, double> &interprocCoefs)
{
for (auto &loop : loops)
{
auto it = interprocCoefs.find(loop);
if (it == interprocCoefs.end())
it = interprocCoefs.insert(it, make_pair(loop, 0.0));
it->second += coef;
recAddToChildren(loop->children, coef, interprocCoefs);
}
}
static void multiplyCountIterIP(vector<LoopGraph*> &loops, const double allCount, map<LoopGraph*, double> &interprocCoefs)
{
for (auto &loop : loops)
{
const double coef = loop->countOfIters * allCount;
recAddToChildren(loop->funcChildren, coef, interprocCoefs);
multiplyCountIterIP(loop->funcChildren, coef, interprocCoefs);
}
}
static void fillInterprocLinks(const map<string, FuncInfo*> &mapFunc, vector<LoopGraph*> &loops, const map<string, vector<LoopGraph*>> &allLoops)
{
for (auto &loop : loops)
{
set<string> funNames;
for (auto &call : loop->calls)
funNames.insert(call.first);
if (funNames.size())
{
for (auto &call : funNames)
{
auto it = mapFunc.find(call);
if (it != mapFunc.end())
{
FuncInfo *currF = it->second;
for (auto &loopInFunc : currF->loopsInFunc)
loop->funcChildren.push_back(loopInFunc);
}
}
}
fillInterprocLinks(mapFunc, loop->children, allLoops);
}
}
static void fillInterprocLinks(vector<LoopGraph*>& loops)
{
for (auto& loop : loops)
{
for (auto& funcCh : loop->funcChildren)
funcCh->funcParents.push_back(loop);
fillInterprocLinks(loop->children);
}
}
void checkCountOfIter(map<string, vector<LoopGraph*>> &loopGraph, const map<string, vector<FuncInfo*>> &allFuncInfo, map<string, vector<Messages>> &SPF_messages)
{
set<void*> isNotOkey;
map<string, FuncInfo*> mapFunc;
createMapOfFunc(allFuncInfo, mapFunc);
for (auto& loopsInFile : loopGraph)
fillInterprocLinks(mapFunc, loopsInFile.second, loopGraph);
for (auto& loopsInFile : loopGraph)
fillInterprocLinks(loopsInFile.second);
for (auto &loopsInFile : loopGraph)
{
set<wstring> uniqMessages;
auto itM = SPF_messages.find(loopsInFile.first);
if (itM == SPF_messages.end())
itM = SPF_messages.insert(itM, make_pair(loopsInFile.first, vector<Messages>()));
isAllOk(loopsInFile.second, itM->second, isNotOkey, uniqMessages);
}
if (isNotOkey.size() != 0)
{
for (auto &loopsInFile : loopGraph)
setToDefaultCountIter(loopsInFile.second, isNotOkey);
}
for (auto &loopsInFile : loopGraph)
multiplyCountIter(loopsInFile.second, 1.0);
set<LoopGraph*> linkTo;
for (auto &loopsInFile : loopGraph)
{
for (auto &loop : loopsInFile.second)
{
for (auto &ch : loop->children)
linkTo.insert(ch);
for (auto &ch : loop->funcChildren)
linkTo.insert(ch);
}
}
bool changed = true;
while (changed)
{
changed = false;
for (auto &loop : linkTo)
{
for (auto &ch : loop->children)
{
if (linkTo.find(ch) == linkTo.end())
{
linkTo.insert(ch);
changed = true;
}
}
for (auto &ch : loop->funcChildren)
{
if (linkTo.find(ch) == linkTo.end())
{
linkTo.insert(ch);
changed = true;
}
}
}
}
set<LoopGraph*> dontLink;
for (auto &loopsInFile : loopGraph)
for (auto &loop : loopsInFile.second)
if (linkTo.find(loop) == linkTo.end())
dontLink.insert(loop);
map<LoopGraph*, double> interprocCoefs;
auto tmpParam = vector<LoopGraph*>(dontLink.begin(), dontLink.end());
multiplyCountIterIP(tmpParam, 1.0, interprocCoefs);
for (auto &loop : interprocCoefs)
loop.first->countOfIterNested *= loop.second;
}
static void updateLoopIoAndStopsByFuncCalls(vector<LoopGraph*> &loopGraph, map<string, FuncInfo*> mapFunc)
{
for (auto &loop : loopGraph)
{
vector<pair<pair<string, int>, set<string>>> funNames;
for (auto &call : loop->calls)
{
string currF = call.first;
set<string> recCalls;
recCalls.insert(currF);
bool changed = true;
while (changed)
{
changed = false;
set<string> local = recCalls;
for (auto &elem : local)
{
auto itF = mapFunc.find(elem);
if (itF != mapFunc.end())
{
for (auto &toAdd : itF->second->callsFrom)
{
if (recCalls.find(toAdd) == recCalls.end())
{
recCalls.insert(toAdd);
changed = true;
}
}
}
}
}
funNames.push_back(make_pair(call, recCalls));
}
if (funNames.size())
{
for (auto &calls : funNames)
{
const int lineInLoop = calls.first.second;
for (auto &call : calls.second)
{
auto itF = mapFunc.find(call);
if (itF != mapFunc.end())
{
if (itF->second->linesOfIO.size() != 0)
{
loop->hasPrints = true;
loop->linesOfIO.insert(lineInLoop);
}
if (itF->second->linesOfStop.size() != 0)
{
loop->hasStops = true;
loop->linesOfStop.insert(lineInLoop);
}
}
}
}
}
updateLoopIoAndStopsByFuncCalls(loop->children, mapFunc);
}
}
void updateLoopIoAndStopsByFuncCalls(map<string, vector<LoopGraph*>> &loopGraph, const map<string, vector<FuncInfo*>> &allFuncInfo)
{
map<string, FuncInfo*> mapFunc;
createMapOfFunc(allFuncInfo, mapFunc);
for (auto &byFile : loopGraph)
updateLoopIoAndStopsByFuncCalls(byFile.second, mapFunc);
}
static void checkArraysMapping(vector<LoopGraph*> &loopList, map<DIST::Array*, vector<int>> flagUse, vector<Messages> &messages, const int topLine, set<DIST::Array*> &checked);
static void fillFromLoop(LoopGraph *loop, map<DIST::Array*, vector<int>> flagUse, vector<Messages> &messages, const int topLine, set<DIST::Array*> &checked)
{
for (auto &write_op : loop->writeOps)
{
DIST::Array *array = write_op.first;
if (flagUse.find(array) == flagUse.end())
{
vector<int> tmp(array->GetDimSize());
std::fill(tmp.begin(), tmp.end(), 0);
flagUse[array] = tmp;
checked.insert(array);
}
for (int dim = 0; dim < write_op.second.size(); ++dim)
{
for (auto &coef : write_op.second[dim].coefficients)
{
if (coef.first.first != 0)
{
flagUse[array][dim]++;
break;
}
}
}
}
checkArraysMapping(loop->children, flagUse, messages, topLine, checked);
}
static void checkArraysMapping(vector<LoopGraph*> &loopList, map<DIST::Array*, vector<int>> flagUse,
vector<Messages> &messages, const int topLine, set<DIST::Array*> &checked)
{
if (loopList.size() > 0)
{
for (auto &loop : loopList)
fillFromLoop(loop, flagUse, messages, topLine, checked);
}
else
{
for (auto &elem : flagUse)
{
for (int z = 0; z < elem.second.size(); ++z)
{
if (elem.second[z] > 1)
{
if (!elem.first->IsDimDepracated(z))
{
std::wstring bufw, bufR;
__spf_printToLongBuf(bufw, L" Array '%s' can not be distributed due to different writes to %d dimension, this dimension will deprecated",
to_wstring(elem.first->GetShortName()).c_str(), z + 1);
__spf_printToLongBuf(bufR, R85, z + 1,to_wstring(elem.first->GetShortName()).c_str());
messages.push_back(Messages(NOTE, topLine, bufR, bufw, 1047));
elem.first->DeprecateDimension(z);
}
}
}
}
}
}
//TODO: need to improve interproc analysis
void checkArraysMapping(const map<string, vector<LoopGraph*>> &loopGraph, map<string, vector<Messages>> &SPF_messages,
const map<DIST::Array*, set<DIST::Array*>> &arrayLinksByFuncCalls)
{
set<DIST::Array*> checked;
for (auto &loopByFile : loopGraph)
{
auto &messages = getObjectForFileFromMap(loopByFile.first.c_str(), SPF_messages);
for (auto &loop : loopByFile.second)
{
if (loop->children.size() > 0)
{
map<DIST::Array*, vector<int>> flagUse;
fillFromLoop(loop, flagUse, messages, loop->lineNum, checked);
}
}
}
for (auto &elem : checked)
{
if (elem->IsAllDeprecated())
{
wstring bufw, bufR;
__spf_printToLongBuf(bufw, L" Array '%s' can not be distributed due to all dimensions will deprecated", to_wstring(elem->GetShortName()).c_str());
__spf_printToLongBuf(bufR, R86, to_wstring(elem->GetShortName()).c_str());
for (auto &decl : elem->GetDeclInfo())
getObjectForFileFromMap(decl.first.c_str(), SPF_messages).push_back(Messages(NOTE, decl.second, bufR, bufw, 1047));
elem->SetDistributeFlag(DIST::SPF_PRIV);
}
}
}
static bool isMapped(const vector<ArrayOp> &allOps)
{
bool mapped = false;
for (auto &ops : allOps)
{
for (auto &coefs : ops.coefficients)
{
if (coefs.first.first != 0)
{
mapped = true;
break;
}
}
if (mapped)
break;
}
return mapped;
}
static void filterArrayInCSRGraph(vector<LoopGraph*> &loops, const map<string, FuncInfo*> &mapFuncInfo, const ParallelRegion *reg,
const map<DIST::Array*, set<DIST::Array*>> &arrayLinksByFuncCalls, const map<DIST::Array*, int> &trees,
map<string, vector<Messages>> &messages)
{
for (auto &loop : loops)
{
if (loop->region == reg)
{
if (loop->calls.size())
{
bool bounds = loop->hasGoto || loop->hasPrints || loop->hasStops || loop->hasUnknownArrayAssigns ||
loop->hasNonRectangularBounds || loop->hasIndirectAccess || loop->hasWritesToNonDistribute || loop->hasDifferentAlignRules;
if (bounds == false )
{
if (loop->usedArrays.size())
{
set<DIST::Array*> realRefs;
for (auto &array : loop->usedArrays)
getRealArrayRefs(array, array, realRefs, arrayLinksByFuncCalls);
set<DIST::Array*> wasMapped;
for (auto &read : loop->readOps)
{
set<DIST::Array*> readRefs;
getRealArrayRefs(read.first, read.first, readRefs, arrayLinksByFuncCalls);
if (isMapped(read.second.first))
wasMapped.insert(readRefs.begin(), readRefs.end());
}
for (auto &write : loop->writeOps)
{
set<DIST::Array*> writeRefs;
getRealArrayRefs(write.first, write.first, writeRefs, arrayLinksByFuncCalls);
if (isMapped(write.second))
wasMapped.insert(writeRefs.begin(), writeRefs.end());
}
if (wasMapped.size() == 0)
filterArrayInCSRGraph(loop->children, mapFuncInfo, reg, arrayLinksByFuncCalls, trees, messages);
else
{
set<DIST::Array*> deprecated;
int treeNum = -1;
map<int, int> treeNumCount;
//filter by graph loop's arrays
//TODO
for (auto &array : realRefs)
{
if (wasMapped.find(array) == wasMapped.end())
continue;
auto itA = trees.find(array);
if (itA == trees.end() || itA->second < 0)
{
wstring bufw, bufR;
__spf_printToLongBuf(bufw, L" Array '%s' can not be distributed", to_wstring(array->GetShortName()).c_str());
__spf_printToLongBuf(bufR, R87, to_wstring(array->GetShortName()).c_str());
getObjectForFileFromMap(loop->fileName.c_str(), messages).push_back(Messages(NOTE, loop->lineNum, bufR, bufw, 1047));
deprecated.insert(array);
array->SetDistributeFlag(DIST::SPF_PRIV);
}
else
{
if (treeNumCount.find(itA->second) == treeNumCount.end())
treeNumCount[itA->second] = 1;
else
treeNumCount[itA->second]++;
}
}
if (treeNumCount.size() == 0)
continue;
auto itT = treeNumCount.begin();
treeNum = itT->first;
int countT = itT->second;
itT++;
for (; itT != treeNumCount.end(); itT++)
{
if (itT->second > countT)
{
countT = itT->second;
treeNum = itT->first;
}
}
for (auto &array : realRefs)
{
if (wasMapped.find(array) == wasMapped.end())
continue;
auto itA = trees.find(array);
if (itA == trees.end() || itA->second != treeNum)
{
wstring bufw, bufR;
__spf_printToLongBuf(bufw, L" Array '%s' can not be distributed", to_wstring(array->GetShortName()).c_str());
__spf_printToLongBuf(bufR, R88, to_wstring(array->GetShortName()).c_str());
getObjectForFileFromMap(loop->fileName.c_str(), messages).push_back(Messages(NOTE, loop->lineNum, bufR, bufw, 1047));
deprecated.insert(array);
array->SetDistributeFlag(DIST::SPF_PRIV);
}
}
set<DIST::Array*> inCalls;
for (auto &call : loop->calls)
{
auto itF = mapFuncInfo.find(call.first);
if (itF != mapFuncInfo.end())
inCalls.insert(itF->second->allUsedArrays.begin(), itF->second->allUsedArrays.end());
}
for (auto &inCall : inCalls)
{
if (realRefs.find(inCall) == realRefs.end() && deprecated.find(inCall) == deprecated.end())
{
bool needToDeprecated = false;
if (trees.find(inCall) == trees.end())
needToDeprecated = true;
else
{
if (trees.find(inCall)->second != treeNum)
needToDeprecated = true;
}
if (needToDeprecated)
{
wstring bufw, bufR;
__spf_printToLongBuf(bufw, L" Array '%s' can not be distributed", to_wstring(inCall->GetShortName()).c_str());
__spf_printToLongBuf(bufR, R89, to_wstring(inCall->GetShortName()).c_str());
getObjectForFileFromMap(loop->fileName.c_str(), messages).push_back(Messages(NOTE, loop->lineNum, bufR, bufw, 1047));
deprecated.insert(inCall);
inCall->SetDistributeFlag(DIST::SPF_PRIV);
}
}
}
}
}
}
else
filterArrayInCSRGraph(loop->children, mapFuncInfo, reg, arrayLinksByFuncCalls, trees, messages);
}
}
}
}
void filterArrayInCSRGraph(map<string, vector<LoopGraph*>> &loopGraph, map<string, vector<FuncInfo*>> &allFuncs,
ParallelRegion *reg, const map<DIST::Array*, set<DIST::Array*>> &arrayLinksByFuncCalls,
map<string, vector<Messages>> &messages)
{
map<string, FuncInfo*> mapFuncInfo;
map<DIST::Array*, int> trees;
auto arrays = reg->GetAllArrays().GetArrays();
int count = 0;
for (auto &array : arrays)
if (!array->IsLoopArray() && !array->IsTemplate() && array->GetLocation().first != DIST::l_PARAMETER)
count++;
if (count <= 1)
return;
reg->GetGraphToModify().FindAllArraysTrees(trees, reg->GetAllArrays());
createMapOfFunc(allFuncs, mapFuncInfo);
int lastTreesNum = trees.size();
for (auto &array : arrays)
if (!array->IsLoopArray() && !array->IsTemplate() && array->GetLocation().first != DIST::l_PARAMETER)
if (trees.find(array) == trees.end())
trees[array] = lastTreesNum++;
if (trees.size())
for (auto &byFile : loopGraph)
filterArrayInCSRGraph(byFile.second, mapFuncInfo, reg, arrayLinksByFuncCalls, trees, messages);
}
void LoopGraph::reduceAccessGraph()
{
for (auto& ch : children)
ch->reduceAccessGraph();
checkNull(region, convertFileName(__FILE__).c_str(), __LINE__);
if (accessGraph.GetNumberOfV() != 0)
DIST::createOptimalDistribution<int, double, attrType>(accessGraph, reducedAccessGraph, region->GetAllArrays(), region->GetId(), false);
}
void LoopGraph::createVirtualTemplateLinks(const map<DIST::Array*, set<DIST::Array*>>& arrayLinksByFuncCalls, map<string, vector<Messages>>& SPF_messages, bool isMpiProgram)
{
if (region == NULL)
{
for (auto& ch : children)
ch->createVirtualTemplateLinks(arrayLinksByFuncCalls, SPF_messages, isMpiProgram);
return;
}
auto allArrays = region->GetAllArrays();
__spf_print(1, "*** FOR LOOP on line %d and file '%s':\n", lineNum, fileName.c_str());
#if 0
{
char fName[256];
sprintf(fName, "_graph_reg%d_%s.txt", lineNum, fileName.c_str());
accessGraph.CreateGraphWiz(fName, vector<tuple<int, int, attrType>>(), allArrays, true);
}
#endif
set<DIST::Array*> canNotMapped;
createDistributionDirs(reducedAccessGraph, allArrays, dataDirectives, SPF_messages, arrayLinksByFuncCalls, isMpiProgram, usedArrays.size() ? usedArrays : usedArraysAll);
createAlignDirs(reducedAccessGraph, allArrays, dataDirectives, (uint64_t)this, arrayLinksByFuncCalls, SPF_messages, &canNotMapped, usedArrays);
for (auto& elem : canNotMapped)
if (usedArraysWrite.find(elem) != usedArraysWrite.end())
hasUnknownArrayAssigns = true;
auto result = dataDirectives.GenAlignsRules();
for (int i = 0; i < result.size(); ++i)
__spf_print(1, " %s\n", result[i].c_str());
#if 0
//if (lineNum == 56 && fileName == "exchange_6.f")
{
char fName[256];
sprintf(fName, "_graph_reduced_with_templ_reg%d_%s.txt", lineNum, fileName.c_str());
reducedAccessGraph.CreateGraphWiz(fName, vector<tuple<int, int, attrType>>(), allArrays, true);
}
#endif
for (auto& ch : children)
ch->createVirtualTemplateLinks(arrayLinksByFuncCalls, SPF_messages, isMpiProgram);
}