#pragma once #include #include #include #include #include "errors.h" #include "types.h" #include "../Distribution/DvmhDirective.h" #include "../Distribution/Distribution.h" struct DistrVariant; struct ParallelDirective; struct ParallelRegion; class Statement; struct FuncInfo; namespace Distribution { class Array; } namespace DIST = Distribution; void getRealArrayRefs(DIST::Array* addTo, DIST::Array* curr, std::set& realArrayRefs, const std::map>& arrayLinksByFuncCalls); void getAllArrayRefs(DIST::Array* addTo, DIST::Array* curr, std::set& realArrayRefs, const std::map>& arrayLinksByFuncCalls); enum class LoopType { NONE, FOR, WHILE, IMPLICIT }; struct InductiveVariables { private: std::string mainVar; std::set allVars; public: InductiveVariables() { } explicit InductiveVariables(const std::string& mainVar, const std::set& allVars) : mainVar(mainVar), allVars(allVars) { }; std::string getMainVar() const { return mainVar; } std::set getAllVars() const { return allVars; } void addVar(const std::string& var) { allVars.insert(var); } void addMainVar(const std::string& var) { mainVar = var; allVars.insert(var); } void replaceMainVar(const std::string& var) { allVars.erase(mainVar); addMainVar(var); } }; struct LoopGraph { private: std::vector> redistributeRules; LoopGraph* needToSwapWith; //for local directive creating in MPI mode DIST::GraphCSR accessGraph; DIST::GraphCSR reducedAccessGraph; DataDirective dataDirectives; // public: LoopGraph() { lineNumAfterLoop = lineNum = altLineNum = -1; perfectLoop = 0; hasGoto = false; hasPrints = false; hasUnknownArrayDep = false; hasUnknownScalarDep = false; hasUnknownArrayAssigns = false; hasNonRectangularBounds = false; hasIndirectAccess = false; withoutDistributedArrays = false; hasWritesToNonDistribute = false; hasUnknownDistributedMap = false; hasDifferentAlignRules = false; hasNonPureProcedures = false; hasDvmIntervals = false; hasStops = false; directive = NULL; oldDirective = NULL; directiveForLoop = NULL; region = NULL; needToSwapWith = NULL; countOfIters = 0; countOfIterNested = 1; loop = NULL; parent = NULL; userDvmDirective = NULL; startVal = endVal = stepVal = 0; calculatedCountOfIters = 0; executionTimeInSec = -1.0; inDvmhRegion = 0; loopType = LoopType::NONE; inCanonicalFrom = false; hasAccessToSubArray = false; hasSubstringRefs = false; } ~LoopGraph() { needToSwapWith = NULL; if (directive != NULL) delete directive; if (directiveForLoop != NULL) delete directiveForLoop; for (int i = 0; i < children.size(); ++i) delete children[i]; calls.clear(); readOpsArray.clear(); readOps.clear(); writeOps.clear(); hasConflicts.clear(); acrossOutAttribute.clear(); accessGraph.ClearGraphCSR(); reducedAccessGraph.ClearGraphCSR(); } void setForSwap(LoopGraph* with) { needToSwapWith = with; } LoopGraph* getForSwap() const { return needToSwapWith; } void clearForSwap() { for (auto& ch : children) ch->clearForSwap(); needToSwapWith = NULL; } bool hasLimitsToParallel() const { return hasUnknownArrayDep || hasUnknownScalarDep || hasGoto || hasPrints || (hasConflicts.size() != 0) || hasStops || hasNonPureProcedures || hasUnknownArrayAssigns || hasNonRectangularBounds || hasIndirectAccess || hasWritesToNonDistribute || hasDifferentAlignRules || hasDvmIntervals || !isFor() || lastprivateScalars.size() || hasAccessToSubArray || hasSubstringRefs; } bool hasLimitsToSplit() const { return hasGoto || hasStops || !isFor() || hasPrints; } bool hasLimitsToCombine() const { return hasGoto || hasStops || !isFor() || hasPrints || linesOfCycle.size(); } void addConflictMessages(std::vector *messages) { if (messages == NULL) return; const int line = altLineNum > 0 ? altLineNum : lineNum; if (hasUnknownArrayDep) messages->push_back(Messages(NOTE, line, R113, L"unknown array dependency prevents parallelization of this loop", 3006)); if (hasUnknownScalarDep) messages->push_back(Messages(NOTE, line, R114, L"unknown scalar dependency prevents parallelization of this loop", 3006)); if (hasGoto) messages->push_back(Messages(NOTE, line, R115, L"internal/external moves via GOTO or EXIT operations prevent parallelization of this loop", 3006)); if (hasPrints) messages->push_back(Messages(NOTE, line, R116, L"IO operations prevent parallelization of this loop", 3006)); if (hasStops) messages->push_back(Messages(NOTE, line, R117, L"stop operations prevent parallelization of this loop", 3006)); if (hasConflicts.size() != 0) messages->push_back(Messages(NOTE, line, R118, L"conflict writes operations prevent parallelization of this loop", 3006)); if (hasUnknownArrayAssigns) messages->push_back(Messages(NOTE, line, R119, L"unknown array reference for writes prevent parallelization of this loop", 3006)); if (hasNonRectangularBounds) messages->push_back(Messages(NOTE, line, R144, L"non rectangular bounds prevent parallelization of this loop", 3006)); if (hasIndirectAccess) messages->push_back(Messages(NOTE, line, R120, L"indirect access by distributed array prevents parallelization of this loop", 3006)); if (hasWritesToNonDistribute) messages->push_back(Messages(NOTE, line, R121, L"writes to non distributed array prevents parallelization of this loop", 3006)); if (hasDifferentAlignRules) messages->push_back(Messages(NOTE, line, R122, L"different aligns between writes to distributed array prevents parallelization of this loop", 3006)); if (hasNonPureProcedures) messages->push_back(Messages(NOTE, line, R123, L"non pure procedures prevent parallelization of this loop", 3006)); if (hasDvmIntervals) messages->push_back(Messages(NOTE, line, R145, L"DVM intervals prevent parallelization of this loop", 3006)); if (!isFor() || !inCanonicalFrom) messages->push_back(Messages(NOTE, line, R178, L"This type of loop is not supported by the system", 3006)); if (lastprivateScalars.size()) messages->push_back(Messages(NOTE, line, R199, L"lastprivate scalar dependency prevents parallelization of this loop", 3006)); } void setNewRedistributeRules(const std::vector> &newRedistributeRules) { // set to top and for all childs redistributeRules = newRedistributeRules; for (int i = 0; i < children.size(); ++i) children[i]->setNewRedistributeRules(newRedistributeRules); } DistrVariant* getRedistributeRule(const DIST::Array *arrayT) const { DistrVariant *retVal = NULL; for (int i = 0; i < redistributeRules.size(); ++i) { if (redistributeRules[i].first == arrayT) { retVal = redistributeRules[i].second; break; } } return retVal; } bool hasRedistribute() const { return redistributeRules.size(); } ParallelDirective* recalculateParallelDirective() { std::vector baseDirs(perfectLoop); LoopGraph *next = this; for (int z = 0; z < perfectLoop; ++z) { baseDirs[z] = next->directiveForLoop; if (z != perfectLoop - 1) next = next->children[0]; } ParallelDirective *parDirective = baseDirs[0]; for (int z = 1; z < baseDirs.size() && baseDirs[z]; ++z) { ParallelDirective *old = parDirective; parDirective = *parDirective + *baseDirs[z]; if (z != 1) delete old; } oldDirective = directive; directive = parDirective; return directive; } void restoreDirective() { if (oldDirective) { delete directive; directive = oldDirective; } for (int i = 0; i < children.size(); ++i) children[i]->restoreDirective(); } void setRegionToChilds() { for (auto &loop : children) { loop->region = region; loop->setRegionToChilds(); } } void recalculatePerfect(); void setWithOutDistrFlagToFalse() { for (auto &loop : children) { loop->withoutDistributedArrays = false; loop->setWithOutDistrFlagToFalse(); } } void propagateUserDvmDir() { for (auto &loop : children) { if (loop->userDvmDirective == NULL) loop->userDvmDirective = userDvmDirective; loop->propagateUserDvmDir(); } } void propagateDvmhRegion(const int flag) { for (auto& loop : children) { loop->inDvmhRegion = flag; loop->propagateDvmhRegion(flag); } } std::string genLoopArrayName(const std::string &funcName) const { return funcName + "_loop_" + std::to_string(lineNum); } std::set getAllArraysInLoop() { std::set retVal(readOpsArray); for (auto &elem : writeOps) retVal.insert(elem.first); return retVal; } void removeNonDistrArrays() { std::set newUsedArrays; for (auto &elem : usedArrays) if (elem->GetDistributeFlagVal() == DIST::DISTR) newUsedArrays.insert(elem); usedArrays = newUsedArrays; std::set newUsedArraysW; for (auto &elem : usedArraysWrite) if (elem->GetDistributeFlagVal() == DIST::DISTR) newUsedArraysW.insert(elem); usedArraysWrite = newUsedArraysW; readOpsArray.clear(); readOps.clear(); writeOps.clear(); hasConflicts.clear(); for (auto &ch : children) ch->removeNonDistrArrays(); } void removeGraphData() { accessGraph.ClearGraphCSR(); reducedAccessGraph.ClearGraphCSR(); for (auto& ch : children) ch->removeGraphData(); } void clearUserDirectives(); bool isArrayTemplatesTheSame(DIST::Array*& sameTemplate, const uint64_t regId, const std::map>& arrayLinksByFuncCalls) { if (sharedMemoryParallelization != 0) return true; std::set usedForRegAccess; for (auto& array : writeOps) { std::set realArrayRefs; getRealArrayRefs(array.first, array.first, realArrayRefs, arrayLinksByFuncCalls); for (auto& realArr : realArrayRefs) usedForRegAccess.insert(realArr); } //read operations can be REMOTE_ACCESS /*for (auto& array : readOps) { std::set realArrayRefs; getRealArrayRefs(array.first, array.first, realArrayRefs, arrayLinksByFuncCalls); for (auto& realArr : realArrayRefs) usedForRegAccess.insert(realArr); }*/ std::set usedTemplates; for (auto& array : usedArrays) { std::set realArrayRefs; getRealArrayRefs(array, array, realArrayRefs, arrayLinksByFuncCalls); for (auto& realArr : realArrayRefs) { if (usedForRegAccess.find(realArr) == usedForRegAccess.end()) continue; auto templ = realArr->GetTemplateArray(regId); //TODO: what about NULL? if (templ) usedTemplates.insert(templ); } } if (usedArrays.size()) { if (usedTemplates.size() == 0 || usedTemplates.size() > 1) return false; else sameTemplate = *usedTemplates.begin(); } return true; } bool hasParallelLoopsInChList(); DIST::GraphCSR& getGraphToModify() { return accessGraph; } const DIST::GraphCSR& getGraph() const { return accessGraph; } DataDirective& getDataDirToModify() { return dataDirectives; } const DataDirective& getDataDir() const { return dataDirectives; } void reduceAccessGraph(); void createVirtualTemplateLinks(const std::map>& arrayLinksByFuncCalls, std::map>& SPF_messages, bool isMpiProgram = false); bool hasParalleDirectiveBefore(); void analyzeParallelDirs(); void* getRealStat(const char* file) const; bool isFor() const { return loopType == LoopType::FOR; } bool isWhile() const { return loopType == LoopType::WHILE; } bool isImplicit() const { return loopType == LoopType::IMPLICIT; } std::string loopSymbol() const { return loopSymbols.getMainVar(); } public: int lineNum; int altLineNum; int lineNumAfterLoop; std::string fileName; int perfectLoop; int countOfIters; // calculated total with nested loops double countOfIterNested; double executionTimeInSec; int calculatedCountOfIters; // calculated for current loop int startVal, endVal, stepVal; std::tuple startEndStepVals; InductiveVariables loopSymbols; std::pair startEndExpr; bool hasGoto; std::vector linesOfInternalGoTo; std::vector linesOfExternalGoTo; std::vector linesOfCycle; bool hasPrints; std::set linesOfIO; bool hasStops; std::set linesOfStop; bool hasUnknownScalarDep; std::vector linesOfScalarDep; bool hasUnknownArrayDep; bool hasUnknownArrayAssigns; bool hasNonRectangularBounds; bool hasIndirectAccess; bool hasWritesToNonDistribute; bool withoutDistributedArrays; bool hasUnknownDistributedMap; bool hasDifferentAlignRules; bool hasNonPureProcedures; bool hasDvmIntervals; // make sense only for NODIST regime bool hasAccessToSubArray; bool hasSubstringRefs; LoopType loopType; bool inCanonicalFrom; std::vector children; std::vector funcChildren; LoopGraph *parent; std::vector funcParents; // PAIR std::vector> calls; std::set lastprivateScalars; std::set privateScalars; std::map> readOpsForLoop; std::map> writeOpsForLoop; // agregated read and write operations by arrays std::set readOpsArray; std::map, std::vector>> readOps; std::map> writeOps; std::map> remoteRegularReads; std::map hasConflicts; std::set acrossOutAttribute; ParallelDirective *directive; // united directive for nested loops ParallelDirective *oldDirective; // save old directive for reverse ParallelDirective *directiveForLoop; // part of directive for loop Statement *userDvmDirective; // user's DVM PARALLEL directive ParallelRegion *region; Statement *loop; std::set usedArrays;// without NON DIST std::set usedArraysAll; std::set usedArraysWrite; // without NON DIST std::set usedArraysWriteAll; int inDvmhRegion; // 0 -unknown, -1 - no, 1 - yes }; void processLoopInformationForFunction(std::map>& loopInfo); void addToDistributionGraph(const std::map>& loopInfo, const std::map>& arrayLinksByFuncCalls); void selectFreeLoopsForParallelization(const std::vector& loops, const std::string& funcName, bool isDistribute, const std::vector& regions, std::vector& messagesForFile); int printLoopGraph(const char* fileName, const std::map>& loopGraph, bool withRegs = false); void checkCountOfIter(std::map>& loopGraph, const std::map>& allFuncInfo, std::map>& SPF_messages); void createMapLoopGraph(const std::vector& loops, std::map& mapGraph); void updateLoopIoAndStopsByFuncCalls(std::map>& loopGraph, const std::map>& allFuncInfo); void checkArraysMapping(const std::map>& loopGraph, std::map>& SPF_messages, const std::map>& arrayLinksByFuncCalls); void filterArrayInCSRGraph(std::map>& loopGraph, std::map>& allFuncs, ParallelRegion* reg, const std::map>& arrayLinksByFuncCalls, std::map>& messages); void swapLoopsForParallel(std::map>& loopGraph, std::map>& SPF_messages, const int rev);