#include "../Utils/leak_detector.h" #include #include #include #include #include #include #include #include "../ParallelizationRegions/ParRegions.h" #include "../Distribution/Arrays.h" #include "../Transformations/loop_transform.h" #include "../Utils/errors.h" #include "directive_parser.h" #include "directive_creator.h" #define PRINT_PROF_INFO 1 #define PRINT_DIR_RESULT 0 using std::vector; using std::pair; using std::tuple; using std::map; using std::set; using std::make_pair; using std::make_tuple; using std::get; using std::string; using std::wstring; struct MapToArray { public: MapToArray() : underAcross(false) { } public: DIST::Array *arrayRef; int dimentionPos; int hasWrite; std::pair mainAccess; bool underAcross; }; static LoopGraph* createDirectiveForLoop(LoopGraph *currentLoop, MapToArray &mainArray, const set &acrossOutArrays) { const int pos = mainArray.dimentionPos; const pair &mainAccess = mainArray.mainAccess; ParallelDirective *directive = new ParallelDirective(); #if __SPF directive->langType = LANG_F; #else directive->langType = LANG_C; #endif directive->line = currentLoop->lineNum; directive->col = 0; directive->file = currentLoop->fileName; fillInfoFromDirectives(currentLoop, directive); //remote from SHADOW all arrays from REMOTE for (auto it = directive->remoteAccess.begin(); it != directive->remoteAccess.end(); ++it) { string arrayN = it->first.first.second; for (int k = 0; k < directive->shadowRenew.size(); ++k) { if (directive->shadowRenew[k].first.second == arrayN) { directive->shadowRenew.erase(directive->shadowRenew.begin() + k); break; } } } directive->parallel.push_back(currentLoop->loopSymbol); directive->arrayRef = mainArray.arrayRef; DIST::Array *tmp = mainArray.arrayRef; if (tmp != NULL) { for (int i = 0; i < tmp->GetDimSize(); ++i) { if (i == pos) directive->on.push_back(make_pair(currentLoop->loopSymbol, mainAccess)); else directive->on.push_back(make_pair("*", make_pair(0, 0))); } } currentLoop->directive = directive; if(!sharedMemoryParallelization) { for (auto& read : currentLoop->readOpsArray) { const string shortName = read->GetName(); const string orignName = read->GetShortName(); pair key = make_pair(orignName, shortName); bool found = false; for (int i = 0; i < directive->shadowRenew.size(); ++i) { if (directive->shadowRenew[i].first == key) { found = true; break; } } if (found == false) { for (int i = 0; i < directive->across.size(); ++i) { if (directive->across[i].first == key) { found = true; break; } } if (found == false) { directive->shadowRenew.push_back(make_pair(key, vector>())); const DIST::Array *arrayRef = read; for (int i = 0; i < arrayRef->GetDimSize(); ++i) directive->shadowRenew.back().second.push_back(make_pair(0, 0)); } } } } if (currentLoop->directive) currentLoop->acrossOutAttribute.insert(acrossOutArrays.begin(), acrossOutArrays.end()); return currentLoop; } static pair, vector>>* findShadowArraysOrCreate(vector, vector>>> &shadow, const vector, vector>>> &across, DIST::Array *symb) { const string &arrayName = symb->GetShortName(); bool existInAcross = false; for (int i = 0; i < across.size(); ++i) { if (across[i].first.first == arrayName) { return NULL; break; } } pair, vector>> *toAdd = NULL; bool cond = false; for (int i = 0; i < shadow.size(); ++i) { if (shadow[i].first.first == arrayName) { toAdd = &shadow[i]; cond = true; break; } } if (!cond) { shadow.push_back(make_pair(make_pair(arrayName, symb->GetName()), vector>())); toAdd = &shadow.back(); } return toAdd; } void addShadowFromAnalysis(ParallelDirective *dir, const map &currAccesses) { vector, vector>>> &shadow = dir->shadowRenew; const vector, vector>>> &across = dir->across; for (auto &access : currAccesses) { const int dimSize = access.first->GetDimSize(); const ArrayInfo &currInfo = *(access.second); pair, vector>> *toAdd = NULL; bool needBreak = false; for (int idx = 0; idx < dimSize; ++idx) { for (auto &reads : currInfo.readOps[idx].coefficients) { auto &readPair = reads.first; if (readPair.second != 0) { int left = 0, right = 0; if (readPair.second < 0) left = -readPair.second; else right = readPair.second; if (toAdd == NULL) { toAdd = findShadowArraysOrCreate(shadow, across, access.first); if (toAdd == NULL) { needBreak = true; break; } } if (toAdd->second.size() < dimSize) { toAdd->second.resize(dimSize); for (int z = 0; z < dimSize; ++z) toAdd->second[z] = make_pair(0, 0); } toAdd->second[idx].first = std::max(toAdd->second[idx].first, left); toAdd->second[idx].second = std::max(toAdd->second[idx].second, right); } } if (needBreak) break; } } } static void findRegularReads(const ArrayInfo &currInfo, DIST::Array *arrayUniqKey, const int i, int &maxDim, MapToArray &mainArray, const int itersCount, const map>& arrayLinksByFuncCalls) { map, int> countAcc; for (auto &reads : currInfo.readOps[i].coefficients) { auto it = countAcc.find(reads.first); if (it == countAcc.end()) countAcc.insert(it, make_pair(reads.first, 1)); else it->second++; } int maxVal = 0; int maxPos = 0; int k = 0; for (auto it = countAcc.begin(); it != countAcc.end(); ++it, ++k) { if (maxVal < it->second) { maxVal = it->second; maxPos = k; } } set allUniqKeys; getRealArrayRefs(arrayUniqKey, arrayUniqKey, allUniqKeys, arrayLinksByFuncCalls); for (auto& newKey : allUniqKeys) { auto& sizes = newKey->GetSizes(); const int currSize = sizes[i].second - sizes[i].first + 1; if (itersCount != 0 && itersCount > currSize) { mainArray.arrayRef = NULL; mainArray.dimentionPos = -1; return; } bool needToUpdate = true; if (maxDim == -1) maxDim = arrayUniqKey->GetDimSize(); else { const int currDim = arrayUniqKey->GetDimSize(); if (maxDim < currDim) maxDim = currDim; else needToUpdate = false; } if (needToUpdate) { mainArray.arrayRef = arrayUniqKey; mainArray.dimentionPos = i; k = 0; for (auto it = countAcc.begin(); it != countAcc.end(); ++it, ++k) { if (k == maxPos) { mainArray.mainAccess = it->first; break; } } } } } // no regular writes on loop, try to find regular reads static void findMainArrayFromRead(const map &currAccesses, MapToArray &mainArray, const int itersCount, const map>& arrayLinksByFuncCalls) { map> currAccessesSorted; for (auto& elem : currAccesses) { const string key = elem.first->GetName(); auto it = currAccessesSorted.find(key); if (it != currAccessesSorted.end()) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); currAccessesSorted[key] = elem; } int maxDim = -1; for (auto &itArray : currAccessesSorted) { const ArrayInfo &currInfo = *(itArray.second.second); for (int i = 0; i < currInfo.getDimSize(); ++i) if (currInfo.readOps[i].coefficients.size() != 0) findRegularReads(currInfo, itArray.second.first, i, maxDim, mainArray, itersCount, arrayLinksByFuncCalls); } } static void fillArrays(LoopGraph *loopInfo, set &uniqNames) { vector, vector>>> acrossInfo; fillAcrossInfoFromDirectives(loopInfo, acrossInfo); for (int i = 0; i < acrossInfo.size(); ++i) uniqNames.insert(acrossInfo[i].first.second); } static void fillArraysWithAcrossStatus(LoopGraph *loopInfo, set &uniqNames) { fillArrays(loopInfo, uniqNames); LoopGraph *curr = loopInfo; while (curr->perfectLoop > 1) { curr = curr->children[0]; fillArrays(curr, uniqNames); } LoopGraph *prev = loopInfo->parent; while (prev && prev->perfectLoop > 1) { fillArrays(prev, uniqNames); prev = prev->parent; } } static inline pair getShadowsAcross(const string &array, const int pos, const vector, vector>>> &acrossInfo) { pair shadows = make_pair(0, 0); for (int i = 0; i < acrossInfo.size(); ++i) { if (acrossInfo[i].first.first == array) { shadows = acrossInfo[i].second[pos]; break; } } return shadows; } static inline bool isUnderAcrossDir(const string &array, const vector, vector>>> &acrossInfo) { bool underAcrossDir = false; for (int i = 0; i < acrossInfo.size(); ++i) { if (acrossInfo[i].first.first == array) { underAcrossDir = true; break; } } return underAcrossDir; } bool checkForConflict(const map &currAccesses, const LoopGraph *currentLoop, map>, DIST::ArrayComparator> &arrayWriteAcc, const vector, vector>>> &acrossInfo, set &acrossOutArrays) { bool hasConflict = false; for (auto &itArray : currAccesses) { if(!itArray.first) continue; // skip fictitious array access in free loop const ArrayInfo &currInfo = *(itArray.second); const string &arrayName = itArray.first->GetShortName(); int countOfWriteDims = 0; int lastPosWrite = -1; for (int i = 0; i < currInfo.getDimSize(); ++i) { if (currInfo.writeOps[i].coefficients.size() != 0) { lastPosWrite = i; countOfWriteDims++; } } if (countOfWriteDims > 1) { __spf_print(PRINT_DIR_RESULT, " array %s was found for loop on line %d\n", arrayName.c_str(), currentLoop->lineNum); __spf_print(PRINT_DIR_RESULT, " conflicted writes\n"); hasConflict = true; lastPosWrite = -1; } if (lastPosWrite == -1) { __spf_print(PRINT_DIR_RESULT, " no regular writes for array %s on loop\n", arrayName.c_str()); continue; } else { set> uniqAccess; const ArrayOp &acceses = currInfo.writeOps[lastPosWrite]; for (auto &elem : acceses.coefficients) uniqAccess.insert(elem.first); bool underAcross = isUnderAcrossDir(arrayName.c_str(), acrossInfo); if (uniqAccess.size() > 1) { if (!underAcross) { __spf_print(PRINT_DIR_RESULT, " conflicted writes\n"); hasConflict = true; continue; } else { int loopStep = currentLoop->stepVal; if (loopStep == 0) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); int shiftMin, shiftMax; bool init = false; for (auto &access : uniqAccess) { if (!init) { shiftMin = shiftMax = access.second; init = true; } else { shiftMin = std::min(shiftMin, access.second); shiftMax = std::max(shiftMax, access.second); } } pair needed = { 0, 0 };// = (loopStep > 0) ? make_pair(0, shiftMin) : make_pair(0, shiftMax); pair shiftSize = getShadowsAcross(arrayName.c_str(), lastPosWrite, acrossInfo); if (loopStep > 0) needed.second = shiftMax - shiftSize.second * loopStep; else needed.second = shiftMin - shiftSize.first * loopStep; set firstCoeffs; for (auto &access : uniqAccess) { firstCoeffs.insert(access.first); if (access.second == needed.second) { needed.first = access.first; break; } } if (needed.first == 0) if (firstCoeffs.size() == 1) needed.first = *firstCoeffs.begin(); if (needed.first == 0) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); arrayWriteAcc.insert(make_pair(itArray.first, make_pair(lastPosWrite, needed))); acrossOutArrays.insert(itArray.first); } } else arrayWriteAcc.insert(make_pair(itArray.first, make_pair(lastPosWrite, *uniqAccess.begin()))); } int countOfReadDims = 0; int lastPosRead = -1; for (int i = 0; i < currInfo.getDimSize(); ++i) { if (currInfo.readOps[i].coefficients.size() != 0) { lastPosRead = i; countOfReadDims++; } } if (countOfReadDims > 1 && lastPosRead != lastPosWrite && !isUnderAcrossDir(arrayName, acrossInfo)) { __spf_print(PRINT_DIR_RESULT, " dependencies between read and write\n"); hasConflict = true; continue; } } return hasConflict; } void createParallelDirectives(const map> &loopInfos, const vector& regions, const map> &arrayLinksByFuncCalls, vector &messages) { for (auto &loopInfo : loopInfos) { LoopGraph* currLoop = loopInfo.first; ParallelRegion *currReg = getRegionByLine(regions, currLoop->fileName.c_str(), currLoop->lineNum); if (currReg == NULL || currLoop->userDvmDirective != NULL) { __spf_print(PRINT_PROF_INFO, "Skip loop on file %s and line %d\n", currLoop->fileName.c_str(), currLoop->lineNum); continue; } const int itersCount = currLoop->calculatedCountOfIters; uint64_t regId = currReg->GetId(); if (sharedMemoryParallelization) regId = (uint64_t)currLoop; const DIST::Arrays &allArrays = currReg->GetAllArrays(); vector, vector>>> acrossInfo; fillAcrossInfoFromDirectives(currLoop, acrossInfo); bool hasConflict = false; // uniqKey -> pair> ///write acceses /// map>, DIST::ArrayComparator> arrayWriteAcc; set acrossOutArrays; __spf_print(PRINT_DIR_RESULT, " Loop on line %d:\n", currLoop->lineNum); const map &currAccesses = loopInfo.second; // find conflict and fill arrayWriteAcc hasConflict = checkForConflict(currAccesses, currLoop, arrayWriteAcc, acrossInfo, acrossOutArrays); if (hasConflict) __spf_print(PRINT_DIR_RESULT, " has conflict\n"); else { MapToArray mainArray; mainArray.arrayRef = NULL; mainArray.dimentionPos = -1; mainArray.hasWrite = true; mainArray.mainAccess; if(!sharedMemoryParallelization) { set uniqNamesWithAcross; fillArraysWithAcrossStatus(currLoop, uniqNamesWithAcross); if (arrayWriteAcc.size() == 1) { mainArray.arrayRef = arrayWriteAcc.begin()->first; mainArray.dimentionPos = arrayWriteAcc.begin()->second.first; mainArray.mainAccess = arrayWriteAcc.begin()->second.second; if (uniqNamesWithAcross.size()) mainArray.underAcross = true; } else if (arrayWriteAcc.size() > 1) { if (uniqNamesWithAcross.size()) mainArray.underAcross = true; int posDim = -1; int minDim = 999999; int k = 0; vector>>> accesses; map>> sameDims; for (auto i = arrayWriteAcc.begin(); i != arrayWriteAcc.end(); ++i, ++k) { const auto array = i->first; const string &uniqName = array->GetName(); //ACROSS arrays have priority state for all nested loops! if (uniqNamesWithAcross.size() > 0) if (uniqNamesWithAcross.find(uniqName) == uniqNamesWithAcross.end()) continue; const int currDim = array->GetDimSize(); auto& sizes = array->GetSizes(); const int currSize = sizes[i->second.first].second - sizes[i->second.first].first + 1; sameDims[currDim].insert(make_tuple(uniqName, k, currSize)); if (currDim < minDim) { if (itersCount == 0 || (itersCount != 0) && currSize >= itersCount) { minDim = currDim; posDim = k; } } __spf_print(PRINT_DIR_RESULT, " found writes for array %s -> [%d %d]\n", array->GetShortName().c_str(), i->second.second.first, i->second.second.second); accesses.push_back(make_pair(array->GetName(), i->second)); } if (posDim != -1) { auto itDim = sameDims.find(minDim); if (itDim == sameDims.end()) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); int maxArraySize = -1; for (auto& elem : itDim->second) { if (get<2>(elem) > maxArraySize) { maxArraySize = get<2>(elem); posDim = get<1>(elem); } } } else { mainArray.hasWrite = false; __spf_print(PRINT_DIR_RESULT, " no appropriate regular writes on loop\n"); } vector> realArrayRefs(accesses.size()); for (int i = 0; i < (int)accesses.size(); ++i) { DIST::Array *array = allArrays.GetArrayByName(accesses[i].first); getRealArrayRefs(array, array, realArrayRefs[i], arrayLinksByFuncCalls); } //check the same distribution bool statusOk = true; for (int i = 0; i < (int)accesses.size(); ++i) { for (int k = i + 1; k < (int)accesses.size(); ++k) { DIST::Array *array1 = allArrays.GetArrayByName(accesses[i].first); DIST::Array *array2 = allArrays.GetArrayByName(accesses[k].first); const set &realArrayRefs1 = realArrayRefs[i]; const set &realArrayRefs2 = realArrayRefs[k]; for (auto &refs1 : realArrayRefs1) { for (auto &refs2 : realArrayRefs2) { auto links = findLinksBetweenArrays(refs1, refs2, regId); const int dimFrom = accesses[i].second.first; const int dimTo = accesses[k].second.first; if (dimTo != links[dimFrom]) { __spf_print(1, "arrays '%s' and '%s' have different align dimensions for loop on line %d\n --> %d vs %d(%d) \n", array1->GetShortName().c_str(), array2->GetShortName().c_str(), currLoop->lineNum, dimTo, links[dimFrom], dimFrom); statusOk = false; } else { const auto accessFrom = accesses[i].second.second; const auto accessTo = accesses[k].second.second; auto templRule1 = refs1->GetAlignRulesWithTemplate(regId); auto templRule2 = refs2->GetAlignRulesWithTemplate(regId); if (DIST::Fx(accessFrom, templRule1[dimFrom]) != DIST::Fx(accessTo, templRule2[dimTo])) { string format = "arrays '%s' and '%s' have different align rules -- \n -->"; format += "F1 = [%d.%d], x1 = [%d.%d], F2 = [%d.%d], x2 = [%d.%d] \n -->"; format += "F1(x1) = [%d.%d] != F2(x2) = [%d.%d]\n"; __spf_print(1, format.c_str(), array1->GetShortName().c_str(), array2->GetShortName().c_str(), templRule1[dimFrom].first, templRule1[dimFrom].second, accessFrom.first, accessFrom.second, templRule2[dimTo].first, templRule2[dimTo].second, accessTo.first, accessTo.second, DIST::Fx(accessFrom, templRule1[dimFrom]).first, DIST::Fx(accessFrom, templRule1[dimFrom]).second, DIST::Fx(accessTo, templRule2[dimTo]).first, DIST::Fx(accessTo, templRule2[dimTo]).second); statusOk = false; } } if (!statusOk) { wstring bufE, bufR; __spf_printToLongBuf(bufE, L"arrays '%s' and '%s' have different align rules in this loop according to their write accesses", to_wstring(array1->GetShortName()).c_str(), to_wstring(array2->GetShortName()).c_str()); __spf_printToLongBuf(bufR, R132, to_wstring(array1->GetShortName()).c_str(), to_wstring(array2->GetShortName()).c_str()); messages.push_back(Messages(WARR, currLoop->lineNum, bufR, bufE, 3011)); currLoop->hasDifferentAlignRules = true; break; } } if (!statusOk) break; } if (!statusOk) break; } if (!statusOk) break; } if (statusOk) { k = 0; for (auto array = arrayWriteAcc.begin(); array != arrayWriteAcc.end(); array++, ++k) { if (k == posDim) { mainArray.arrayRef = array->first; mainArray.dimentionPos = array->second.first; mainArray.mainAccess = array->second.second; break; } } } else { __spf_print(PRINT_DIR_RESULT, " has conflict writes\n"); hasConflict = true; } } else { mainArray.hasWrite = false; __spf_print(PRINT_DIR_RESULT, " no regular writes on loop\n"); } // fill mainArray if no regular writes found // now OmegaTest is used for searching dependencies if (!mainArray.hasWrite) findMainArrayFromRead(currAccesses, mainArray, itersCount, arrayLinksByFuncCalls); } bool dimPosFound = sharedMemoryParallelization || (mainArray.arrayRef != NULL && mainArray.dimentionPos != -1); if (dimPosFound && !currLoop->hasLimitsToParallel() && (currLoop->lineNum > 0 || (currLoop->lineNum < 0 && currLoop->altLineNum > 0))) { DIST::Array *mainArrayOfLoop = mainArray.arrayRef; pair mainAccess = mainArray.mainAccess; const int dimPos = mainArray.dimentionPos; //change array to template if ACROSS was not found or not loop_array if (!sharedMemoryParallelization && mainArray.underAcross == false && !mainArray.arrayRef->IsLoopArray()) { set realArrayRef; getRealArrayRefs(mainArray.arrayRef, mainArray.arrayRef, realArrayRef, arrayLinksByFuncCalls); set templateLink; vector>> allRules; vector> allLinks; for (auto& array : realArrayRef) { DIST::Array* toAdd = array->GetTemplateArray(regId); if (toAdd) { templateLink.insert(toAdd); allRules.push_back(array->GetAlignRulesWithTemplate(regId)); allLinks.push_back(array->GetLinksWithTemplate(regId)); } } if (!isAllRulesEqual(allRules)) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); /*if (templateLink.size() != 1) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); */ const vector> &rules = allRules[0]; const vector &links = allLinks[0]; mainArray.arrayRef = *templateLink.begin(); mainArray.mainAccess = DIST::Fx(mainArray.mainAccess, rules[mainArray.dimentionPos]); mainArray.dimentionPos = links[mainArray.dimentionPos]; } ParallelDirective *parDir = NULL; LoopGraph *loop = createDirectiveForLoop(currLoop, mainArray, acrossOutArrays); parDir = loop->directive; if (parDir != NULL) { if(!sharedMemoryParallelization) { parDir->arrayRef2 = mainArrayOfLoop; if (mainArray.underAcross == false) { for (int i = 0; i < mainArrayOfLoop->GetDimSize(); ++i) { if (i == dimPos) parDir->on2.push_back(make_pair(currLoop->loopSymbol, mainAccess)); else parDir->on2.push_back(make_pair("*", make_pair(0, 0))); } for (int z = 0; z < parDir->on2.size(); ++z) if (parDir->on2[z].first != "*" && parDir->on2[z].second == make_pair(0, 0)) parDir->on2[z].second = mainAccess; } else parDir->on2 = parDir->on; addShadowFromAnalysis(parDir, currAccesses); } loop->directiveForLoop = new ParallelDirective(*loop->directive); } __spf_print(PRINT_DIR_RESULT, " directive created\n"); } } } } extern vector>> getAlignRuleWithTemplate(DIST::Array *array, const map> &arrayLinksByFuncCalls, DIST::GraphCSR &reducedG, const DIST::Arrays &allArrays, const uint64_t regionId); static void propagateTemplateInfo(map>>>> &arrays, const uint64_t regId, const map> &arrayLinksByFuncCalls, DIST::GraphCSR &reducedG, const DIST::Arrays &allArrays) { bool changed = true; while (changed) { changed = false; for (auto &arrayElem: arrays) { auto array = arrayElem.first; if (array->GetTemplateArray(regId, false) == NULL) { vector>> templRule = getAlignRuleWithTemplate(array, arrayLinksByFuncCalls, reducedG, allArrays, regId); int idx = 0; for (auto &elem : templRule) { if (get<0>(elem) == NULL) { idx++; continue; } auto templ = get<0>(elem); auto alignDim = get<1>(elem); auto intRule = get<2>(elem); int dimNum = -1; int err = allArrays.GetDimNumber(get<0>(elem), alignDim, dimNum); if (err == -1) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); array->AddLinkWithTemplate(idx, dimNum, templ, intRule, regId); ++idx; changed = true; } } } } } static inline bool findAndResolve(bool &resolved, vector> &updateOn, const map> &dimsNotMatch, const map> &arrayLinksByFuncCalls, DIST::GraphCSR &reducedG, const DIST::Arrays &allArrays, const uint64_t regId, ParallelDirective *parDirective, map>>> &values, const set &deprecateToMatch, const set &privates, bool fromRead = false) { bool ret = true; for (auto &elem : dimsNotMatch) { vector>> rule; set realRefs; getRealArrayRefs(elem.first, elem.first, realRefs, arrayLinksByFuncCalls); vector>>> allRules(realRefs.size()); int tmpIdx = 0; for (auto &array : realRefs) reducedG.GetAlignRuleWithTemplate(array, allArrays, allRules[tmpIdx++], regId); if (isAllRulesEqual(allRules)) rule = allRules[0]; else return false; findAndReplaceDimentions(rule, allArrays); for (int i = 0; i < elem.second.size(); ++i) { if (elem.second[i] && values[elem.first][i].first) { const int idx = get<1>(rule[i]); const auto &currRule = get<2>(rule[i]); //TODO: use rule string mapTo = ""; if (values[elem.first][i].second.first != "") { mapTo = values[elem.first][i].second.first; if (values[elem.first][i].second.second != 0) { if (values[elem.first][i].second.second >= 0) mapTo += " + " + std::to_string(values[elem.first][i].second.second); else mapTo += " - " + std::to_string(abs(values[elem.first][i].second.second)); } } else mapTo = std::to_string(values[elem.first][i].second.second); if (deprecateToMatch.find(values[elem.first][i].second.first) != deprecateToMatch.end()) return false; if (updateOn[idx].first) { if (updateOn[idx].second != mapTo && !fromRead) // DIFFERENT VALUES TO MAP return false; } else updateOn[idx] = make_pair(true, mapTo); } } } for (int i = 0; i < updateOn.size(); ++i) { if (updateOn[i].first && privates.find(updateOn[i].second) == privates.end()) { if (parDirective->on[i].first != "*") printInternalError(convertFileName(__FILE__).c_str(), __LINE__); else { parDirective->on[i].first = updateOn[i].second; parDirective->on[i].second = make_pair(1, 0); resolved = true; if (!parDirective->arrayRef->IsTemplate()) { parDirective->on2[i].first = updateOn[i].second; parDirective->on2[i].second = make_pair(1, 0); } else { set realRefsOfPar; getRealArrayRefs(parDirective->arrayRef2, parDirective->arrayRef2, realRefsOfPar, arrayLinksByFuncCalls); vector>>> allRules(realRefsOfPar.size()); int tmpIdx = 0; for (auto &array : realRefsOfPar) reducedG.GetAlignRuleWithTemplate(array, allArrays, allRules[tmpIdx++], regId); if (!isAllRulesEqual(allRules)) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); DIST::Array *arrayRef2 = *realRefsOfPar.begin(); auto links = arrayRef2->GetLinksWithTemplate(regId); if (arrayRef2->GetTemplateArray(regId) != parDirective->arrayRef) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); int found = -1; for (int z = 0; z < links.size(); ++z) if (links[z] == i) found = z; if (found == -1) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); if (parDirective->on2[found].first != "*") printInternalError(convertFileName(__FILE__).c_str(), __LINE__); parDirective->on2[found].first = updateOn[i].second; parDirective->on2[found].second = make_pair(1, 0); } } } } return ret; } static bool tryToResolveUnmatchedDims(const map> &dimsNotMatch, LoopGraph* loop, const uint64_t regId, ParallelDirective *parDirective, DIST::GraphCSR &reducedG, const DIST::Arrays &allArrays, const map> &arrayLinksByFuncCalls, const vector> &distribution, const map &mapFuncInfo) { bool resolved = false; map>>> leftValues; map>>>> rightValues; for (auto &elem : dimsNotMatch) { leftValues[elem.first] = vector>>(elem.second.size()); std::fill(leftValues[elem.first].begin(), leftValues[elem.first].end(), make_pair(false, make_pair("", 0))); rightValues[elem.first] = vector>>>(elem.second.size()); std::fill(rightValues[elem.first].begin(), rightValues[elem.first].end(), make_pair(false, map>())); } string base = ""; int shiftValue = 0; set countOfLeftBase; if (!analyzeLoopBody(loop, leftValues, rightValues, base, dimsNotMatch, mapFuncInfo)) return false; // check found info for (auto &elem : dimsNotMatch) { for (int idx = 0; idx < elem.second.size(); ++idx) if (elem.second[idx] && (!leftValues[elem.first][idx].first && !rightValues[elem.first][idx].first)) // NOT INFO FOUND return false; } //check multiplied Arrays to BLOCK distr of template for (auto &elem : dimsNotMatch) { set realRefs; getRealArrayRefs(elem.first, elem.first, realRefs, arrayLinksByFuncCalls); set templates; set> links; for (auto &realR : realRefs) { templates.insert(realR->GetTemplateArray(regId)); links.insert(realR->GetLinksWithTemplate(regId)); } DIST::Array *templ = NULL; vector alignLinks; if (templates.size() == 1 && links.size() == 1) { templ = *templates.begin(); alignLinks = *links.begin(); } if (!templ) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); if (elem.first->GetDimSize() != templ->GetDimSize()) { const DistrVariant *var = NULL; for (auto &distRule : distribution) { if (distRule.first == templ) { var = distRule.second; break; } } if (!var) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); const set alingLinksSet(alignLinks.begin(), alignLinks.end()); for (int z = 0; z < templ->GetDimSize(); ++z) { if (alingLinksSet.find(z) == alingLinksSet.end()) { if (var->distRule[z] == BLOCK) { //check all accesses to write for (auto &left : leftValues) for (auto &toCheck : left.second) if (toCheck.first) return false; return true; } } } } } vector> updateOn(parDirective->on.size()); std::fill(updateOn.begin(), updateOn.end(), make_pair(false, "")); set deprecateToMatch; int nested = loop->perfectLoop; LoopGraph* tmpL = loop; for (int z = 0; z < nested; ++z) { deprecateToMatch.insert(tmpL->loopSymbol); if (tmpL->children.size()) tmpL = tmpL->children[0]; else if (z != nested - 1) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); } set privates; #if __SPF tmpL = loop->parent; while (tmpL) { if (!tmpL->isFor) // TODO: need to add all inductive variables! { SgWhileStmt* dow = isSgWhileStmt(tmpL->loop->GetOriginal()); if (dow->conditional()) { SgExpression* cond = dow->conditional(); if (cond->lhs() && cond->lhs()->variant() == VAR_REF) deprecateToMatch.insert(cond->lhs()->symbol()->identifier()); } } else deprecateToMatch.insert(tmpL->loopSymbol); tmpL = tmpL->parent; } tryToFindPrivateInAttributes(loop->loop->GetOriginal(), privates); #else #error 'TODO - fill privates for this loop and check all inductive variables' #endif //try to resolve from write operations bool ok = findAndResolve(resolved, updateOn, dimsNotMatch, arrayLinksByFuncCalls, reducedG, allArrays, regId, parDirective, leftValues, deprecateToMatch, privates); if (!ok) return false; else { //shift right splited values if (base != "") { for (auto& left : leftValues) { for (int z = 0; z < left.second.size(); ++z) { if (left.second[z].first) { if (left.second[z].second.first != "") { countOfLeftBase.insert(left.second[z].second.second); shiftValue = left.second[z].second.second; } } } } if (countOfLeftBase.size() != 1) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); for (auto &right : rightValues) { for (int z = 0; z < right.second.size(); ++z) { if (right.second[z].first) { auto itB = right.second[z].second.find(base); if (itB != right.second[z].second.end()) { itB->second.first -= shiftValue; itB->second.second -= shiftValue; } } } } } } //try to resolve from read operations if (!resolved) { return false; /*map>>> values2; for (auto &elem : rightValues) for (auto &vElem : elem.second) values2[elem.first].push_back(make_pair(vElem.first, vElem.second.first)); ok = findAndResolve(resolved, updateOn, dimsNotMatch, arrayLinksByFuncCalls, reducedG, allArrays, regId, parDirective, values2, privates, true); if (!ok) return false;*/ } if (resolved) { propagateTemplateInfo(rightValues, regId, arrayLinksByFuncCalls, reducedG, allArrays); for (auto &rVal : rightValues) { auto &rArray = rVal.first; for (auto &shadows : parDirective->shadowRenew) { if (shadows.first.first == rArray->GetShortName()) { const auto &leftPartVal = leftValues[rArray]; for (int i = 0; i < leftPartVal.size(); ++i) { if (leftPartVal[i].first || rVal.second[i].first) { int foundVal = 0; if (leftPartVal[i].first) { if (leftPartVal[i].second.first == base && shiftValue == leftPartVal[i].second.second) foundVal = 0; else foundVal = leftPartVal[i].second.second - shiftValue; } else { auto rules = rArray->GetAlignRulesWithTemplate(regId); auto links = rArray->GetLinksWithTemplate(regId); if (links[i] == -1) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); const auto &currRule = rules[i]; if (base == "") foundVal = std::stoi(parDirective->on[links[i]].first) + currRule.second; else foundVal = currRule.second; } auto itSh = rVal.second[i].second.find(base); if (itSh != rVal.second[i].second.end()) // shadow { auto shadowElem = itSh->second; shadowElem.first -= foundVal; shadowElem.second -= foundVal; if (shadowElem.first > 0) shadowElem.first = 0; if (shadowElem.second < 0) shadowElem.second = 0; shadows.second[i].first = std::max(shadows.second[i].first, abs(shadowElem.first)); shadows.second[i].second = std::max(shadows.second[i].second, shadowElem.second); } //else remote } } break; } } } } return resolved; } static bool createLinksBetweenArrays(map> &links, DIST::Array *dist, const std::map> &arrayLinksByFuncCalls, DIST::GraphCSR &reducedG, DIST::Arrays &allArrays, const uint64_t regionId) { bool ok = true; if (dist == NULL) return false; for (auto &array : links) { set realArrayRef; getRealArrayRefs(array.first, array.first, realArrayRef, arrayLinksByFuncCalls); vector> AllLinks(realArrayRef.size()); int currL = 0; for (auto &array : realArrayRef) AllLinks[currL++] = findLinksBetweenArrays(array, dist, regionId); if (isAllRulesEqual(AllLinks)) array.second = AllLinks[0]; else ok = false; if (ok == false) break; } return ok; } static bool checkCorrectness(const ParallelDirective &dir, const vector> &distribution, DIST::GraphCSR &reducedG, DIST::Arrays &allArrays, const std::map> &arrayLinksByFuncCalls, const set &allArraysInLoop, vector &messages, const int loopLine, map> &dimsNotMatch, const uint64_t regionId) { const pair *distArray = NULL; pair *newDistArray = NULL; map> arrayLinksWithTmpl; map> arrayLinksWithDirArray; const DistrVariant *distRuleTempl = NULL; for (auto &array : allArraysInLoop) arrayLinksWithDirArray[array] = arrayLinksWithTmpl[array] = vector(); vector links; bool ok = true; if (dir.arrayRef->IsLoopArray() || dir.arrayRef2->IsLoopArray()) return ok; for (int i = 0; i < distribution.size(); ++i) { if (dir.arrayRef2 == distribution[i].first) { distArray = &distribution[i]; for (int z = 0; z < distArray->first->GetDimSize(); ++z) links.push_back(z); distRuleTempl = distribution[i].second; break; } } if (!distArray) { bool found = false; for (int i = 0; i < distribution.size(); ++i) { DIST::Array *currDistArray = distribution[i].first; set realArrayRef; getRealArrayRefs(dir.arrayRef2, dir.arrayRef2, realArrayRef, arrayLinksByFuncCalls); vector> AllLinks(realArrayRef.size()); int currL = 0; for (auto &array : realArrayRef) AllLinks[currL++] = findLinksBetweenArrays(array, currDistArray, regionId); if (isAllRulesEqual(AllLinks)) links = AllLinks[0]; else { wstring bufE, bufR; __spf_printToLongBuf(bufE, L"Can not create distributed link"); __spf_printToLongBuf(bufR, R127); messages.push_back(Messages(ERROR, loopLine, bufR, bufE, 3007)); printInternalError(convertFileName(__FILE__).c_str(), __LINE__); } for (int k = 0; k < links.size(); ++k) { if (links[k] != -1) { found = true; break; } } if (found) { if (dir.arrayRef2->GetDimSize() != links.size()) { __spf_print(1, "Can not create distributed link for array '%s': dim size of this array is '%d' and it is not equal '%d'\n", dir.arrayRef2->GetShortName().c_str(), dir.arrayRef2->GetDimSize(), (int)links.size()); wstring bufE, bufR; __spf_printToLongBuf(bufE, L"Can not create distributed link for array '%s': dim size of this array is '%d' and it is not equal '%d'", to_wstring(dir.arrayRef2->GetShortName()).c_str(), dir.arrayRef2->GetDimSize(), (int)links.size()); __spf_printToLongBuf(bufR, R126, to_wstring(dir.arrayRef2->GetShortName()).c_str(), dir.arrayRef2->GetDimSize(), (int)links.size()); messages.push_back(Messages(ERROR, loopLine, bufR, bufE, 3007)); printInternalError(convertFileName(__FILE__).c_str(), __LINE__); } vector derivedRule(dir.arrayRef2->GetDimSize()); for (int z = 0; z < links.size(); ++z) { if (links[z] != -1) derivedRule[z] = distribution[i].second->distRule[links[z]]; else derivedRule[z] = dist::NONE; } newDistArray = new pair(); newDistArray->first = dir.arrayRef2; DistrVariant *tmp = new DistrVariant(derivedRule); newDistArray->second = tmp; distArray = newDistArray; distRuleTempl = distribution[i].second; break; } } if (found == false) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); } auto templArray = dir.arrayRef; if (templArray->IsTemplate() == false) templArray = dir.arrayRef->GetTemplateArray(regionId, false); ok = createLinksBetweenArrays(arrayLinksWithTmpl, templArray, arrayLinksByFuncCalls, reducedG, allArrays, regionId); if (ok == false) { if (newDistArray) { delete newDistArray->second; delete newDistArray; } return ok; } if (dir.arrayRef->IsTemplate()) arrayLinksWithDirArray = arrayLinksWithTmpl; else ok = ok && createLinksBetweenArrays(arrayLinksWithDirArray, dir.arrayRef, arrayLinksByFuncCalls, reducedG, allArrays, regionId); // check main array if (dir.arrayRef2 != dir.arrayRef) { const vector &rule = distArray->second->distRule; DIST::Array* key = distArray->first; dimsNotMatch[key] = vector(rule.size()); auto it = dimsNotMatch.find(key); std::fill(it->second.begin(), it->second.end(), false); for (int i = 0; i < rule.size(); ++i) { if (rule[i] == dist::BLOCK) { if (dir.on[links[i]].first == "*") { ok = false; it->second[i] = true; } } } } for (auto &array : arrayLinksWithTmpl) { auto dirArrayRef = arrayLinksWithDirArray[array.first]; if (array.first != dir.arrayRef2 && array.first != dir.arrayRef || dir.arrayRef2 == dir.arrayRef) // ACROSS { vector derivedRule(array.first->GetDimSize()); DIST::Array* key = array.first; dimsNotMatch[key] = vector(array.first->GetDimSize()); auto it = dimsNotMatch.find(key); std::fill(it->second.begin(), it->second.end(), false); for (int z = 0; z < array.second.size(); ++z) { if (array.second[z] != -1) derivedRule[z] = distRuleTempl->distRule[array.second[z]]; else derivedRule[z] = dist::NONE; } for (int i = 0; i < derivedRule.size(); ++i) { if (derivedRule[i] == dist::BLOCK) { if (dir.on[dirArrayRef[i]].first == "*") { ok = false; it->second[i] = true; } } } } } if (newDistArray) { delete newDistArray->second; delete newDistArray; } return ok; } static bool isOnlyTopPerfect(LoopGraph *current, const vector> &distribution) { for (auto &elem : distribution) { if (elem.first == current->directive->arrayRef) { bool allNone = true; for (auto &dist : elem.second->distRule) { if (dist != NONE) { allNone = false; break; } } if (allNone) return true; } } LoopGraph *next = current; for (int i = 0; i < current->perfectLoop - 1; ++i) next = next->children[0]; if (next->children.size() == 0) return true; else // return false; { while (next->children.size() != 0) { if (next->children.size() > 1) return false; else { next = next->children[0]; bool condition = next->directive != NULL; if (condition) condition = next->directive->arrayRef != NULL; if (condition) { bool found = false; for (int k = 0; k < distribution.size(); ++k) { if (distribution[k].first == next->directive->arrayRef) { int dimPos = -1; for (int p = 0; p < next->directiveForLoop->on.size(); ++p) { if (next->directiveForLoop->on[p].first == next->directiveForLoop->parallel[0]) { dimPos = p; break; } } if (dimPos == -1) { found = true; //continue; break; } if (distribution[k].second->distRule[dimPos] != NONE) return false; found = true; break; } } if (!found) return false; } else return false; } } return true; } } static void constructRules(vector>& outRules, const vector>& distribution, LoopGraph* loop) { outRules = distribution; for (auto& rule : outRules) { const DistrVariant* redistRules = loop->getRedistributeRule(rule.first); if (redistRules) rule.second = redistRules; } } static bool matchParallelAndDist(const pair& currDist, const ParallelDirective* currParDir, vector& saveDistr, const map>& arrayLinksByFuncCalls, const uint64_t regionId) { DIST::Array* parallelOn = currParDir->arrayRef; set realRefs; getRealArrayRefs(parallelOn, parallelOn, realRefs, arrayLinksByFuncCalls); if (realRefs.size() == 1) parallelOn = *realRefs.begin(); else { int maxC = -1; vector linkWithTempl; parallelOn = NULL; for (auto& refOn : realRefs) { linkWithTempl = refOn->GetLinksWithTemplate(regionId); int z = 0; for (int k = 0; k < linkWithTempl.size(); ++k) if (linkWithTempl[k] != -1) z++; if (maxC < z) { maxC = z; parallelOn = refOn; } } checkNull(parallelOn, convertFileName(__FILE__).c_str(), __LINE__); } const vector>>& ruleOn = currParDir->on; DIST::Array* templArray = currDist.first; //return true if need to skeep if (parallelOn->IsTemplate()) { if (parallelOn != templArray) return true; } else if (parallelOn->GetTemplateArray(regionId) != templArray) return true; vector touched(templArray->GetDimSize()); std::fill(touched.begin(), touched.end(), false); saveDistr.resize(templArray->GetDimSize(), false); bool conflict = false; vector linkWithTempl; if (parallelOn->IsTemplate()) for (int i = 0; i < templArray->GetDimSize(); ++i) linkWithTempl.push_back(i); else linkWithTempl = parallelOn->GetLinksWithTemplate(regionId); for (int i = 0; i < parallelOn->GetDimSize(); ++i) { // link does not exist if (linkWithTempl[i] == -1) continue; touched[linkWithTempl[i]] = true; if (ruleOn[i].first != "*") { if (currDist.second->distRule[linkWithTempl[i]] == dist::BLOCK) saveDistr[linkWithTempl[i]] = true; } else { if (currDist.second->distRule[linkWithTempl[i]] == dist::BLOCK) conflict = true; } } for (int i = 0; i < touched.size(); ++i) { if (!touched[i]) { if (currDist.second->distRule[i] == dist::BLOCK) conflict = true; } } if (conflict) { int countOfBlockToNone = 0; for (int z = 0; z < saveDistr.size(); ++z) { if (saveDistr[z] == false && currDist.second->distRule[z] == dist::BLOCK) countOfBlockToNone++; } vector idxCandidates; for (int z = 0; z < currParDir->parallel.size(); ++z) { const string currLetter = currParDir->parallel[z]; for (int k = 0; k < currParDir->on.size(); ++k) { if (currParDir->on[k].first == currLetter) { if (currDist.second->distRule[linkWithTempl[k]] == dist::NONE) idxCandidates.push_back(linkWithTempl[k]); break; } } } if (idxCandidates.size()) for (int k = 0; k < std::min(countOfBlockToNone, (int)idxCandidates.size()); ++k) saveDistr[idxCandidates[k]] = true; } return !conflict; } static vector genRedistributeDirective(File* file, const vector> distribution, const LoopGraph* current, const ParallelDirective* currParDir, const uint64_t regionId, vector>& redistributeRules, const map>& arrayLinksByFuncCalls) { vector selectedIdxOfDistr; for (int i = 0; i < distribution.size(); ++i) { vector saveDistr; bool result = matchParallelAndDist(distribution[i], currParDir, saveDistr, arrayLinksByFuncCalls, regionId); if (result) continue; selectedIdxOfDistr.push_back(i); vector newRedistributeRule; for (int k = 0; k < saveDistr.size(); ++k) { if (saveDistr[k]) newRedistributeRule.push_back(BLOCK); else newRedistributeRule.push_back(NONE); } if (saveDistr.size() != 0) { DistrVariant* newRules = new DistrVariant(newRedistributeRule); redistributeRules.push_back(make_pair(distribution[i].first, newRules)); } } return selectedIdxOfDistr; } static bool ifRuleNull(const DistrVariant* currVar) { for (auto& elem : currVar->distRule) if (elem == BLOCK) return false; return true; } static bool addRedistributionDirs(File* file, const vector>& distribution, vector& toInsert, LoopGraph* current, const map& loopGraph, ParallelDirective* currParDir, const uint64_t regionId, vector& messages, const map>& arrayLinksByFuncCalls, const DIST::Array* sameAlignTemplate) { vector> redistributeRules; const vector& redistrDirs = genRedistributeDirective(file, distribution, current, currParDir, regionId, redistributeRules, arrayLinksByFuncCalls); bool needToSkip = true; for (int z = 0; z < redistrDirs.size(); ++z) { if (ifRuleNull(redistributeRules[z].second)) { LoopGraph* cp = current->parent; int nestedLvl = 1; while (cp) { const int line = cp->lineNum; auto itL = loopGraph.find(line); if (itL == loopGraph.end()) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); if (itL->second->directiveForLoop != NULL || itL->second->directive != NULL) return needToSkip; nestedLvl = itL->second->perfectLoop; if (nestedLvl > 1) return needToSkip; cp = cp->parent; } } } needToSkip = false; if (sharedMemoryParallelization) return false; // Realign with global template clones for (int z = 0; z < redistrDirs.size(); ++z) { const int idx = redistrDirs[z]; const auto redistrRule = redistributeRules[z].second->distRule; const string newTemplate = distribution[idx].first->AddTemplateClone(redistrRule); const auto lines = make_pair(current->lineNum, current->lineNumAfterLoop); checkNull(sameAlignTemplate, convertFileName(__FILE__).c_str(), __LINE__); set usedArrays; //apply filter by template for (auto& array : current->usedArrays) { if (array->IsNotDistribute()) continue; auto realRef = getRealArrayRef(array, regionId, arrayLinksByFuncCalls); if (realRef->GetTemplateArray(regionId) == sameAlignTemplate) usedArrays.insert(array); } if (usedArrays.size() == 0) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); const auto& toRealign = createRealignRules(current->loop, regionId, file, newTemplate, arrayLinksByFuncCalls, usedArrays, lines); //before loop for (auto& rule : toRealign.first) toInsert.push_back(rule); //after loop for (auto& rule : toRealign.second) toInsert.push_back(rule); if (toRealign.first.size()) currParDir->cloneOfTemplate = newTemplate; } current->setNewRedistributeRules(redistributeRules); return needToSkip; } void selectParallelDirectiveForVariant(File* file, ParallelRegion* currParReg, DIST::GraphCSR& reducedG, DIST::Arrays& allArrays, const vector& loopGraph, const map& mapLoopsInFile, const map& mapFuncInfo, const vector>& distribution, vector& toInsert, const uint64_t regionId, const map>& arrayLinksByFuncCalls, const map& depInfoForLoopGraph, vector& messages) { for (int i = 0; i < loopGraph.size(); ++i) { LoopGraph* loop = loopGraph[i]; const bool hasDirective = loop->directive; const bool noLimits = loop->hasLimitsToParallel() == false; const bool isMyRegion = loop->region == currParReg; const bool noUserDir = loop->userDvmDirective == NULL; DIST::Array* sameAlignTemplate = NULL; const bool sameAligns = sharedMemoryParallelization ? true : loop->isArrayTemplatesTheSame(sameAlignTemplate, regionId, arrayLinksByFuncCalls); bool freeLoopDistr = true; if (!sharedMemoryParallelization && hasDirective && loop->directive->arrayRef2->IsLoopArray()) { bool ok = false; for (auto& elem : distribution) { if (elem.first == loop->directive->arrayRef) { bool allNone = true; for (auto& rule : elem.second->distRule) if (rule != dist::NONE) allNone = false; if (allNone) freeLoopDistr = false; ok = true; break; } } if (!ok) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); } if (hasDirective && noLimits && isMyRegion && noUserDir && (sameAlignTemplate || sameAligns) && freeLoopDistr) { if (loop->perfectLoop >= 1) { ParallelDirective* parDirective = loop->directive; parDirective->cloneOfTemplate = ""; //try to unite loops if (createNestedLoops(loop, depInfoForLoopGraph, mapFuncInfo, messages)) parDirective = loop->recalculateParallelDirective(); if(!sharedMemoryParallelization) { bool topCheck = isOnlyTopPerfect(loop, distribution); bool needToContinue = false; if (topCheck) { // -> dims not mached map> dimsNotMatch; if (!checkCorrectness(*parDirective, distribution, reducedG, allArrays, arrayLinksByFuncCalls, loop->getAllArraysInLoop(), messages, loop->lineNum, dimsNotMatch, regionId)) { if (!tryToResolveUnmatchedDims(dimsNotMatch, loop, regionId, parDirective, reducedG, allArrays, arrayLinksByFuncCalls, distribution, mapFuncInfo)) needToContinue = addRedistributionDirs(file, distribution, toInsert, loop, mapLoopsInFile, parDirective, regionId, messages, arrayLinksByFuncCalls, sameAlignTemplate); } } else needToContinue = addRedistributionDirs(file, distribution, toInsert, loop, mapLoopsInFile, parDirective, regionId, messages, arrayLinksByFuncCalls, sameAlignTemplate); if (needToContinue) continue; } vector> newRules; if(!sharedMemoryParallelization) constructRules(newRules, distribution, loop); Directive* dirImpl = parDirective->genDirective(file, newRules, loop, reducedG, allArrays, regionId, arrayLinksByFuncCalls); #if __SPF //move label before loop if (!sharedMemoryParallelization && loop->hasRedistribute()) { auto prev = loop->loop->lexPrev(); if (!prev) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); moveLabelBefore(prev, loop->loop); } else if(loop->lineNum > 0) moveLabelBefore(loop->loop); // check correctness if (loop->lineNum < 0) { if (loop->altLineNum == -1) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); SgStatement* local = NULL; local = SgStatement::getStatementByFileAndLine(loop->loop->fileName(), loop->lineNum); if (local == NULL) local = SgStatement::getStatementByFileAndLine(loop->loop->fileName(), loop->altLineNum); checkNull(local, convertFileName(__FILE__).c_str(), __LINE__); } #endif toInsert.push_back(dirImpl); } } else //TODO: add checker for indexing in this loop { if (loopGraph[i]->children.size() != 0) selectParallelDirectiveForVariant(file, currParReg, reducedG, allArrays, loopGraph[i]->children, mapLoopsInFile, mapFuncInfo, distribution, toInsert, regionId, arrayLinksByFuncCalls, depInfoForLoopGraph, messages); } } } static bool hasParallelDir(const LoopGraph* loop, const map>& createdDirectives) { auto byFile = createdDirectives.find(loop->fileName); if (byFile == createdDirectives.end()) return false; if (byFile->second.find(loop->lineNum) == byFile->second.end()) return false; else return true; } //TODO: check for rec calls static void addForRemove(const vector& loops, const map>& createdDirectives, map>& toRem) { for (auto& loop : loops) { if (hasParallelDir(loop, createdDirectives)) toRem[loop->fileName].insert(loop->lineNum); addForRemove(loop->children, createdDirectives, toRem); addForRemove(loop->funcChildren, createdDirectives, toRem); } } static void filterParallelDirectives(const vector& loopsByFile, const map>& createdDirectives, map>& toRem) { if (loopsByFile.size() == 0) return; auto dirsInCurrF = createdDirectives.find(loopsByFile[0]->fileName); if (dirsInCurrF == createdDirectives.end()) return; set dirsForLoop; for (auto& elem : dirsInCurrF->second) dirsForLoop.insert(elem->line); map> dirsForLoopByFile; for (auto& byFile : createdDirectives) for (auto& elem : byFile.second) dirsForLoopByFile[byFile.first].insert(elem->line); for (auto& loop : loopsByFile) { auto it = dirsForLoop.find(loop->lineNum); if (it != dirsForLoop.end()) //remove all dirs from funcChildren addForRemove(loop->funcChildren, dirsForLoopByFile, toRem); filterParallelDirectives(loop->children, createdDirectives, toRem); } } void filterParallelDirectives(const map>& loopGraph, map>& createdDirectives) { map> dirsToRem; for (auto& byFile : loopGraph) filterParallelDirectives(byFile.second, createdDirectives, dirsToRem); if (dirsToRem.size() == 0) return; map> newCreatedDirectives; set toDel; for (auto& byFile : createdDirectives) { auto byFileRem = dirsToRem.find(byFile.first); if (byFileRem == dirsToRem.end()) newCreatedDirectives[byFile.first] = byFile.second; else { for (auto& elem : byFile.second) { auto it = byFileRem->second.find(elem->line); if (it == byFileRem->second.end()) newCreatedDirectives[byFile.first].push_back(elem); else toDel.insert(elem); } } } createdDirectives = newCreatedDirectives; for (auto& del : toDel) delete del; } DIST::Array* getRealArrayRef(DIST::Array* in, const uint64_t regId, const map>& arrayLinksByFuncCalls) { set out; getRealArrayRefs(in, in, out, arrayLinksByFuncCalls); set>> rules; set> links; set templ; for (auto& array : out) { rules.insert(array->GetAlignRulesWithTemplate(regId)); links.insert(array->GetLinksWithTemplate(regId)); templ.insert(array->GetTemplateArray(regId)); } if (templ.size() != 1 || links.size() != 1 || rules.size() != 1) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); return *out.begin(); } //TODO: it works only for C void shiftAlignRulesForTemplates(const set& arrays, const uint64_t regId, DataDirective& dataDirectives, const map>& arrayLinksByFuncCalls) { map> templatesLink; for (auto& elem : arrays) if (elem->IsTemplate()) templatesLink[elem] = set(); if (templatesLink.size() == 0) return; for (auto& elem : arrays) { if (!elem->IsTemplate() && !elem->IsLoopArray() && !elem->IsNotDistribute()) { set realArrayRef; getRealArrayRefs(elem, elem, realArrayRef, arrayLinksByFuncCalls); for (auto& array : realArrayRef) { auto link = array->GetTemplateArray(regId); if (link) { if (templatesLink.find(link) != templatesLink.end()) templatesLink[link].insert(array); } } } } map, AlignRule*> alignInfo; for (auto& info : dataDirectives.alignRules) alignInfo[make_pair(info.alignArray, info.alignWith)] = &info; for (auto& templ : templatesLink) { int dimN = 0; auto sizes = templ.first->GetSizes(); for (auto& size : sizes) { if (size.first < 0) { //TODO: it works only for C, low bound of all array's dims is 0 int maxShift = -size.first; size.first = 0; size.second += maxShift; for (auto& linkArray : templ.second) { auto link = linkArray->GetLinksWithTemplate(regId); auto rule = linkArray->GetAlignRulesWithTemplate(regId); for (int z = 0; z < link.size(); ++z) { if (link[z] == dimN) { auto newRule = make_pair(rule[z].first, rule[z].second + maxShift); linkArray->AddLinkWithTemplate(z, dimN, templ.first, newRule, regId); auto itInfo = alignInfo.find(make_pair(linkArray, templ.first)); if (itInfo == alignInfo.end()) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); for (int k = 0; k < itInfo->second->alignRuleWith.size(); ++k) { if (itInfo->second->alignRuleWith[k].first == dimN) { itInfo->second->alignRuleWith[k].second = newRule; break; } } break; } } } } ++dimN; } templ.first->SetSizes(sizes); } } static vector, vector>>> createFullShadowSpec(const vector, vector>>> &shadowBase, const vector>> &shadowShifts) { vector, vector>>> result(shadowBase); for (int z = 0; z < result.size(); ++z) { for (int p = 0; p < result[z].second.size(); ++p) { result[z].second[p].first += shadowShifts[z][p].first; result[z].second[p].second += shadowShifts[z][p].second; } } return result; } static void createShadowSpec(const vector &loopGraph, vector, vector>>>> &shadowSpecs) { for (int i = 0; i < loopGraph.size(); ++i) { createShadowSpec(loopGraph[i]->children, shadowSpecs); if (loopGraph[i]->directive == NULL) continue; if (loopGraph[i]->directive->shadowRenew.size() == 0 && loopGraph[i]->directive->across.size() == 0) continue; if (loopGraph[i]->directive->shadowRenew.size() != loopGraph[i]->directive->shadowRenewShifts.size()) continue; if (loopGraph[i]->directive->across.size() != loopGraph[i]->directive->acrossShifts.size()) continue; vector, vector>>> fullShadow = createFullShadowSpec(loopGraph[i]->directive->shadowRenew, loopGraph[i]->directive->shadowRenewShifts); vector, vector>>> fullAcross = createFullShadowSpec(loopGraph[i]->directive->across, loopGraph[i]->directive->acrossShifts); shadowSpecs.push_back(fullShadow); shadowSpecs.push_back(fullAcross); } } void createShadowSpec(const map>& loopGraph, const map>& arrayLinksByFuncCalls, const set& forArrays) { vector, vector>>>> shadowSpecsAll; for (auto& loopsByFile : loopGraph) if (loopsByFile.second.size() > 0) createShadowSpec(loopsByFile.second, shadowSpecsAll); map dynamicArraysStr; for (auto& array : forArrays) dynamicArraysStr[array->GetName()] = array; for (auto& spec : shadowSpecsAll) { for (int i = 0; i < spec.size(); ++i) { auto dynArray = dynamicArraysStr.find(spec[i].first.second); if (dynArray == dynamicArraysStr.end()) continue; set realArrayRefs; getRealArrayRefs(dynArray->second, dynArray->second, realArrayRefs, arrayLinksByFuncCalls); for (auto& array : realArrayRefs) array->ExtendShadowSpec(spec[i].second); } } } #undef PRINT_PROF_INFO #undef PRINT_DIR_RESULT