#include "../Utils/leak_detector.h" #include #include #include #include #include #include "DvmhDirective.h" #include "../Distribution/Array.h" #include "../Distribution/Arrays.h" #include "../Distribution/GraphCSR.h" #include "../Utils/errors.h" #include "../Utils/utils.h" #include "../GraphCall/graph_calls_func.h" using std::vector; using std::tuple; using std::get; using std::string; using std::pair; using std::set; using std::map; using std::set_union; using std::make_pair; using std::min; using std::max; template static void uniteSets(const set &first, const set &second, set &result) { vector unitedVector(first.size() + second.size()); auto it = set_union(first.begin(), first.end(), second.begin(), second.end(), unitedVector.begin()); unitedVector.resize(it - unitedVector.begin()); for (int i = 0; i < (int)unitedVector.size(); ++i) result.insert(unitedVector[i]); } template static void uniteSets(const map &first, const map &second, map &result) { vector> unitedVector(first.size() + second.size()); auto it = set_union(first.begin(), first.end(), second.begin(), second.end(), unitedVector.begin()); unitedVector.resize(it - unitedVector.begin()); for (int i = 0; i < (int)unitedVector.size(); ++i) result.insert(unitedVector[i]); } template static void uniteReduction(const map &first, const map &second, map &result) { auto itF = first.begin(); for (auto &redList : result) { auto itFound = second.find(redList.first); if (itFound != second.end()) uniteSets(itF->second, itFound->second, redList.second); ++itF; } for (auto redList : second) { auto itFound = result.find(redList.first); if (itFound == result.end()) result.insert(itFound, redList); } } template static vTuples uniteOnRules(const vTuples &first, const vTuples &second) { vTuples result(first.size()); for (int i = 0; i < (int)first.size(); ++i) { bool firstStar = first[i].first == "*"; bool secondStar = second[i].first == "*"; if (firstStar && secondStar) result[i].first = "*"; else if (firstStar) result[i] = second[i]; else result[i] = first[i]; } return result; } template static bool hasConflictUniteOnRules(const vTuples& first, const vTuples& second) { if (first.size() != second.size()) return true; for (int i = 0; i < (int)first.size(); ++i) { bool firstStar = first[i].first == "*"; bool secondStar = second[i].first == "*"; if (!firstStar && !secondStar) return true; } return false; } ParallelDirective* operator+(const ParallelDirective &left, const ParallelDirective &right) { const ParallelDirective *first = &left; const ParallelDirective *second = &right; ParallelDirective *result = NULL; checkNull(first, convertFileName(__FILE__).c_str(), __LINE__); checkNull(second, convertFileName(__FILE__).c_str(), __LINE__); bool condition = first->arrayRef == second->arrayRef; if (sharedMemoryParallelization) condition = !hasConflictUniteOnRules(first->on, second->on) && !hasConflictUniteOnRules(first->on2, second->on2); if (condition) { if (first->on.size() != second->on.size()) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); result = new ParallelDirective(); result->langType = first->langType; result->file = first->file; result->line = first->line; result->col = first->col; result->arrayRef = first->arrayRef; result->arrayRef2 = first->arrayRef2; result->parallel = vector(first->parallel.size() + second->parallel.size()); result->on = vector>>(first->on.size()); for (int i = 0; i < (int)first->parallel.size(); ++i) result->parallel[i] = first->parallel[i]; for (int i = 0; i < (int)second->parallel.size(); ++i) result->parallel[i + first->parallel.size()] = second->parallel[i]; result->on = uniteOnRules(first->on, second->on); result->on2 = uniteOnRules(first->on2, second->on2); uniteVectors(first->across, second->across, result->across); uniteVectors(first->shadowRenew, second->shadowRenew, result->shadowRenew); uniteSets(first->privates, second->privates, result->privates); uniteSets(first->remoteAccess, second->remoteAccess, result->remoteAccess); result->reduction = first->reduction; result->reductionLoc = first->reductionLoc; uniteReduction(first->reduction, second->reduction, result->reduction); uniteReduction(first->reductionLoc, second->reductionLoc, result->reductionLoc); } else result = new ParallelDirective(left); return result; } static inline int findRule(const int alingDim, const vector>> &rule, pair> &retVal) { for (int i = 0; i < rule.size(); ++i) { if (get<1>(rule[i]) == alingDim) { retVal = make_pair(alingDim, get<2>(rule[i])); return 0; } } return -1; } static inline bool isNonDistributedDim(const vector>> &ruleForOn, const vector>> &ruleForShadow, const int dimN, const vector> &distribution, const vector>> ¶llelOnRule) { if (ruleForShadow.size() <= dimN) return false; if (get<0>(ruleForShadow[dimN]) == NULL) return true; //check for distributed in declaration or in redistr. rules const tuple> &toCheck = ruleForShadow[dimN]; const int dimInTepml = get<1>(toCheck); for (auto &templ : distribution) { if (templ.first == get<0>(toCheck)) { if (templ.second->distRule[dimInTepml] == NONE) return true; } } //check for distributed in loop if (dimInTepml >= 0 && dimInTepml < parallelOnRule.size()) if (parallelOnRule[dimInTepml].first == "*") return true; return false; } vector>> getAlignRuleWithTemplate(DIST::Array *array, const map> &arrayLinksByFuncCalls, DIST::GraphCSR &reducedG, const DIST::Arrays &allArrays, const uint64_t regionId) { vector>> retVal; set realRefs; getRealArrayRefs(array, array, realRefs, arrayLinksByFuncCalls); vector>>> allRuleForShadow(realRefs.size()); int idx = 0; for (auto &array : realRefs) reducedG.GetAlignRuleWithTemplate(array, allArrays, allRuleForShadow[idx++], regionId); if (realRefs.size() == 1) retVal = allRuleForShadow[0]; else { bool eq = isAllRulesEqual(allRuleForShadow); if (eq == false) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); else retVal = allRuleForShadow[0]; } return retVal; } bool needCorner(const DIST::Array* currArray, const vector, int>>& shiftsByAccess, const vector>>& refsInLoop) { for (auto& access : refsInLoop) { int countOfShadows = 0; if (access.size() < currArray->GetDimSize()) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); for (int i = 0; i < currArray->GetDimSize(); ++i) { const pair& coef = access[i]; auto it = shiftsByAccess[i].find(coef); if (it != shiftsByAccess[i].end()) if (it->second != 0) countOfShadows++; } if (countOfShadows > 1) return true; } return false; } static inline string calculateShifts(DIST::GraphCSR &reducedG, const DIST::Arrays &allArrays, DIST::Array *arrayRef, DIST::Array *calcForArray, pair, vector>> &coeffs, vector> &shifts, vector, int>> &shiftsByAccess, map>& remoteRegularReads, const vector>> baseOnRule, const vector>> parallelOnRule, const map, vector>> &readOps, const bool isAcross, const vector> &distribution, const uint64_t regionId, const map> &arrayLinksByFuncCalls) { vector>> ruleForOn, ruleForShadow; if (!sharedMemoryParallelization) { ruleForOn = getAlignRuleWithTemplate(arrayRef, arrayLinksByFuncCalls, reducedG, allArrays, regionId); ruleForShadow = getAlignRuleWithTemplate(calcForArray, arrayLinksByFuncCalls, reducedG, allArrays, regionId); } string out = ""; // check for distributed and not mapped dims -> zero them out ('coeffs.second') set refs; getRealArrayRefs(calcForArray, calcForArray, refs, arrayLinksByFuncCalls); //TODO: need to correct errors /*if (!sharedMemoryParallelization) { for (auto& array : refs) { DIST::Array* tmpl = array->GetTemplateArray(regionId); checkNull(tmpl, convertFileName(__FILE__).c_str(), __LINE__); auto align = array->GetLinksWithTemplate(regionId); bool found = false; for (auto& t : distribution) { if (t.first == tmpl) { found = true; for (int aDim = 0; aDim < align.size(); ++aDim) { int link = align[aDim]; if (link != -1) { int tLink = link; if (!arrayRef->IsTemplate()) { auto alignMain = arrayRef->GetLinksWithTemplate(regionId); for (int z = 0; z < alignMain.size(); ++z) if (alignMain[z] == tLink) tLink = z; } if (t.second->distRule[link] == dist::BLOCK && baseOnRule[aDim].first == "*") { for (int z = 0; z < coeffs.second.size(); ++z) coeffs.second[z].first = coeffs.second[z].second = 0; return out; } } } } } if (!found) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); } }*/ const pair, vector> *currReadOp = NULL; auto readIt = readOps.find(calcForArray); if (readIt != readOps.end()) currReadOp = &(readIt->second); if(!sharedMemoryParallelization) { findAndReplaceDimentions(ruleForOn, allArrays); findAndReplaceDimentions(ruleForShadow, allArrays); } const int len = (int)coeffs.second.size(); vector> shift(len); bool allZero = true; for (int k = 0; k < len; ++k) { shiftsByAccess.push_back(map, int>()); shift[k].first = shift[k].second = 0; if (k != 0) out += ","; char buf[256]; // calculate correct shifts from readOp info if (currReadOp) { // no unrecognized read operations if (currReadOp->second[k] == false) { if (sharedMemoryParallelization) { for (auto& coefs : currReadOp->first[k].coefficients) { auto currAccess = coefs.first; const int currShift = coefs.first.second; auto itFound = shiftsByAccess[k].find(currAccess); if (itFound == shiftsByAccess[k].end()) itFound = shiftsByAccess[k].insert(itFound, make_pair(currAccess, currShift)); } } else if (get<0>(ruleForShadow[k]) != NULL) { const pair currRuleShadow = get<2>(ruleForShadow[k]); pair> currRuleOn; int err = findRule(get<1>(ruleForShadow[k]), ruleForOn, currRuleOn); if (err == 0) { if (currRuleOn.first >= parallelOnRule.size()) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); const pair loopRule = DIST::Fx(parallelOnRule[currRuleOn.first].second, currRuleOn.second); if (loopRule.first != 0) { int minShift = 9999999; int maxShift = -9999999; for (auto &coefs : currReadOp->first[k].coefficients) { auto currAccess = coefs.first; auto result = DIST::Fx(currAccess, currRuleShadow); if (result.first == loopRule.first) { const int absShift = abs(result.second - loopRule.second); const int signShift = (result.second - loopRule.second) > 0 ? 1 : -1; const int currShift = signShift * (absShift / currRuleShadow.first + ((absShift % currRuleShadow.first) != 0)); minShift = std::min(minShift, currShift); maxShift = std::max(maxShift, currShift); auto itFound = shiftsByAccess[k].find(currAccess); if (itFound == shiftsByAccess[k].end()) itFound = shiftsByAccess[k].insert(itFound, make_pair(currAccess, currShift)); } else // remote_access { auto it = remoteRegularReads.find(calcForArray); if (it == remoteRegularReads.end()) it = remoteRegularReads.insert(it, make_pair(calcForArray, vector(calcForArray->GetDimSize()))); it->second[k].coefficients.insert(coefs); } } // inconsistent -> may be remote will add later... // or SINGLE position if (minShift == 9999999 && maxShift == -9999999) minShift = maxShift = 0; if (minShift == maxShift) { if (minShift == 0) { if (parallelOnRule[currRuleOn.first].first != "SINGLE") { shift[k].first = -coeffs.second[k].first; shift[k].second = -coeffs.second[k].second; } } else { shift[k].first = -minShift; shift[k].second = minShift; if (shift[k].first > 0 && shift[k].second < 0) shift[k].second = 0; else if (shift[k].first < 0 && shift[k].second > 0) shift[k].first = 0; shift[k].first -= coeffs.second[k].first; shift[k].second -= coeffs.second[k].second; } } else if (currReadOp->first[k].coefficients.size() > 0) { if (minShift > 0 && maxShift > 0) { shift[k].first = 0; shift[k].second = std::abs(maxShift) - coeffs.second[k].second; } else if (minShift < 0 && maxShift < 0) { shift[k].first = std::abs(minShift) - coeffs.second[k].first; shift[k].second = 0; } else { shift[k].first = std::abs(minShift) - coeffs.second[k].first; shift[k].second = std::abs(maxShift) - coeffs.second[k].second; } } } } } } } if(!sharedMemoryParallelization) { if (coeffs.second[k].first + shift[k].first < 0) shift[k].first = -coeffs.second[k].first; if (coeffs.second[k].second + shift[k].second < 0) shift[k].second = -coeffs.second[k].second; if (isAcross) { if (coeffs.second[k] == make_pair(0, 0)) shift[k] = make_pair(0, 0); } else if (isNonDistributedDim(ruleForOn, ruleForShadow, k, distribution, parallelOnRule)) { shift[k].first = -coeffs.second[k].first; shift[k].second = -coeffs.second[k].second; shiftsByAccess[k].clear(); } } sprintf(buf, "%d:%d", coeffs.second[k].first + shift[k].first, coeffs.second[k].second + shift[k].second); shifts[k] = shift[k]; if (coeffs.second[k].first + shift[k].first != 0 || coeffs.second[k].second + shift[k].second != 0) allZero = false; out += buf; } if (allZero) return ""; else return out; } string ParallelDirective::genBounds(pair, vector>> &shadowOp, vector> &shadowOpShift, DIST::GraphCSR &reducedG, DIST::Arrays &allArrays, DIST::Array* shadowArray, map>& remoteRegularReads, const map, vector>> &readOps, const bool isAcross, const uint64_t regionId, const vector> &distribution, set &arraysInAcross, vector, int>> &shiftsByAccess, const map> &arrayLinksByFuncCalls) const { checkNull(shadowArray, convertFileName(__FILE__).c_str(), __LINE__); auto on_ext = on; if(!sharedMemoryParallelization) { //replace to template align ::on if (arrayRef->IsTemplate() == false) { vector>> ruleForRef = getAlignRuleWithTemplate(arrayRef, arrayLinksByFuncCalls, reducedG, allArrays, regionId); findAndReplaceDimentions(ruleForRef, allArrays); on_ext.clear(); for (int i = 0; i < ruleForRef.size(); ++i) { if (get<0>(ruleForRef[i])) { on_ext.resize(get<0>(ruleForRef[i])->GetDimSize()); break; } } if (on_ext.size() == 0) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); std::fill(on_ext.begin(), on_ext.end(), make_pair("*", make_pair(0, 0))); for (int i = 0; i < ruleForRef.size(); ++i) if (get<0>(ruleForRef[i])) on_ext[get<1>(ruleForRef[i])] = on[i]; } //replace single dim to key word 'SINGLE' for (int i = 0; i < on_ext.size(); ++i) { if (on_ext[i].first != "*") { if (std::find(parallel.begin(), parallel.end(), on_ext[i].first) == parallel.end()) on_ext[i].first = "SINGLE"; } } } string ret = ""; if (isAcross) { arraysInAcross.insert(shadowArray); ret = calculateShifts(reducedG, allArrays, arrayRef, shadowArray, shadowOp, shadowOpShift, shiftsByAccess, remoteRegularReads, on, on_ext, readOps, isAcross, distribution, regionId, arrayLinksByFuncCalls); } else { if (arraysInAcross.find(shadowArray) == arraysInAcross.end()) ret = calculateShifts(reducedG, allArrays, arrayRef, shadowArray, shadowOp, shadowOpShift, shiftsByAccess, remoteRegularReads, on, on_ext, readOps, isAcross, distribution, regionId, arrayLinksByFuncCalls); } return ret; } static void genVariants(const int dimNum, vector &currDist, std::vector &currdist) { if (dimNum == 1) { DistrVariant var1(currDist); var1.distRule.push_back(dist::NONE); DistrVariant var2(currDist); var2.distRule.push_back(dist::BLOCK); currdist.push_back(var1); currdist.push_back(var2); } else { vector next(currDist); next.push_back(dist::NONE); genVariants(dimNum - 1, next, currdist); next.back() = dist::BLOCK; genVariants(dimNum - 1, next, currdist); } } void DataDirective::createDirstributionVariants(const vector &arraysToDist) { for (int i = 0; i < arraysToDist.size(); ++i) { vector currdist; vector currDist; genVariants(arraysToDist[i]->GetDimSize(), currDist, currdist); //deprecate by dims for (auto &variant : currdist) { for (int z = 0; z < arraysToDist[i]->GetDimSize(); ++z) if (arraysToDist[i]->IsDimDepracated(z) || !arraysToDist[i]->IsDimMapped(z)) variant.distRule[z] = dist::NONE; } distrRules.push_back(make_pair(arraysToDist[i], currdist)); } } string DistrVariantBase::GenRuleBase(const vector &newOrder) const { string retVal = ""; retVal += "("; for (int i = 0; i < distRule.size(); ++i) { if (newOrder.size() == 0) { if (distRule[i] == dist::NONE) retVal += "*"; else if (distRule[i] == dist::BLOCK) retVal += "BLOCK"; } else { if (distRule[newOrder[i]] == dist::NONE) retVal += "*"; else if (distRule[newOrder[i]] == dist::BLOCK) retVal += "BLOCK"; } if (i != distRule.size() - 1) retVal += ","; } retVal += ")"; return retVal; } vector DataDirective::GenRule(const vector &rules) const { vector retVal; if (distrRules.size() < rules.size()) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); for (int i = 0; i < rules.size(); ++i) { if (rules[i] < distrRules[i].second.size()) { string tmp = distrRules[i].first->GetShortName(); tmp += distrRules[i].second[rules[i]].GenRuleBase(distrRules[i].first->GetNewTemplateDimsOrder()); retVal.push_back(tmp); } else printInternalError(convertFileName(__FILE__).c_str(), __LINE__); } return retVal; } vector> DataDirective::GenRule(const vector &rules, int) const { vector> retVal; if (distrRules.size() < rules.size()) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); for (int i = 0; i < rules.size(); ++i) { if (rules[i] < distrRules[i].second.size()) retVal.push_back(distrRules[i].second[rules[i]].distRule); else printInternalError(convertFileName(__FILE__).c_str(), __LINE__); } return retVal; } vector DataDirective::GenAlignsRules() const { vector retVal; for (int i = 0; i < alignRules.size(); ++i) retVal.push_back(alignRules[i].GenRuleBase()); return retVal; } string AlignRuleBase::GenRuleBase() const { string retVal = ""; retVal += "ALIGN " + alignArray->GetShortName() + "("; for (int i = 0; i < alignRule.size(); ++i) { retVal += genStringExpr(alignNames[i], alignRule[i]); if (i != alignRule.size() - 1) retVal += ","; } retVal += ") WITH " + alignWith->GetShortName() + "("; vector alignEachDim(alignWith->GetDimSize()); for (int i = 0; i < alignWith->GetDimSize(); ++i) alignEachDim[i] = "*"; for (int i = 0; i < alignRuleWith.size(); ++i) if (alignRuleWith[i].first != -1) alignEachDim[alignRuleWith[i].first] = genStringExpr(alignNames[i], alignRuleWith[i].second); auto newOrder = alignWith->GetNewTemplateDimsOrder(); if (newOrder.size() != 0) { vector alignEachDimNew(alignEachDim); for (int z = 0; z < newOrder.size(); ++z) alignEachDim[z] = alignEachDimNew[newOrder[z]]; } for (int i = 0; i < alignWith->GetDimSize(); ++i) { retVal += alignEachDim[i]; if (i != alignWith->GetDimSize() - 1) retVal += ","; } retVal += ")"; return retVal; } string AlignRuleBase::toString() { string retVal = ""; if (alignArray) retVal += "#" + std::to_string((long long)alignArray); else retVal += "#-1"; if (alignWith) retVal += "#" + std::to_string((long long)alignWith); else retVal += "#-1"; retVal += "#" + std::to_string(alignRule.size()); for (int i = 0; i < alignRule.size(); ++i) retVal += "#" + std::to_string(alignRule[i].first) + "#" + std::to_string(alignRule[i].second); retVal += "#" + std::to_string(alignRuleWith.size()); for (int i = 0; i < alignRuleWith.size(); ++i) retVal += "#" + std::to_string(alignRuleWith[i].first) + "#" + std::to_string(alignRuleWith[i].second.first) + "#" + std::to_string(alignRuleWith[i].second.second); return retVal; } pair convertDigitToPositive(const int digit) { char buf[16]; string sign = " + "; if (digit < 0) { sign = " - "; int val = -digit; sprintf(buf, "%d", val); } else sprintf(buf, "%d", digit); return make_pair(sign, buf); } string genStringExpr(const string &letter, const pair expr) { string retVal = ""; if (expr.first == 0 && expr.second == 0) retVal = "*"; else if (expr.second == 0) { if (expr.first == 1) retVal = letter; else { pair digit2 = convertDigitToPositive(expr.first); if (digit2.first == " - ") digit2.second = "(-" + digit2.second + ")"; retVal = digit2.second + string(" * ") + letter; } } else { pair digit1 = convertDigitToPositive(expr.second); if (expr.first == 1) retVal = letter + digit1.first + digit1.second; else { pair digit2 = convertDigitToPositive(expr.first); if (digit2.first == " - ") digit2.second = "(-" + digit2.second + ")"; retVal = digit2.second + string(" * ") + letter + digit1.first + digit1.second; } } return retVal; } const std::vector AlignRuleBase::alignNames = { "iEX1", "iEX2", "iEX3", "iEX4", "iEX5", "iEX6", "iEX7" };