Merge pull request 'Fix MOVE_OPERATORS pass' (#84) from egormayorov into master

This commit was merged in pull request #84.
This commit is contained in:
2026-05-05 18:00:34 +03:00
3 changed files with 306 additions and 335 deletions

View File

@@ -5,125 +5,134 @@
#include <algorithm>
#include <cstring>
#include <cstdlib>
#include <functional>
#include <cctype>
#include "../../Utils/errors.h"
#include "../../Utils/SgUtils.h"
#include "../../GraphCall/graph_calls.h"
#include "../../GraphCall/graph_calls_func.h"
#include "../../CFGraph/CFGraph.h"
#include "../../CFGraph/IR.h"
#include "../../GraphLoop/graph_loops.h"
#include "move_operators.h"
using namespace std;
set<int> loop_tags = {FOR_NODE};
set<int> loop_tags = {FOR_NODE, WHILE_NODE, DO_WHILE_NODE};
set<int> control_tags = {IF_NODE, ELSEIF_NODE, DO_WHILE_NODE, WHILE_NODE, LOGIF_NODE};
set<int> control_end_tags = {CONTROL_END};
static vector<SAPFOR::IR_Block*> findInstructionsFromStatement(SgStatement* st, const vector<SAPFOR::BasicBlock*>& blocks)
static bool isParentStmt(SgStatement* stmt, SgStatement* parent)
{
vector<SAPFOR::IR_Block*> result;
if (!st)
return result;
for (; stmt; stmt = stmt->controlParent())
if (stmt == parent)
return true;
return false;
}
const int stmtId = st->id();
for (auto* bb : blocks)
static bool isStatementInFile(SgStatement* st, const string& fileName)
{
if (!st)
return false;
const char* stmtFile = st->fileName();
return stmtFile && fileName == stmtFile;
}
static vector<SgStatement*> findLoopStatementsInFunction(SgStatement* funcStmt, const string& fileName)
{
vector<SgStatement*> loops;
if (!funcStmt)
return loops;
SgStatement* lastNode = funcStmt->lastNodeOfStmt();
for (SgStatement* st = funcStmt; st && st != lastNode; st = st->lexNext())
if (loop_tags.count(st->variant()) && isStatementInFile(st, fileName))
loops.push_back(st);
return loops;
}
static SgStatement* firstSignificantOperatorInBlock(const SAPFOR::BasicBlock* bb, const string& fileName)
{
if (!bb)
return nullptr;
for (auto* ir : bb->getInstructions())
{
if (!bb)
if (!ir || !ir->getInstruction())
continue;
for (auto* ir : bb->getInstructions())
{
if (!ir || !ir->getInstruction())
continue;
SgStatement* op = ir->getInstruction()->getOperator();
if (op && op->id() == stmtId)
result.push_back(ir);
}
SgStatement* st = ir->getInstruction()->getOperator();
if (isStatementInFile(st, fileName))
return st;
}
sort(result.begin(), result.end(),
[](const SAPFOR::IR_Block* a, const SAPFOR::IR_Block* b) { return a->getNumber() < b->getNumber(); });
return result;
return nullptr;
}
vector<SAPFOR::BasicBlock*> findFuncBlocksByFuncStatement(SgStatement *st, const map<FuncInfo*, vector<SAPFOR::BasicBlock*>>& FullIR) {
vector<SAPFOR::BasicBlock*> result;
if (!st)
return result;
static SgStatement* lastSignificantOperatorInBlock(const SAPFOR::BasicBlock* bb, const string& fileName)
{
if (!bb)
return nullptr;
Statement* forSt = (Statement*)st;
const string stmtFile = st->fileName();
const int stmtLine = st->lineNumber();
for (auto& func: FullIR) {
if (!func.first || !func.first->funcPointer)
const auto& instructions = bb->getInstructions();
for (auto it = instructions.rbegin(); it != instructions.rend(); ++it)
{
auto* ir = *it;
if (!ir || !ir->getInstruction())
continue;
const string funcFile = func.first->fileName;
const int funcLine = func.first->funcPointer->lineNumber();
// Important: select CFG blocks only for the same file and function header.
if (funcFile == stmtFile && funcLine == stmtLine)
{
result = func.second;
break;
}
SgStatement* st = ir->getInstruction()->getOperator();
if (isStatementInFile(st, fileName))
return st;
}
return result;
return nullptr;
}
map<SgForStmt*, vector<SAPFOR::BasicBlock*>> findAndAnalyzeLoops(SgStatement *st, const vector<SAPFOR::BasicBlock*>& blocks) {
map<SgForStmt*, vector<SAPFOR::BasicBlock*>> result;
SgStatement *lastNode = st->lastNodeOfStmt();
while (st && st != lastNode) {
if (loop_tags.find(st -> variant()) != loop_tags.end()) {
SgForStmt *forSt = (SgForStmt*)st;
SgStatement *loopBody = forSt -> body();
SgStatement *lastLoopNode = st->lastNodeOfStmt();
set<int> blocks_nums;
while (loopBody && loopBody != lastLoopNode) {
vector<SAPFOR::IR_Block*> irBlocks = findInstructionsFromStatement(loopBody, blocks);
if (!irBlocks.empty()) {
SAPFOR::IR_Block* IR = irBlocks.front();
if (IR && IR->getBasicBlock()) {
if (blocks_nums.find(IR -> getBasicBlock() -> getNumber()) == blocks_nums.end()) {
result[forSt].push_back(IR -> getBasicBlock());
blocks_nums.insert(IR -> getBasicBlock() -> getNumber());
}
}
}
loopBody = loopBody -> lexNext();
}
sort(result[forSt].begin(), result[forSt].end());
}
st = st -> lexNext();
}
return result;
static bool isBasicBlockInAnyLoop(const SAPFOR::BasicBlock* bb, const vector<SgStatement*>& loops, const string& fileName)
{
SgStatement* first = firstSignificantOperatorInBlock(bb, fileName);
SgStatement* last = lastSignificantOperatorInBlock(bb, fileName);
if (!first || !last)
return false;
for (auto* loop : loops)
if (loop && isParentStmt(first, loop) && isParentStmt(last, loop))
return true;
return false;
}
vector<SAPFOR::BasicBlock*> findBlocksInLoopsByFullIR(
SgStatement* funcStmt,
static vector<SAPFOR::BasicBlock*> findBlocksInLoopsByFullIR(
SgFile* file,
const map<FuncInfo*, vector<SAPFOR::BasicBlock*>>& FullIR)
{
vector<SAPFOR::BasicBlock*> result;
if (!funcStmt)
if (!file || !file->filename())
return result;
const vector<SAPFOR::BasicBlock*> funcBlocks = findFuncBlocksByFuncStatement(funcStmt, FullIR);
const auto loopsMapping = findAndAnalyzeLoops(funcStmt, funcBlocks);
const string fileName = file->filename();
set<SAPFOR::BasicBlock*> uniq;
for (const auto& kv : loopsMapping)
for (auto* bb : kv.second)
if (bb)
for (const auto& func : FullIR)
{
FuncInfo* funcInfo = func.first;
if (!funcInfo || !funcInfo->funcPointer)
continue;
if (funcInfo->fileName != fileName)
continue;
const vector<SgStatement*> loops = findLoopStatementsInFunction(funcInfo->funcPointer, fileName);
if (loops.empty())
continue;
for (auto* bb : func.second)
if (isBasicBlockInAnyLoop(bb, loops, fileName))
uniq.insert(bb);
}
result.assign(uniq.begin(), uniq.end());
sort(result.begin(), result.end(),
@@ -137,11 +146,10 @@ vector<SAPFOR::BasicBlock*> findBlocksInLoopsByFullIR(
return result;
}
static map<SgStatement*, vector<SgStatement*>> analyzeBasicBlockIntraDependencies(const SAPFOR::BasicBlock* bb)
static map<SgStatement*, set<SgStatement*>> analyzeBasicBlockIntraDependencies(const SAPFOR::BasicBlock* bb)
{
map<SgStatement*, vector<SgStatement*>> result;
if (!bb)
return result;
return {};
auto isCompoundStmt = [](SgStatement* st) -> bool
{
@@ -151,121 +159,154 @@ static map<SgStatement*, vector<SgStatement*>> analyzeBasicBlockIntraDependencie
return loop_tags.count(v) || control_tags.count(v) || control_end_tags.count(v);
};
auto isTrackable = [](const SAPFOR::Argument* a) -> bool
auto normalizeSageString = [](const string& src) -> string
{
if (!a)
return false;
const auto t = a->getType();
return t == SAPFOR::CFG_ARG_TYPE::VAR || t == SAPFOR::CFG_ARG_TYPE::REG;
};
string normalized;
bool prevSpace = false;
auto argKey = [&](const SAPFOR::Argument* a) -> string
{
if (!a)
return string();
return to_string((int)a->getType()) + "#" + to_string((int)a->getMemType()) + "#" + a->getValue();
};
auto memKeyFromInstr = [&](const SAPFOR::Instruction* instr) -> string
{
if (!instr)
return string();
SgExpression* ex = instr->getExpression();
if (!ex)
return string();
auto exprKey = [&](auto&& self, SgExpression* e) -> string
for (char ch : src)
{
if (!e)
return string("_");
if (auto* ar = isSgArrayRefExp(e))
const bool isSpace = std::isspace(static_cast<unsigned char>(ch));
if (isSpace)
{
SgSymbol* sym = ar->symbol() ? OriginalSymbol(ar->symbol()) : nullptr;
string key = string("A(") + (sym ? sym->identifier() : "?");
const int n = ar->numberOfSubscripts();
for (int i = 0; i < n; ++i)
{
key += ",";
key += self(self, ar->subscript(i));
}
key += ")";
return key;
if (!normalized.empty())
prevSpace = true;
continue;
}
if (e->variant() == VAR_REF || e->variant() == CONST_REF)
{
SgSymbol* sym = e->symbol() ? OriginalSymbol(e->symbol()) : nullptr;
return string((e->variant() == VAR_REF) ? "V(" : "C(") + (sym ? sym->identifier() : "?") + ")";
}
if (prevSpace && !normalized.empty())
normalized += ' ';
normalized += ch;
prevSpace = false;
}
if (auto* v = isSgValueExp(e))
{
if (e->variant() == INT_VAL)
return string("I(") + to_string(v->intValue()) + ")";
if (e->variant() == BOOL_VAL)
return string("B(") + (v->boolValue() ? "1" : "0") + ")";
if (e->variant() == CHAR_VAL)
return string("CH(") + string(1, v->charValue()) + ")";
if (e->variant() == FLOAT_VAL)
return string("F(") + (v->floatValue() ? v->floatValue() : "") + ")";
if (e->variant() == DOUBLE_VAL)
return string("D(") + (v->doubleValue() ? v->doubleValue() : "") + ")";
if (e->variant() == STRING_VAL)
return string("S(") + (v->stringValue() ? v->stringValue() : "") + ")";
}
string key = string("N(") + to_string(e->variant());
if (e->lhs())
key += ",L=" + self(self, e->lhs());
if (e->rhs())
key += ",R=" + self(self, e->rhs());
key += ")";
return key;
};
return "MEMEX#" + exprKey(exprKey, ex);
return normalized;
};
auto isBarrier = [&](const SAPFOR::Instruction* instr) -> bool
auto sageExprToString = [&](SgExpression* expr) -> string
{
if (!instr)
return true;
const auto op = instr->getOperation();
switch (op)
if (!expr)
return string();
char* raw = expr->unparse();
return normalizeSageString(raw ? string(raw) : string());
};
auto arrayElementKey = [&](SgArrayRefExp* arrayRef) -> string
{
if (!arrayRef)
return string();
string key;
SgSymbol* sym = arrayRef->symbol() ? OriginalSymbol(arrayRef->symbol()) : nullptr;
if (sym && sym->identifier())
key = sym->identifier();
else
return sageExprToString(arrayRef);
if (key.empty())
return string();
key += "(";
for (int i = 0; i < arrayRef->numberOfSubscripts(); ++i)
{
case SAPFOR::CFG_OP::F_CALL:
case SAPFOR::CFG_OP::IO_PARAM:
case SAPFOR::CFG_OP::DVM_DIR:
case SAPFOR::CFG_OP::SPF_DIR:
case SAPFOR::CFG_OP::POINTER_ASS:
case SAPFOR::CFG_OP::EXIT:
return true;
default:
return false;
if (i)
key += ", ";
key += sageExprToString(arrayRef->subscript(i));
}
key += ")";
return key;
};
function<void(SgExpression*, set<string>&)> collectUsedKeysFromExpression =
[&](SgExpression* expr, set<string>& usedKeys)
{
if (!expr)
return;
if (isSgValueExp(expr) || expr->variant() == CONST_REF)
return;
if (auto* arrayRef = isSgArrayRefExp(expr))
{
const string key = arrayElementKey(arrayRef);
if (!key.empty())
usedKeys.insert(key);
for (int i = 0; i < arrayRef->numberOfSubscripts(); ++i)
collectUsedKeysFromExpression(arrayRef->subscript(i), usedKeys);
return;
}
if (auto* call = isSgFunctionCallExp(expr))
{
for (int i = 0; i < call->numberOfArgs(); ++i)
collectUsedKeysFromExpression(call->arg(i), usedKeys);
return;
}
if (expr->variant() == VAR_REF)
{
SgSymbol* sym = expr->symbol() ? OriginalSymbol(expr->symbol()) : nullptr;
if (sym && sym->identifier())
usedKeys.insert(sym->identifier());
return;
}
collectUsedKeysFromExpression(expr->lhs(), usedKeys);
collectUsedKeysFromExpression(expr->rhs(), usedKeys);
};
auto collectUsedKeysFromArraySubscripts = [&](SgArrayRefExp* arrayRef, set<string>& usedKeys)
{
if (!arrayRef)
return;
for (int i = 0; i < arrayRef->numberOfSubscripts(); ++i)
collectUsedKeysFromExpression(arrayRef->subscript(i), usedKeys);
};
auto addOperatorDependencies = [&](SgStatement* stmt,
const set<string>& usedKeys,
const map<string, SgStatement*>& varDeclarations,
map<SgStatement*, set<SgStatement*>>& operatorsDependencies)
{
if (!stmt)
return;
for (const string& key : usedKeys)
{
auto it = varDeclarations.find(key);
if (it != varDeclarations.end() && it->second && it->second != stmt)
operatorsDependencies[stmt].insert(it->second);
}
};
auto isDef = [&](const SAPFOR::Instruction* instr) -> bool
auto declarationKeyFromLeftPart = [&](SgStatement* stmt) -> string
{
if (!instr)
return false;
SAPFOR::Argument* r = instr->getResult();
if (!isTrackable(r))
return false;
if (!stmt || stmt->variant() != ASSIGN_STAT)
return string();
const auto op = instr->getOperation();
if (op == SAPFOR::CFG_OP::STORE || op == SAPFOR::CFG_OP::REC_REF_STORE)
return false;
SgExpression* lhs = stmt->expr(0);
if (!lhs)
return string();
return true;
if (auto* arrayRef = isSgArrayRefExp(lhs))
return arrayElementKey(arrayRef);
if (lhs->variant() == VAR_REF)
{
SgSymbol* sym = lhs->symbol() ? OriginalSymbol(lhs->symbol()) : nullptr;
if (sym && sym->identifier())
return sym->identifier();
}
return string();
};
// Reaching definitions inside the BasicBlock in straight-line order:
// lastDef[var] = last operator in this block that defined it.
map<string, pair<SgStatement*, const SAPFOR::Argument*>> lastDef;
map<string, pair<SgStatement*, const SAPFOR::Argument*>> lastMemDef;
map<SgStatement*, set<SgStatement*>> depsSets;
vector<SgStatement*> operatorsOrder;
set<SgStatement*> seenOperators;
for (auto* ir : bb->getInstructions())
{
@@ -273,111 +314,38 @@ static map<SgStatement*, vector<SgStatement*>> analyzeBasicBlockIntraDependencie
continue;
const SAPFOR::Instruction* instr = ir->getInstruction();
SgStatement* opStmt = instr->getOperator();
if (!opStmt)
continue;
if (isCompoundStmt(opStmt))
SgStatement* stmt = instr->getOperator();
if (!stmt || isCompoundStmt(stmt))
continue;
if (isBarrier(instr))
{
for (auto it = lastDef.begin(); it != lastDef.end();)
{
const SAPFOR::Argument* a = it->second.second;
if (!a || a->isMemGlobal() || a->isParameter())
it = lastDef.erase(it);
else
++it;
}
for (auto it = lastMemDef.begin(); it != lastMemDef.end();)
{
const SAPFOR::Argument* a = it->second.second;
if (!a || a->isMemGlobal() || a->isParameter())
it = lastMemDef.erase(it);
else
++it;
}
}
if (!result.count(opStmt))
result[opStmt] = {};
auto addDep = [&](SAPFOR::Argument* use)
{
if (!isTrackable(use))
return;
const string k = argKey(use);
auto it = lastDef.find(k);
if (it == lastDef.end())
return;
if (it->second.first && it->second.first != opStmt)
depsSets[opStmt].insert(it->second.first);
};
auto addMemDep = [&](const string& key)
{
if (key.empty())
return;
auto it = lastMemDef.find(key);
if (it == lastMemDef.end())
return;
if (it->second.first && it->second.first != opStmt)
depsSets[opStmt].insert(it->second.first);
};
addDep(instr->getArg1());
addDep(instr->getArg2());
if (instr->getOperation() == SAPFOR::CFG_OP::RANGE)
addDep(instr->getResult());
if (instr->getOperation() == SAPFOR::CFG_OP::STORE || instr->getOperation() == SAPFOR::CFG_OP::REC_REF_STORE)
addDep(instr->getResult());
if (instr->getOperation() == SAPFOR::CFG_OP::LOAD || instr->getOperation() == SAPFOR::CFG_OP::REC_REF_LOAD)
{
const string memKey = memKeyFromInstr(instr);
addMemDep(memKey);
}
if (isDef(instr))
{
const string dk = argKey(instr->getResult());
lastDef[dk] = { opStmt, instr->getResult() };
}
if (instr->getOperation() == SAPFOR::CFG_OP::STORE || instr->getOperation() == SAPFOR::CFG_OP::REC_REF_STORE)
{
const string k = memKeyFromInstr(instr);
SAPFOR::Argument* base = instr->getArg1();
if (!k.empty() && base)
lastMemDef[k] = { opStmt, base };
addMemDep(k);
}
if (seenOperators.insert(stmt).second)
operatorsOrder.push_back(stmt);
}
for (auto& kv : result)
map<string, SgStatement*> varDeclarations;
map<SgStatement*, set<SgStatement*>> operatorsDependencies;
for (SgStatement* stmt : operatorsOrder)
{
SgStatement* op = kv.first;
auto it = depsSets.find(op);
if (it == depsSets.end())
continue;
set<string> usedKeys;
if (stmt)
{
collectUsedKeysFromExpression(stmt->expr(1), usedKeys);
collectUsedKeysFromArraySubscripts(isSgArrayRefExp(stmt->expr(0)), usedKeys);
}
addOperatorDependencies(stmt, usedKeys, varDeclarations, operatorsDependencies);
kv.second.assign(it->second.begin(), it->second.end());
sort(kv.second.begin(), kv.second.end(),
[](SgStatement* a, SgStatement* b)
{
const int la = a ? a->lineNumber() : -1;
const int lb = b ? b->lineNumber() : -1;
if (la != lb)
return la < lb;
return a < b;
});
const string declarationKey = declarationKeyFromLeftPart(stmt);
if (!declarationKey.empty())
varDeclarations[declarationKey] = stmt;
}
return result;
return operatorsDependencies;
}
static bool reorderOperatorsInBasicBlockUsingDeps(SAPFOR::BasicBlock* bb, const char* expectedFile)
static bool reorderOperatorsInBasicBlockUsingDeps(
SAPFOR::BasicBlock* bb,
const map<SgStatement*, set<SgStatement*>>& operatorsDependencies)
{
if (!bb)
return false;
@@ -407,18 +375,6 @@ static bool reorderOperatorsInBasicBlockUsingDeps(SAPFOR::BasicBlock* bb, const
if (ops.size() < 2)
return false;
// Check that analyzed BB is in the same file as the expected file
const char* bbFile = ops.front()->fileName();
if (!bbFile)
bbFile = "(unknown)";
if (expectedFile && strcmp(expectedFile, bbFile) != 0)
return false;
for (auto* st : ops)
{
if (!st || !st->fileName() || strcmp(st->fileName(), bbFile) != 0)
return false;
}
SgStatement* parent = ops.front()->controlParent();
if (!parent)
return false;
@@ -466,52 +422,72 @@ static bool reorderOperatorsInBasicBlockUsingDeps(SAPFOR::BasicBlock* bb, const
return false;
}
// Compute dependencies (inside BB) and build a new order by moving each statement
// as close as possible after its last dependency (if any).
const auto depsMap = analyzeBasicBlockIntraDependencies(bb);
vector<SgStatement*> order = ops;
map<SgStatement*, int> originalIndex;
for (int i = 0; i < (int)ops.size(); ++i)
originalIndex[ops[i]] = i;
auto indexIn = [](const vector<SgStatement*>& v, SgStatement* s) -> int
{
for (int i = 0; i < (int)v.size(); ++i)
if (v[i] == s)
return i;
return -1;
};
map<SgStatement*, vector<SgStatement*>> moveAfter;
set<SgStatement*> moved;
for (SgStatement* s : ops)
for (SgStatement* st : ops)
{
auto itDeps = depsMap.find(s);
if (itDeps == depsMap.end() || itDeps->second.empty())
continue;
int posS = indexIn(order, s);
if (posS < 0)
auto itDeps = operatorsDependencies.find(st);
if (itDeps == operatorsDependencies.end() || itDeps->second.empty())
continue;
SgStatement* lastDep = nullptr;
int lastDepIdx = -1;
for (SgStatement* dep : itDeps->second)
{
const int j = indexIn(order, dep);
if (j >= 0)
lastDepIdx = max(lastDepIdx, j);
auto itIdx = originalIndex.find(dep);
if (itIdx != originalIndex.end() && itIdx->second > lastDepIdx)
{
lastDep = dep;
lastDepIdx = itIdx->second;
}
}
if (lastDepIdx < 0)
if (!lastDep || lastDep == st)
continue;
if (posS == lastDepIdx + 1)
continue;
order.erase(order.begin() + posS);
int lp = lastDepIdx;
if (posS < lastDepIdx)
lp = lastDepIdx - 1;
const int insertAt = lp + 1;
order.insert(order.begin() + insertAt, s);
moveAfter[lastDep].push_back(st);
moved.insert(st);
}
vector<SgStatement*> order;
order.reserve(ops.size());
set<SgStatement*> emitted;
set<SgStatement*> active;
bool invalidOrder = false;
function<void(SgStatement*)> emitStatement = [&](SgStatement* st)
{
if (!st || invalidOrder || emitted.count(st))
return;
if (active.count(st))
{
invalidOrder = true;
return;
}
active.insert(st);
emitted.insert(st);
order.push_back(st);
auto itMovedAfter = moveAfter.find(st);
if (itMovedAfter != moveAfter.end())
for (SgStatement* dependent : itMovedAfter->second)
emitStatement(dependent);
active.erase(st);
};
for (SgStatement* st : ops)
if (!moved.count(st))
emitStatement(st);
if (invalidOrder || order.size() != ops.size())
return false;
bool changed = false;
for (size_t i = 0; i < ops.size(); ++i)
if (ops[i] != order[i])
@@ -585,18 +561,14 @@ void moveOperators(SgFile* file, const map<FuncInfo*, vector<SAPFOR::BasicBlock*
return;
if (SgFile::switchToFile(file->filename()) == -1)
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
const int funcNum = file->numberOfFunctions();
for (int i = 0; i < funcNum; ++i)
{
SgStatement* st = file->functions(i);
const auto loopBlocks = findBlocksInLoopsByFullIR(file, FullIR);
const auto loopBlocks = findBlocksInLoopsByFullIR(st, FullIR);
for (auto* bb : loopBlocks)
{
if (!bb)
continue;
if (reorderOperatorsInBasicBlockUsingDeps(bb, file->filename()))
countOfTransform += 1;
}
for (auto* bb : loopBlocks)
{
if (!bb)
continue;
const auto operatorsDependencies = analyzeBasicBlockIntraDependencies(bb);
if (reorderOperatorsInBasicBlockUsingDeps(bb, operatorsDependencies))
countOfTransform += 1;
}
}

View File

@@ -1,3 +1,2 @@
#pragma once
#define VERSION_SPF "2486"
#define VERSION_SPF "2487"