From 3cd16c9d0fb334ddf18ae694a039d84e3e15ddc9 Mon Sep 17 00:00:00 2001 From: mkoch Date: Tue, 9 Jan 2024 17:28:28 +0300 Subject: [PATCH] extract data flow analysys part from live variable analysis --- sapfor/experts/Sapfor_2017/CMakeLists.txt | 7 + .../CFGraph/DataFlow/backward_data_flow.h | 108 ++++++ .../_src/CFGraph/DataFlow/data_flow.h | 161 +++++++++ .../_src/CFGraph/live_variable_analysis.cpp | 308 +++++------------- 4 files changed, 356 insertions(+), 228 deletions(-) create mode 100644 sapfor/experts/Sapfor_2017/_src/CFGraph/DataFlow/backward_data_flow.h create mode 100644 sapfor/experts/Sapfor_2017/_src/CFGraph/DataFlow/data_flow.h diff --git a/sapfor/experts/Sapfor_2017/CMakeLists.txt b/sapfor/experts/Sapfor_2017/CMakeLists.txt index fd246f3..0ad33fe 100644 --- a/sapfor/experts/Sapfor_2017/CMakeLists.txt +++ b/sapfor/experts/Sapfor_2017/CMakeLists.txt @@ -218,6 +218,11 @@ set(CFG _src/CFGraph/IR.cpp _src/CFGraph/private_variables_analysis.h ) +set(DATA_FLOW + _src/CFGraph/DataFlow/data_flow.h + _src/CFGraph/DataFlow/backward_data_flow.h + ) + set(CREATE_INTER_T _src/CreateInterTree/CreateInterTree.cpp _src/CreateInterTree/CreateInterTree.h) @@ -377,6 +382,7 @@ set(ZLIB ${zlib_sources}/src/adler32.c set(SOURCE_EXE ${CFG} + ${DATA_FLOW} ${CREATE_INTER_T} ${DIRA} ${DISTR} @@ -406,6 +412,7 @@ set(SOURCE_EXE add_executable(Sapfor_F ${SOURCE_EXE}) source_group (CFGraph FILES ${CFG}) +source_group (CFGraph\\DataFlow FILES ${DATA_FLOW}) source_group (Transformations\\ExpressionSubstitution FILES ${EXPR_TRANSFORM}) source_group (Transformations\\CheckPoints FILES ${TR_CP}) diff --git a/sapfor/experts/Sapfor_2017/_src/CFGraph/DataFlow/backward_data_flow.h b/sapfor/experts/Sapfor_2017/_src/CFGraph/DataFlow/backward_data_flow.h new file mode 100644 index 0000000..dd0b3c3 --- /dev/null +++ b/sapfor/experts/Sapfor_2017/_src/CFGraph/DataFlow/backward_data_flow.h @@ -0,0 +1,108 @@ +#pragma once +#include "data_flow.h" + +#include +#include +#include + +#include "../../Utils/SgUtils.h" +#include "../CFGraph.h" +#include "../IR.h" +#include "../RD_subst.h" + +template +class BackwardDataFlowAnalysis : public DataFlowAnalysis { + std::vector reorderSequence(const std::vector& blocks, + const std::set back_edge_sources); +public: + void fit(const std::vector& blocks); +}; + +// minimizes the number of blocks beween the ends of back edges +template +std::vector BackwardDataFlowAnalysis::reorderSequence(const std::vector& blocks, + const std::set back_edge_sources) +{ + std::vector res = { }; + + auto blocks_end = blocks.rend(); + for (auto it = blocks.rbegin(); it < blocks_end; it++) + { + SAPFOR::BasicBlock* curr = *it; + auto res_end = res.end(); + auto inserter = res.begin(); + if (back_edge_sources.count(curr) == 0) + { + auto curr_next_begin = curr->getNext().begin(); + auto curr_next_end = curr->getNext().end(); + while (inserter < res_end && std::find(curr_next_begin, curr_next_end, *inserter) == curr_next_end) + inserter++; + } + + res.insert(inserter, curr); + } + + return res; +} + +template +void BackwardDataFlowAnalysis::fit(const std::vector& blocks) +{ + std::set> back_edges = {}; + + bool returned = false; + std::map> back_edges_by_src; + + auto blocks_sorted = sortCfgNodes(blocks, &back_edges); + + std::set back_edge_sources; + + for (auto& edge : back_edges) + { + back_edges_by_src[edge.first].insert(edge.second); + back_edge_sources.insert(edge.first); + } + + back_edges.clear(); + + blocks_sorted = reorderSequence(blocks_sorted, back_edge_sources); + back_edge_sources.clear(); + + std::reverse(blocks_sorted.begin(), blocks_sorted.end()); + + nodes.clear(); + std::map node_by_block; + + for (auto block : blocks_sorted) + { + NodeType* node = createNode(block); + nodes.push_back(node); + node_by_block[block] = node; + } + + int nodes_size = nodes.size(); + + for (int i = 0; i < nodes_size; i++) + { + NodeType* node = nodes[i]; + + auto back_edges_by_src_it = back_edges_by_src.find(node->getBlock()); + if (back_edges_by_src_it != back_edges_by_src.end()) + { + // This node is a source for back edge + for (auto dest : back_edges_by_src_it->second) + { + auto node_by_block_it = node_by_block.find(dest); + if (node_by_block_it != node_by_block.end()) + node_by_block_it->second->getRollback().insert(i); + } + } + + for (auto next : node->getBlock()->getNext()) + { + auto node_by_block_it = node_by_block.find(next); + if (node_by_block_it != node_by_block.end()) + node->getPrevBlocks().insert(node_by_block_it->second); + } + } +} \ No newline at end of file diff --git a/sapfor/experts/Sapfor_2017/_src/CFGraph/DataFlow/data_flow.h b/sapfor/experts/Sapfor_2017/_src/CFGraph/DataFlow/data_flow.h new file mode 100644 index 0000000..d4ac744 --- /dev/null +++ b/sapfor/experts/Sapfor_2017/_src/CFGraph/DataFlow/data_flow.h @@ -0,0 +1,161 @@ +#pragma once +#include +#include + +#include "../../Utils/SgUtils.h" +#include "../CFGraph.h" +#include "../IR.h" + +enum +{ + CNT_NOTINIT = 0 +}; + +template +class DataFlowAnalysisNode { + int in_cnt = CNT_NOTINIT, out_cnt = CNT_NOTINIT; + + std::set rollback; + std::set ignore_rollback; + + std::set*> prev_blocks; + + SAPFOR::BasicBlock* bb; +public: + DataFlowAnalysisNode(); + + void doStep(); + + virtual DataType getIn() = 0; + virtual DataType getOut() = 0; + + virtual bool addIn(const DataType& data) = 0; + virtual bool addOut(const DataType& data) = 0; + + virtual bool forwardData(const DataType& data) = 0; + + bool newerThan(const DataFlowAnalysisNode* block) const { return out_cnt > block->in_cnt; } + + int getInCnt() { return in_cnt; } + int getOutCnt() { return out_cnt; } + + void setInCnt(int cnt) { in_cnt = cnt; } + void setOutCnt(int cnt) { out_cnt = cnt; } + + std::set& getRollback() { return rollback; } + std::set& getIgnoreRollback() { return ignore_rollback; } + + std::set*>& getPrevBlocks() { return prev_blocks; } + + SAPFOR::BasicBlock* getBlock() { return bb; } + void setBlock(SAPFOR::BasicBlock* b) { bb = b; } +}; + +template +class DataFlowAnalysis { +protected: + std::vector nodes; + + virtual NodeType* createNode(SAPFOR::BasicBlock* block) = 0; +public: + virtual void fit(const std::vector& blocks) = 0; + void analyze(); + + const std::vector& getNodes() { return nodes; } + + ~DataFlowAnalysis(); +}; + +template +DataFlowAnalysisNode::DataFlowAnalysisNode() { + getRollback() = {}; + getIgnoreRollback() = {}; + prev_blocks = {}; +} + +template +void DataFlowAnalysisNode::doStep() +{ + int in_max_cnt = CNT_NOTINIT, out_max_cnt = CNT_NOTINIT; + for (auto next : prev_blocks) + { + if (in_cnt < next->out_cnt) + { + for (const auto& byOut : next->getOut()) + { + bool inserted = addIn({ byOut }); + + if (inserted) + { + if (next->out_cnt > in_max_cnt) + in_max_cnt = next->out_cnt; + + inserted = forwardData({ byOut }); + + if (inserted && next->out_cnt > out_max_cnt) + out_max_cnt = next->out_cnt; + } + } + } + } + + bool was_notinit = (out_cnt == CNT_NOTINIT); + + if (out_max_cnt != CNT_NOTINIT) + out_cnt = out_max_cnt; + + if (in_max_cnt != CNT_NOTINIT) + in_cnt = in_max_cnt; + + // TODO: fix counter overflow + if (was_notinit) + { + out_cnt++; + in_cnt++; + } +} + +template +void DataFlowAnalysis::analyze() { + auto curr = 0; + auto stop = nodes.size(); + + while (curr != stop) + { + auto curr_bb = nodes[curr]; + curr_bb->doStep(); + + const auto& jumps = curr_bb->getRollback(); + if (jumps.size() != 0) + { + auto& ignored_jumps = curr_bb->getIgnoreRollback(); + + bool jump = false; + for (const auto& jump_to : jumps) + { + if (ignored_jumps.insert(jump_to).second && curr_bb->newerThan(nodes[jump_to])) + { + jump = true; + curr = jump_to; + break; + } + } + + if (!jump) + curr_bb->getIgnoreRollback().clear(); + else + continue; + } + + curr++; + } +} + +template +DataFlowAnalysis::~DataFlowAnalysis() +{ + for (DataFlowAnalysisNode* node : nodes) + delete node; + + nodes.clear(); +} \ No newline at end of file diff --git a/sapfor/experts/Sapfor_2017/_src/CFGraph/live_variable_analysis.cpp b/sapfor/experts/Sapfor_2017/_src/CFGraph/live_variable_analysis.cpp index 4b40ff9..3452e8e 100644 --- a/sapfor/experts/Sapfor_2017/_src/CFGraph/live_variable_analysis.cpp +++ b/sapfor/experts/Sapfor_2017/_src/CFGraph/live_variable_analysis.cpp @@ -1,5 +1,6 @@ #include "live_variable_analysis.h" #include "RD_subst.h" +#include "DataFlow/backward_data_flow.h" #include #include @@ -197,85 +198,67 @@ static void buildUseDef(SAPFOR::BasicBlock* block, set& use, vector& formal_parameters, vector& fcalls, const map& funcByName); -enum -{ - CNT_NOTINIT = 0 +class LiveVarAnalysisNode : public DataFlowAnalysisNode>> { +private: + set live, dead; +public: + map> getIn() + { + return getBlock()->getLiveOut(); + }; + + map> getOut() + { + return getBlock()->getLiveIn(); + }; + + bool addIn(const map>& data) + { + return getBlock()->addLiveOut(data); + }; + + bool addOut(const map>& data) + { + return getBlock()->addLiveIn(data); + }; + + bool forwardData(const map>& data) + { + bool inserted = false; + + for (const auto& byArg : data) + if (live.find(byArg.first) == live.end() && dead.find(byArg.first) == dead.end()) + inserted |= getBlock()->addLiveIn({ byArg }); + + return inserted; + }; + + LiveVarAnalysisNode(SAPFOR::BasicBlock* block, vector& formal_parameters, + vector& fcalls, const map& funcByName) + { + setBlock(block); + + buildUseDef(getBlock(), live, dead, formal_parameters, fcalls, funcByName); + + for (SAPFOR::Argument* arg : live) + getBlock()->addLiveIn({ { arg, { getBlock() } } }); + } }; -struct BasicBlockNode -{ - SAPFOR::BasicBlock* bb; - set live, dead; - int in_cnt, out_cnt; - set next_blocks; +class LiveVarAnalysis : public BackwardDataFlowAnalysis>, LiveVarAnalysisNode> { +protected: + vector& formal_parameters; + vector& fcalls; + const map& funcByName; - set::reverse_iterator> rollback; - set::reverse_iterator> ignore_rollback; - - BasicBlockNode(SAPFOR::BasicBlock* block, vector& formal_parameters, - vector& fcalls, const map& funcByName) + LiveVarAnalysisNode* createNode(SAPFOR::BasicBlock* block) override { - bb = block; - out_cnt = in_cnt = CNT_NOTINIT; - - buildUseDef(bb, live, dead, formal_parameters, fcalls, funcByName); - for (SAPFOR::Argument* arg : live) - bb->addLiveIn({ { arg, { bb } } }); - - rollback = {}; - ignore_rollback = {}; - next_blocks = {}; - } - - void updateLive() - { - bool in_changed = false, out_changed = false; - int in_max_cnt = CNT_NOTINIT, out_max_cnt = CNT_NOTINIT; - for (auto next : next_blocks) - { - if (out_cnt < next->in_cnt) - { - for (const auto& byArg : next->bb->getLiveIn()) - { - bool inserted = bb->addLiveOut({ byArg }); - out_changed |= inserted; - - if (inserted) - { - if (next->in_cnt > out_max_cnt) - out_max_cnt = next->in_cnt; - - if (live.find(byArg.first) == live.end() && dead.find(byArg.first) == dead.end()) - { - inserted = bb->addLiveIn({ byArg }); - if (inserted && next->in_cnt > in_max_cnt) - { - in_max_cnt = next->in_cnt; - in_changed = true; - } - } - } - } - } - } - - bool was_notinit = (in_cnt == CNT_NOTINIT); - - if (in_max_cnt != CNT_NOTINIT) - in_cnt = in_max_cnt; - - if (out_max_cnt != CNT_NOTINIT) - out_cnt = out_max_cnt; - - // TODO: fix counter overflow - if (was_notinit) - { - out_cnt++; - in_cnt++; - } - } - - bool newerThan(const BasicBlockNode* block) const { return in_cnt > block->out_cnt; } + return new LiveVarAnalysisNode(block, formal_parameters, fcalls, funcByName); + }; +public: + LiveVarAnalysis(vector& formal_parameters, vector& fcalls, + const map& funcByName) : formal_parameters(formal_parameters), fcalls(fcalls), funcByName(funcByName) + { }; }; //Build use and def sets of block. Result are stored in use and def @@ -381,137 +364,6 @@ static void buildUseDef(SAPFOR::BasicBlock* block, set& use, def = tmp_def; } -// minimizes the number of blocks beween the ends of back edges -static vector reorderSequence(const vector& blocks, - const set back_edge_sources) -{ - vector res = { }; - - auto blocks_end = blocks.rend(); - for (auto it = blocks.rbegin(); it < blocks_end; it++) - { - SAPFOR::BasicBlock* curr = *it; - auto res_end = res.end(); - auto inserter = res.begin(); - if (back_edge_sources.count(curr) == 0) - { - auto curr_next_begin = curr->getNext().begin(); - auto curr_next_end = curr->getNext().end(); - while (inserter < res_end && std::find(curr_next_begin, curr_next_end, *inserter) == curr_next_end) - inserter++; - } - - res.insert(inserter, curr); - } - - return res; -} - -// finds back edges, reorders and converts blocks into vector of BasicBlockNode* -// fills vector of formal parameters for given function -// fills info about arguments which becomes live after calls of functions -static vector toBlocksWithCnt(const vector& blocks, - vector& formal_parameters, - vector& fcalls, const map& funcByName) -{ - set> back_edges = {}; - - bool returned = false; - map> back_edges_by_src; - - auto blocks_sorted = sortCfgNodes(blocks, &back_edges); - - set back_edge_sources; - - for (auto& edge : back_edges) - { - back_edges_by_src[edge.first].insert(edge.second); - back_edge_sources.insert(edge.first); - } - - back_edges.clear(); - - blocks_sorted = reorderSequence(blocks_sorted, back_edge_sources); - back_edge_sources.clear(); - - vector blocks_with_counters; - map node_by_block; - for (auto block : blocks_sorted) - { - BasicBlockNode* node = new BasicBlockNode(block, formal_parameters, fcalls, funcByName); - blocks_with_counters.push_back(node); - node_by_block[block] = node; - } - - for (auto r_it = blocks_with_counters.rbegin(); r_it != blocks_with_counters.rend(); r_it++) - { - auto back_edges_by_src_it = back_edges_by_src.find((*r_it)->bb); - if (back_edges_by_src_it != back_edges_by_src.end()) - { - // This node is a source for back edge - for (auto dest : back_edges_by_src_it->second) - { - auto node_by_block_it = node_by_block.find(dest); - if (node_by_block_it != node_by_block.end()) - node_by_block_it->second->rollback.insert(r_it); - } - } - - for (auto next : (*r_it)->bb->getNext()) - { - auto node_by_block_it = node_by_block.find(next); - if (node_by_block_it != node_by_block.end()) - (*r_it)->next_blocks.insert(node_by_block_it->second); - } - } - - return blocks_with_counters; -} - -// iterate over separated subset of blocks -static void analyzeSequence(const vector& blocks_with_counters) -{ - auto curr = blocks_with_counters.rbegin(); - auto stop = blocks_with_counters.rend(); - - while (curr != stop) - { - auto curr_bb = *curr; - curr_bb->updateLive(); - - const auto& jumps = curr_bb->rollback; - if (jumps.size() != 0) - { - auto& ignored_jumps = curr_bb->ignore_rollback; - - bool jump = false; - for (const auto& jump_to : jumps) - { - if (ignored_jumps.insert(jump_to).second && curr_bb->newerThan(*jump_to)) - { - jump = true; - curr = jump_to; - break; - } - } - - if (!jump) - curr_bb->ignore_rollback.clear(); - else - continue; - } - - curr++; - } -} - -// delete all nodes from vector -static void freeBlocksWithCnt(const vector& blocks_with_counters) -{ - for (auto to_free : blocks_with_counters) - delete to_free; -} - // prints info about live variables void doDumpLive(const map>& CFGraph_for_project) { @@ -682,11 +534,11 @@ void runLiveVariableAnalysis(const map>& callDeps[byFunc.first].insert(byFunc.first->callsFromV.begin(), byFunc.first->callsFromV.end()); funcByName[byFunc.first->funcName] = byFunc.first; } - + vector> scc; vector> callLvls = groupByCallDependencies(callDeps, scc); - map> func_to_blocks_with_cnt; + map func_to_analysis_object; map> func_to_parameters; list> live_for_fcalls; @@ -707,8 +559,9 @@ void runLiveVariableAnalysis(const map>& auto& params = func_to_parameters[byFunc->funcName] = vector(byFunc->funcParams.countOfPars, NULL); - auto& blocks_with_cnt = (func_to_blocks_with_cnt[byFunc->funcName] = toBlocksWithCnt(itCFG->second, params, curr_fcalls, funcByName)); - analyzeSequence(blocks_with_cnt); + LiveVarAnalysis* analysis_object = (func_to_analysis_object[byFunc->funcName] = new LiveVarAnalysis(params, curr_fcalls, funcByName)); + analysis_object->fit(itCFG->second); + analysis_object->analyze(); fillLiveDeadArgs(byFunc, itCFG->second); } @@ -728,7 +581,7 @@ void runLiveVariableAnalysis(const map>& auto it = assembled_fcalls.find(call.func); if (it == assembled_fcalls.end()) it = assembled_fcalls.insert({ call.func, fcall(call.func, call.block, {}) }).first; - + for (const auto& p : call.live_after) it->second.live_after[p.first].insert(p.second.begin(), p.second.end()); @@ -738,12 +591,12 @@ void runLiveVariableAnalysis(const map>& for (const auto& func : assembled_fcalls) { - auto func_it = func_to_blocks_with_cnt.find(func.first->funcName); - if (func_it == func_to_blocks_with_cnt.end()) + auto func_it = func_to_analysis_object.find(func.first->funcName); + if (func_it == func_to_analysis_object.end()) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); auto param_it = func_to_parameters.find(func.first->funcName); - if(param_it == func_to_parameters.end()) + if (param_it == func_to_parameters.end()) printInternalError(convertFileName(__FILE__).c_str(), __LINE__); const vector& params = param_it->second; @@ -761,14 +614,14 @@ void runLiveVariableAnalysis(const map>& } } - set exits; + set exits; int max_cnt = CNT_NOTINIT; - for (auto block : func_it->second) + for (auto block : func_it->second->getNodes()) { - if (block->bb->getNext().size() == 0) + if (block->getBlock()->getNext().size() == 0) exits.insert(block); - if (block->out_cnt > max_cnt) - max_cnt = block->out_cnt; + if (block->getInCnt() > max_cnt) + max_cnt = block->getInCnt(); } max_cnt++; @@ -777,23 +630,22 @@ void runLiveVariableAnalysis(const map>& { for (const auto& byArg : live_after) { - if (exit->bb->addLiveOut({ byArg })) + if (exit->addIn({ byArg })) { - exit->out_cnt = max_cnt; - if (exit->live.find(byArg.first) == exit->live.end() && exit->dead.find(byArg.first) == exit->dead.end()) - if (exit->bb->addLiveIn({ byArg })) - exit->in_cnt = max_cnt; + exit->setInCnt(max_cnt); + if (exit->forwardData({ byArg })) + exit->setOutCnt(max_cnt); } } } - + // now we can update live sets in all blocks - analyzeSequence(func_it->second); + func_it->second->analyze(); } } - for (const auto& nodeByFunc : func_to_blocks_with_cnt) - freeBlocksWithCnt(nodeByFunc.second); + for (const auto& byFunc : func_to_analysis_object) + delete byFunc.second; for (auto& byFunc : CFGraph_for_project) for (auto& byBlock : byFunc.second)