This commit is contained in:
2025-03-12 12:37:19 +03:00
parent 1c851baa7e
commit 6a4040be3e
426 changed files with 0 additions and 0 deletions

View File

@@ -0,0 +1,244 @@
#include <assert.h>
#include "AMView.h"
#include <iostream>
using namespace std;
extern ofstream prot;
//grig
extern long_vect MinSizesOfAM; // <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
bool FirstTrace=true;
//\grig
AMView::AMView(const vector<long>& ASizeArray) :
Space(ASizeArray),
Repl(0)
{
VM_Dis = 0;
AlignArrays = list<DArray*>(0);
DistRule = vector<DistAxis>(0);
FillArr = vector<long>(0);
}
AMView::~AMView()
{
}
#ifdef nodef
long AMView::GetMapDim(long arrDim, int & dir)
{
long vmDim = 0;
AlignAxis align;
DistAxis dist;
long amDim;
align = AlignRule[arrDim-1];
if (align.Attr == align_NORMAL) {
amDim = align.TAxis;
dir = (align.A > 0) ? 1 : -1;
dist = AM_Dis->DistRule[amDim-1];
if (dist.Attr == map_BLOCK)
vmDim = dist.PAxis;
}
return vmDim;
}
#endif
void AMView::DisAM(VM *AVM_Dis, const vector<long>& AAxisArray, const vector<long>& ADistrParamArray)
{
int i,
DistRuleSize;
long BlockSize;
assert(AVM_Dis !=NULL);
unsigned int VMR = AVM_Dis->Rank();
assert(AAxisArray.size() <= VMR);
DistRuleSize = VMR + Rank();
FillArr = vector<long>((long) VMR, 0);
vector<long> AxisArr;
AxisArr.reserve(VMR);
AxisArr = AAxisArray;
for (i = AAxisArray.size(); i < VMR; i++)
AxisArr[i] = 0;
DistRule.reserve(DistRuleSize);
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> AMView.Rank() <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> DistRule
for (i = 0; i < Rank(); i++)
DistRule.push_back(DistAxis(map_COLLAPSE, i + 1, 0)); //
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> RankOfVM <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> DistRule
for (i = Rank(); i < DistRuleSize; i++)
DistRule.push_back(DistAxis(map_NORMVMAXIS, 0, i-Rank()+1));
//====
for (i = 0; i < AAxisArray.size(); i++)
{ // making a correction of array MinSizesOfAM - for automatic finding configuration
if(FirstTrace==true)
{
//printf("Axis[%d]=%d\n",i,GetSize(AxisArr[i]));
MinSizesOfAM[i]=MinSizesOfAM[i] > GetSize(AxisArr[i]) ?
MinSizesOfAM[i] :
GetSize(AxisArr[i]);
}
}
//=***
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> DistRule <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
for (i = 0; i < VMR; i++)
{
if(AxisArr[i] < 0)
{
prot << "Wrong call DissAM" << endl;
exit(1);
}
if (AxisArr[i] == 0)
DistRule[Rank() + i] = DistAxis(map_REPLICATE, 0, i+1);
else
{
DistRule[Rank() + i] = DistAxis(map_NORMVMAXIS, AxisArr[i], i+1);
DistRule[AxisArr[i] - 1] = DistAxis(map_BLOCK, AxisArr[i], i+1);
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// <20> RDisAm <20> <20> RAlnDA <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> Repl = 1
BlockSize = (GetSize(AxisArr[i]) - 1) / AVM_Dis->GetSize(i+1) + 1;
//printf("Blocksize=%d Bsize=%d\n",BlockSize, BSize[i]);
//printf("Blocksize=%d %d/%d %d\n",BlockSize, GetSize(AxisArr[i]), AVM_Dis->GetSize(i+1), AxisArr[i]);
FillArr[i] = AVM_Dis->GetSize(i+1) - (GetSize(AxisArr[i]) - 1) / BlockSize - 1;
//grig making a correction of array minsizeofAm - for automatic finding configuration
/* //==// moved up with some changes
if(FirstTrace==true)
{
MinSizesOfAM[i]=MinSizesOfAM[i] > GetSize(AxisArr[i]) ?
MinSizesOfAM[i] :
GetSize(AxisArr[i]);
}
*/
//j=MinSizesOfAM[i];
//\grig
}
}
VM_Dis = AVM_Dis;
// vector<DistAxis>::iterator first = DistRule.begin(), last = DistRule.end();
// cout << "i "<< "Attr "<< "Axis "<<"PAxis " <<"\n";
// while(first != last)
// {
// cout << endl << first - DistRule.begin() << " " << first->Attr << " " << first->Axis << " " << first->PAxis << endl;
// ++first;
// }
#ifdef _TIME_TRACE_
// <20><> <20><><EFBFBD><EFBFBD><EFBFBD>
vector<DistAxis>::iterator first = DistRule.begin(), last = DistRule.end();
while(first != last)
{
prot << endl << first->Attr << " " << first->Axis << " " << first->PAxis << endl;
++first;
}
#endif
}
double AMView::RDisAM(const vector<long>& AAxisArray, const vector<long>& ADistrParamArray, long ANewSign)
{
if (!IsDistribute()) {
prot << "Wrong call RDisAM" << endl;
exit(1);
}
if ( ANewSign != 0) {
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> => <20><> <20><><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
DisAM(VM_Dis, AAxisArray, ADistrParamArray);
return 0;
}
double time;
DArray oldDA;
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> AMView <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
AMView *oldAM = new AMView(*this);
// <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> VM_Dis*VM_Dis, <20> VM_Dis*AVM_Dis (<28><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> VM)
CommCost *rdisCost = new CommCost(VM_Dis);
// <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> VM_Dis <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>(<28>.<2E>. <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> VM <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD>)
DisAM(VM_Dis, AAxisArray, ADistrParamArray);
list<DArray *>::iterator newDAi = AlignArrays.begin();
list<DArray *>::iterator last = AlignArrays.end();
while (newDAi != last)
{
oldDA = DArray(**newDAi);
oldDA.AM_Dis = oldAM;
rdisCost->Update(&oldDA, *newDAi);
newDAi++;
}
#ifdef _TIME_TRACE_
// <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
int i, j;
prot << endl;
for (i = 0; i < VM_Dis->GetLSize(); i++)
{
for (j = 0; j < VM_Dis->GetLSize(); j++)
{
prot << "[" << i << "]" << "[" << j << "] = " << rdisCost->transfer[i][j] << "; ";
}
prot << endl;
}
// <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
#endif
time = rdisCost->GetCost();
delete rdisCost;
delete oldAM;
return time;
}
void AMView::AddDA(DArray * Aln_da)
{
AlignArrays.push_back(Aln_da);
}
int AMView::DelDA(DArray * RAln_da)
{
list<DArray*>::iterator i;
i = find(AlignArrays.begin(), AlignArrays.end(), RAln_da);
if (i == AlignArrays.end())
return -1;
AlignArrays.erase(i);
return 0;
}
AMView::AMView(const AMView &x) : Space(x)
{
DistRule = x.DistRule;
VM_Dis = x.VM_Dis;
AlignArrays = list<DArray*>(0);
FillArr = x.FillArr;
}
bool AMView::IsDistribute()
{
if (VM_Dis == 0)
return false;
return true;
}

View File

@@ -0,0 +1,114 @@
#ifndef AMViewH
#define AMViewH
//////////////////////////////////////////////////////////////////////
//
// AMView.h: interface for the AMView class.
//
//////////////////////////////////////////////////////////////////////
#include <stdlib.h>
#include <fstream>
#include <list>
#include <vector>
#include <algorithm>
#include "Vm.h"
#include "DistAxis.h"
#include "DArray.h"
class DArray;
//grig
class WeightClass
{
public :
long ID; // PS , <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD>
std::vector<double> body; // opt weights
WeightClass()
{
ID=NULL;
body.resize(0);
}
WeightClass(long AID,std::vector<double>& init_weights)
{
ID=AID;
body.resize(0);
for(int i=0;i<init_weights.size();i++)
{
body.push_back(init_weights[i]);
}
}
void GetWeights(std::vector<double> & AAweights)
{
// printf("Get SZ id=%lx %d\n",ID,body.size());
AAweights.resize(body.size());
for(int i=0;i<body.size();i++)
AAweights[i]=body[i];
}
void SetWeights(long AID,std::vector<double>& init_weights)
{// printf("Set SZ id=%lx %d\n",ID,init_weights.size());
ID=AID;
body.resize(0);
for(int i=0;i<init_weights.size();i++)
{
body.push_back(init_weights[i]);
}
}
~WeightClass()
{
body.resize(0);
}
long GetSize()
{
return body.size();
}
};
//\grig
class AMView : public Space {
public:
VM *VM_Dis; // VM <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> AMView
std::list<DArray*> AlignArrays; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
std::vector<DistAxis> DistRule; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
std::vector<long> FillArr; // ???
std::vector<long> BSize; //====// <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
int Repl; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> AM_Dis <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
//grig
WeightClass weightEl; // <20><><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
//\grig
AMView(const std::vector<long>& ASizeArray);
AMView(const AMView &); // !!! <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> VM_Dis <20> DistRule <20> FillArr
~AMView();
long GetMapDim(long arrDim, int & dir);
int DelDA(DArray* RAln_da); // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> DArray <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> AlignArrays
void AddDA(DArray* Aln_da); // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> DArray <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> AlignArrays
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
void DisAM(VM *AVM_Dis, const std::vector<long>& AAxisArray, const std::vector<long>& ADistrParamArray);
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
double RDisAM(const std::vector<long>& AAxisArray, const std::vector<long>& ADistrParamArray,
long ANewSign);
bool IsDistribute();
};
#endif

View File

@@ -0,0 +1,71 @@
// AlignAxis.cpp: implementation of the AlignAxis class.
//
//////////////////////////////////////////////////////////////////////
#include <assert.h>
#include "AlignAxis.h"
using namespace std;
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
AlignAxis::AlignAxis()
{
}
AlignAxis::~AlignAxis()
{
}
AlignAxis::AlignAxis(align_Type AAttr, long AAxis, long ATAxis,
long AA, long AB, long ABound) :
Attr(AAttr), Axis(AAxis), TAxis(ATAxis), A(AA), B(AB), Bound(ABound)
{
}
AlignAxis& AlignAxis :: operator= (const AlignAxis& AA)
{
Attr = AA.Attr;
Axis = AA.Axis;
TAxis = AA.TAxis;
A = AA.A;
B = AA.B;
Bound = AA.Bound;
return *this;
}
bool operator == (const AlignAxis& x, const AlignAxis& y)
{
return x.Attr == y.Attr && x.Axis == y.Axis && x.TAxis == y.TAxis \
&& x.A == y.A && x.B == y.B && x.Bound == y.Bound;
}
bool operator < (const AlignAxis& x, const AlignAxis& y)
{
if (x.Attr == align_NORMAL || x.Attr == align_COLLAPSE)
if (y.Attr == align_NORMAL || y.Attr == align_COLLAPSE)
return x.Axis < y.Axis;
else
return true;
else
if (y.Attr == align_NORMAL || y.Attr == align_COLLAPSE)
return false;
else
return x.TAxis < y.TAxis; // <20> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>? (<28><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> TAxis)
// <20> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20><><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD>
}
#ifdef P_DEBUG
ostream& operator << (ostream& os, const AlignAxis& aa)
{
os << "AlignAxis: Attr = " << aa.Attr << ", Axis = " << aa.Axis
<< ", TAxis = " << aa.TAxis << ", A = " << aa.A << ", B = " << aa.B
<< ", Bound = " << aa.Bound;
return os;
}
#endif

View File

@@ -0,0 +1,48 @@
#ifndef AlignAxisH
#define AlignAxisH
//////////////////////////////////////////////////////////////////////
//
// AlignAxis.h: interface for the AlignAxis class.
//
//////////////////////////////////////////////////////////////////////
#include <fstream>
enum align_Type {
align_NORMAL = 0, // 0
align_REPLICATE, // 1
align_COLLAPSE, // 2
align_CONSTANT, // 3
align_BOUNDREPL, // 4 - Additional align styles
align_NORMTAXIS // 5 - Normal template's axis
};
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
class AlignAxis {
public:
align_Type Attr;
long Axis;
long TAxis;
long A;
long B;
long Bound;
AlignAxis(align_Type AAttr, long AAxis, long ATAxis,
long AA = 0, long AB = 0, long ABound = 0);
AlignAxis();
virtual ~AlignAxis();
AlignAxis& operator= (const AlignAxis&);
friend bool operator==(const AlignAxis& x, const AlignAxis& y);
friend bool operator<(const AlignAxis& x, const AlignAxis& y);
#ifdef P_DEBUG
friend std::ostream& operator << (std::ostream& os, const AlignAxis& s);
#endif
};
#endif

View File

@@ -0,0 +1,135 @@
// BGroup.cpp: implementation of the BoundGroup class.
//
//////////////////////////////////////////////////////////////////////
#include <iostream>
#include "BGroup.h"
#include "../../Utils/utils.h"
#include "../../Utils/errors.h"
using namespace std;
extern ofstream prot;
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
BoundGroup::BoundGroup()
{
amPtr = 0;
}
BoundGroup::~BoundGroup()
{
}
void BoundGroup::AddBound(DArray *ADArray, const vector<long>& BLeftBSizeArray,
const vector<long>& BRightBSizeArray, long ACornerSign)
{
long i,
proc,
vmDim,
arrDim;
int dir,
count = 0;
bool IsCorner = false;
Block b;
// vector<DimBound> dimInfo;
long daRank = ADArray->Rank();
vector<long> ALeftBSizeArray(BLeftBSizeArray);
vector<long> ARightBSizeArray(BRightBSizeArray);
for (i = 0; i < daRank; i++) {
if (ALeftBSizeArray[i] == -1)
ALeftBSizeArray[i] = ADArray->LowShdWidthArray[i];
if (ARightBSizeArray[i] == -1)
ARightBSizeArray[i] = ADArray->HiShdWidthArray[i];
}
if (!amPtr)
amPtr = ADArray->AM_Dis;
else if (amPtr != ADArray->AM_Dis) {
// arrays is align on different AMView
prot << "Wrong call AddBound: arrays is align on different AMView" << endl;
__spf_print(1, "Wrong call AddBound: arrays is align on different AMView\n");
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
}
#ifdef P_DEBUG
for (i = 0; i < daRank; i++) {
vmDim = ADArray->GetMapDim(i+1, dir);
prot << "arDim=" << i+1
<< ", vmDim=" << vmDim
<< ", Left=" << ALeftBSizeArray[i]
<< ", Right=" << ARightBSizeArray[i]
<<", dir =" << dir << endl;
}
#endif
if (boundCost.transfer.size() == 0)
boundCost = CommCost(ADArray->AM_Dis->VM_Dis);
if (ADArray->Repl)
return;
if (!ADArray->IsAlign()) {
// Array is'n align on any AMView
prot << "Wrong call AddBound: Array is'n align on any AMView" << endl;
__spf_print(1, "Wrong call AddBound: Array is'n align on any AMView\n");
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
}
for (i = 0; i < daRank; i++) {
if (ALeftBSizeArray[i] < 0 || ARightBSizeArray[i] < 0) {
prot << "Wrong call AddBound" << endl;
__spf_print(1, "Wrong call AddBound ALeftBSizeArray[i]=%d ARightBSizeArray[i]=%d\n", ALeftBSizeArray[i], ARightBSizeArray[i]);
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
}
}
for (proc = 0; proc < amPtr->VM_Dis->GetLSize(); proc++) {
//grig b = Block(ADArray, proc);
b = Block(ADArray, proc,1);
// prot << "proc=" << proc << ", empty=" << b.empty() << ", IsBoundIn=" << b.IsBoundIn(ALeftBSizeArray, ARightBSizeArray) << endl;
if (!b.empty() && !b.IsBoundIn(ALeftBSizeArray, ARightBSizeArray)) {
prot << "Fatal error: Local array size is less then shadow width." << endl;
__spf_print(1, "Fatal error: Local array size is less then shadow width.");
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
}
}
for (arrDim = 1; arrDim <= daRank; arrDim++)
{
vmDim = ADArray->GetMapDim(arrDim, dir);
if (vmDim >= 0 && //====// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>-<2D><> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> ">" <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
(ALeftBSizeArray[arrDim-1] >= 0 || ARightBSizeArray[arrDim-1] >= 0)) { //====// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>-<2D><> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> ">" <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> 0 <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
dimInfo.push_back(
DimBound(arrDim, vmDim, dir, ALeftBSizeArray[arrDim-1], ARightBSizeArray[arrDim-1])
);
count++;
}
}
if (ACornerSign == 1 && count > 1)
IsCorner = true;
boundCost.BoundUpdate(ADArray, dimInfo, IsCorner);
}
double BoundGroup::StartB()
{
return boundCost.GetCost();
}
//====
CommCost* BoundGroup::GetBoundCost()
{ return &boundCost;
}
//=***

View File

@@ -0,0 +1,42 @@
#ifndef BGroupH
#define BGroupH
//////////////////////////////////////////////////////////////////////
//
// BGroup.h interface for the Bgroup class.
//
//////////////////////////////////////////////////////////////////////
#include <vector>
#include "DArray.h"
#include "Block.h"
#include "DimBound.h"
#include "CommCost.h"
class BoundGroup {
CommCost boundCost;
// std::vector<DimBound> dimInfo;
// for pipeline
// long vmDimension;
// char dimBound; // L-left, R-right
public:
std::vector<DimBound> dimInfo; //====// <20><><EFBFBD><EFBFBD> private
AMView *amPtr;
BoundGroup();
virtual ~BoundGroup();
void AddBound(DArray *ADArray, const std::vector<long>& ALeftBSizeArray,
const std::vector<long>& ARightBSizeArray, long ACornerSign);
//====
CommCost* GetBoundCost();
//=***
double StartB();
// char getDimBound() const { return dimBound; }
// long getVmDimension() const { return vmDimension; }
};
#endif

View File

@@ -0,0 +1,471 @@
// Block.cpp: implementation of the Block class.
//
//////////////////////////////////////////////////////////////////////
#include <assert.h>
#include "Block.h"
using namespace std;
extern ofstream prot;
#if defined (_MSC_VER) || (defined (__GNUG__) && (__GNUC__ < 3))
/*template <class T>
T min(T a, T b)
{
return a < b ? a : b;
}*/
#endif
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
Block::Block(vector<LS> &v)
{
LSDim = v;
}
Block::Block()
{
}
Block::~Block()
{
// printf("Block::~Block()=%0X\n", this);
}
Block operator^ (Block &x, Block &y)
{
Block temp;
vector<LS> empty_Block(0);
long i;
if (x.empty() || y.empty())
return empty_Block;
if (x.GetRank() != y.GetRank())
{
prot << "Wrong call operator^" << endl;
exit(1);
}
temp.LSDim.reserve(x.GetRank());
for (i = 0; i < x.GetRank(); i++)
if ((x.LSDim[i] ^ y.LSDim[i]).IsEmpty() != true)
temp.LSDim.push_back(x.LSDim[i] ^ y.LSDim[i]);
else
{
temp.LSDim = empty_Block;
break;
};
return temp;
}
long Block::GetBlockSize()
{
int i;
long size = 1;
if (LSDim.empty())
return 0;
for (i = 0; i < LSDim.size(); i++)
size *= LSDim[i].GetLSSize();
//====
//printf("GETSIZE[%d] %d\n",i,size);
//=***
return size;
}
//grig
Block::Block(DArray * da, long ProcLI , int a)
{
int i;
long vmRank, vmDimSize, dimProcI;
long amRank, amDimSize, amAxis;
long daRank, daAxis;
long amLower, amUpper, BlockSize; // Param, Module;
bool IsBlockEmpty = false;
vector<long> ProcSI;
VM *vm;
AMView *am;
DistAxis dist;
AlignAxis align, alignParam;
LS ls;
am = da->AM_Dis;
amRank = am->Rank();
vm = am->VM_Dis;
vmRank = vm->Rank();
vm->GetSI(ProcLI, ProcSI);
daRank = da->Rank();
LSDim.reserve(daRank);
//grig
std::vector<double> avWeights;
int j;
long local_sum=0; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> VM
long jmax; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> Vm
double vBlockSize,temp_w=0; //
double sum1=0;
//grig
/* LU deb
for(i=0;i<da->AlignRule.size();i++)
printf("Block %d %d %d %d ",da->AlignRule[i].A,da->AlignRule[i].B,da->AlignRule[i].Axis,da->AlignRule[i].TAxis);
printf("\n");
*/
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> (<28><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>)
for (i = 0; i < daRank; i++)
LSDim.push_back(LS(0, da->GetSize(i+1)-1));
for (i = 0; i < vmRank; i++)
{
dist = am->DistRule[amRank + i];
switch (dist.Attr)
{
case map_NORMVMAXIS :
amAxis = dist.Axis;
vmDimSize = vm->GetSize(i+1);
amDimSize = am->GetSize(amAxis);
dimProcI = ProcSI[i];
BlockSize = (amDimSize - 1) / vmDimSize + 1;
amLower = dimProcI * BlockSize;
amUpper = min(amDimSize, amLower+BlockSize) - 1;
// printf("amAxis=%d amDimSize=%d\n",amAxis,amDimSize);
// printf("Blocksize=%d %d %d\n",BlockSize,amLower,amUpper);
am->weightEl.GetWeights(avWeights);
local_sum=0; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> VM
jmax=vm->GetSize(i+1); // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> Vm
vBlockSize,temp_w=0; //
sum1=0;
long lBlockSize;
for(j=0;j<i;j++)
{
local_sum+=vm->GetSize(j+1);
}
for(j=0;j<jmax;j++)
{ if(j+local_sum>=am->weightEl.GetSize()) break;
temp_w+=am->weightEl.body[j+local_sum]; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
}
if(temp_w==0) temp_w=1; //====//
vBlockSize = amDimSize/temp_w; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
// lBlockSize=ceil((double)amDimSize/temp_w) > 0.5 ? amDimSize/temp_w+ 1 : amDimSize/temp_w; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
//====
if(am->BSize.size() > 0)
{
if(amDimSize % am->BSize[i] !=0 ) { printf("Error: Dimension %d is not dividible by %d \n",amDimSize, am->BSize[i]); exit(0);}
lBlockSize=(long)ceil(vBlockSize);
if( ( lBlockSize % am->BSize[i]) > 0)
lBlockSize = ( lBlockSize / am->BSize[i] + 1) * am->BSize[i];
vBlockSize=lBlockSize;
}
//=***
/*
if(vBlockSize - ceil(vBlockSize)<0.5) // <20><><EFBFBD><EFBFBD> VBlocksize - celoe
{
lBlockSize=floor(vBlockSize);
}
else // <20><><EFBFBD>
lBlockSize= ceil(vBlockSize);
*/
// printf("Blocksize v=%f l=%d\n",vBlockSize, lBlockSize);
for(j=0;j<dimProcI;j++)
{ if(j+local_sum>=am->weightEl.GetSize()) break; //====//
sum1+=(vBlockSize*am->weightEl.body[j+local_sum]);
}
amLower=sum1;
amUpper=(double)sum1;
if(dimProcI+local_sum<am->weightEl.GetSize()) amUpper += vBlockSize*am->weightEl.body[dimProcI+local_sum]-1; //====//
if(amUpper+1>=amDimSize-1)
amUpper=amDimSize-1;
IsBlockEmpty = IsBlockEmpty || amLower > amUpper;
if (IsBlockEmpty)
break;
// printf("bBLOCK[%d] %d %d\n",ProcLI, amLower,amUpper);
align = da->AlignRule[daRank+amAxis-1];
switch (align.Attr) {
case align_NORMTAXIS :
daAxis = align.Axis;
assert(daAxis != 0);
alignParam = da->AlignRule[daAxis-1];
ls = LS(amLower, amUpper);
// printf("bBLOCK ls=%d %d daAxissize=%d\n",ls.GetLower(),ls.GetUpper(),da->GetSize(daAxis));
ls.transform(alignParam.A, alignParam.B, da->GetSize(daAxis));
// printf("eBLOCK ls=%d %d\n",ls.GetLower(),ls.GetUpper());
if (ls.IsEmpty()) {
IsBlockEmpty = true;
}
else
{
//xp_max
if(daAxis-1<LSDim.size())
LSDim[daAxis-1] = ls; // LSDim <20> <20><><EFBFBD><EFBFBD>
else
{
printf("PREDICTOR!!! seems as error in Block.cpp\n");
//exit(0);
}
}
break;
case align_BOUNDREPL :
ls = LS(amLower, amUpper);
ls.transform(align.A, align.B, align.Bound);
if (ls.IsEmpty())
IsBlockEmpty = true;
break;
case align_REPLICATE :
break;
case align_CONSTANT :
if (align.B < amLower || align.B > amUpper)
IsBlockEmpty = true;
break;
} // end switch
break;
case map_REPLICATE :
break;
} // end switch
if (IsBlockEmpty)
break;
} // end for
if (IsBlockEmpty)
{
LSDim = vector<LS>(0);
#ifdef _TIME_TRACE_
prot << LSDim.empty() << endl; // <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
#endif
}
}
//\grig
Block::Block(DArray * da, long ProcLI)
{
int i;
long vmRank, vmDimSize, dimProcI;
long amRank, amDimSize, amAxis;
long daRank, daAxis;
long amLower, amUpper, BlockSize; // Param, Module;
bool IsBlockEmpty = false;
vector<long> ProcSI;
VM *vm;
AMView *am;
DistAxis dist;
AlignAxis align, alignParam;
LS ls;
am = da->AM_Dis;
amRank = am->Rank();
vm = am->VM_Dis;
vmRank = vm->Rank();
vm->GetSI(ProcLI, ProcSI);
daRank = da->Rank();
LSDim.reserve(daRank);
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> (<28><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>)
for (i = 0; i < daRank; i++)
LSDim.push_back(LS(0, da->GetSize(i+1)-1));
for (i = 0; i < vmRank; i++)
{
dist = am->DistRule[amRank + i];
switch (dist.Attr)
{
case map_NORMVMAXIS :
amAxis = dist.Axis;
vmDimSize = vm->GetSize(i+1);
amDimSize = am->GetSize(amAxis);
dimProcI = ProcSI[i];
// Param = amDimSize / vmDimSize;
// Module = amDimSize % vmDimSize;
// amLower = dimProcI * Param;
BlockSize = (amDimSize - 1) / vmDimSize + 1;
amLower = dimProcI * BlockSize;
//if ((Module != 0) && (dimProcI < Module))
//{
// amLower += dimProcI;
// Param++;
//}
//else
// amLower += Module;
amUpper = min(amDimSize, amLower+BlockSize) - 1;
IsBlockEmpty = IsBlockEmpty || amLower > amUpper;
if (IsBlockEmpty)
break;
align = da->AlignRule[daRank+amAxis-1];
switch (align.Attr) {
case align_NORMTAXIS :
daAxis = align.Axis;
assert(daAxis != 0);
alignParam = da->AlignRule[daAxis-1];
ls = LS(amLower, amUpper);
ls.transform(alignParam.A, alignParam.B, da->GetSize(daAxis));
if (ls.IsEmpty()) {
IsBlockEmpty = true;
}
else
{
//xp_max
if(daAxis-1<LSDim.size())
LSDim[daAxis-1] = ls; // LSDim <20> <20><><EFBFBD><EFBFBD>
}
break;
case align_BOUNDREPL :
ls = LS(amLower, amUpper);
ls.transform(align.A, align.B, align.Bound);
if (ls.IsEmpty())
IsBlockEmpty = true;
break;
case align_REPLICATE :
break;
case align_CONSTANT :
if (align.B < amLower || align.B > amUpper)
IsBlockEmpty = true;
break;
} // end switch
break;
case map_REPLICATE :
break;
} // end switch
if (IsBlockEmpty)
break;
} // end for
if (IsBlockEmpty)
{
LSDim = vector<LS>(0);
#ifdef _TIME_TRACE_
prot << LSDim.empty() << endl; // <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
#endif
}
}
bool Block::empty()
{
return LSDim.empty();
}
long Block::GetRank()
{
return LSDim.size();
}
Block & Block::operator =(const Block & x)
{
this->LSDim = x.LSDim;
return *this;
}
bool Block::IsBoundIn(const vector<long>& ALeftBSizeArray,
const vector<long>& ARightBSizeArray)
{
long i;
for (i = 0; i < LSDim.size(); i++)
{
if (!LSDim[i].IsBoundIn(ALeftBSizeArray[i], ARightBSizeArray[i]))
return false;
}
return true;
}
bool Block::IsLeft(long arrDim, long elem)
{
if (empty())
return false;
return LSDim[arrDim-1].IsLeft(elem);
}
bool Block::IsRight(long arrDim, long elem)
{
if (empty())
return false;
return LSDim[arrDim-1].IsRight(elem);
}
long Block::GetBlockSizeMult(long dim)
{
int i;
long size = 1;
if (LSDim.empty())
return 0;
for (i = 0; i < LSDim.size(); i++)
{
if (i == dim-1)
continue;
size *= LSDim[i].GetLSSize();
}
return size;
}
long Block::GetBlockSizeMult2(long dim1, long dim2)
{
int i;
long size = 1;
if (LSDim.empty())
return 0;
for (i = 0; i < LSDim.size(); i++)
{
if (i == dim1-1 || i == dim2-1)
continue;
size *= LSDim[i].GetLSSize();
}
return size;
}
//====
long Block::GetUpper(long i)
{ return LSDim[i].GetUpper();
}
long Block::GetLower(long i)
{ return LSDim[i].GetLower();
}
//=***

View File

@@ -0,0 +1,66 @@
#ifndef BlockH
#define BlockH
//////////////////////////////////////////////////////////////////////
//
// Block.h: interface for the Block class.
//
//////////////////////////////////////////////////////////////////////
#include <vector>
#include "Ls.h"
#include "DArray.h"
class DArray;
// Rectangular section of elements
class Block {
// std::vector of LS for every dimensions
std::vector<LS> LSDim;
public:
Block(std::vector<LS> &v);
Block();
Block(DArray *da, long ProcLI);
//grig
Block(DArray *da, long ProcLI,int a); // a - <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
//\grig
virtual ~Block();
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>, <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD>
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
long GetBlockSizeMult2(long dim1, long dim2);
long GetBlockSizeMult(long dim);
// true <20><><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> (<28><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>) <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>. <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> false
bool IsLeft(long arrDim, long elem);
bool IsRight(long arrDim, long elem);
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
bool IsBoundIn(const std::vector<long>& ALeftBSizeArray,
const std::vector<long>& ARightBSizeArray);
Block & operator =(const Block & x);
long GetRank();
bool empty();
// <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD>
long GetBlockSize();
friend Block operator^ (Block &x, Block &y); // intersection
//====
long GetUpper(long i);
long GetLower(long i);
//=***
};
#endif

View File

@@ -0,0 +1,362 @@
#ifndef _CallInfo_H
#define _CallInfo_H
#include <vector>
#include "Event.h"
struct TraceCall {
Event func_id;
int source_line;
char* source_file;
int call_info_count;
char** call_info; // pointer to lines with input function params
int ret_info_count;
char** ret_info; // pointer to lines with output function params
TraceCall(Event func_id, int source_line, char* source_file, int call_info_count,
char** call_info, int ret_info_count, char** ret_info);
};
// Common CallInfo structures
struct IDOnlyInfo {
long ID;
};
// Interval structures
struct binter_Info {
long line;
char * file;
long index;
~binter_Info() { delete file; }
};
typedef struct IDOnlyInfo einter_Info;
// Message sending structures
typedef struct IDOnlyInfo rtl_BarrierInfo;
struct rtl_BcastInfo {
long Count;
long Size;
};
// MPS/AM/AMView structures
typedef struct IDOnlyInfo CreateVMSInfo;
typedef struct IDOnlyInfo getam_Info;
struct crtps_Info {
long PSRef; // ID
long PSRefParent;
std::vector<long> InitIndexArray;
std::vector<long> LastIndexArray;
long StaticSign;
};
struct getps_Info {
long PSRef;
long AMRef;
};
struct psview_Info {
long PSRef; // ID
long PSRefParent;
long Rank;
std::vector<long> SizeArray;
long StaticSign;
};
struct setelw_Info {
long PSRef; // 16
long AMViewRef; // 16
long AddrNumber; // 10
std::vector<long> WeightNumber;
// length = sun i = [0,AddrNumber-1] WeightNumber[i]
std::vector<double> LoadWeight;
};
typedef struct IDOnlyInfo delps_Info;
struct getamr_Info {
long AMRef; // ID
long AMViewRef;
std::vector<long> IndexArray;
};
struct getamv_Info {
long ArrayHeader; // ArrayHeader
long AMViewRef;
};
struct mapam_Info {
long AMRef;
long PSRef;
};
typedef struct IDOnlyInfo runam_Info;
struct crtamv_Info {
long ID; // AMViewRef
long AM_ID; // AMRef
long StaticSign; // StaticSign
std::vector<long> SizeArray; // Rank + SizeArray
};
typedef struct IDOnlyInfo delamv_Info;
//====
struct blkdiv_Info {
long ID; // AMViewRef
std::vector<long> AMVAxisDiv; // AMVAxisDiv[]
};
//=***
struct distr_Info {
long ID; // AMViewRef
long PSRef; // PSRef
std::vector<long> AxisArray; // AxisArray[]
std::vector<long> DistrParamArray;// DistrParamArray[]
};
struct redis_Info {
long ID; // AMViewRef
long AID; // ArrayHeader
long PSRef; // PSRef
std::vector<long> AxisArray; // AxisArray[]
std::vector<long> DistrParamArray;// DistrParamArray[]
long NewSign;
};
// DArray structures
struct crtda_Info {
long ArrayHandlePtr; // ArrayHandlePtr
long ArrayHeader; // ArrayHeader
long TypeSize;
long StaticSign; //
long ReDistrSign; //
std::vector<long> SizeArray;
std::vector<long> LowShdWidthArray;
std::vector<long> HiShdWidthArray;
};
struct align_Info
{
long ArrayHeader; // ArrayHeader
long ArrayHandlePtr; // ArrayHandlePtr
long PatternRefPtr; // PatternRefPtr
long PatternRef; // PatternRef
int PatternType; // AMView = 1, DisArray = 2
std::vector<long> AxisArray;
std::vector<long> CoeffArray;
std::vector<long> ConstArray;
};
typedef struct IDOnlyInfo delda_Info;
struct realn_Info {
long ArrayHandlePtr; // ArrayHandlePtr
long ArrayHeader; // ArrayHeader
long PatternRefPtr; // PatternRefPtr
long PatternRef; // PatternRef
int PatternType; // AMView = 1, DisArray = 2
std::vector<long> AxisArray;
std::vector<long> CoeffArray;
std::vector<long> ConstArray;
long NewSign;
};
struct arrcpy_Info {
long FromBufferPtr;
long FromArrayHeader;
long FromArrayHandlePtr;
std::vector<long> FromInitIndexArray;
std::vector<long> FromLastIndexArray;
std::vector<long> FromStepArray;
long ToBufferPtr;
long ToArrayHeader;
long ToArrayHandlePtr;
std::vector<long> ToInitIndexArray;
std::vector<long> ToLastIndexArray;
std::vector<long> ToStepArray;
long CopyRegim;
long CopyFlagPtr;
};
struct waitcp_Info {
long CopyFlagPtr;
};
// ParLoop structures
struct crtpl_Info {
long ID;
long Rank;
};
struct mappl_Info {
long LoopRef; //ID;
long PatternRefPtr;
long PatternRef;
int PatternType; // AMView = 1, DisArray = 2
std::vector<long> AxisArray;
std::vector<long> CoeffArray;
std::vector<long> ConstArray;
std::vector<long> InInitIndexArray;
std::vector<long> InLastIndexArray;
std::vector<long> InStepArray;
};
typedef struct IDOnlyInfo dopl_Info;
typedef struct IDOnlyInfo endpl_Info;
//grig
struct dopl_full_Info
{
long ID;
std::vector<long> Dim;
std::vector<long> Step;
std::vector<long> Lower;
std::vector<long> Upper;
long ReturnVar; //====//
};
//\grig
// Reduction structures
typedef struct IDOnlyInfo crtrg_Info;
struct crtred_Info {
long ID;
long RedArrayType;
long RedArrayLength;
long LocElmLength;
};
struct insred_Info {
long RG_ID;
long RV_ID;
};
typedef struct IDOnlyInfo delrg_Info;
typedef struct IDOnlyInfo delred_Info;
typedef struct IDOnlyInfo strtrd_Info;
typedef struct IDOnlyInfo waitrd_Info;
// Shadow structures
struct crtshg_Info {
long StaticSign;
long ShadowGroupRef;
};
struct inssh_Info {
Event func; // function ID
long ShadowGroupRef; // SHG_ID;
long ArrayHeader; //
long ArrayHandlePtr; // DA_ID;
long FullShdSign; // only for inssh_, incsh_
long MaxShdCount; // only for insshd_, incshd
std::vector<long> ShdSignArray; // for insshd_, incshd_
std::vector<long> LowShdWidthArray;
std::vector<long> HiShdWidthArray;
std::vector<long> InitDimIndex;
std::vector<long> LastDimIndex;
std::vector<long> InitLowShdIndex;
std::vector<long> LastLowShdIndex;
std::vector<long> InitHiShdIndex;
std::vector<long> LastHiShdIndex;
};
struct exfrst_Info{
long ID; // LoopRef
long ShadowGroupRef;
};
struct imlast_Info{
long ID; // LoopRef
long ShadowGroupRef;
};
struct across_Info {
long AcrossType;
long OldShadowGroupRef;
long NewShadowGroupRef;
double PipeLinePar;
long CondPipeLine;
long ErrPipeLine;
long PipeLinePLAxis;
};
/*
AcrossType=1;
OldShadowGroupRef=9b7ac0;
NewShadowGroupRef=9b77c0;
PipeLinePar=0.000000;
CondPipeLine=0
ErrPipeLine=60
*/
typedef struct IDOnlyInfo delshg_Info;
typedef struct IDOnlyInfo strtsh_Info;
typedef struct IDOnlyInfo waitsh_Info;
typedef struct IDOnlyInfo sendsh_Info;
typedef struct IDOnlyInfo recvsh_Info;
// Regular access to remote data
struct crtrbl_Info {
long RemArrayHeader;
long BufferHeader;
long StaticSign;
long LoopRef;
std::vector<long> AxisArray;
std::vector<long> CoeffArray;
std::vector<long> ConstArray;
};
struct crtrbp_Info {
long ID; // BufferHeader;
long RemArrayHeader;
long StaticSign;
long PSRef;
long IsLocal;
std::vector<long> CoordArray;
};
struct loadrb_Info {
long ID; // BufferHeader;
long RenewSign;
};
typedef struct IDOnlyInfo waitrb_Info;
struct srmem_Info {
long MemoryCount;
std::vector<long> LengthArray;
};
// Root info
struct root_Info {
long VProcCount;
long VPSRank;
std::vector<long> VPSSize;
};
extern bool GetCallParams(TraceCall &trc_call, void*& call_params);
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,72 @@
#ifndef CommCostH
#define CommCostH
//////////////////////////////////////////////////////////////////////
//
// CommCost.h: interface for the CommCost class.
//
//////////////////////////////////////////////////////////////////////
#include <cmath>
#include <vector>
#include <algorithm>
#include "Vm.h"
#include "DArray.h"
#include "DimBound.h"
//====
#include "LoopBlock.h"
//=***
typedef std::vector<long> lvector;
typedef std::vector<lvector> Dim2Array;
class VM;
class Block;
//====
class LoopBlock;
//=***
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
class CommCost {
public:
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> vm
Dim2Array transfer;
VM *vm;
CommCost(VM *Avm);
CommCost();
virtual ~CommCost();
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
void CopyUpdate(DArray *FromArray, Block & readBlock);
//====
void CopyUpdateDistr(DArray * FromArray, Block &readBlock, long p1);
long GetLSize();
// void calculate(); //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
void Across(double call_time, long LoopSZ, LoopBlock** ProcBlock,int type_size);
//=***
void BoundUpdate(DArray *daPtr, std::vector<DimBound> & dimInfo, bool IsCorner);
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
double GetCost();
CommCost & operator =(const CommCost &);
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> transfer <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> VM <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
void Update(DArray *oldDA, DArray *newDA);
};
#endif

View File

@@ -0,0 +1,821 @@
#include <assert.h>
#include "DArray.h"
//#include "ModelStructs.h"
#include <iostream>
using namespace std;
extern ofstream prot;
extern bool SynchCopy; //====//
DArray::DArray(const vector<long>& ASizeArray, const vector<long>& ALowShdWidthArray,
const vector<long>& AHiShdWidthArray, int ATypeSize) :
Space(ASizeArray),
LowShdWidthArray(ALowShdWidthArray),
HiShdWidthArray(AHiShdWidthArray),
TypeSize(abs(ATypeSize)), //====//=*** - is needed in case of negative TypeSize - when it is fixed in trace file, then this 'abs' no need here
AM_Dis(0),
Repl(0),
AlignRule(vector<AlignAxis>(0))
{
}
DArray::~DArray()
{
}
void DArray::PrepareAlign(long& TempRank, const vector<long>& AAxisArray,
const vector<long>& ACoeffArray, const vector<long>& AConstArray,
vector<AlignAxis>& IniRule)
{
long ArrRank, ARSize;
int i,j;
ArrRank = Rank();
ARSize = ArrRank + TempRank;
IniRule.reserve(ARSize);
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
for (i = 0; i < ArrRank; i++)
IniRule.push_back(AlignAxis(align_COLLAPSE, i+1, 0));
/* Lu deb
for(i=0;i<IniRule.size();i++)
printf("ini %d %d %d %d ",IniRule[i].A, IniRule[i].B, IniRule[i].Axis, IniRule[i].TAxis);
printf("\n");
*/
for (i = ArrRank; i < ARSize; i++)
IniRule.push_back(AlignAxis(align_NORMTAXIS, 0, i-ArrRank+1));
/* Lu deb
for(i=0;i<IniRule.size();i++)
printf("ini %d %d %d %d ",IniRule[i].A, IniRule[i].B, IniRule[i].Axis, IniRule[i].TAxis);
printf("\n");
*/
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> DistRule <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
for (i = 0; i < TempRank; i++) {
if (i>=AAxisArray.size() || AAxisArray[i] < 0) // klinov add i>=AAxisArray.size()
IniRule[i+ArrRank] = AlignAxis(align_REPLICATE, 0, i+1);
else if (ACoeffArray[i] == 0 || AAxisArray[i] == 0)
// in new version last condition not require
IniRule[i+ArrRank] = AlignAxis(align_CONSTANT, 0, i+1, 0, AConstArray[i]);
else {
IniRule[i+ArrRank] = AlignAxis(align_NORMTAXIS, AAxisArray[i], i+1);
IniRule[AAxisArray[i]-1] = AlignAxis(align_NORMAL, AAxisArray[i], i+1, ACoeffArray[i], AConstArray[i]);
}
}
/* Lu deb
for(i=0;i<IniRule.size();i++)
printf("ini %d %d %d %d ",IniRule[i].A, IniRule[i].B, IniRule[i].Axis, IniRule[i].TAxis);
printf("\n");
*/
}
void DArray::AlnDA(AMView *APattern, const vector<long>& AAxisArray,
const vector<long>& ACoeffArray, const vector<long>& AConstArray)
{
long i;
int dir;
vector<AlignAxis> IniRule;
long TempRank = APattern->Rank();
AM_Dis = APattern;
APattern->AddDA(this);
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
//for (i = 0; i < TempRank; i++)
// if (AAxisArray[i] != -1)
// break;
//if(i == TempRank)
// Repl = 1; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
//else
// Repl = 0;
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> AlignRule
PrepareAlign(TempRank, AAxisArray, ACoeffArray, AConstArray, IniRule);
AlignRule = IniRule;
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
Repl = 1;
for (i = 0; i < TempRank; i++) {
switch (AlignRule[i + Rank()].Attr) {
case align_CONSTANT :
if (AM_Dis->DistRule[AlignRule[i + Rank()].TAxis - 1].Attr == map_BLOCK)
Repl = 0;
break;
case align_NORMTAXIS:
if (GetMapDim(AlignRule[i + Rank()].Axis, dir) > 0)
Repl = 0;
break;
}
if (!Repl)
break;
}
#ifdef _TIME_TRACE_
// <20><> <20><><EFBFBD><EFBFBD><EFBFBD>
vector<AlignAxis>::iterator first = AlignRule.begin(), last = AlignRule.end();
while(first != last) {
prot << endl << first->Attr << " " << first->Axis << " " << first->TAxis << " " << first->A << " " << first->B << " " << first->Bound << endl;
++first;
}
#endif
}
void DArray::AlnDA(DArray *APattern, const vector<long>& AAxisArray,
const vector<long>& ACoeffArray, const vector<long>& AConstArray)
{
int i;
if (!APattern->IsAlign()) {
prot << "Wrong call AlnDA" << endl;
exit(1);
}
long TempRank,
ArrRank,
ARSize;
AMView * TempAMV;
vector<AlignAxis> TAlign,
IniRule;
AlignAxis aAl,
tAl;
TAlign = APattern->AlignRule;
TempRank = APattern->Rank();
TempAMV = APattern->AM_Dis;
AM_Dis = TempAMV;
TempAMV->AddDA(this);
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
Repl = APattern->Repl; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> AlignRule
PrepareAlign(TempRank, AAxisArray, ACoeffArray, AConstArray, IniRule);
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
ArrRank = Rank();
ARSize = ArrRank + TempAMV->Rank();
// Rank <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> + Rank AMView <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>(<28>.<2E>. AM_Dis)
AlignRule = vector<AlignAxis>(ARSize);
AlignRule = IniRule;
/* LU deb
for(i=0;i<AlignRule.size();i++)
printf("%d %d %d %d ",AlignRule[i].A, AlignRule[i].B, AlignRule[i].Axis, AlignRule[i].TAxis);
printf("\n");
*/
for (i = 0; i < ArrRank; i++)
{
aAl = IniRule[i];
if (aAl.Attr == align_NORMAL)
{
tAl = TAlign[aAl.TAxis - 1];
switch (tAl.Attr)
{
case align_NORMAL : aAl.TAxis = tAl.TAxis;
aAl.A *= tAl.A;
aAl.B = aAl.B * tAl.A + tAl.B;
// IniRule[i] = AlignAxis(align_NORMAL, i+1, tAl.TAxis, aAl.A*tAl.A, aAl.B*tAl.A+tAl.B);
break;
case align_COLLAPSE : aAl.TAxis = 0;
aAl.Attr = align_COLLAPSE;
// IniRule[i] = AlignAxis(align_COLLAPSE, i+1, 0);
break;
}
}
AlignRule[i] = aAl;
}
#ifdef nodef
for (i = 0; i < TempAMV->Rank(); i++) {
AlignRule[i+ArrRank].Attr=TAlign[i+TempRank].Attr;
AlignRule[i+ArrRank].TAxis=TAlign[i+TempRank].TAxis;
AlignRule[i+ArrRank].A=TAlign[i+TempRank].A;
AlignRule[i+ArrRank].B=TAlign[i+TempRank].B;
AlignRule[i+ArrRank].Bound=TAlign[i+TempRank].Bound;
AlignRule[i+ArrRank].Axis=IniRule[i+TempRank].Axis;
}
#endif
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD>. 2-<2D><> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> (<28><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20><> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>)
for (i = 0; i < TempRank; i++)
{
aAl = IniRule[i+ArrRank];
switch (aAl.Attr)
{
case align_CONSTANT : tAl = TAlign[aAl.TAxis-1];
if (tAl.Attr == align_NORMAL)
{
aAl.TAxis = tAl.TAxis;
aAl.B = tAl.A * aAl.B + tAl.B;
AlignRule[ArrRank+tAl.TAxis-1] = aAl;
}
break;
case align_REPLICATE: tAl = TAlign[aAl.TAxis-1];
if (tAl.Attr == align_NORMAL)
{
aAl.Attr = align_BOUNDREPL;
aAl.TAxis = tAl.TAxis;
aAl.A = tAl.A;
aAl.B = tAl.B;
aAl.Bound = APattern->GetSize(tAl.TAxis);
AlignRule[ArrRank+tAl.TAxis-1] = aAl;
}
break;
}
}
#ifdef _TIME_TRACE_
// <20><> <20><><EFBFBD><EFBFBD><EFBFBD>
prot << "AlignRule:" << endl;
vector<AlignAxis>::iterator first = AlignRule.begin(), last = AlignRule.end();
while(first != last)
{
prot << endl << first->Attr << " " << first->Axis << " " << first->TAxis
<< " " << first->A << " " << first->B << " " << first->Bound << endl;
++first;
}
#ifdef nodef
first = AlignRule.begin(),
last = AlignRule.end();
while(first != last)
{
assert(first->Axis!=0);
assert(first->TAxis!=0);
++first;
}
#endif
#endif
/* LU deb
for(i=0;i<AlignRule.size();i++)
printf("%d %d %d %d ",AlignRule[i].A, AlignRule[i].B, AlignRule[i].Axis, AlignRule[i].TAxis);
printf("\n");
*/
}
double DArray::RAlnDA(AMView *APattern, const vector<long>& AAxisArray,
const vector<long>& ACoeffArray, const vector<long>& AConstArray,
long ANewSign)
{
if (!APattern->IsDistribute()) {
prot << "Wrong call RAlnDA" << endl;
exit(1);
}
if (!IsAlign()) {
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> - align
AlnDA(APattern, AAxisArray, ACoeffArray, AConstArray);
return 0.0;
}
if ( ANewSign != 0) {
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> AMView <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
AM_Dis->DelDA(this);
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
AlnDA(APattern, AAxisArray, ACoeffArray, AConstArray);
return 0.0;
}
double time;
DArray *oldDA = new DArray(*this);
CommCost *ralCost = new CommCost(AM_Dis->VM_Dis);
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> AMView <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
AM_Dis->DelDA(this);
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> int DelDA <20><> void DelDA
// <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> RAlnDA <20><> <20><><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
AlnDA(APattern, AAxisArray, ACoeffArray, AConstArray);
ralCost->Update(oldDA, this);
#ifdef _TIME_TRACE_
// <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
int i, j;
prot << endl;
for (i = 0; i < AM_Dis->VM_Dis->GetLSize(); i++) {
for (j = 0; j < AM_Dis->VM_Dis->GetLSize(); j++) {
prot << "[" << i << "]" << "[" << j << "] = " << ralCost->transfer[i][j] << "; ";
}
prot << endl;
}
// <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
#endif
time = ralCost->GetCost();
delete oldDA;
delete ralCost;
return time;
}
double DArray::RAlnDA(DArray *APattern, const vector<long>& AAxisArray,
const vector<long>& ACoeffArray, const vector<long>& AConstArray,
long ANewSign)
{
if (!APattern->IsAlign()) {
prot << "Wrong call RAlnDA" << endl;
exit(1);
}
if (!IsAlign()) {
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> - align
AlnDA(APattern, AAxisArray, ACoeffArray, AConstArray);
return 0.0;
}
if ( ANewSign != 0) {
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
return 0.0;
}
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
double time;
DArray *oldDA = new DArray(*this);
CommCost *ralCost = new CommCost(AM_Dis->VM_Dis);
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> AMView <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
AM_Dis->DelDA(this);
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> int DelDA <20><> void DelDA
AlnDA(APattern, AAxisArray, ACoeffArray, AConstArray);
// printf("Ral begin\n");
ralCost->Update(oldDA, this);
// printf("Ral Update ended\n");
#ifdef _TIME_TRACE_
// <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
int i, j;
prot << endl;
for (i = 0; i < AM_Dis->VM_Dis->GetLSize(); i++) {
for (j = 0; j < AM_Dis->VM_Dis->GetLSize(); j++) {
prot << "[" << i << "]" << "[" << j << "] = " << ralCost->transfer[i][j] << "; ";
}
prot << endl;
}
// <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
#endif
time = ralCost->GetCost();
delete oldDA;
delete ralCost;
return time;
}
// -------------------- Distributed --> Distributed ---------------------------------
double ArrayCopy(DArray* AFromArray, const vector<long>& BFromInitIndexArray,
const vector<long>& BFromLastIndexArray, const vector<long>& BFromStepArray,
DArray* AToArray, const vector<long>& BToInitIndexArray,
const vector<long>& BToLastIndexArray, const vector<long>& BToStepArray)
{
double time;
long i,DimSize;
//====
long p1;
//was vector<LS> blockIni;
//=***
unsigned sz = BFromInitIndexArray.size();
unsigned szt= BToInitIndexArray.size();
unsigned j;
vector<long> AFromInitIndexArray(sz);
vector<long> AFromLastIndexArray(sz);
vector<long> AFromStepArray(sz);
vector<long> AToInitIndexArray(szt);
vector<long> AToLastIndexArray(szt);
vector<long> AToStepArray(szt);
//====
// printf("Distr-Distr; Replicated %d %d \n",AToArray->Repl,AFromArray->Repl);
//=***
for ( j = 0; j < sz; j++) {
DimSize = AFromArray->SizeArray[j];
if (BFromInitIndexArray[j] == -1) {
AFromInitIndexArray[j] = 0;
AFromLastIndexArray[j] = AFromArray->GetSize(j + 1) - 1;
AFromStepArray[j] = 1;
} else {
AFromInitIndexArray[j] = BFromInitIndexArray[j];
AFromLastIndexArray[j] = BFromLastIndexArray[j];
AFromStepArray[j] = BFromStepArray[j];
if (AFromInitIndexArray[j] > AFromLastIndexArray[j])
AFromLastIndexArray[j] = AFromLastIndexArray[j];
if (AFromLastIndexArray[j] >= DimSize)
AFromLastIndexArray[j] = DimSize - 1;
}
}
for ( j = 0; j < szt; j++) {
if (BToInitIndexArray[j] == -1) {
AToInitIndexArray[j] = 0;
AToLastIndexArray[j] = AToArray->GetSize(j + 1) - 1;
AToStepArray[j] = 1;
} else {
AToInitIndexArray[j] = BToInitIndexArray[j];
AToLastIndexArray[j] = BToLastIndexArray[j];
AToStepArray[j] = BToStepArray[j];
}
}
CommCost *copyCost = new CommCost(AFromArray->AM_Dis->VM_Dis);
if (!AFromArray->CheckIndex(AFromInitIndexArray, AFromLastIndexArray, AFromStepArray))
{
prot << "Wrong call ArrayCopy" << endl;
exit(1);
}
if (!AToArray->CheckIndex(AToInitIndexArray, AToLastIndexArray, AToStepArray))
{
prot << "Wrong call ArrayCopy" << endl;
exit(1);
}
//====
/* was
for (i = 0; i < AFromArray->Rank(); i++) {
// cout << AFromArray->Rank() << endl;
// cout << "AFromInitIndexArray[" << i << "] = " << AFromInitIndexArray[i] << endl;
// cout << "AFromLastIndexArray[" << i << "] = " << AFromLastIndexArray[i] << endl;
blockIni.push_back(LS(AFromInitIndexArray[i], AFromLastIndexArray[i]));
}
i = 5;
Block readBlock(blockIni);
if (AFromArray->Repl)
time = 0;
else {
copyCost->CopyUpdate(AFromArray, readBlock);
time = copyCost->GetCost();
}
*/
//<2F><><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD> <20> cout & i=5; & Block...
//=***
if (AFromArray->Repl)
time = 0;
else {
//====
if(AToArray->Repl)
{
//=***
vector<LS> blockIni;
for (i = 0; i < AFromArray->Rank(); i++) {
// cout << AFromArray->Rank() << endl;
// cout << "AFromInitIndexArray[" << i << "] = " << AFromInitIndexArray[i] << endl;
// cout << "AFromLastIndexArray[" << i << "] = " << AFromLastIndexArray[i] << endl;
// cout << "AFromStepArray[" << i << "] = " << AFromStepArray[i] << endl;
if(AFromStepArray[i]>0)
blockIni.push_back(LS(AFromInitIndexArray[i], AFromLastIndexArray[i], AFromStepArray[i]));
else
blockIni.push_back(LS(AFromLastIndexArray[i], AFromInitIndexArray[i], -AFromStepArray[i]));
}
Block readBlock(blockIni);
copyCost->CopyUpdate(AFromArray, readBlock);
time = copyCost->GetCost();
// printf("Synch=%d time=%f\n",SynchCopy,time);
//====
}
else
{ long x1,x2,x3;
for (p1 = 0; p1 < copyCost->GetLSize(); p1++) {
vector<LS> blockIni;
vector<LS> blockIni1;
Block locBlock(AToArray, p1, 1);
for (i = 0; i < AToArray->Rank(); i++)
{ if(AToStepArray[i]>0)
blockIni1.push_back(LS(AToInitIndexArray[i], AToLastIndexArray[i], AToStepArray[i]));
else
blockIni1.push_back(LS(AToLastIndexArray[i], AToInitIndexArray[i], -AToStepArray[i]));
}
Block writeBlock(blockIni1);
//printf("lockAll[%d].0=%d-%d\n",p1,locBlock.GetLower(0),locBlock.GetUpper(0));
//printf("lockAll[%d].0=%d-%d\n",p1,locBlock.GetLower(1),locBlock.GetUpper(1));
//printf("lockAll[%d].empty=%d\n",p1,locBlock.empty());
//printf("writeBl[%d].1=%d-%d\n",p1,writeBlock.GetLower(0),writeBlock.GetUpper(0));
//printf("writeBl[%d].1=%d-%d\n",p1,writeBlock.GetLower(1),writeBlock.GetUpper(1));
Block writeLocBlock = locBlock ^ writeBlock;
if(writeLocBlock.empty()) continue; //<2F><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
//printf("ATo[%d].0=%d-%d\n",p1,writeLocBlock.GetLower(0),writeLocBlock.GetUpper(0));
//printf("ATo[%d].1=%d-%d\n",p1,writeLocBlock.GetLower(1),writeLocBlock.GetUpper(1));
for (i = 0; i < AFromArray->Rank(); i++)
{
// printf("FROM ***%d-%d***\n",AFromInitIndexArray[i],AFromLastIndexArray[i]);
if(AToStepArray[i]==0 && AToInitIndexArray[i]==AToLastIndexArray[i]) AToStepArray[i]=1; // <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20><> <20><><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
if(AFromStepArray[i]>0 && AToStepArray[i]>0)
x1 = AFromInitIndexArray[i] + (writeLocBlock.GetLower(i) - AToInitIndexArray[i])*(AFromStepArray[i]/AToStepArray[i]);
if(AFromStepArray[i]>0 && AToStepArray[i]<0)
x1 = AFromInitIndexArray[i] + (writeLocBlock.GetLower(i) - AToLastIndexArray[i])*(-AFromStepArray[i]/AToStepArray[i]);
if(AFromStepArray[i]<=0 && AToStepArray[i]>0)
x1 = AFromLastIndexArray[i] + (writeLocBlock.GetLower(i) - AToInitIndexArray[i])*(-AFromStepArray[i]/AToStepArray[i]);
if(AFromStepArray[i]<=0 && AToStepArray[i]<0)
x1 = AFromLastIndexArray[i] + (writeLocBlock.GetLower(i) - AToLastIndexArray[i])*(AFromStepArray[i]/AToStepArray[i]);
x2 = x1 + (writeLocBlock.GetUpper(i) - writeLocBlock.GetLower(i))*(abs(AFromStepArray[i]/AToStepArray[i]));
x3 = abs(AFromStepArray[i]);
blockIni.push_back(LS(x1,x2,x3));
}
Block readBlock(blockIni);
// printf("AFrom[%d].0=%d-%d\n",p1,readBlock.GetLower(0),readBlock.GetUpper(0));
// printf("AFrom[%d].1=%d-%d\n",p1,readBlock.GetLower(1),readBlock.GetUpper(1));
copyCost->CopyUpdateDistr(AFromArray, readBlock, p1);
}
time = copyCost->GetCost();
// printf("Synch=%d time=%f\n",SynchCopy,time);
}
//=***
}
#ifdef _TIME_TRACE_
// <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
//long j;
prot << endl;
for (i = 0; i < AFromArray->AM_Dis->VM_Dis->GetLSize(); i++)
{
for (j = 0; j < AFromArray->AM_Dis->VM_Dis->GetLSize(); j++)
{
prot << "[" << i << "]" << "[" << j << "] = " << copyCost->transfer[i][j] << "; ";
}
prot << endl;
}
// <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
#endif
// ??? <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>-<2D><> <20><><EFBFBD>, <20><> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// <20><><EFBFBD><EFBFBD> - <20> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>(<28><><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD> time = 0
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> - <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>(<28><><EFBFBD> <20><><EFBFBD><EFBFBD>)
// <20><><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>-<2D><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
delete copyCost;
return time;
}
// -------------------- Distributed --> Replicated ---------------------------------
//<2F><><EFBFBD> <20><> <20> <20><><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> sor <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
double ArrayCopy(DArray* AFromArray, const vector<long>& BFromInitIndexArray,
const vector<long>& BFromLastIndexArray, const vector<long>& BFromStepArray,
long ACopyRegim)
{
double time;
long i;
long DimSize;
vector<LS> blockIni;
Block readBlock;
unsigned sz = BFromInitIndexArray.size();
unsigned j;
// printf("Distr-Repl; Replicated %d \n",AFromArray->Repl);
vector<long> AFromInitIndexArray(sz);
vector<long> AFromLastIndexArray(sz);
vector<long> AFromStepArray(sz);
for ( j = 0; j < sz; j++) {
DimSize = AFromArray->SizeArray[j];
if (BFromInitIndexArray[j] == -1) {
AFromInitIndexArray[j] = 0;
AFromLastIndexArray[j] = AFromArray->GetSize(j + 1) - 1;
AFromStepArray[j] = 1;
} else {
AFromInitIndexArray[j] = BFromInitIndexArray[j];
AFromLastIndexArray[j] = BFromLastIndexArray[j];
AFromStepArray[j] = BFromStepArray[j];
if (AFromInitIndexArray[j] > AFromLastIndexArray[j])
AFromLastIndexArray[j] = AFromLastIndexArray[j];
if (AFromLastIndexArray[j] >= DimSize)
AFromLastIndexArray[j] = DimSize - 1;
}
}
#ifdef P_DEBUG
prot << "ArrayCopy: " << *AFromArray;
prot << "ArrayCopy: AFromInitIndexArray: ";
for ( j = 0; j < sz; j++)
prot << AFromInitIndexArray[j] << ',';
prot << endl;
prot << "ArrayCopy: AFromLastIndexArray: ";
for ( j = 0; j < sz; j++)
prot << AFromLastIndexArray[j] << ',';
prot << endl;
prot << "ArrayCopy: AFromStepArray: ";
for ( j = 0; j < sz; j++)
prot << AFromStepArray[j] << ',';
prot << endl;
#endif
CommCost *copyCost = new CommCost(AFromArray->AM_Dis->VM_Dis);
if (!AFromArray->CheckIndex(AFromInitIndexArray, AFromLastIndexArray, AFromStepArray)) {
prot << "Wrong call ArrayCopy" << endl;
exit(1);
}
for (i = 0; i < AFromArray->Rank(); i++) {
blockIni.push_back(LS(AFromInitIndexArray[i], AFromLastIndexArray[i]));
}
readBlock = Block(blockIni);
if (AFromArray->Repl)
time = 0;
else{
//====
int p1;
for (p1 = 0; p1 < copyCost->GetLSize(); p1++) {
vector<LS> blockIni;
for (i = 0; i < AFromArray->Rank(); i++) {
//printf("***%d %d - %d %d***\n",AFromInitIndexArray[i],AFromLastIndexArray[i],AFromInitIndexArray[i]+locBlock.GetLower(i),AFromInitIndexArray[i]+locBlock.GetUpper(i));
if(AFromStepArray[i]>0)
blockIni.push_back(LS(AFromInitIndexArray[i], AFromInitIndexArray[i], AFromStepArray[i]));
else
blockIni.push_back(LS(AFromLastIndexArray[i], AFromLastIndexArray[i], -AFromStepArray[i]));
}
Block readBlock(blockIni);
copyCost->CopyUpdateDistr(AFromArray, readBlock, p1);
}
time = copyCost->GetCost();
// printf("Synch=%d time=%f\n",SynchCopy,time);
}
//=***
#ifdef _TIME_TRACE_
// <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
//long j;
prot << endl;
for (i = 0; i < AFromArray->AM_Dis->VM_Dis->GetLSize(); i++)
{
for (j = 0; j < AFromArray->AM_Dis->VM_Dis->GetLSize(); j++)
{
prot << "[" << i << "]" << "[" << j << "] = " << copyCost->transfer[i][j] << "; ";
}
prot << endl;
}
// <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
#endif
// ??? <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>-<2D><> <20><><EFBFBD>, <20><> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// <20><><EFBFBD><EFBFBD> - <20> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>(<28><><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD> time = 0
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> - <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>(<28><><EFBFBD> <20><><EFBFBD><EFBFBD>)
// <20><><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>-<2D><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
delete copyCost;
return time;
}
DArray::DArray(const DArray &x) : Space(x)
{
TypeSize = x.TypeSize;
AM_Dis = x.AM_Dis;
AlignRule = x.AlignRule;
Repl = x.Repl;
}
DArray & DArray::operator =(const DArray & x)
{
if (this != &x) {
Space::operator =(x);
TypeSize = x.TypeSize;
AM_Dis = x.AM_Dis;
AlignRule = x.AlignRule;
Repl = x.Repl;
}
return * this;
}
DArray::DArray() : Space()
{
TypeSize = 0;
AM_Dis = 0;
AlignRule = vector<AlignAxis>(0);
Repl = 0;
}
bool DArray::IsAlign()
{
if (AM_Dis == 0)
return false;
return true;
}
long DArray::GetMapDim(long arrDim, int & dir)
{
long vmDim = 0;
AlignAxis align;
DistAxis dist;
long amDim;
if(arrDim<=0)
{
printf("arrDim<=0\n");
dir=1; // not sure in it
return 0; // not much sure in it
}
align = AlignRule[arrDim-1];
if (align.Attr == align_NORMAL)
{
amDim = align.TAxis;
dir = (align.A > 0) ? 1 : -1;
dist = AM_Dis->DistRule[amDim-1];
if(dist.Attr == map_BLOCK)
vmDim = dist.PAxis;
}
return vmDim;
}
long DArray::CheckIndex(const vector<long>& InitIndexArray,
vector<long>& LastIndexArray, const vector<long>& StepArray)
{
int i;
long DimSize, BlockSize = 1;
for (i = 0; i < Rank(); i++) {
DimSize = SizeArray[i];
if ((InitIndexArray[i] >= DimSize || InitIndexArray[i] < 0) ||
(LastIndexArray[i] >= DimSize || LastIndexArray[i] < 0)) { //====//=*** <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <= 0 //was (StepArray[i] !=1)
prot << "i = " << i << " InitIndexArray[i] = " << InitIndexArray[i]
<< " LastIndexArray[i] = " << LastIndexArray[i]
<< " StepArray[i] = " << StepArray[i]
<< " DimSize = " << DimSize <<endl;
return 0;
}
}
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD>
for (i = 0; i < Rank(); i++) {
//=== change if
// printf("Step=%d\n",StepArray[i]);
if(StepArray[i]!=0)
DimSize = (LastIndexArray[i] - InitIndexArray[i]) / StepArray[i] + 1; // <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> |Step| <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> > 1
else
DimSize = 1;
//was if ((LastIndexArray[i] - InitIndexArray[i]) % StepArray[i])
//was DimSize++;
//=***
BlockSize *= DimSize;
}
return BlockSize;
}
double DArray::RDisDA(const vector<long>& AAxisArray, const vector<long>& ADistrParamArray, long ANewSign)
{
return AM_Dis->RDisAM(AAxisArray, ADistrParamArray, ANewSign);
}
#ifdef P_DEBUG
ostream& operator << (ostream& os, const DArray& da)
{
int i;
os << "DArray: TypeSize = " << da.TypeSize << ", AM_Dis = " << (void*) da.AM_Dis <<endl;
os << " " << (Space&) da;
os << " " << "LowShdWidthArray:HiShdWidthArray = ";
for (i = 0; i < da.LowShdWidthArray.size(); i++)
os << da.LowShdWidthArray[i] << ':' << da.HiShdWidthArray[i] << ' ';
os << endl;
os << " AlignRule:" << endl;
for (i = 0; i < da.AlignRule.size(); i++)
os << " " << i << ' ' << da.AlignRule[i] << endl;
return os;
}
#endif

View File

@@ -0,0 +1,99 @@
#ifndef DArrayH
#define DArrayH
//////////////////////////////////////////////////////////////////////
//
// DArray.h: interface for the DArray class.
//
//////////////////////////////////////////////////////////////////////
#include <vector>
#include <fstream>
#include "Space.h"
#include "AMView.h"
#include "AlignAxis.h"
#include "Block.h"
#include "CommCost.h"
//#include "ModelStructs.h"
class AMView;
class DArray : public Space {
void PrepareAlign(long& TempRank, const std::vector<long>& AAxisArray,
const std::vector<long>& ACoeffArray, const std::vector<long>& AConstArray,
std::vector<AlignAxis>& IniRule);
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>.
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD>. 0 <20><><EFBFBD><EFBFBD> <20><> <20><><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
long CheckIndex(const std::vector<long>& InitIndexArray,
std::vector<long>& LastIndexArray,
const std::vector<long>& StepArray);
public:
std::vector<long> LowShdWidthArray;
std::vector<long> HiShdWidthArray;
long TypeSize; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
AMView *AM_Dis; // AMView <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> DArray
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> AM_Dis - ?
// <20><><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> - <20><><EFBFBD> Pattern <20><><EFBFBD> <20><><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD>-<2D><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD>
std::vector<AlignAxis> AlignRule;
int Repl; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> AM_Dis <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
DArray();
DArray(const std::vector<long>& ASizeArray, const std::vector<long>& ALowShdWidthArray,
const std::vector<long>& AHiShdWidthArray, int ATypeSize);
DArray(const DArray &);
~DArray();
DArray & operator= (const DArray &x);
void AlnDA(AMView *APattern, const std::vector<long>& AAxisArray,
const std::vector<long>& ACoeffArray, const std::vector<long>& AConstArray);
void AlnDA(DArray* APattern, const std::vector<long>& AAxisArray,
const std::vector<long>& ACoeffArray, const std::vector<long>& AConstArray);
double RAlnDA(AMView *APattern, const std::vector<long>& AAxisArray,
const std::vector<long>& ACoeffArray, const std::vector<long>& AConstArray,
long ANewSign);
double RAlnDA(DArray* APattern, const std::vector<long>& AAxisArray,
const std::vector<long>& ACoeffArray, const std::vector<long>& AConstArray,
long ANewSign);
friend double ArrayCopy(
DArray* AFromArray,
const std::vector<long>& AFromInitIndexArray,
const std::vector<long>& AFromLastIndexArray,
const std::vector<long>& AFromStepArray,
DArray* AToArray,
const std::vector<long>& AToInitIndexArray,
const std::vector<long>& AToLastIndexArray,
const std::vector<long>& AToStepArray);
friend double ArrayCopy(DArray* AFromArray,
const std::vector<long>& AFromInitIndexArray,
const std::vector<long>& AFromLastIndexArray,
const std::vector<long>& AFromStepArray,
long ACopyRegim);
// ArrCpy ? - <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
long GetMapDim(long arrDim, int &dir); // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> VM <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// (<28><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> - 0).
// <20> dir <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> 1 <20><><EFBFBD> -1 <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
bool IsAlign();
double RDisDA(const std::vector<long>& AAxisArray, const std::vector<long>& ADistrParamArray,
long ANewSign);
#ifdef P_DEBUG
friend std::ostream& operator << (std::ostream& os, const DArray& s);
#endif
};
#endif

View File

@@ -0,0 +1,44 @@
// DimBound.cpp: implementation of the DimBound class.
//
//////////////////////////////////////////////////////////////////////
#include "DimBound.h"
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
DimBound::DimBound()
{
}
DimBound::~DimBound()
{
}
DimBound::DimBound(long AarrDim, long AvmDim, int Adir, long ALeftBSize, long ARightBSize) :
arrDim(AarrDim),
vmDim(AvmDim),
dir(Adir),
LeftBSize(ALeftBSize),
RightBSize(ARightBSize)
{
}
bool operator < (const DimBound& x, const DimBound& y)
{
return true;
}
bool operator == (const DimBound& x, const DimBound& y)
{
return
(x.arrDim == y.arrDim &&
x.dir == y.dir &&
x.LeftBSize == y.LeftBSize &&
x.RightBSize == y.RightBSize &&
x.vmDim == y.vmDim) ?
true : false;
}

View File

@@ -0,0 +1,30 @@
#ifndef DimBoundH
#define DimBoundH
//////////////////////////////////////////////////////////////////////
//
// DimBound.h: interface for the DimBound class.
//
//////////////////////////////////////////////////////////////////////
class DimBound {
public:
long arrDim; // Array dimension
long vmDim; // Virtual machine dimension
int dir; // <20><><EFBFBD><EFBFBD><EFBFBD> 1 <20><><EFBFBD> -1 <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
long LeftBSize;
long RightBSize;
DimBound(long AarrDim, long AvmDim, int Adir, long ALeftBSize, long ARightBSize);
DimBound();
virtual ~DimBound();
};
bool operator==(const DimBound& x, const DimBound& y);
bool operator<(const DimBound& x, const DimBound& y);
#endif

View File

@@ -0,0 +1,52 @@
// DistAxis.cpp: implementation of the DistAxis class.
//
//////////////////////////////////////////////////////////////////////
#include "DistAxis.h"
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
DistAxis::DistAxis()
{
}
DistAxis::~DistAxis()
{
}
DistAxis::DistAxis(map_Type AAttr, long AAxis, long APAxis) :
Attr(AAttr), Axis(AAxis), PAxis(APAxis)
{
}
DistAxis& DistAxis :: operator= (const DistAxis& DA)
{
this->Attr = DA.Attr;
this->Axis = DA.Axis;
this->PAxis = DA.PAxis;
return *this;
}
bool operator==(const DistAxis& x, const DistAxis& y)
{
return x.Attr == y.Attr && x.Axis == y.Axis && x.PAxis == y.PAxis;
}
bool operator<(const DistAxis& x, const DistAxis& y)
{
if (x.Attr == map_BLOCK || x.Attr == map_COLLAPSE)
if (y.Attr == map_BLOCK || y.Attr == map_COLLAPSE)
return x.Axis < y.Axis;
else
return true;
else
if (y.Attr == map_BLOCK || y.Attr == map_COLLAPSE)
return false;
else
return x.PAxis < y.PAxis;
}

View File

@@ -0,0 +1,34 @@
#ifndef DistAxisH
#define DistAxisH
//////////////////////////////////////////////////////////////////////
//
// DistAxis.h: interface for the DistAxis class.
//
//////////////////////////////////////////////////////////////////////
enum map_Type {
map_BLOCK = 1, // 1
map_COLLAPSE, // 2
map_REPLICATE, // 3
map_NORMVMAXIS // 4
};
class DistAxis {
public:
map_Type Attr; // <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
long Axis; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> AMView
long PAxis; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> VM (<28><> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> Axis)
DistAxis(map_Type AAttr, long AAxis, long APAxis);
DistAxis();
virtual ~DistAxis();
DistAxis& operator = (const DistAxis&);
friend bool operator == (const DistAxis& x, const DistAxis& y);
friend bool operator < (const DistAxis& x, const DistAxis& y);
};
#endif

View File

@@ -0,0 +1,586 @@
#include <string>
#include <fstream>
#include "Event.h"
using namespace std;
extern ofstream prot;
Event EventNameToID(const string& event_name)
{
// cout<<"event='"<<event_name<<"'\n";
//====
if(event_name == "blkdiv_") return blkdiv_; //====// <20><><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
if(event_name == "dvm_Init") return dvm_Init;
//=***
if(event_name == "delrg_") return delrg_;
if(event_name == "insred_") return insred_;
if(event_name == "arrcpy_") return arrcpy_;
if(event_name == "aarrcp_") return aarrcp_;
if(event_name == "waitcp_") return waitcp_;
if(event_name == "crtda_") return crtda_;
if(event_name == "getam_") return getam_;
if(event_name == "mapam_") return mapam_;
if(event_name == "runam_") return runam_;
if(event_name == "stopam_") return stopam_;
if(event_name == "getamv_") return getamv_;
if(event_name == "getamr_") return getamr_;
if(event_name == "crtamv_") return crtamv_;
if(event_name == "runam_") return runam_;
if(event_name == "align_") return align_;
if(event_name == "getps_") return getps_;
if(event_name == "saverv_") return saverv_;
if(event_name == "tstelm_") return tstelm_;
if(event_name == "rwelm_") return rwelm_;
if(event_name == "rlocel_") return rlocel_;
if(event_name == "delda_") return delda_;
if(event_name == "delobj_") return delobj_;
if(event_name == "copelm_") return copelm_;
if(event_name == "elmcpy_") return elmcpy_;
if(event_name == "wlocel_") return wlocel_;
if(event_name == "clocel_") return clocel_;
if(event_name == "getlen_") return getlen_;
if(event_name == "dvm_Init") return dvm_Init;
if(event_name == "dvm_fopen") return dvm_fopen;
if(event_name == "dvm_fclose") return dvm_fclose;
if(event_name == "dvm_void_vfprintf") return dvm_void_vfprintf;
if(event_name == "dvm_vfprintf") return dvm_vfprintf;
if(event_name == "dvm_fwrite") return dvm_fwrite;
if(event_name == "dvm_fread") return dvm_fread;
if(event_name == "tron_") return tron_;
if(event_name == "delamv_") return delamv_;
if(event_name == "distr_") return distr_;
if(event_name == "crtred_") return crtred_;
if(event_name == "delred_") return delred_;
if(event_name == "begbl_") return begbl_;
if(event_name == "endbl_") return endbl_;
if(event_name == "crtpl_") return crtpl_;
if(event_name == "mappl_") return mappl_;
if(event_name == "endpl_") return endpl_;
if(event_name == "locind_") return locind_;
if(event_name == "tstda_") return tstda_;
if(event_name == "srmem_") return srmem_;
if(event_name == "tstio_") return tstio_;
if(event_name == "getrnk_") return getrnk_;
if(event_name == "getsiz_") return getsiz_;
if(event_name == "dvm_vscanf") return dvm_vscanf;
if(event_name == "realn_") return realn_;
if(event_name == "redis_") return redis_;
if(event_name == "arrmap_") return arrmap_;
if(event_name == "setpsw_") return setpsw_;
if(event_name == "setind_") return setind_;
if(event_name == "locsiz_") return locsiz_;
if(event_name == "imlast_") return imlast_;
if(event_name == "malign_") return malign_;
if(event_name == "crtrg_") return crtrg_;
if(event_name == "mrealn_") return mrealn_;
if(event_name == "strtrd_") return strtrd_;
if(event_name == "waitrd_") return waitrd_;
if(event_name == "amvmap_") return amvmap_;
if(event_name == "exfrst_") return exfrst_;
if(event_name == "across_") return across_;
if(event_name == "dopl_") return dopl_;
if(event_name == "mdistr_") return mdistr_;
if(event_name == "mredis_") return mredis_;
if(event_name == "delarm_") return delarm_;
if(event_name == "delmvm_") return delmvm_;
if(event_name == "dvm_fscanf") return dvm_fscanf;
if(event_name == "dvm_scanf") return dvm_scanf;
if(event_name == "dvm_vfscanf") return dvm_vfscanf;
if(event_name == "dvm_clearerr")return dvm_clearerr;
if(event_name == "dvm_feof") return dvm_feof;
if(event_name == "dvm_ferror") return dvm_ferror;
if(event_name == "dvm_fflush") return dvm_fflush;
if(event_name == "dvm_fgetc") return dvm_fgetc;
if(event_name == "dvm_fgetpos") return dvm_fgetpos;
if(event_name == "dvm_fgets") return dvm_fgets;
if(event_name == "dvm_fputc") return dvm_fputc;
if(event_name == "dvm_fputs") return dvm_fputs;
if(event_name == "dvm_freopen") return dvm_freopen;
if(event_name == "dvm_fseek") return dvm_fseek;
if(event_name == "dvm_fsetpos") return dvm_fsetpos;
if(event_name == "dvm_ftell") return dvm_ftell;
if(event_name == "dvm_getc") return dvm_getc;
if(event_name == "dvm_getchar") return dvm_getchar;
if(event_name == "dvm_gets") return dvm_gets;
if(event_name == "dvm_putc") return dvm_putc;
if(event_name == "dvm_putchar") return dvm_putchar;
if(event_name == "dvm_puts") return dvm_puts;
if(event_name == "dvm_rewind") return dvm_rewind;
if(event_name == "dvm_setbuf") return dvm_setbuf;
if(event_name == "dvm_setvbuf") return dvm_setvbuf;
if(event_name == "dvm_tmpfile") return dvm_tmpfile;
if(event_name == "dvm_ungetc") return dvm_ungetc;
if(event_name == "dvm_void_fprintf") return dvm_void_fprintf;
if(event_name == "dvm_fprintf") return dvm_fprintf;
if(event_name == "dvm_void_printf") return dvm_void_printf;
if(event_name == "dvm_printf") return dvm_printf;
if(event_name == "dvm_void_vprintf") return dvm_void_vprintf;
if(event_name == "dvm_vprintf") return dvm_vprintf;
if(event_name == "dvm_remove") return dvm_remove;
if(event_name == "dvm_rename") return dvm_rename;
if(event_name == "dvm_tmpnam") return dvm_tmpnam;
if(event_name == "dvm_close") return dvm_close;
if(event_name == "dvm_fstat") return dvm_fstat;
if(event_name == "dvm_lseek") return dvm_lseek;
if(event_name == "dvm_open") return dvm_open;
if(event_name == "dvm_read") return dvm_read;
if(event_name == "dvm_write") return dvm_write;
if(event_name == "dvm_access") return dvm_access;
if(event_name == "dvm_stat") return dvm_stat;
if(event_name == "mps_Bcast") return mps_Bcast;
if(event_name == "mps_Barrier") return mps_Barrier;
if(event_name == "dvm_dfread") return dvm_dfread;
if(event_name == "dvm_dfwrite") return dvm_dfwrite;
if(event_name == "crtshg_") return crtshg_;
if(event_name == "inssh_") return inssh_;
if(event_name == "insshd_") return insshd_;
if(event_name == "incsh_") return incsh_;
if(event_name == "incshd_") return incshd_;
if(event_name == "strtsh_") return strtsh_;
if(event_name == "waitsh_") return waitsh_;
if(event_name == "sendsh_") return sendsh_;
if(event_name == "recvsh_") return recvsh_;
if(event_name == "delshg_") return delshg_;
if(event_name == "getind_") return getind_;
if(event_name == "addhdr_") return addhdr_;
if(event_name == "delhdr_") return delhdr_;
if(event_name == "troff_") return troff_;
if(event_name == "biof_") return biof_;
if(event_name == "eiof_") return eiof_;
if(event_name == "crtps_") return crtps_;
if(event_name == "psview_") return psview_;
if(event_name == "delps_") return crtps_;
if(event_name == "setelw_") return setelw_;
if(event_name == "dprstv_") return dprstv_;
if(event_name == "dstv_") return dstv_;
if(event_name == "dldv_") return dldv_;
if(event_name == "dbegpl_") return dbegpl_;
if(event_name == "dbegsl_") return dbegsl_;
if(event_name == "dendl_") return dendl_;
if(event_name == "diter_") return diter_;
if(event_name == "drmbuf_") return drmbuf_;
if(event_name == "dskpbl_") return dskpbl_;
if(event_name == "binter_") return binter_;
if(event_name == "einter_") return einter_;
if(event_name == "bsloop_") return bsloop_;
if(event_name == "bploop_") return bploop_;
if(event_name == "eloop_") return eloop_;
if(event_name == "dvm_exit") { return Event_dvm_exit;}
if(event_name == "crtrbl_") return crtrbl_;
if(event_name == "crtrbp_") return crtrbp_;
if(event_name == "loadrb_") return loadrb_;
if(event_name == "waitrb_") return waitrb_;
if(event_name == "crtbg_") return crtbg_;
if(event_name == "insrb_") return insrb_;
if(event_name == "loadbg_") return loadbg_;
if(event_name == "waitbg_") return waitbg_;
return Unknown_Func;
}
//------------------------------------------------------------------------------
#ifdef P_DEBUG
static std::string IDToEventName(const Event& event_id)
{
if(event_id == delrg_) return "delrg_";
if(event_id == insred_) return "insred_";
if(event_id == arrcpy_) return "arrcpy_";
if(event_id == aarrcp_) return "aarrcp_";
if(event_id == waitcp_) return "waitcp_";
if(event_id == crtda_) return "crtda_";
if(event_id == getam_) return "getam_";
if(event_id == mapam_) return "mapam_";
if(event_id == runam_) return "runam_";
if(event_id == stopam_) return "stopam_";
if(event_id == getamv_) return "getamv_";
if(event_id == getamr_) return "getamr_";
if(event_id == crtamv_) return "crtamv_";
if(event_id == runam_) return "runam_";
if(event_id == align_) return "align_";
if(event_id == getps_) return "getps_";
if(event_id == saverv_) return "saverv_";
if(event_id == tstelm_) return "tstelm_";
if(event_id == rwelm_) return "rwelm_";
if(event_id == rlocel_) return "rlocel_";
if(event_id == delda_) return "delda_";
if(event_id == delobj_) return "delobj_";
if(event_id == copelm_) return "copelm_";
if(event_id == elmcpy_) return "elmcpy_";
if(event_id == wlocel_) return "wlocel_";
if(event_id == clocel_) return "clocel_";
if(event_id == getlen_) return "getlen_";
if(event_id == dvm_fopen) return "dvm_fopen";
if(event_id == dvm_fclose) return "dvm_fclose";
if(event_id == dvm_void_vfprintf) return "dvm_void_vfprintf";
if(event_id == dvm_vfprintf) return "dvm_vfprintf";
if(event_id == dvm_fwrite) return "dvm_fwrite";
if(event_id == dvm_fread) return "dvm_fread";
if(event_id == tron_) return "tron_";
if(event_id == delamv_) return "delamv_";
if(event_id == distr_) return "distr_";
if(event_id == crtred_) return "crtred_";
if(event_id == delred_) return "delred_";
if(event_id == begbl_) return "begbl_";
if(event_id == endbl_) return "endbl_";
if(event_id == crtpl_) return "crtpl_";
if(event_id == mappl_) return "mappl_";
if(event_id == endpl_) return "endpl_";
if(event_id == locind_) return "locind_";
if(event_id == tstda_) return "tstda_";
if(event_id == srmem_) return "srmem_";
if(event_id == tstio_) return "tstio_";
if(event_id == getrnk_) return "getrnk_";
if(event_id == getsiz_) return "getsiz_";
if(event_id == dvm_vscanf) return "dvm_vscanf";
if(event_id == realn_) return "realn_";
if(event_id == redis_) return "redis_";
if(event_id == arrmap_) return "arrmap_";
if(event_id == setpsw_) return "setpsw_";
if(event_id == setind_) return "setind_";
if(event_id == locsiz_) return "locsiz_";
if(event_id == imlast_) return "imlast_";
if(event_id == malign_) return "malign_";
if(event_id == crtrg_) return "crtrg_";
if(event_id == mrealn_) return "mrealn_";
if(event_id == strtrd_) return "strtrd_";
if(event_id == waitrd_) return "waitrd_";
if(event_id == amvmap_) return "amvmap_";
if(event_id == exfrst_) return "exfrst_";
if(event_id == across_) return "across_";
if(event_id == dopl_) return "dopl_";
if(event_id == mdistr_) return "mdistr_";
if(event_id == mredis_) return "mredis_";
if(event_id == delarm_) return "delarm_";
if(event_id == delmvm_) return "delmvm_";
if(event_id == dvm_Init) return "dvm_Init";
if(event_id == dvm_fscanf) return "dvm_fscanf";
if(event_id == dvm_scanf) return "dvm_scanf";
if(event_id == dvm_vfscanf) return "dvm_vfscanf";
if(event_id == dvm_clearerr) return "dvm_clearerr";
if(event_id == dvm_feof) return "dvm_feof";
if(event_id == dvm_ferror) return "dvm_ferror";
if(event_id == dvm_fflush) return "dvm_fflush";
if(event_id == dvm_fgetc) return "dvm_fgetc";
if(event_id == dvm_fgetpos) return "dvm_fgetpos";
if(event_id == dvm_fgets) return "dvm_fgets";
if(event_id == dvm_fputc) return "dvm_fputc";
if(event_id == dvm_fputs) return "dvm_fputs";
if(event_id == dvm_freopen) return "dvm_freopen";
if(event_id == dvm_fseek) return "dvm_fseek";
if(event_id == dvm_fsetpos) return "dvm_fsetpos";
if(event_id == dvm_ftell) return "dvm_ftell";
if(event_id == dvm_getc) return "dvm_getc";
if(event_id == dvm_getchar) return "dvm_getchar";
if(event_id == dvm_gets) return "dvm_gets";
if(event_id == dvm_putc) return "dvm_putc";
if(event_id == dvm_putchar) return "dvm_putchar";
if(event_id == dvm_puts) return "dvm_puts";
if(event_id == dvm_rewind) return "dvm_rewind";
if(event_id == dvm_setbuf) return "dvm_setbuf";
if(event_id == dvm_setvbuf) return "dvm_setvbuf";
if(event_id == dvm_tmpfile) return "dvm_tmpfile";
if(event_id == dvm_ungetc) return "dvm_ungetc";
if(event_id == dvm_void_fprintf) return "dvm_void_fprintf";
if(event_id == dvm_fprintf) return "dvm_fprintf";
if(event_id == dvm_void_printf) return "dvm_void_printf";
if(event_id == dvm_printf) return "dvm_printf";
if(event_id == dvm_void_vprintf) return "dvm_void_vprintf";
if(event_id == dvm_vprintf) return "dvm_vprintf";
if(event_id == dvm_remove) return "dvm_remove";
if(event_id == dvm_rename) return "dvm_rename";
if(event_id == dvm_tmpnam) return "dvm_tmpnam";
if(event_id == dvm_close) return "dvm_close";
if(event_id == dvm_fstat) return "dvm_fstat";
if(event_id == dvm_lseek) return "dvm_lseek";
if(event_id == dvm_open) return "dvm_open";
if(event_id == dvm_read) return "dvm_read";
if(event_id == dvm_write) return "dvm_write";
if(event_id == dvm_access) return "dvm_access";
if(event_id == dvm_stat) return "dvm_stat";
if(event_id == mps_Bcast) return "mps_Bcast";
if(event_id == mps_Barrier) return "mps_Barrier";
if(event_id == dvm_dfread) return "dvm_dfread";
if(event_id == dvm_dfwrite) return "dvm_dfwrite";
if(event_id == crtshg_) return "crtshg_";
if(event_id == inssh_) return "inssh_";
if(event_id == insshd_) return "insshd_";
if(event_id == incsh_) return "incsh_";
if(event_id == incshd_) return "incshd_";
if(event_id == strtsh_) return "strtsh_";
if(event_id == waitsh_) return "waitsh_";
if(event_id == recvsh_) return "recvsh_";
if(event_id == sendsh_) return "sendsh_";
if(event_id == delshg_) return "delshg_";
if(event_id == getind_) return "getind_";
if(event_id == addhdr_) return "addhdr_";
if(event_id == delhdr_) return "delhdr_";
if(event_id == troff_) return "troff_";
if(event_id == biof_) return "biof_";
if(event_id == eiof_) return "eiof_";
if(event_id == crtps_) return "crtps_";
if(event_id == psview_) return "psview_";
if(event_id == delps_) return "delps_";
if(event_id == setelw_) return "setelw_";
if(event_id == dprstv_) return "dprstv_";
if(event_id == dstv_) return "dstv_";
if(event_id == dldv_) return "dldv_";
if(event_id == dbegpl_) return "dbegpl_";
if(event_id == dbegsl_) return "dbegsl_";
if(event_id == dendl_) return "dendl_";
if(event_id == diter_) return "diter_";
if(event_id == drmbuf_) return "drmbuf_";
if(event_id == dskpbl_) return "dskpbl_";
if(event_id == binter_) return "binter_";
if(event_id == einter_) return "einter_";
if(event_id == bsloop_) return "bsloop_";
if(event_id == bploop_) return "bploop_";
if(event_id == eloop_) return "eloop_";
if(event_id == Event_dvm_exit) return "dvm_exit";
if(event_id == crtrbl_) return "crtrbl_";
if(event_id == crtrbp_) return "crtrbp_";
if(event_id == loadrb_) return "loadrb_";
if(event_id == waitrb_) return "waitrb_";
if(event_id == crtbg_) return "crtbg_";
if(event_id == insrb_) return "insrb_";
if(event_id == loadbg_) return "loadbg_";
if(event_id == waitbg_) return "waitbg_";
return "Unknown_Func";
}
ostream& operator << (ostream& os, const Event& e)
{
os << ' ' << IDToEventName(e) << ' ';
return os;
}
#endif
FuncType GetFuncType(Event func_id)
{
switch (func_id) {
case Root_func:
return __RootFunc;
case crtda_ :
case align_ :
case delda_ :
case realn_ :
case arrcpy_ :
case aarrcp_ :
case waitcp_ :
return __DArrayFunc;
case binter_ :
case bsloop_ :
case bploop_ :
case einter_ :
case eloop_ :
return __IntervalFunc;
case crtshg_ :
case inssh_ :
case insshd_ :
case incsh_ :
case incshd_ :
case delshg_ :
case strtsh_ :
case waitsh_ :
case sendsh_ :
case recvsh_ :
case imlast_ :
case exfrst_ :
case across_ :
return __ShadowFunc;
case crtrg_ :
case crtred_ :
case insred_ :
case delred_ :
case delrg_ :
case strtrd_ :
case waitrd_ :
return __ReductFunc;
case crtpl_ :
case mappl_ :
case dopl_ :
case endpl_ :
return __ParLoopFunc;
case crtps_ :
case delps_ :
case setelw_ :
case psview_ :
case getps_ :
case getam_ :
case mapam_ :
case runam_ :
case stopam_ :
case getamr_ :
case crtamv_ :
case delamv_ :
case blkdiv_ :
case distr_ :
case redis_ :
return __MPS_AMFunc;
case dvm_rewind :
case dvm_tmpfile :
case dvm_ungetc :
case dvm_setbuf :
case dvm_setvbuf :
case dvm_remove :
case dvm_rename :
case dvm_tmpnam :
case dvm_close :
case dvm_fstat :
case dvm_lseek :
case dvm_access :
case dvm_stat :
case dvm_clearerr :
case dvm_ferror :
case dvm_fgetpos :
case dvm_ftell :
case dvm_getc :
case dvm_open :
case dvm_read :
case dvm_write :
case dvm_fflush :
case dvm_fgetc :
case dvm_feof :
case dvm_getchar :
case dvm_gets :
case dvm_putc :
case dvm_putchar :
case dvm_puts :
case dvm_vscanf :
case dvm_fopen :
case dvm_fclose :
case dvm_void_vfprintf :
case dvm_vfprintf :
case dvm_fwrite :
case dvm_fread :
case dvm_fgets :
case dvm_fputc :
case dvm_fputs :
case dvm_freopen :
case dvm_fseek :
case dvm_fsetpos :
case dvm_fscanf :
case dvm_scanf :
case dvm_void_fprintf :
case dvm_fprintf :
case dvm_void_printf :
case dvm_printf :
case dvm_void_vprintf :
case dvm_vprintf :
case dvm_vfscanf :
case biof_ :
case eiof_ :
case srmem_ :
return __IOFunc;
case tstio_ :
case tstda_ :
case locsiz_ :
case getlen_ :
case delobj_ :
case tron_ :
case troff_ :
case clocel_ :
case locind_ :
case rlocel_ :
case wlocel_ :
case tstelm_ :
case getrnk_ :
case getsiz_ :
case delmvm_ :
case mdistr_ :
case mredis_ :
case amvmap_ :
case setpsw_ :
case malign_ :
case mrealn_ :
case delarm_ :
case arrmap_ :
case rwelm_ :
case copelm_ :
case elmcpy_ :
case setind_ :
case dvm_dfread :
case dvm_dfwrite :
case getind_ :
case addhdr_ :
case delhdr_ :
case dprstv_ :
case dstv_ :
case dldv_ :
case dbegpl_ :
case dbegsl_ :
case dendl_ :
case diter_ :
case drmbuf_ :
case dskpbl_ :
case begbl_ :
case endbl_ :
case getamv_ :
return __RegularFunc;
case crtrbl_ :
case crtrbp_ :
case loadrb_ :
case waitrb_ :
case crtbg_ :
case insrb_ :
case loadbg_ :
case waitbg_ :
return __RemAccessFunc;
default :
return __UnknownFunc;
}
}
#ifdef P_DEBUG
static std::string GetFuncTypeName(const FuncType ft)
{
switch(ft) {
case __RootFunc: return "__RootFunc";
case __DArrayFunc: return "__DArrayFunc";
case __IntervalFunc: return "__IntervalFunc";
case __IOFunc: return "__IOFunc";
case __MPS_AMFunc: return "__MPS_AMFunc";
case __ParLoopFunc: return "__ParLoopFunc";
case __ReductFunc: return "__ReductFunc";
case __RegularFunc: return "__RegularFunc";
case __ShadowFunc: return "__ShadowFunc";
case __RemAccessFunc:return "__RemAccessFunc";
case __UnknownFunc: return "__UnknownFunc";
default: return "__UnknownFunc";
}
}
std::ostream& operator << (std::ostream& os, const FuncType& ft)
{
os << ' ' << GetFuncTypeName(ft) << ' ';
return os;
}
#endif

View File

@@ -0,0 +1,220 @@
#ifndef _DVM_EventH
#define _DVM_EventH
#include <iostream>
#include <string>
enum Event {
Unknown_Func = -1,
Root_func = 0,
dvm_Init,
Event_dvm_exit,
//====
blkdiv_,
//=***
delrg_,
insred_,
arrcpy_,
aarrcp_,
waitcp_,
crtda_,
getam_,
crtamv_,
align_ ,
getps_,
saverv_,
tstelm_,
rwelm_,
rlocel_,
delda_,
delobj_,
copelm_,
elmcpy_,
wlocel_,
clocel_,
getlen_,
tron_,
delamv_,
distr_,
crtred_,
delred_,
begbl_,
endbl_,
crtpl_,
mappl_,
endpl_,
locind_,
tstda_,
srmem_,
tstio_,
getrnk_,
getsiz_,
realn_,
redis_,
arrmap_,
setpsw_,
setind_,
locsiz_,
imlast_,
malign_,
crtrg_,
mrealn_,
strtrd_,
waitrd_,
amvmap_,
exfrst_,
dopl_,
mdistr_,
mredis_,
delarm_,
delmvm_,
crtshg_,
inssh_,
insshd_,
incsh_,
incshd_,
strtsh_,
waitsh_,
delshg_,
recvsh_,
sendsh_,
across_,
getind_,
addhdr_,
delhdr_,
troff_,
biof_,
eiof_,
crtps_,
psview_,
delps_,
setelw_,
getamv_,
mapam_,
runam_,
stopam_,
getamr_,
dprstv_,
dstv_,
dldv_,
dbegpl_,
dbegsl_,
dendl_,
diter_,
drmbuf_,
dskpbl_,
binter_,
einter_,
bsloop_,
bploop_,
eloop_,
crtrbl_,
crtrbp_,
loadrb_,
waitrb_,
crtbg_,
insrb_,
loadbg_,
waitbg_,
dvm_fopen,
dvm_fclose,
dvm_void_vfprintf,
dvm_vfprintf,
dvm_fwrite,
dvm_fread,
dvm_vscanf,
dvm_fscanf,
dvm_scanf,
dvm_vfscanf,
dvm_clearerr,
dvm_feof,
dvm_ferror,
dvm_fflush,
dvm_fgetc,
dvm_fgetpos,
dvm_fgets,
dvm_fputc,
dvm_fputs,
dvm_freopen,
dvm_fseek,
dvm_fsetpos,
dvm_ftell,
dvm_getc,
dvm_getchar,
dvm_gets,
dvm_putc,
dvm_putchar,
dvm_puts,
dvm_rewind,
dvm_setbuf,
dvm_setvbuf,
dvm_tmpfile,
dvm_ungetc,
dvm_void_fprintf,
dvm_fprintf,
dvm_void_printf,
dvm_printf,
dvm_void_vprintf,
dvm_vprintf,
dvm_remove,
dvm_rename,
dvm_tmpnam,
dvm_close,
dvm_fstat,
dvm_lseek,
dvm_open,
dvm_read,
dvm_write,
dvm_access,
dvm_stat,
mps_Bcast,
mps_Barrier,
dvm_dfread,
dvm_dfwrite
};
Event EventNameToID(const std::string& event_name);
#ifdef P_DEBUG
std::ostream& operator << (std::ostream& os, const Event& e);
#endif
enum FuncType {
__RootFunc = 0,
__DArrayFunc,
__IntervalFunc,
__IOFunc,
// __MessageFunc,
__MPS_AMFunc,
__ParLoopFunc,
__ReductFunc,
__RegularFunc,
__ShadowFunc,
__RemAccessFunc,
__UnknownFunc
};
FuncType GetFuncType(Event func_id);
#ifdef P_DEBUG
std::ostream& operator << (std::ostream& os, const FuncType& ft);
#endif
enum LineType {
Unknown_ =-1,
Call_ = 0,
Ret_ = 1,
Event_ = 2,
Root_ = 3
};
#endif

View File

@@ -0,0 +1,276 @@
#include <string.h>
#include <assert.h>
#include <fstream>
#include "FuncCall.h"
#include "CallInfoStructs.h"
#include "Vm.h"
using namespace std;
extern ofstream prot;
double grig_time_call=0.0;
// =================================FuncCall =======================================
//temp
#include "ModelStructs.h"
extern _DArrayInfo * GetDArrayByIndex(long ID);
//------------------------------ CONSTRUCTOR --------------------------------------
FuncCall::FuncCall():
call_time(0.0),
ret_time(0.0),
call_params(NULL),
source_file(NULL)
{
vcall_time.resize(0);
vret_time.resize(0);
//temp
// printf("temp debug\n");
// GetDArrayByIndex(0);
}
/*
FuncCall::FuncCall(VectorTraceLine *traceLines) :
ret_time(0.0),
call_params(NULL)
{
int call_info_count = 0;
char** call_info = NULL; // pointer to string vector with input function params
int ret_info_count = 0;
char** ret_info = NULL; // pointer to string vector with output function params
TraceLine * tl = traceLines->current();
// 'call_xxxxxx'
assert(traceLines->current()->line_type == Call_);
func_id = tl->func_id;
call_time = tl->func_time;//commented grig /rootVM->getProcPower();
grig_time_call+=tl->func_time;
//grig add-on
vcall_time.resize(currentVM->getProcCount());
int k;
for(k=0;k<currentVM->getProcCount();k++)
{
// printf("VRET[%d of %d]= %f / %f\n",k,currentVM->getProcCount(),tl->func_time,currentVM->getProcPower(k));
vcall_time[k]=tl->func_time/currentVM->getProcPower(k);
}
//\grig add-on
source_line = tl->source_line;
source_file = strdup(tl->source_file);
// get effective parameters
traceLines->next();
traceLines->GetUnknownLines(call_info_count, call_info);
//'ret_xxxxxx
assert(traceLines->current()->line_type == Ret_);
//grig
double rettimetemp;
rettimetemp=traceLines->current()->func_time;
//\grig
ret_time = rettimetemp; //commented by grig / rootVM->getProcPower();
//grig add-on
//int k;
vret_time.resize(currentVM->getProcCount());
for(k=0;k<currentVM->getProcCount();k++)
{
// fff=rettimetemp;
vret_time[k]=rettimetemp/ currentVM->getProcPower(k);
}
//\grig add-on
traceLines->next();
traceLines->GetUnknownLines(ret_info_count, ret_info);
// Only for parameters passing
TraceCall trc_call(func_id, source_line, source_file,
call_info_count, call_info, ret_info_count, ret_info);
// create FuncCall::params
GetCallParams(trc_call, call_params);
// free memory
int i;
for (i = 0; i < call_info_count; i++)
delete call_info[i];
delete call_info;
for (i = 0; i < ret_info_count; i++)
delete ret_info[i];
delete ret_info;
}
*/
// ------------------------------ DESTRUCTOR -------------------------------------
FuncCall::~FuncCall()
{
delete source_file;
vcall_time.resize(0);
vret_time.resize(0);
switch (func_id) {
case binter_ :
case bsloop_ :
case bploop_ :
delete (binter_Info*) call_params;
break;
case crtamv_:
delete (crtamv_Info*) call_params;
break;
case blkdiv_ :
delete (blkdiv_Info*) call_params;
break;
case distr_ :
delete (distr_Info*) call_params;
break;
case redis_ :
delete (redis_Info*) call_params;
break;
case crtda_ :
delete (crtda_Info*) call_params;
break;
case align_ :
delete (align_Info*) call_params;
break;
case realn_ :
delete (realn_Info*) call_params;
break;
case arrcpy_ :
delete (arrcpy_Info*) call_params;
break;
case aarrcp_ :
delete (arrcpy_Info*) call_params;
break;
case mappl_ :
delete (mappl_Info*) call_params;
break;
case inssh_ :
delete (inssh_Info*) call_params;
break;
case insshd_ :
delete (inssh_Info*) call_params;
break;
case incsh_ :
delete (inssh_Info*) call_params;
break;
case incshd_ :
delete (inssh_Info*) call_params;
break;
case exfrst_ :
delete (exfrst_Info*) call_params;
break;
case imlast_ :
delete (imlast_Info*) call_params;
break;
case einter_ :
delete (einter_Info*) call_params;
break;
case getam_ :
delete (getam_Info*) call_params;
break;
case crtps_ :
delete (crtps_Info*) call_params;
break;
case getps_ :
delete (getps_Info*) call_params;
break;
case psview_ :
delete (psview_Info*) call_params;
break;
case delps_ :
delete (delps_Info*) call_params;
break;
case setelw_ :
delete (setelw_Info*) call_params;
break;
case getamr_ :
delete (getamr_Info*) call_params;
break;
case getamv_ :
delete (getamv_Info*) call_params;
break;
case mapam_ :
delete (mapam_Info*) call_params;
break;
case runam_ :
delete (runam_Info*) call_params;
break;
case delamv_ :
delete (delamv_Info*) call_params;
break;
case delda_ :
delete (delda_Info*) call_params;
break;
case crtpl_ :
delete (crtpl_Info*) call_params;
break;
case dopl_ :
delete (dopl_Info*) call_params;
break;
case endpl_ :
delete (endpl_Info*) call_params;
break;
case crtrg_ :
delete (crtrg_Info*) call_params;
break;
case crtred_ :
delete (crtred_Info*) call_params;
break;
case insred_ :
delete (insred_Info*) call_params;
break;
case delrg_ :
delete (delrg_Info*) call_params;
break;
case delred_ :
delete (delred_Info*) call_params;
break;
case strtrd_ :
delete (strtrd_Info*) call_params;
break;
case waitrd_ :
delete (waitrd_Info*) call_params;
break;
case crtshg_ :
delete (crtshg_Info*) call_params;
break;
case delshg_ :
delete (delshg_Info*) call_params;
break;
case strtsh_ :
delete (strtsh_Info*) call_params;
break;
case waitsh_ :
delete (waitsh_Info*) call_params;
break;
case sendsh_ :
delete (sendsh_Info*) call_params;
break;
case recvsh_ :
delete (recvsh_Info*) call_params;
break;
case crtrbl_ :
delete (crtrbl_Info*) call_params;
break;
case crtrbp_ :
delete (crtrbp_Info*) call_params;
break;
default:
delete call_params;
break;
}
}

View File

@@ -0,0 +1,100 @@
#ifndef _FuncCall_H
#define _FuncCall_H
#include "TraceLine.h"
#include <vector>
using namespace std;
// Structure for the final stage of file parsing -- call graph with parameters
class FuncCall {
public:
Event func_id; // function identifier
double call_time; // call time
double ret_time; // return time
void * call_params; // pointer to structure with function params
int source_line;
char * source_file;
//grig
vector<double> vcall_time;
vector<double> vret_time;
//\grig
public:
FuncCall();
// FuncCall(VectorTraceLine *traceLines);
~FuncCall();
void RegularTime();
void UnknownTime();
void IntervalTime();
void DArrayTime();
void crtda();
void align();
void delda();
void realn();
void arrcpy();
void aarrcp();
void waitcp();
void MPS_AMTime();
void crtps();
void psview();
void getps();
void setelw();
void delps();
void getam();
void getamr();
void crtamv();
void delamv();
void mapam();
void runam();
void stopam();
void blkdiv(); //====//
void distr();
void RedisTime();
void ParLoopTime();
void crtpl();
void endpl();
void mappl();
void dopl();
void ReductTime();
void crtrg();
void crtred();
void insred();
void delred();
void delrg();
void strtrd();
void waitrd();
void across();
void ShadowTime();
void crtshg();
void inssh();
void insshd();
void incsh();
void incshd();
void delshg();
void strtsh();
void waitsh();
void exfrst();
void imlast();
void sendsh();
void recvsh();
void IOTime();
void ciotime();
void biof();
void tstio();
void srmem();
void eiof();
void RemAccessTime();
// void crtbl();
void crtrbl();
void crtrbp();
void loadrb();
void waitrb();
};
#endif

View File

@@ -0,0 +1,943 @@
#include <stdlib.h>
#include <malloc.h>
#include <string.h>
#include <assert.h>
#include <stdlib.h>
#include <fstream>
#include <float.h>
#include <vector>
#include "Interval.h"
#include "Vm.h"
#include "Ver.h"
#ifdef __GNUC__
#define _strdup strdup
#include "ParseString.h"
#endif
using namespace std;
extern ofstream prot;
extern int rootProcCount; // number of processors in root VM
extern double * procElapsedTime; // processor's elapsed times vector
extern VM * currentVM; // pointer to current VM
extern char * interval[]; // interval's template
Interval * CurrInterval = NULL; // pointer to current interval
int Interval::Intervallevel = 0; // current interval's level
int Interval::IntervalID = 0; // last interval's ID
Interval::Interval(int iline, char * ifile, IntervalType itype, long iindex, Interval * par_int) :
type(itype),
index(iindex),
level(Intervallevel),
EXE_count(1),
source_line(iline),
source_file(_strdup(ifile)),
io_trafic(false),
ID(++IntervalID),
num_op_io(0),
num_op_reduct(0),
num_op_shadow(0),
num_op_remote(0),
num_op_redist(0),
Total_time(0.0),
Efficiency(0.0),
Productive_time(0.0),
Productive_CPU_time(0.0),
Productive_SYS_time(0.0),
parent_interval(par_int),
count(0),
html_title(NULL),
nested_intervals(NULL)
{
int i;
// create prosessor's vector
Procs = new Processor*[rootProcCount];
for (i = 0; i < rootProcCount; i++)
Procs[i] = new Processor();
// link with parent
if (parent_interval != NULL) {
parent_interval->count++;
parent_interval->nested_intervals =
(Interval**) realloc(parent_interval->nested_intervals,
parent_interval->count * sizeof(Interval*));
parent_interval->nested_intervals[parent_interval->count -1] = this;
}
}
Interval::Interval(int arg) :
type(__IT_MAIN),
index(NO_EXPR),
level(arg),
EXE_count(1),
source_line(0),
io_trafic(false),
ID(++IntervalID),
num_op_io(0),
num_op_reduct(0),
num_op_shadow(0),
num_op_remote(0),
num_op_redist(0),
Total_time(0.0),
Efficiency(0.0),
Productive_time(0.0),
Productive_CPU_time(0.0),
Productive_SYS_time(0.0),
parent_interval(NULL),
count(0),
html_title(NULL),
nested_intervals(NULL)
{
int i;
// source_file=(char*)malloc(sizeof(char)*strlen("no_file.fdv"));
// strcpy(source_file,"no_file.fdv");
source_file=_strdup("no_file.fdv");
// create prosessor's vector
Procs = new Processor*[rootProcCount];
for (i = 0; i < rootProcCount; i++)
Procs[i] = new Processor();
}
Interval::~Interval()
{
int i;
delete source_file;
for (i = 0; i < rootProcCount; i++)
delete Procs[i];
delete Procs;
for (i = 0; i < count; i++)
delete nested_intervals[i];
delete nested_intervals;
}
int Interval::copy_poss(Interval* from, double p1, double p2)
{ long i;
for(i=0; i<MPSProcCount(); i++)
{
Procs[i]->Lost_time = Procs[i]->Lost_time * p1 + from->Procs[i]->Lost_time * p2;
Procs[i]->Execution_time = Procs[i]->Execution_time * p1 + from->Procs[i]->Execution_time * p2;
Procs[i]->Insuff_parallelism_usr = Procs[i]->Insuff_parallelism_usr * p1 + from->Procs[i]->Insuff_parallelism_usr * p2;
Procs[i]->Insuff_parallelism_sys = Procs[i]->Insuff_parallelism_sys * p1 + from->Procs[i]->Insuff_parallelism_sys * p2;
Procs[i]->Communication = Procs[i]->Communication * p1 + from->Procs[i]->Communication * p2;
Procs[i]->Idle = Procs[i]->Idle * p1 + from->Procs[i]->Idle * p2;
Procs[i]->CPU_time = Procs[i]->CPU_time * p1 + from->Procs[i]->CPU_time * p2;
Procs[i]->CPU_time_usr = Procs[i]->CPU_time_usr * p1 + from->Procs[i]->CPU_time_usr * p2;
Procs[i]->CPU_time_sys = Procs[i]->CPU_time_sys * p1 + from->Procs[i]->CPU_time_sys * p2;
}
// Productive_time = Productive_time*p1 + from->Productive_time*p2;
// Productive_CPU_time = Productive_CPU_time*p1 + from->Productive_CPU_time*p2;
// Productive_SYS_time = Productive_SYS_time*p1 + from->Productive_SYS_time*p2;
return 0;
}
int Interval::copy(Interval* from)
{ copy_poss(from,0,1);
return 0;
}
double Interval::GetProcPred(int proc_no, PredType pred)
{
switch (pred)
{
case _Lost_time: return Procs[proc_no]->Lost_time;
case _Insuff_parallelism: return Procs[proc_no]->Insuff_parallelism;
case _Insuff_parallelism_sys: return Procs[proc_no]->Insuff_parallelism_sys;
case _Idle: return Procs[proc_no]->Idle;
case _Communication: return Procs[proc_no]->Communication;
case _Synchronization: return Procs[proc_no]->Synchronization;
case _Real_synchronization: return Procs[proc_no]->Real_synchronization;
case _Variation: return Procs[proc_no]->Variation;
case _Overlap: return Procs[proc_no]->Overlap;
case _Load_imbalance: return Procs[proc_no]->Load_imbalance;
case _Execution_time: return Procs[proc_no]->Execution_time;
case _CPU_time: return Procs[proc_no]->CPU_time;
case _CPU_time_usr: return Procs[proc_no]->CPU_time_usr;
case _CPU_time_sys: return Procs[proc_no]->CPU_time_sys;
case _IO_time: return Procs[proc_no]->IO_time;
case _IO_comm: return Procs[proc_no]->IO_comm;
case _IO_real_synch: return Procs[proc_no]->IO_real_synch;
case _IO_synch: return Procs[proc_no]->IO_synch;
case _IO_vary: return Procs[proc_no]->IO_vary;
case _IO_overlap: return Procs[proc_no]->IO_overlap;
case _Wait_reduction: return Procs[proc_no]->Wait_reduction;
case _Reduction_real_synch: return Procs[proc_no]->Reduction_real_synch;
}
return 0.0;
}
double Interval::GetPred(PredType pred)
{
double res;
res=0.0;
switch (pred)
{
case _Total_time: res=Total_time; break;
case _Efficiency: res=Efficiency; break;
case _Productive_time: res=Productive_time; break;
case _Productive_CPU_time: res=Productive_CPU_time; break;
case _Productive_SYS_time: res=Productive_SYS_time; break;
case _CPU_time: res=CPU_time; break;
case _CPU_time_usr: res=CPU_time_usr; break;
case _CPU_time_sys: res=CPU_time_sys; break;
}
if (res < 0)
{
printf("PREDICTOR: fatal error - zero time\n");
exit(1);
}
return res;
}
void Interval::AddTime(TimeType InfoType, int proc_no, double TimeDelta)
{ int i ;
if (io_trafic) {
// prot << "proc_no = " << proc_no << ", TimeDelta = " << TimeDelta
// << ", Execution_time = " << Procs[proc_no]->Execution_time << endl;
if ((proc_no == 0) && (InfoType != __IO_comm)) {
InfoType = __IO_time;
} else if (InfoType != __IO_comm){
return;
}
}
// printf("ADD TIME [%d] += %f\n",proc_no,TimeDelta);;
switch (InfoType) {
case __IO_time :
Procs[proc_no]->IO_time+=TimeDelta;
Procs[proc_no]->Execution_time+=TimeDelta;
procElapsedTime[proc_no] += TimeDelta;
break;
case __CPU_time_sys :
Procs[proc_no]->CPU_time_sys+=TimeDelta;
Procs[proc_no]->CPU_time+=TimeDelta;
Procs[proc_no]->Execution_time+=TimeDelta;
procElapsedTime[proc_no] += TimeDelta;
break;
case __CPU_time_usr :
//printf("Predictor: add CPU_usr[%d] timeDelta=%.10f\n",proc_no,TimeDelta);
Procs[proc_no]->CPU_time_usr+=TimeDelta;
Procs[proc_no]->CPU_time+=TimeDelta;
Procs[proc_no]->Execution_time+=TimeDelta;
procElapsedTime[proc_no] += TimeDelta;
break;
case __Wait_reduct :
Procs[proc_no]->Wait_reduction+=TimeDelta;
Procs[proc_no]->Communication+=TimeDelta;
Procs[proc_no]->Lost_time+=TimeDelta;
Procs[proc_no]->Execution_time+=TimeDelta;
procElapsedTime[proc_no] += TimeDelta;
break;
case __Wait_shadow :
Procs[proc_no]->Wait_shadow+=TimeDelta;
Procs[proc_no]->Communication+=TimeDelta;
Procs[proc_no]->Lost_time+=TimeDelta;
Procs[proc_no]->Execution_time+=TimeDelta;
procElapsedTime[proc_no] += TimeDelta;
// for (i = 0; i < MPSProcCount(); i++) printf("proc[%d].Comm.shad=%f\n",i,Procs[currentVM->map(i)]->Wait_shadow); printf("\n");
break;
case __Remote_access :
Procs[proc_no]->Remote_access+=TimeDelta;
Procs[proc_no]->Communication+=TimeDelta;
Procs[proc_no]->Lost_time+=TimeDelta;
Procs[proc_no]->Execution_time+=TimeDelta;
procElapsedTime[proc_no] += TimeDelta;
// for (i = 0; i < MPSProcCount(); i++) printf("proc[%d].Comm.Remote=%f\n",i,Procs[currentVM->map(i)]->Remote_access);
break;
case __Redistribute :
Procs[proc_no]->Redistribution+=TimeDelta;
Procs[proc_no]->Communication+=TimeDelta;
Procs[proc_no]->Lost_time+=TimeDelta;
Procs[proc_no]->Execution_time+=TimeDelta;
procElapsedTime[proc_no] += TimeDelta;
break;
case __IO_comm :
Procs[proc_no]->IO_comm+=TimeDelta;
Procs[proc_no]->Communication+=TimeDelta;
Procs[proc_no]->Lost_time+=TimeDelta;
Procs[proc_no]->Execution_time+=TimeDelta;
procElapsedTime[proc_no] += TimeDelta;
break;
case __Insuff_parall_sys :
Procs[proc_no]->Insuff_parallelism_sys+=TimeDelta;
Procs[proc_no]->Insuff_parallelism+=TimeDelta;
Procs[proc_no]->Lost_time+=TimeDelta;
break;
case __Insuff_parall_usr :
Procs[proc_no]->Insuff_parallelism_usr+=TimeDelta;
Procs[proc_no]->Insuff_parallelism+=TimeDelta;
Procs[proc_no]->Lost_time+=TimeDelta;
break;
case __Synchronize :
// printf("synch %f %f\n",procElapsedTime[proc_no],TimeDelta);
procElapsedTime[proc_no]=(procElapsedTime[proc_no]>TimeDelta)?procElapsedTime[proc_no]:TimeDelta;
Procs[proc_no]->Execution_time=procElapsedTime[proc_no];
break;
case __Variation :
// printf("vary %f %f\n",procElapsedTime[proc_no],TimeDelta);
// procElapsedTime[proc_no]=(procElapsedTime[proc_no]>TimeDelta)?procElapsedTime[proc_no]:TimeDelta;
Procs[proc_no]->Variation+=TimeDelta;
break;
case __Remote_overlap :
Procs[proc_no]->Remote_overlap+=TimeDelta;
Procs[proc_no]->Overlap+=TimeDelta;
break;
case __Reduct_overlap :
Procs[proc_no]->Reduction_overlap+=TimeDelta;
Procs[proc_no]->Overlap+=TimeDelta;
break;
case __Shadow_overlap :
Procs[proc_no]->Shadow_overlap+=TimeDelta;
Procs[proc_no]->Overlap+=TimeDelta;
break;
default:
prot << "Interval::AddTime - unknown time type " << InfoType << endl;
exit(EXIT_FAILURE);
}
// printf("Procs[%d]->Execution_time=%f\n",proc_no,Procs[proc_no]->Execution_time);
}
void Interval::AddTimeVariation(TimeType InfoType, int proc_no, double TimeDelta)
{
Procs[proc_no]->Variation+=TimeDelta;
switch (InfoType) {
case __Remote_access:
Procs[proc_no]->Remote_vary += TimeDelta;
break;
case __Redistribute:
Procs[proc_no]->Redistribution_vary += TimeDelta;
break;
case __Wait_reduct:
Procs[proc_no]->Reduction_vary += TimeDelta;
break;
case __Wait_shadow:
Procs[proc_no]->Shadow_vary += TimeDelta;
break;
case __IO_comm:
Procs[proc_no]->IO_vary += TimeDelta;
break;
}
}
void Interval::AddTimeSynchronize(TimeType InfoType, int proc_no, double TimeDelta)
{
Procs[proc_no]->Synchronization+=TimeDelta;
//====
if(InfoType!=__Synchronize && InfoType!=__Wait_reduct)
{
// Procs[proc_no]->Real_synchronization+=TimeDelta;
}
//=***
switch (InfoType) {
case __Remote_access:
Procs[proc_no]->Remote_synch += TimeDelta;
break;
case __Redistribute:
Procs[proc_no]->Redistribution_synch += TimeDelta;
break;
case __Wait_reduct:
Procs[proc_no]->Reduction_synch += TimeDelta;
break;
case __Wait_shadow:
Procs[proc_no]->Shadow_synch += TimeDelta;
break;
case __IO_comm:
Procs[proc_no]->IO_synch += TimeDelta;
break;
}
}
void Interval::AddMPSTime(TimeType InfoType, double TimeDelta)
{
int i;
// printf("Time += %.8f\n",TimeDelta);
for (i=0; i < MPSProcCount(); i++)
AddTime(InfoType, currentVM->map(i), TimeDelta);
}
//grig
void Interval::AddMPSTime(TimeType InfoType, std::vector<double> vTimeDelta)
{
int i;
double temp;
long int_proc;
for (i=0; i < MPSProcCount(); i++)
{
int_proc=currentVM->map(i);
temp=vTimeDelta[i];
AddTime(InfoType,currentVM->map(i),vTimeDelta[i]);
}
}
//\grig
void Interval::CalcIdleAndImbalance()
{
int i;
double max_ExTime=0,
max_CPUTimeSys=0,
max_CPUTimeUsr=0,
max_CPUTime=0;
// for (i = 0; i < MPSProcCount(); i++) printf("proc[%d].Comm.shad=%f\n",i,Procs[currentVM->map(i)]->Wait_shadow);
// for (i = 0; i < MPSProcCount(); i++) printf("proc[%d].Comm.Remote=%f\n",i,Procs[currentVM->map(i)]->Remote_access);
for (i = 0; i < MPSProcCount(); i++) {
/* if(rootProcCount>1)
{
printf("currentVM->map(i)==%d Procs.addr %d %d\n",currentVM->map(i), Procs[0], Procs[1]);
if((int)Procs[1]<=0)
{
exit(0);
}
}
*/
if (Procs[currentVM->map(i)]->Execution_time > max_ExTime)
max_ExTime = Procs[currentVM->map(i)]->Execution_time;
if (Procs[currentVM->map(i)]->CPU_time_sys > max_CPUTimeSys)
max_CPUTimeSys = Procs[currentVM->map(i)]->CPU_time_sys;
if (Procs[currentVM->map(i)]->CPU_time_usr > max_CPUTimeUsr)
max_CPUTimeUsr = Procs[currentVM->map(i)]->CPU_time_usr;
//grig
if (Procs[currentVM->map(i)]->CPU_time > max_CPUTime)
max_CPUTime = Procs[currentVM->map(i)]->CPU_time;
//\grig
}
for(i = 0; i < MPSProcCount(); i++) {
Procs[currentVM->map(i)]->Idle =
max_ExTime - Procs[currentVM->map(i)]->Execution_time;
//Procs[currentVM->map(i)]->Load_imbalance =
// max_CPUTimeSys - Procs[currentVM->map(i)]->CPU_time_sys;
//grig
if(max_CPUTime!=max_CPUTimeSys+max_CPUTimeUsr)
{
}
// printf("max_sys=%f max_usr=%f sys=%f usr=%f\n",max_CPUTimeSys,max_CPUTimeUsr,Procs[currentVM->map(i)]->CPU_time_sys,Procs[currentVM->map(i)]->CPU_time_usr);
Procs[currentVM->map(i)]->Load_imbalance =
max_CPUTimeSys + max_CPUTimeUsr - Procs[currentVM->map(i)]->CPU_time_sys- Procs[currentVM->map(i)]->CPU_time_usr;
//\grig
}
}
void Interval::Enter(IntervalType int_type, int line, char* file, long index)
{
int i;
Intervallevel++;
// Searching for interval
for (i=0; i < CurrInterval->count; i++)
if ((CurrInterval->nested_intervals[i]->type == int_type) &&
(CurrInterval->nested_intervals[i]->source_line == line) &&
(strcmp(CurrInterval->nested_intervals[i]->source_file, file) == 0) &&
(CurrInterval->nested_intervals[i]->index == index) )
break;
if (i >= CurrInterval->count) {
// Interval not found - create new interval and change current interval
CurrInterval = new Interval(line, file, int_type, index, CurrInterval);
} else {
CurrInterval = CurrInterval->nested_intervals[i];
CurrInterval->EXE_count++;
}
}
void Interval::Leave()
{
CurrInterval->CalcIdleAndImbalance();
CurrInterval = CurrInterval->parent_interval;
Intervallevel--;
}
//grig
double Interval::GetEffectiveParameter()
{
return this->Execution_time;
}
//\grig
double Interval::GetExecTime()
{
return this->Execution_time;
}
void Interval::Integrate()
{
int i;
double max_ExTime=0;
double CPU_time_usr_start=CPU_time_usr;
double Insuff_parallelism_usr_start=Insuff_parallelism_usr;
// integrate this interval
for(i = 0; i < rootProcCount; i++) {
IO_time += Procs[i]->IO_time;
CPU_time += Procs[i]->CPU_time;
CPU_time_sys += Procs[i]->CPU_time_sys;
CPU_time_usr += Procs[i]->CPU_time_usr;
Lost_time += Procs[i]->Lost_time;
Communication += Procs[i]->Communication;
IO_comm += Procs[i]->IO_comm;
IO_real_synch += Procs[i]->IO_real_synch;
IO_synch += Procs[i]->IO_synch;
IO_vary += Procs[i]->IO_vary;
IO_overlap += Procs[i]->IO_overlap;
Wait_reduction += Procs[i]->Wait_reduction;
// prot << "Wait_reduction=" << Wait_reduction << endl;
Reduction_real_synch += Procs[i]->Reduction_real_synch;
Reduction_synch += Procs[i]->Reduction_synch;
Reduction_vary += Procs[i]->Reduction_vary;
Reduction_overlap += Procs[i]->Reduction_overlap;
// prot << "Reduction_overlap=" << Reduction_overlap << endl;
Wait_shadow += Procs[i]->Wait_shadow;
// prot << "Wait_shadow=" << Wait_shadow << endl;
Shadow_real_synch += Procs[i]->Shadow_real_synch;
Shadow_synch += Procs[i]->Shadow_synch;
Shadow_vary += Procs[i]->Shadow_vary;
Shadow_overlap += Procs[i]->Shadow_overlap;
// prot << "Shadow_overlap=" << Shadow_overlap << endl;
Remote_access += Procs[i]->Remote_access;
Remote_real_synch += Procs[i]->Remote_real_synch;
Remote_synch += Procs[i]->Remote_synch;
Remote_vary += Procs[i]->Remote_vary;
Remote_overlap += Procs[i]->Remote_overlap;
Redistribution += Procs[i]->Redistribution;
Redistribution_real_synch+= Procs[i]->Redistribution_real_synch;
Redistribution_synch+= Procs[i]->Redistribution_synch;
Redistribution_vary+= Procs[i]->Redistribution_vary;
Redistribution_overlap+=Procs[i]->Redistribution_overlap;
Insuff_parallelism += Procs[i]->Insuff_parallelism;
Insuff_parallelism_sys+=Procs[i]->Insuff_parallelism_sys;
Insuff_parallelism_usr+=Procs[i]->Insuff_parallelism_usr;
Synchronization += Procs[i]->Synchronization;
Variation += Procs[i]->Variation;
Real_synchronization+= Procs[i]->Real_synchronization;
// Communication_SYNCH += Procs[i]->Communication_SYNCH;
Idle += Procs[i]->Idle;
Load_imbalance += Procs[i]->Load_imbalance;
Overlap += Procs[i]->Overlap;
if (max_ExTime < Procs[i]->Execution_time)
max_ExTime = Procs[i]->Execution_time;
}
Lost_time += Idle;
Execution_time = max_ExTime;
// Integrate nested intervals
for (i = 0; i < count; i++) {
nested_intervals[i]->Integrate();
num_op_io += nested_intervals[i]->num_op_io;
num_op_reduct += nested_intervals[i]->num_op_reduct;
num_op_shadow += nested_intervals[i]->num_op_shadow;
num_op_remote += nested_intervals[i]->num_op_remote;
num_op_redist += nested_intervals[i]->num_op_redist;
Execution_time += nested_intervals[i]->Execution_time;
IO_time += nested_intervals[i]->IO_time;
CPU_time += nested_intervals[i]->CPU_time;
CPU_time_sys += nested_intervals[i]->CPU_time_sys;
CPU_time_usr += nested_intervals[i]->CPU_time_usr;
Lost_time += nested_intervals[i]->Lost_time;
Communication += nested_intervals[i]->Communication;
IO_comm += nested_intervals[i]->IO_comm;
IO_real_synch += nested_intervals[i]->IO_real_synch;
IO_synch += nested_intervals[i]->IO_synch;
IO_vary += nested_intervals[i]->IO_vary;
IO_overlap += nested_intervals[i]->IO_overlap;
Wait_reduction += nested_intervals[i]->Wait_reduction;
Reduction_synch += nested_intervals[i]->Reduction_synch;
Reduction_overlap += nested_intervals[i]->Reduction_overlap;
Wait_shadow += nested_intervals[i]->Wait_shadow;
Shadow_synch += nested_intervals[i]->Shadow_synch;
Shadow_overlap += nested_intervals[i]->Shadow_overlap;
Remote_access += nested_intervals[i]->Remote_access;
Remote_synch += nested_intervals[i]->Remote_synch;
Remote_overlap += nested_intervals[i]->Remote_overlap;
Redistribution += nested_intervals[i]->Redistribution;
Redistribution_synch += nested_intervals[i]->Redistribution_synch;
Redistribution_overlap += nested_intervals[i]->Redistribution_overlap;
Insuff_parallelism += nested_intervals[i]->Insuff_parallelism;
Insuff_parallelism_sys += nested_intervals[i]->Insuff_parallelism_sys;
Insuff_parallelism_usr += nested_intervals[i]->Insuff_parallelism_usr;
Synchronization += nested_intervals[i]->Synchronization;
Real_synchronization += nested_intervals[i]->Real_synchronization;
// Communication_SYNCH += nested_intervals[i]->Communication_SYNCH;
Idle += nested_intervals[i]->Idle;
Load_imbalance += nested_intervals[i]->Load_imbalance;
Overlap += nested_intervals[i]->Overlap;
}
Total_time = Execution_time * rootProcCount;
Productive_CPU_time = CPU_time_usr - Insuff_parallelism_usr; //Insuff_parallelism_usr - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
if(Productive_CPU_time<0)
{
printf("PREDICTOR: fatal error in Integrate - zero time = %.8f(CPU_usr) - %.8f(Ins_par_usr)\n", CPU_time_usr, Insuff_parallelism_usr);
printf(" CPU_time_usr_start=%.8f\n",CPU_time_usr_start);
printf(" Procs[ ]->CPU_time_usr= ");
for(i = 0; i < rootProcCount; i++)
printf("%.8f ", Procs[i]->CPU_time_usr);
printf("\n");
printf(" Insuff_parallelism_usr_start=%.8f\n",Insuff_parallelism_usr_start);
printf(" Procs[ ]->Insuff_parallelism_usr= ");
for(i = 0; i < rootProcCount; i++)
printf("%.8f ", Procs[i]->Insuff_parallelism_usr);
printf("\n");
exit(1);
}
Productive_SYS_time = CPU_time_sys - Insuff_parallelism_sys;
if(Productive_SYS_time<0)
{
printf("PREDICTOR: fatal error in Integrate - zero time = %.8f(CPU_sys) - %.8f(Ins_par_sys)\n", CPU_time_sys, Insuff_parallelism_sys);
exit(1);
}
Productive_time = Productive_CPU_time + Productive_SYS_time + IO_time;
// cout << "Idle = " << Idle << endl;
// cout << "Lost_time = " << Lost_time << endl;
if ((Productive_time == 0.0) && (Total_time == 0.0))
Efficiency = 1.0;
else if (Total_time == 0.0)
Efficiency = DBL_MAX;
else
Efficiency = Productive_time / Total_time;
}
// Save interval as part of HTML file
void Interval::SaveInFile(ofstream& hfile, int up, int next, int pred)
{
bool outOn = true;
int i = 0,
j = 0,
k = 0;
char idv[64];
for (i=0; interval[i] != NULL; i++) {
if (interval[i][0] == '@') {
//====
if (strcmp(interval[i], "@title@") == 0)
{
if(html_title!=NULL) hfile << html_title <<" :: ";
hfile << VER_PRED << endl;
}
else
//=***
if (strcmp(interval[i],"@label@") == 0)
hfile << '"' << _itoa(ID, idv, 10) /*HTML_file*/ << '"' << endl;
else if (strcmp(interval[i], "@typec@") == 0) {
switch(type) {
case __IT_MAIN :
hfile << "Main" << endl;
break;
case __IT_PAR :
hfile << "Par" << endl;
break;
case __IT_SEQ :
hfile << "Seq" << endl;
break;
case __IT_USER :
hfile << "User" << endl;
break;
}
} else if (strcmp(interval[i], "@levc@") == 0) {
hfile << level << endl;
} else if (strcmp(interval[i], "@counc@") == 0) {
hfile << EXE_count << endl;
} else if (strcmp(interval[i], "@linec@") == 0) {
hfile << source_line << endl;
} else if (strcmp(interval[i], "@exprc@") == 0) {
if (index != NO_EXPR)
hfile << index << endl;
} else if (strcmp(interval[i], "@filec@") == 0) {
if (source_file != NULL)
hfile << source_file << endl;
} else if (strcmp(interval[i], "@proc@") == 0) {
vector<long>::const_iterator j;
/* if (currentVM->getMType() == 0)
hfile << "ethernet ";
else
hfile << "transp ";
*/
switch (currentVM->getMType()) {
case mach_ETHERNET :
hfile << "ethernet ";
break;
case mach_TRANSPUTER:
hfile << "transp ";
break;
case mach_MYRINET:
hfile << "myrinet ";
break;
}
for (j = rootVM->getSizeArray().begin(); j < rootVM->getSizeArray().end(); j++) {
if (j != rootVM->getSizeArray().begin())
hfile << 'x';
hfile << *j;
}
} else if (strcmp(interval[i], "@effic@") == 0) {
hfile << Efficiency << endl;
} else if (strcmp(interval[i], "@exec@") == 0) {
hfile << Execution_time << endl;
} else if (strcmp(interval[i], "@total@") == 0) {
hfile << Total_time << endl;
} else if (strcmp(interval[i], "@ptime@") == 0) {
hfile << Productive_time << endl;
} else if (strcmp(interval[i], "@ptimec@") == 0) {
hfile << Productive_CPU_time << endl;
} else if (strcmp(interval[i], "@ptimes@") == 0) {
hfile << Productive_SYS_time << endl;
} else if (strcmp(interval[i], "@ptimei@") == 0) {
hfile << IO_time << endl;
} else if (strcmp(interval[i], "@lost@") == 0) {
hfile << Lost_time << endl;
} else if (strcmp(interval[i], "@insuf@") == 0) {
hfile << Insuff_parallelism << endl;
} else if (strcmp(interval[i], "@iuser@") == 0) {
hfile << Insuff_parallelism_usr << endl;
} else if (strcmp(interval[i], "@isyst@") == 0) {
hfile << Insuff_parallelism_sys << endl;
} else if (strcmp(interval[i], "@comm@") == 0) {
hfile << Communication << endl;
} else if (strcmp(interval[i], "@csyn@") == 0) {
hfile << Real_synchronization << endl;
} else if (strcmp(interval[i], "@idle@") == 0) {
hfile << Idle << endl;
} else if (strcmp(interval[i], "@imbal@") == 0) {
hfile << Load_imbalance << endl;
} else if (strcmp(interval[i], "@synch@") == 0) {
hfile << Synchronization << endl;
} else if (strcmp(interval[i], "@vary@") == 0) {
hfile << Variation << endl;
} else if (strcmp(interval[i], "@over@") == 0) {
hfile << Overlap << endl;
} else if (strcmp(interval[i], "@nopi@") == 0) {
hfile << num_op_io << endl;
} else if (strcmp(interval[i], "@comi@") == 0) {
if (outOn) hfile << IO_comm << endl;
} else if (strcmp(interval[i], "@rsynchi@") == 0) {
if (outOn) hfile << IO_real_synch << endl;
} else if (strcmp(interval[i], "@synchi@") == 0) {
if (outOn) hfile << IO_synch << endl;
} else if (strcmp(interval[i], "@varyi@") == 0) {
if (outOn) hfile << IO_vary << endl;
} else if (strcmp(interval[i], "@overi@") == 0) {
if (outOn) hfile << IO_overlap << endl;
} else if (strcmp(interval[i], "@nopr@") == 0) {
hfile << num_op_reduct << endl;
} else if (strcmp(interval[i], "@comr@") == 0) {
if (outOn) hfile << Wait_reduction << endl;
} else if (strcmp(interval[i], "@rsynchr@") == 0) {
if (outOn) hfile << Reduction_real_synch << endl;
} else if (strcmp(interval[i], "@synchr@") == 0) {
if (outOn) hfile << Reduction_synch << endl;
} else if (strcmp(interval[i], "@varyr@") == 0) {
if (outOn) hfile << Reduction_vary << endl;
} else if (strcmp(interval[i], "@overr@") == 0) {
if (outOn) hfile << Reduction_overlap << endl;
} else if (strcmp(interval[i], "@nops@") == 0) {
hfile << num_op_shadow << endl;
} else if (strcmp(interval[i], "@coms@") == 0) {
if (outOn) hfile << Wait_shadow << endl;
} else if (strcmp(interval[i], "@rsynchs@") == 0) {
if (outOn) hfile << Shadow_real_synch << endl;
} else if (strcmp(interval[i], "@synchs@") == 0) {
if (outOn) hfile << Shadow_synch << endl;
} else if (strcmp(interval[i], "@varys@") == 0) {
if (outOn) hfile << Shadow_vary << endl;
} else if (strcmp(interval[i], "@overs@") == 0) {
if (outOn) hfile << Shadow_overlap << endl;
} else if (strcmp(interval[i], "@nopa@") == 0) {
hfile << num_op_remote << endl;
} else if (strcmp(interval[i], "@coma@") == 0) {
if (outOn) hfile << Remote_access << endl;
} else if (strcmp(interval[i], "@rsyncha@") == 0) {
if (outOn) hfile << Remote_real_synch << endl;
} else if (strcmp(interval[i], "@syncha@") == 0) {
if (outOn) hfile << Remote_synch << endl;
} else if (strcmp(interval[i], "@varya@") == 0) {
if (outOn) hfile << Remote_vary << endl;
} else if (strcmp(interval[i], "@overa@") == 0) {
if (outOn) hfile << Remote_overlap << endl;
} else if (strcmp(interval[i], "@nopd@") == 0) {
hfile << num_op_redist << endl;
} else if (strcmp(interval[i], "@comd@") == 0) {
if (outOn) hfile << Redistribution << endl;
} else if (strcmp(interval[i], "@rsynchd@") == 0) {
if (outOn) hfile << Redistribution_real_synch << endl;
} else if (strcmp(interval[i], "@synchd@") == 0) {
if (outOn) hfile << Redistribution_synch << endl;
} else if (strcmp(interval[i], "@varyd@") == 0) {
if (outOn) hfile << Redistribution_vary << endl;
} else if (strcmp(interval[i], "@overd@") == 0) {
if (outOn) hfile << Redistribution_overlap << endl;
} else if (strcmp(interval[i], "@nestbeg@") == 0) {
if (count == 0)
outOn = false;
} else if (strcmp(interval[i], "@nesteds@") == 0) {
if (outOn)
j = i;
} else if (strcmp(interval[i], "@url@") == 0) {
if (outOn) {
hfile << "\"#" << _itoa(nested_intervals[k]->ID, idv, 10)
<< '"' << endl;
}
} else if (strcmp(interval[i], "@go01@") == 0) {
if (outOn)
hfile << k+1 << endl;
} else if (strcmp(interval[i], "@type@") == 0) {
if (outOn) {
switch(nested_intervals[k]->type) {
case __IT_MAIN :
hfile << "Main" << endl;
break;
case __IT_PAR :
hfile << "Par" << endl;
break;
case __IT_SEQ :
hfile << "Seq" << endl;
break;
case __IT_USER :
hfile << "User" << endl;
break;
}
}
} else if (strcmp(interval[i], "@lev@") == 0) {
if (outOn) {
hfile << nested_intervals[k]->level << endl;
}
} else if (strcmp(interval[i], "@coun@") == 0) {
if (outOn) {
hfile << nested_intervals[k]->EXE_count << endl;
}
} else if (strcmp(interval[i], "@line@") == 0) {
if (outOn) {
hfile << nested_intervals[k]->source_line << endl;
}
} else if (strcmp(interval[i], "@expr@") == 0) {
if ((outOn) && (nested_intervals[k]->index != NO_EXPR))
hfile << nested_intervals[k]->index << endl;
} else if (strcmp(interval[i], "@file@") == 0) {
if (outOn) {
hfile << nested_intervals[k]->source_file << endl;
}
} else if (strcmp(interval[i], "@nestedf@") == 0) {
if (outOn) {
k++;
if (k < count) {
i = j;
continue;
}
}
} else if (strcmp(interval[i], "@nestend@") == 0) {
outOn = true;
} else if (strcmp(interval[i], "@up@") == 0) {
hfile << "\"#" << _itoa(up, idv, 10) << '"' << endl;
} else if (strcmp(interval[i], "@pred@") == 0) {
hfile << "\"#" << _itoa(pred, idv, 10) << '"' << endl;
} else if (strcmp(interval[i], "@next@") == 0) {
hfile << "\"#" << _itoa(next, idv, 10) << '"' << endl;
} else if (strcmp(interval[i], "@home@") == 0) {
hfile << "\"#" << _itoa(1, idv, 10) << '"' << endl;
}
} else if (outOn) {
hfile << interval[i] << endl;
// prot << interval[i] << endl;
}
}
return;
}
// Save all interval's tree as HTML file (recursive)
void Interval::SaveTree(ofstream& hfile, int up, int next, int pred)
{
int i;
SaveInFile(hfile, up, next, pred);
for(i = 0; i < count; i++)
{
int pred = (i == 0) ? ID : nested_intervals[i-1]->ID;
int next = (i == count - 1) ? ID : nested_intervals[i+1]->ID;
nested_intervals[i]->SaveTree(hfile, ID, next, pred);
}
}

View File

@@ -0,0 +1,171 @@
#ifndef _INTERVAL_H
#define _INTERVAL_H
#include <limits.h>
#include <fstream>
#include "FuncCall.h"
#include "Processor.h"
enum IntervalType {
__IT_MAIN = 0,
__IT_SEQ,
__IT_PAR,
__IT_USER
};
#define NO_EXPR 2000000000
enum PredType{
_Lost_time,
_Insuff_parallelism,
_Insuff_parallelism_usr, // User insufficient parallelism
_Insuff_parallelism_sys, // System Insufficient parallelism
_Idle, // Idle time
_Communication, // Communications
_Synchronization, // Synchronization
_Real_synchronization, // Synchronization
_Variation, // Time variation
_Overlap, // Overlap
_Load_imbalance, // Load imbalance
_Execution_time, // Execution time
_CPU_time, // CPU_time_usr + CPU_time_sys
_CPU_time_usr, // Usefull processor time
_CPU_time_sys, // Usefull system time
_IO_time,
_IO_comm, // IO: Communications
_IO_real_synch, // IO: Real synch
_IO_synch, // IO: Synchronization
_IO_vary, // IO: Time variation
_IO_overlap, // IO: Overlap
_Wait_reduction, // Reduction: Communications
_Reduction_real_synch, // Reduction: Real synch
_Reduction_synch, // Reduction synchronization
_Reduction_vary, // Time variation
_Reduction_overlap, // Reduction: Overlap
_Wait_shadow, // Shadow: Communications
_Shadow_real_synch, // Shadow: Real synch
_Shadow_synch, // Shadow synchronization
_Shadow_vary, // Time variation
_Shadow_overlap, // Shadow: Overlap
_Remote_access, // Remote access: Communications
_Remote_real_synch, // Remote access: Real synch
_Remote_vary, // Remote access: Time variation
_Remote_synch, // Remote access: synchronization
_Remote_overlap, // Remote access: Overlap
_Redistribution, // Redistribution: Communications
_Redistribution_real_synch, // Redistribution: Real synch
_Redistribution_synch, // Redistribution: synchronization
_Redistribution_vary, // Redistribution: time vary
_Redistribution_overlap, // Redistribution: Overlap
_Total_time,
_Efficiency,
_Productive_time,
_Productive_CPU_time,
_Productive_SYS_time
};
class Interval : public Processor {
static int Intervallevel; // current interval level
static int IntervalID; // current interval ID
IntervalType type; // Interval type
long index;
int level; // Interval level
int EXE_count;
int source_line;
int ID;
//for intelval's tree
Interval * parent_interval;
int count;
Interval ** nested_intervals;
Processor ** Procs; // processor's vector
double Total_time;
double Efficiency;
double Productive_time;
double Productive_CPU_time;
double Productive_SYS_time;
public:
char * source_file; //==//
bool io_trafic; // start FORTRAN I/O
int num_op_io;
int num_op_reduct;
int num_op_shadow;
int num_op_remote;
int num_op_redist;
char *html_title;
Interval(int arg); // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
Interval(int iline = TraceLine::first_line_number,
char * ifile = TraceLine::first_file_name,
IntervalType itype = __IT_MAIN,
long index = NO_EXPR,
Interval * parent_interval = NULL);
~Interval();
void AddTime(TimeType InfoType, int proc_no, double TimeDelta);
void AddMPSTime(TimeType InfoType, double TimeDelta);
//grig
void AddMPSTime(TimeType InfoType, std::vector<double> vTimeDelta);
double GetEffectiveParameter(); // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
double GetExecTime(); // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
//\grig
void AddTimeSynchronize(TimeType InfoType, int proc_no, double TimeDelta);
void AddTimeVariation(TimeType InfoType, int proc_no, double TimeDelta);
void CalcIdleAndImbalance();
static void Enter(IntervalType int_type, int line, char* file, long index);
static void Leave();
void Integrate();
void SaveInFile(std::ofstream& hfile, int up, int next, int pred);
void SaveTree(std::ofstream& hfile, int up, int next, int pred);
void setIOTrafic() { io_trafic = true; }
void resetIOTrafic() { io_trafic = false; }
//====
int copy(Interval* from);
int copy_poss(Interval* from, double p1, double p2);
double GetProcPred(int proc_no, PredType pred);
double GetPred(PredType pred);
//=***
friend void CreateHTMLfile();
};
extern Interval * CurrInterval; // pointer to current interval
inline void AddTime(TimeType InfoType, int proc_no, double TimeDelta)
{ CurrInterval->AddTime(InfoType, proc_no, TimeDelta); }
inline void AddMPSTime(TimeType InfoType, double TimeDelta)
{ CurrInterval->AddMPSTime(InfoType, TimeDelta); }
//grig
inline void AddMPSTime(TimeType InfoType, std::vector<double> vTimeDelta)
{ CurrInterval->AddMPSTime(InfoType, vTimeDelta); }
//\grig
inline void AddTimeSynchronize(TimeType InfoType, int proc_no, double TimeDelta)
{ CurrInterval->AddTimeSynchronize(InfoType, proc_no, TimeDelta); }
inline void AddTimeVariation(TimeType InfoType, int proc_no, double TimeDelta)
{ CurrInterval->AddTimeVariation(InfoType, proc_no, TimeDelta); }
#endif

View File

@@ -0,0 +1,610 @@
//---------------------------------------------------------------------------------
// Interval HTML file template
//---------------------------------------------------------------------------------
#include <stdlib.h>
#include "Ver.h"
char *interval[] = {
"<HTML>",
"<HEAD>",
" <META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; charset=iso-8859-1\">",
" <META NAME=\"Author\" CONTENT=\"Valentin Emelianov\">",
" <META NAME=\"GENERATOR\" CONTENT=\"Mozilla/4.05 [en] (Win95; I) [Netscape]\">",
" <TITLE>",
"@title@",
"</TITLE>",
"</HEAD>",
"<BODY TEXT=\"#000000\" BGCOLOR=\"#FFF0F0\" LINK=\"#FF0000\" VLINK=\"#800080\" ALINK=\"#0000FF\">",
"&nbsp;",
"<A NAME=",
"@label@",
"></A>",
"<CENTER><TABLE BORDER=2 WIDTH=\"90%\" BGCOLOR=\"#FFFFCC\" >",
"<CAPTION><B><BLINK><FONT COLOR=\"#FF6666\"><FONT SIZE=-1>INTERVAL</FONT></FONT></BLINK></B></CAPTION>",
"",
"<TR>",
"<TD BGCOLOR=\"#FFCC00\">",
"<CENTER><B><FONT SIZE=-1>Type</FONT></B></CENTER>",
"</TD>",
"",
"<TD BGCOLOR=\"#FFCC00\">",
"<CENTER><B><FONT SIZE=-1>Level</FONT></B></CENTER>",
"</TD>",
"",
"<TD BGCOLOR=\"#FFCC00\">",
"<CENTER><B><FONT SIZE=-1>Count</FONT></B></CENTER>",
"</TD>",
"",
"<TD BGCOLOR=\"#FFCC00\">",
"<CENTER><B><FONT SIZE=-1>Line</FONT></B></CENTER>",
"</TD>",
"",
"<TD BGCOLOR=\"#FFCC00\">",
"<CENTER><B><FONT SIZE=-1>Expr</FONT></B></CENTER>",
"</TD>",
"",
"<TD BGCOLOR=\"#FFCC00\">",
"<CENTER><B><FONT SIZE=-1>File</FONT></B></CENTER>",
"</TD>",
"</TR>",
"",
"<TR>",
"<TD BGCOLOR=\"#FFFFCC\">",
"<CENTER><FONT SIZE=-1>",
"@typec@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@levc@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@counc@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@linec@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@exprc@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@filec@",
"</FONT></CENTER>",
"</TD>",
"</TR>",
"</TABLE></CENTER>",
"&nbsp;",
"<CENTER><TABLE BORDER=2 WIDTH=\"90%\" BGCOLOR=\"#CCFFFF\" >",
"<TR>",
"<TD BGCOLOR=\"#33CCFF\"><B><FONT SIZE=-1>Processors</FONT></B></TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@proc@",
"</FONT></CENTER>",
"</TD>",
"</TR>",
"",
"<TR>",
"<TD BGCOLOR=\"#33CCFF\"><B><FONT SIZE=-1>Efficiency</FONT></B></TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@effic@",
"</FONT></CENTER>",
"</TD>",
"</TR>",
"",
"<TR>",
"<TD BGCOLOR=\"#33CCFF\"><B><FONT SIZE=-1>Execution time</FONT></B></TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@exec@",
"</FONT></CENTER>",
"</TD>",
"</TR>",
"",
"<TR>",
"<TD BGCOLOR=\"#33CCFF\"><B><FONT SIZE=-1>Total time</FONT></B></TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@total@",
"</FONT></CENTER>",
"</TD>",
"</TR>",
"",
"<TR>",
"<TD BGCOLOR=\"#33CCFF\"><B><FONT SIZE=-1>&nbsp;&nbsp;&nbsp; Productive time</FONT></B></TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@ptime@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1><B>CPU = </B>",
"@ptimec@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1><B>SYS = </B>",
"@ptimes@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1><B>I/O = </B>",
"@ptimei@",
"</FONT></CENTER>",
"</TD>",
"</TR>",
"",
"<TR>",
"<TD BGCOLOR=\"#33CCFF\"><B><FONT SIZE=-1>&nbsp;&nbsp;&nbsp; Lost time</FONT></B></TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@lost@",
"</FONT></CENTER>",
"</TD>",
"</TR>",
"",
"<TR>",
"<TD BGCOLOR=\"#33CCFF\"><B><FONT SIZE=-1>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Insufficient parallelism</FONT></B></TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@insuf@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1><B>USR = </B>",
"@iuser@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1><B>SYS = </B>",
"@isyst@",
"</FONT></CENTER>",
"</TD>",
"</TR>",
"",
"<TR>",
"<TD BGCOLOR=\"#33CCFF\"><B><FONT SIZE=-1>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Communications</FONT></B></TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@comm@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1><B>SYN = </B>",
"@csyn@",
"</FONT></CENTER>",
"</TD>",
"</TR>",
"",
"<TR>",
"<TD BGCOLOR=\"#33CCFF\"><FONT SIZE=-1>&nbsp;<B>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Idle time</B></FONT></TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@idle@",
"</FONT></CENTER>",
"</TD>",
"</TR>",
"",
"<TR>",
"<TD BGCOLOR=\"#33CCFF\"><B><FONT SIZE=-1>Load imbalance</FONT></B></TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@imbal@",
"</FONT></CENTER>",
"</TD>",
"</TR>",
"",
"<TR>",
"<TD BGCOLOR=\"#33CCFF\"><B><FONT SIZE=-1>Synchronization</FONT></B></TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@synch@",
"</FONT></CENTER>",
"</TD>",
"</TR>",
"",
"<TR>",
"<TD BGCOLOR=\"#33CCFF\"><B><FONT SIZE=-1>Time variation</FONT></B></TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@vary@",
"</FONT></CENTER>",
"</TD>",
"</TR>",
"",
"<TR>",
"<TD BGCOLOR=\"#33CCFF\"><B><FONT SIZE=-1>Overlap</FONT></B></TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@over@",
"</FONT></CENTER>",
"</TD>",
"</TR>",
"</TABLE></CENTER>",
"&nbsp;",
"<CENTER><TABLE BORDER=2 WIDTH=\"90%\" BGCOLOR=\"#99FF99\" >",
"<TR BGCOLOR=\"#00CC00\">",
"<TD>",
"<CENTER>&nbsp;</CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><B><FONT SIZE=-1># op</FONT></B></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><B><FONT SIZE=-1>Communications</FONT></B></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><B><FONT SIZE=-1>Real synch</FONT></B></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><B><FONT SIZE=-1>Synch</FONT></B></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><B><FONT SIZE=-1>Variation</FONT></B></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><B><FONT SIZE=-1>Overlap</FONT></B></CENTER>",
"</TD>",
"</TR>",
"",
"<TR>",
"<TD BGCOLOR=\"#00CC00\">",
"<CENTER><B><FONT SIZE=-1>I/O</FONT></B></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@nopi@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@comi@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@rsynchi@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@synchi@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@varyi@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@overi@",
"</FONT></CENTER>",
"</TD>",
"</TR>",
"",
"<TR>",
"<TD BGCOLOR=\"#00CC00\">",
"<CENTER><B><FONT SIZE=-1>Reduction</FONT></B></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@nopr@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@comr@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@rsynchr@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@synchr@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@varyr@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@overr@",
"</FONT></CENTER>",
"</TD>",
"</TR>",
"",
"<TR>",
"<TD BGCOLOR=\"#00CC00\">",
"<CENTER><B><FONT SIZE=-1>Shadow</FONT></B></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@nops@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@coms@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@rsynchs@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@synchs@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@varys@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@overs@",
"</FONT></CENTER>",
"</TD>",
"</TR>",
"",
"<TR>",
"<TD BGCOLOR=\"#00CC00\">",
"<CENTER><B><FONT SIZE=-1>Remote access</FONT></B></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@nopa@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@coma@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@rsyncha@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@syncha@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@varya@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@overa@",
"</FONT></CENTER>",
"</TD>",
"</TR>",
"<TR>",
"<TD BGCOLOR=\"#00CC00\">",
"<CENTER><B><FONT SIZE=-1>Redistribution</FONT></B></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@nopd@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@comd@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@rsynchd@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@synchd@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@varyd@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@overd@",
"</FONT></CENTER>",
"</TD>",
"</TR>",
"</TABLE></CENTER>",
"@nestbeg@",
"&nbsp;",
"<CENTER><TABLE BORDER=2 WIDTH=\"90%\" BGCOLOR=\"#FFFFCC\" >",
"<CAPTION><B><BLINK><FONT COLOR=\"#FF6666\"><FONT SIZE=-1>NESTED&nbsp; INTERVALS</FONT></FONT></BLINK></B></CAPTION>",
"",
"<TR>",
"<TD BGCOLOR=\"#FFCC00\">",
"<CENTER><B><FONT SIZE=-1>Go</FONT></B></CENTER>",
"</TD>",
"",
"<TD BGCOLOR=\"#FFCC00\">",
"<CENTER><B><FONT SIZE=-1>Type</FONT></B></CENTER>",
"</TD>",
"",
"<TD BGCOLOR=\"#FFCC00\">",
"<CENTER><B><FONT SIZE=-1>Level</FONT></B></CENTER>",
"</TD>",
"",
"<TD BGCOLOR=\"#FFCC00\">",
"<CENTER><B><FONT SIZE=-1>Count</FONT></B></CENTER>",
"</TD>",
"",
"<TD BGCOLOR=\"#FFCC00\">",
"<CENTER><B><FONT SIZE=-1>Line</FONT></B></CENTER>",
"</TD>",
"",
"<TD BGCOLOR=\"#FFCC00\">",
"<CENTER><B><FONT SIZE=-1>Expr</FONT></B></CENTER>",
"</TD>",
"",
"<TD BGCOLOR=\"#FFCC00\">",
"<CENTER><B><FONT SIZE=-1>File</FONT></B></CENTER>",
"</TD>",
"</TR>",
"",
"@nesteds@",
"<TR>",
"<TD BGCOLOR=\"#FFFFCC\">",
"<CENTER><FONT SIZE=-1><A HREF=",
"@url@",
">",
"@go01@",
"</A></FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@type@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@lev@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@coun@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@line@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@expr@",
"</FONT></CENTER>",
"</TD>",
"",
"<TD>",
"<CENTER><FONT SIZE=-1>",
"@file@"
,"</FONT></CENTER>",
"</TD>",
"</TR>",
"@nestedf@",
"</TABLE></CENTER>",
"@nestend@",
"&nbsp;",
"<CENTER><TABLE BORDER CELLPADDING=3 COLS=4 WIDTH=\"30%\" BGCOLOR=\"#CCCCCC\" >",
"<TR>",
"<TD ALIGN=CENTER><B><FONT FACE=\"Arial,Helvetica\"><FONT SIZE=-1><A HREF=",
"@up@",
">UP</A></FONT></FONT></B></TD>",
"<TD ALIGN=CENTER><B><FONT FACE=\"Arial,Helvetica\"><FONT SIZE=-1><A HREF=",
"@pred@",
">PRED</A></FONT></FONT></B></TD>",
"<TD ALIGN=CENTER><B><FONT FACE=\"Arial,Helvetica\"><FONT SIZE=-1><A HREF=",
"@next@",
">NEXT</A></FONT></FONT></B></TD>",
"<TD ALIGN=CENTER><B><FONT FACE=\"Arial,Helvetica\"><FONT SIZE=-1><A HREF=",
"@home@",
">HOME</A></FONT></FONT></B></TD>",
"</TR>",
"</TABLE></CENTER>",
"<HR SIZE=3 NOSHADE WIDTH=\"100%\">",
"</BODY>",
"</HTML>",
"",
NULL
};

View File

@@ -0,0 +1,817 @@
//////////////////////////////////////////////////////////////////////
//
// LoopBlock.cpp: implementation of the Block class.
//
//////////////////////////////////////////////////////////////////////
#include <assert.h>
#include "LoopBlock.h"
#ifndef _MSC_VER
template <class T>
T _MIN(T a, T b)
{
return a < b ? a : b;
}
template <class T>
T _MAX(T a, T b)
{
return a >= b ? a : b;
}
#endif
using namespace std;
long LoopBlock::GetRank()
{
return LSDim.size();
}
bool LoopBlock::empty()
{
return LSDim.empty();
}
long LoopBlock::GetBlockSize()
{
unsigned int i;
long size = 1;
if (LSDim.empty())
return 0;
for (i = 0; i < LSDim.size(); i++)
{
size *= LSDim[i].GetLoopLSSize();
}
return size;
}
LoopBlock::LoopBlock()
{
LSDim = vector<LoopLS>(0);
}
LoopBlock::~LoopBlock()
{
}
/*
LoopBlock::LoopBlock(ParLoop *pl, long ProcLI)
{
int i;
long vmDimSize, dimProcI;
long amDimSize, amAxis;
long plAxis;
long amLower, amUpper, BlockSize; // Param, Module;
bool IsBlockEmpty = false;
vector<long> ProcSI;
DistAxis dist;
AlignAxis align, plAlign;
LoopLS ls;
AMView* am = pl->AM_Dis;
assert(am != NULL);
VM* vm = am->VM_Dis;
assert(vm != NULL);
long amRank = am->Rank();
long vmRank = vm->Rank();
long plRank = pl->Rank;
vm->GetSI(ProcLI, ProcSI);
LSDim.reserve(plRank);
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> (<28><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD> 1)
for (i = 0; i < plRank; i++)
LSDim.push_back(LoopLS(pl->LowerIndex[i], pl->HigherIndex[i], 1));
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD>
for (i = 0; i < vmRank; i++)
{ dist = am->DistRule[amRank + i];
switch (dist.Attr)
{ case map_NORMVMAXIS :
amAxis = dist.Axis;
amDimSize = am->GetSize(amAxis);
dimProcI = ProcSI[i];
vmDimSize = vm->GetSize(i+1);
BlockSize = (amDimSize - 1) / vmDimSize + 1;
amLower = dimProcI * BlockSize;
amUpper = _MIN(amDimSize, amLower+BlockSize) - 1;
IsBlockEmpty = IsBlockEmpty || amLower > amUpper;
if(IsBlockEmpty) break;
align = pl->AlignRule[plRank+amAxis-1];
switch(align.Attr)
{ case align_NORMTAXIS :
plAxis = align.Axis;
plAlign = pl->AlignRule[plAxis-1];
ls = LoopLS(amLower, amUpper, 1);
ls.transform(plAlign.A, plAlign.B, pl->GetSize(plAxis-1));
if (ls.empty())
IsBlockEmpty = true;
else
{ ls.Lower = _MAX(ls.Lower, (long)0);
ls.Upper = _MIN(ls.Upper, (long)(pl->GetSize(plAxis-1)-1));
LSDim[plAxis-1] = ls; // LSDim <20> <20><><EFBFBD><EFBFBD>
};
break;
case align_BOUNDREPL :
ls = LoopLS(amLower, amUpper, 1);
ls.transform(align.A, align.B, align.Bound);
if (ls.empty())
IsBlockEmpty = true;
break;
case align_REPLICATE :
break;
case align_CONSTANT :
if (align.B < amLower || align.B > amUpper)
IsBlockEmpty = true;
break;
} // end internal switch
break;
case map_REPLICATE :
break;
} // end main switch
if (IsBlockEmpty)
break;
} // end for
if (IsBlockEmpty)
{ // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD>
LSDim = vector<LoopLS>(0);
}
else
{
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD>
for(i=0; i<plRank; i++)
{ LSDim[i].Lower = pl->LoopStep[i] * (long)ceil((double)LSDim[i].Lower/(double)pl->LoopStep[i]);
LSDim[i].Upper = pl->LoopStep[i] * (LSDim[i].Upper / pl->LoopStep[i]);
LSDim[i].Step = pl->LoopStep[i];
if(LSDim[i].Lower > LSDim[i].Upper)
break;
};
if(i==plRank)
{ // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
for(i=0; i<plRank; i++)
{ LSDim[i].Lower += pl->LowerIndex[i];
LSDim[i].Upper += pl->LowerIndex[i];
};
}
else
{ // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD>
LSDim = vector<LoopLS>(0);
};
}
}
*/
LoopBlock::LoopBlock(ParLoop *pl, long ProcLI,int a)
{
int i;
long vmDimSize, dimProcI;
long amDimSize, amAxis;
long plAxis;
long amLower, amUpper, BlockSize; // Param, Module;
bool IsBlockEmpty = false;
vector<long> ProcSI;
DistAxis dist;
AlignAxis align, plAlign;
LoopLS ls;
AMView* am = pl->AM_Dis;
// printf("LOOPBLOCK AM=%lx am->weightEl.ID=%lx\n",am, am->weightEl.ID);
assert(am != NULL);
VM* vm = am->VM_Dis;
assert(vm != NULL);
long amRank = am->Rank();
long vmRank = vm->Rank();
long plRank = pl->Rank;
vm->GetSI(ProcLI, ProcSI);
//grig
std::vector<double> avWeights;
int j;
long local_sum=0; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> VM
long jmax; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> Vm
double vBlockSize,temp_w=0; //
double sum1=0;
//grig
LSDim.reserve(plRank);
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> (<28><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD> 1)
for (i = 0; i < plRank; i++)
{
// printf("1. INIT %d %d %d\n",pl->LowerIndex[i], pl->HigherIndex[i], 1);
LSDim.push_back(LoopLS(pl->LowerIndex[i], pl->HigherIndex[i], 1));
}
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD>
for (i = 0; i < vmRank; i++)
{ dist = am->DistRule[amRank + i];
switch (dist.Attr)
{ case map_NORMVMAXIS :
amAxis = dist.Axis;
amDimSize = am->GetSize(amAxis);
dimProcI = ProcSI[i];
vmDimSize = vm->GetSize(i+1);
BlockSize = (amDimSize - 1) / vmDimSize + 1;
//grig
am->weightEl.GetWeights(avWeights);
local_sum=0; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> VM
jmax=vm->GetSize(i+1); // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> Vm
vBlockSize,temp_w=0; //
sum1=0;
long lBlockSize;
for(j=0;j<i;j++)
{
local_sum+=vm->GetSize(j+1);
}
// printf("size()=%d arr=%d %d %d %d %d %d\n",am->weightEl.body.size(),am->weightEl.body[0],am->weightEl.body[1],am->weightEl.body[2],am->weightEl.body[3],am->weightEl.body[4],am->weightEl.body[5]);
if(am->weightEl.body.size() == 0) temp_w=1;
else
{ for(j=0;j<jmax;j++)
temp_w+=am->weightEl.body[j+local_sum]; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
}
// printf("temp_w=%f\n",temp_w);
vBlockSize = amDimSize/temp_w; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
lBlockSize=ceil((double)amDimSize/temp_w) > 0.5 ? amDimSize/temp_w+ 1 : amDimSize/temp_w; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
//====
if(am->BSize.size()>0)
{ // <20><><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20><> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> 1 <20> <20><> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// <20> <20><><EFBFBD> <20><><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD>, <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
if(amDimSize % am->BSize[i] !=0 ) { printf("Error: Dimension %d is not dividible by %d \n",amDimSize, am->BSize[i]); exit(0);}
lBlockSize=(long)ceil(vBlockSize);
if( ( lBlockSize % am->BSize[i]) > 0)
lBlockSize = ( lBlockSize / am->BSize[i] + 1) * am->BSize[i];
vBlockSize=lBlockSize;
}
// printf("ok 66666\n");
//=***
/*
if(vBlockSize - ceil(vBlockSize)<0.5) // <20><><EFBFBD><EFBFBD> VBlocksize - celoe
{
lBlockSize=floor(vBlockSize);
}
else // <20><><EFBFBD>
lBlockSize= ceil(vBlockSize);
*/
for(j=0;j<dimProcI;j++)
{
sum1+= ((am->weightEl.body.size() != 0)? (vBlockSize*am->weightEl.body[j+local_sum]) : vBlockSize);
}
amLower=sum1;
amUpper=(double)sum1 + ((am->weightEl.body.size() != 0)? (vBlockSize*am->weightEl.body[dimProcI+local_sum]-1) : vBlockSize - 1);
// printf("ok 777\n");
//==== <20><><EFBFBD><EFBFBD><EFBFBD>
// <20><><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> 2 <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// if(amUpper+1==amDimSize-1)
// amUpper=amDimSize-1;
//=***
IsBlockEmpty = IsBlockEmpty || amLower > amUpper;
if(IsBlockEmpty) break;
align = pl->AlignRule[plRank+amAxis-1];
switch(align.Attr)
{ case align_NORMTAXIS :
plAxis = align.Axis;
plAlign = pl->AlignRule[plAxis-1];
ls = LoopLS(amLower, amUpper, 1);
ls.transform(plAlign.A, plAlign.B, (pl->HigherIndex[plAxis-1] - pl->LowerIndex[plAxis-1]+1 ));
// for(j=0; j<plRank; j++)
// {
//printf("LSDimCheck empty[%d] %d %d %d\n",ProcLI, LSDim[j].Lower,LSDim[j].Upper,LSDim[j].Step);
//printf("LSPLDimFirst[%d] %d %d %d\n",ProcLI, pl->LowerIndex[j],pl->HigherIndex[j],pl->LoopStep[j]);
// }
//printf("LS[%d] %d %d %d\n",ProcLI, ls.Lower, ls.Upper, ls.Step);
if (ls.empty())
{
printf("EMPTY %d\n",IsBlockEmpty);
IsBlockEmpty = true;
}
else
{
//====// ls.Lower = _MAX(ls.Lower, (long)0);
//====// ls.Upper = _MIN(ls.Upper, (long)(pl->HigherIndex[plAxis-1] - pl->LowerIndex[plAxis-1]));
//int ii;
// printf("Proc current = %d\n",ProcLI);
// for(ii=0;ii<this->LSDim.size();ii++)
// printf("LSDim %d %d %d\n",this->LSDim[ii].Lower,this->LSDim[ii].Upper,this->LSDim[ii].Step);
LSDim[plAxis-1] = ls; // LSDim <20> <20><><EFBFBD><EFBFBD>
// printf("Proc current = %d\n",ProcLI);
// for(ii=0;ii<this->LSDim.size();ii++)
// printf("LSDim %d %d %d\n",this->LSDim[ii].Lower,this->LSDim[ii].Upper,this->LSDim[ii].Step);
};
//==== this block was moved from down
// for(i=0; i<plRank; i++)
// { LSDim[i].Lower += pl->LowerIndex[i];
// LSDim[i].Upper += pl->LowerIndex[i];
// }
// LSDim[plAxis-1].Lower += pl->LowerIndex[plAxis-1];
// LSDim[plAxis-1].Upper += pl->LowerIndex[plAxis-1];
//printf("PlAxis=%d\n",plAxis);
//=***
break;
case align_BOUNDREPL :
ls = LoopLS(amLower, amUpper, 1);
ls.transform(align.A, align.B, align.Bound);
if (ls.empty())
IsBlockEmpty = true;
break;
case align_REPLICATE :
break;
case align_CONSTANT :
if (align.B < amLower || align.B > amUpper)
IsBlockEmpty = true;
break;
} // end internal switch
break;
case map_REPLICATE :
//====
/* printf("----%d-%d-%d-%d-",i,vm->GetSize(0),vm->GetSize(1),vm->GetSize(2));
LSDim[i-1].Upper -= pl->LowerIndex[i-1];;
LSDim[i-1].Lower -= pl->LowerIndex[i-1];;
{ int ii;
for(ii=0; ii<plRank; ii++)
printf("*****LoopBlock %d %d %d**",LSDim[ii].Lower,LSDim[ii].Upper,pl->LowerIndex[ii]);
}
*/
//=***
break;
} // end main switch
if (IsBlockEmpty)
break;
} // end for
if (IsBlockEmpty)
{ // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD>
LSDim = vector<LoopLS>(0);
}
else
{
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD>
for (i = 0; i < plRank; i++)
{
LSDim[i].Lower = pl->LoopStep[i] * (long)ceil((double)LSDim[i].Lower / (double)pl->LoopStep[i]);
LSDim[i].Upper = pl->LoopStep[i] * (LSDim[i].Upper / pl->LoopStep[i]);
LSDim[i].Step = pl->LoopStep[i];
//printf("LSDim[%d] %d %d %d\n",ProcLI, LSDim[i].Lower,LSDim[i].Upper,LSDim[i].Step);
LSDim[i].Invers = pl->Invers[i];
if (LSDim[i].Lower > LSDim[i].Upper)
break;
};
// if(i==plRank)
// { // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// printf("**%d*%d** ",maxAlign,plRank);
// for(i=0; i<plRank; i++)
//==== this block was moved up
// { LSDim[i].Lower += pl->LowerIndex[i];
// LSDim[i].Upper += pl->LowerIndex[i];
//=***
// printf("-LoopBlock %d %d %d ",LSDim[i].Lower,LSDim[i].Upper,pl->LowerIndex[i]);
// };
// }
if(i!=plRank)
{ // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD>
LSDim = vector<LoopLS>(0);
};
}
// printf("Proc number-%d\n",ProcLI);
// for(i=0;i<this->GetRank();i++)
// printf("%d %d %d\n",this->LSDim[i].Lower,this->LSDim[i].Upper,this->LSDim[i].Step);
}
//xp
/*
LoopBlock::LoopBlock(ParLoop *pl, long ProcLI,int a)
{
int i,ii;
long vmDimSize, dimProcI;
long amDimSize, amAxis;
long plAxis;
long amLower, amUpper, BlockSize; // Param, Module;
bool IsBlockEmpty = false;
vector<long> ProcSI;
DistAxis dist;
AlignAxis align, plAlign;
LoopLS ls;
AMView* am = pl->AM_Dis;
// printf("LOOPBLOCK AM=%lx am->weightEl.ID=%lx\n",am, am->weightEl.ID);
assert(am != NULL);
VM* vm = am->VM_Dis;
assert(vm != NULL);
long amRank = am->Rank();
long vmRank = vm->Rank();
long plRank = pl->Rank;
vm->GetSI(ProcLI, ProcSI);
//grig
std::vector<double> avWeights;
int j;
long local_sum=0; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> VM
long jmax; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> Vm
double vBlockSize,temp_w=0; //
double sum1=0;
//grig
LSDim.reserve(plRank);
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> (<28><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD> 1)
for (i = 0; i < plRank; i++)
{
// printf("1. INIT %d %d %d\n",pl->LowerIndex[i], pl->HigherIndex[i], 1);
LSDim.push_back(LoopLS(pl->LowerIndex[i], pl->HigherIndex[i], 1));
}
printf("Proc current = %d\n",ProcLI);
for(ii=0;ii<this->LSDim.size();ii++)
printf("LSDim %d %d %d\n",this->LSDim[ii].Lower,this->LSDim[ii].Upper,this->LSDim[ii].Step);
// for(i=0;i<this->LSDim.size();i++)
// {
// printf("INIT %d %d %d\n",this->LSDim[i].Lower,this->LSDim[i].Upper,this->LSDim[i].Step);
// printf("INIT addr %d %d %d\n",&LSDim[i].Lower,&LSDim[i].Upper,&LSDim[i].Step);
// }
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD>
for (i = 0; i < vmRank; i++)
{
// printf(" i=%d\n",i);
dist = am->DistRule[amRank + i];
switch (dist.Attr)
{ case map_NORMVMAXIS :
amAxis = dist.Axis;
amDimSize = am->GetSize(amAxis);
dimProcI = ProcSI[i];
vmDimSize = vm->GetSize(i+1);
BlockSize = (amDimSize - 1) / vmDimSize + 1;
//grig
am->weightEl.GetWeights(avWeights);
local_sum=0; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> VM
jmax=vm->GetSize(i+1); // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> Vm
vBlockSize,temp_w=0; //
sum1=0;
long lBlockSize;
for(j=0;j<i;j++)
{
local_sum+=vm->GetSize(j+1);
}
// printf("size()=%d arr=%d %d %d %d %d %d\n",am->weightEl.body.size(),am->weightEl.body[0],am->weightEl.body[1],am->weightEl.body[2],am->weightEl.body[3],am->weightEl.body[4],am->weightEl.body[5]);
if(am->weightEl.body.size() == 0) temp_w=1;
else
{ for(j=0;j<jmax;j++)
temp_w+=am->weightEl.body[j+local_sum]; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
}
// printf("temp_w=%f\n",temp_w);
vBlockSize = amDimSize/temp_w; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
lBlockSize=ceil((double)amDimSize/temp_w) > 0.5 ? amDimSize/temp_w+ 1 : amDimSize/temp_w; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
// printf(" Blocksize v=%.0f l=%d \n",vBlockSize, lBlockSize);
//====
if(am->BSize.size()>0)
{ // <20><><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20><> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> 1 <20> <20><> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// <20> <20><><EFBFBD> <20><><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD>, <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
if(amDimSize % am->BSize[i] !=0 ) { printf("Error: Dimension %d is not dividible by %d \n",amDimSize, am->BSize[i]); exit(0);}
lBlockSize=(long)ceil(vBlockSize);
if( ( lBlockSize % am->BSize[i]) > 0)
lBlockSize = ( lBlockSize / am->BSize[i] + 1) * am->BSize[i];
vBlockSize=lBlockSize;
}
// printf("ok 66666\n");
//=***
for(j=0;j<dimProcI;j++)
{
sum1+= ((am->weightEl.body.size() != 0)? (vBlockSize*am->weightEl.body[j+local_sum]) : vBlockSize);
}
amLower=sum1;
amUpper=(double)sum1 + ((am->weightEl.body.size() != 0)? (vBlockSize*am->weightEl.body[dimProcI+local_sum]-1) : vBlockSize - 1);
// printf(" amlower=%d amupper=%d\n",amLower,amUpper);
// printf("ok 777\n");
//==== <20><><EFBFBD><EFBFBD><EFBFBD>
// <20><><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> 2 <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// if(amUpper+1==amDimSize-1)
// amUpper=amDimSize-1;
//=***
IsBlockEmpty = IsBlockEmpty || amLower > amUpper;
if(IsBlockEmpty) break;
align = pl->AlignRule[plRank+amAxis-1];
//<2F><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> AlignRule!!!
///
// printf("ALIGN plRank=%d amAxis=%d \n",plRank,amAxis);
//
// printf("ALIGN rule \n");
// for(j=0;j<pl->AlignRule.size();j++)
// {
// printf("%d %d %d %d\n",pl->AlignRule[j].A, pl->AlignRule[j].B, pl->AlignRule[j].Axis, pl->AlignRule[j].TAxis);
// }
// printf("\n");
//
switch(align.Attr)
{ case align_NORMTAXIS :
plAxis = align.Axis;
if(plAxis > pl->Rank)
{
printf("BREAK!!!!!!!!!!!!!!! plAxis=%d pl->Rank=%d \n", plAxis, pl->Rank);
break; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD>...
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> - <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20><><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> AlignRule
}
// LU deb
// printf("sz=%d\n",pl->AlignRule.size());
//
// if(1||plAxis-1>=pl->AlignRule.size())
// {
// int i;
// for(i=0;i<pl->AlignRule.size();i++)
// printf("%d %d %d %d ",pl->AlignRule[i].A,pl->AlignRule[i].B,pl->AlignRule[i].Axis,pl->AlignRule[i].TAxis);
// printf("\n");
// }
//
plAlign = pl->AlignRule[plAxis-1];
ls = LoopLS(amLower, amUpper, 1);
ls.transform(plAlign.A, plAlign.B, (pl->HigherIndex[plAxis-1] - pl->LowerIndex[plAxis-1]+1 ));
// for(j=0; j<plRank; j++)
// {
//printf("LSDimCheck empty[%d] %d %d %d\n",ProcLI, LSDim[j].Lower,LSDim[j].Upper,LSDim[j].Step);
//printf("LSPLDimFirst[%d] %d %d %d\n",ProcLI, pl->LowerIndex[j],pl->HigherIndex[j],pl->LoopStep[j]);
// }
printf("LS[%d] %d %d %d\n",ProcLI, ls.Lower, ls.Upper, ls.Step);
if (ls.empty())
{
printf(" EMPTY %d\n",IsBlockEmpty);
IsBlockEmpty = true;
}
else
{
//====// ls.Lower = _MAX(ls.Lower, (long)0);
//====// ls.Upper = _MIN(ls.Upper, (long)(pl->HigherIndex[plAxis-1] - pl->LowerIndex[plAxis-1]));
printf("Proc current = %d\n",ProcLI);
for(ii=0;ii<this->LSDim.size();ii++)
printf("LSDim %d %d %d\n",this->LSDim[ii].Lower,this->LSDim[ii].Upper,this->LSDim[ii].Step);
printf(" set to dim %d %d\n",plAxis-1, align.Axis);
LSDim[plAxis-1] = ls; // LSDim <20> <20><><EFBFBD><EFBFBD>
printf("Proc current = %d\n",ProcLI);
for(ii=0;ii<this->LSDim.size();ii++)
printf("LSDim %d %d %d\n",this->LSDim[ii].Lower,this->LSDim[ii].Upper,this->LSDim[ii].Step);
}
//==== this block was moved from down
// for(i=0; i<plRank; i++)
// { LSDim[i].Lower += pl->LowerIndex[i];
// LSDim[i].Upper += pl->LowerIndex[i];
// }
// LSDim[plAxis-1].Lower += pl->LowerIndex[plAxis-1];
// LSDim[plAxis-1].Upper += pl->LowerIndex[plAxis-1];
//printf("PlAxis=%d\n",plAxis);
//=***
break;
case align_BOUNDREPL :
printf(" bound replicated %d %d \n", amLower, amUpper);
ls = LoopLS(amLower, amUpper, 1);
ls.transform(align.A, align.B, align.Bound);
if (ls.empty())
IsBlockEmpty = true;
else //xp
{
// printf(" set to dim %d < %d\n", align.Axis,LSDim.size());
if(align.Axis < LSDim.size())
LSDim[align.Axis] = ls; //xp <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> align.Axis-1
}
break;
case align_REPLICATE :
break;
case align_CONSTANT :
if (align.B < amLower || align.B > amUpper)
IsBlockEmpty = true;
break;
} // end internal switch
break;
case map_REPLICATE :
// printf(" replicated\n");
//====
// printf("----%d-%d-%d-%d-",i,vm->GetSize(0),vm->GetSize(1),vm->GetSize(2));
// LSDim[i-1].Upper -= pl->LowerIndex[i-1];;
// LSDim[i-1].Lower -= pl->LowerIndex[i-1];;
//
// { int ii;
// for(ii=0; ii<plRank; ii++)
// printf("*****LoopBlock %d %d %d**",LSDim[ii].Lower,LSDim[ii].Upper,pl->LowerIndex[ii]);
// }
//
//=***
break;
} // end main switch
if (IsBlockEmpty)
break;
} // end for
if (IsBlockEmpty)
{ // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD>
LSDim = vector<LoopLS>(0);
}
else
{
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD>
for(i=0; i<plRank; i++)
{
// printf("LoopStep[%d]=%d\n",i,pl->LoopStep[i]);
// printf("LSDim[i].Lower = addr %d\n",&(LSDim[i].Lower));
LSDim[i].Lower = pl->LoopStep[i] * (long)ceil((double)LSDim[i].Lower/(double)pl->LoopStep[i]);
LSDim[i].Upper = pl->LoopStep[i] * (LSDim[i].Upper / pl->LoopStep[i]);
LSDim[i].Step = pl->LoopStep[i];
//printf("LSDim[%d] %d %d %d\n",ProcLI, LSDim[i].Lower,LSDim[i].Upper,LSDim[i].Step);
LSDim[i].Invers = pl->Invers[i];
if(LSDim[i].Lower > LSDim[i].Upper)
break;
}
// if(i==plRank)
// { // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// printf("**%d*%d** ",maxAlign,plRank);
// for(i=0; i<plRank; i++)
//==== this block was moved up
// { LSDim[i].Lower += pl->LowerIndex[i];
// LSDim[i].Upper += pl->LowerIndex[i];
//=***
// printf("-LoopBlock %d %d %d ",LSDim[i].Lower,LSDim[i].Upper,pl->LowerIndex[i]);
// };
// }
if(i!=plRank)
{ // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD>
LSDim = vector<LoopLS>(0);
}
}
// LU deb
// printf("Proc current = %d\n",ProcLI);
// printf("This Rank (what meen?!) = %d\n",this->GetRank());
// for(i=0;i<this->LSDim.size();i++)
// printf("LSDim %d %d %d\n",this->LSDim[i].Lower,this->LSDim[i].Upper,this->LSDim[i].Step);
}
*/
bool operator==(LoopBlock& x, LoopBlock& y)
{
bool equal = (x.GetRank() == y.GetRank());
int i;
if(equal)
for (i = 0; i < x.GetRank(); i++)
equal = equal && (x.LSDim[i] == y.LSDim[i]);
return equal;
}
int intersection(LoopBlock& x,LoopBlock&y)
{
int temp;
std::vector<long> arr1,arr2;
long i,j,k;
int t=0;
arr1.resize(0);
arr2.resize(0);
// printf("finding intersection of two arrays:\n");
// printf("array 1 :\n");
// for(i=0;i<x.GetRank();i++)
// printf(" %d %d %d \n",x.LSDim[i].Lower, x.LSDim[i].Upper,x.LSDim[i].Step);
// printf("array 2 :\n");
// for(i=0;i<y.GetRank();i++)
// printf(" %d %d %d \n",y.LSDim[i].Lower, y.LSDim[i].Upper,y.LSDim[i].Step);
// printf("Block size1 = %d Block size2 = %d \n",x.GetBlockSize(),y.GetBlockSize());
temp=-1;
for(i=0;i<x.LSDim.size();i++)
{
t=0;
for(j=x.LSDim[i].Lower;j<=x.LSDim[i].Upper;j+=x.LSDim[i].Step)
{
arr1.push_back(j);
}
for(j=y.LSDim[i].Lower;j<=y.LSDim[i].Upper;j+=y.LSDim[i].Step)
{
arr2.push_back(j);
}
// printf("size1 = %d , size 2 = %d \n",arr1.size(),arr2.size());
for(j=0;j<arr1.size();j++)
for(k=0;k<arr2.size();k++)
{
if(arr1[j]==arr2[k])
t++;
}
// printf("dim =%d t=%d\n",i,t);
temp*=t;
arr1.resize(0);
arr2.resize(0);
}
// printf("tempt=%d\n",temp);
if(temp<0)
return -temp;
else
return 0;
}

View File

@@ -0,0 +1,44 @@
#ifndef LoopBlockH
#define LoopBlockH
//////////////////////////////////////////////////////////////////////
//
// LoopBlock.h: interface for the LoopBlock class.
//
//////////////////////////////////////////////////////////////////////
#include <vector>
#include "LoopLS.h"
#include "ParLoop.h"
class ParLoop;
class LoopBlock {
public:
std::vector<LoopLS> LSDim; // vector of LoopLS for every dimensions
long GetRank();
bool empty();
long GetBlockSize();
LoopBlock();
LoopBlock(std::vector<LoopLS> arg)
{
LSDim.resize(0);
for(int i=0;i<arg.size();i++)
LSDim.push_back(arg[i]);
}
virtual ~LoopBlock();
LoopBlock(ParLoop *pl, long ProcLI);
//grig
LoopBlock(ParLoop *pl, long ProcLI,int a); // a - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
//grig
friend bool operator==(LoopBlock& x, LoopBlock& y);
friend int intersection(LoopBlock& x,LoopBlock&y);
};
#endif

View File

@@ -0,0 +1,111 @@
// LoopLS.cpp: implementation of the LoopLS class.
//
//////////////////////////////////////////////////////////////////////
#include "LoopLS.h"
#include <math.h>
#include <stdlib.h>
#include <algorithm>
#if defined (__GNUG__) && (__GNUC__ >= 3)
template <class T>
T min(T a, T b)
{
return a < b ? a : b;
}
template <class T>
T max(T a, T b)
{
return a >= b ? a : b;
}
#endif
LoopLS::LoopLS() :
Lower(-1)
{
}
LoopLS::LoopLS(long ALower, long AUpper, long AStep)
{
if((ALower <= AUpper) && (AStep > 0)) {
Lower = ALower;
Upper = AUpper;
Step = AStep;
} else
Lower = -1;
}
LoopLS::~LoopLS()
{
}
bool operator==(const LoopLS& x, const LoopLS& y)
{
return x.Lower == y.Lower && x.Upper == y.Upper && x.Step == y.Step;
}
bool operator<(const LoopLS& x, const LoopLS& y)
{
return true;
}
long LoopLS::GetLoopLSSize()
{
return empty() ? 0 : (Upper - Lower + Step) / Step ; //====/
}
bool LoopLS::empty()
{ return (Lower < 0) ? true : false;
}
void LoopLS::transform(long A, long B, long plDimSize)
{
long daLower, daUpper;
long displace, displace_0, temp;
long daB1 = (Lower - B) / A;
long daB2 = (Upper - B) / A;
displace = (Lower - B) % A;
displace_0 = (Upper - B) % A;
if (A < 0)
{
temp = displace;
displace = displace_0;
displace_0 = -temp;
}
#ifdef _MSC_VER
daLower = __min(daB1, daB2);
daUpper = __max(daB1, daB2);
#else
daLower = min(daB1, daB2);
daUpper = max(daB1, daB2);
#endif
if ((daLower < 0 && daUpper < 0) ||
(daLower >= plDimSize) ||
(daUpper == 0 && displace_0 < 0))
{
Lower = -1;
return;
}
if (displace != 0 && daUpper != 0)
daLower++;
if (daLower < 0)
daLower = 0;
if (daUpper >= plDimSize)
daUpper = plDimSize - 1;
if (daUpper < daLower)
{
Lower = -1;
return;
}
/* //====//
Lower = daLower;
Upper = daUpper;
*/ //====//
Lower = daLower + B;
Upper = daUpper + B;
}

View File

@@ -0,0 +1,29 @@
#ifndef LoopLSH
#define LoopLSH
//////////////////////////////////////////////////////////////////////
//
// LoopLS.h: interface for the LoopLS class.
//
//////////////////////////////////////////////////////////////////////
class LoopLS {
public:
long Lower;
long Upper;
long Step;
bool Invers;//====//
LoopLS();
LoopLS(long ALower, long AUpper, long AStep);
virtual ~LoopLS();
void transform(long A, long B, long plDimSize);
long GetLoopLSSize();
bool empty();
friend bool operator==(const LoopLS& x, const LoopLS& y);
friend bool operator<(const LoopLS& x, const LoopLS& y);
};
#endif

View File

@@ -0,0 +1,182 @@
// LS.cpp: implementation of the LS class.
//
//////////////////////////////////////////////////////////////////////
#include "Ls.h"
extern void s_s_intersect (long l1, long u1, long l2, long u2, long * l3, long *u3);
extern void r_s_intersect (long l1, long u1, long s1, long l2, long u2,
long * l3, long * u3, long * s3);
extern void r_r_intersect (long l1, long u1, long s1, long l2, long u2, long s2,
long * l3, long * u3, long * s3);
using namespace std;
#if defined (_MSC_VER) || (defined (__GNUG__) && (__GNUC__ < 3))
/*template <class T>
T min(T a, T b)
{
return a < b ? a : b;
}
template <class T>
T max(T a, T b)
{
return a >= b ? a : b;
}*/
#endif
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
LS::LS()
{
// create empty LS
Lower = 0;
Upper = -1;
Stride = 1;
}
LS::~LS()
{
}
LS::LS(long ALower, long AUpper, long AStride)
{
if (ALower <= AUpper) {
Lower = ALower;
Upper = AUpper;
Stride = AStride;
} else {
Lower = 0;
Upper = -1;
Stride = 1;
}
}
bool operator == (const LS& x, const LS& y)
{
return x.Lower == y.Lower && x.Upper == y.Upper && x.Stride == y.Stride;
}
bool operator < (const LS& x, const LS& y)
{
return true;
}
LS LS::operator ^ (const LS & x) const
{
LS temp;
if ((Stride == 1) && (x.Stride == 1)) {
s_s_intersect(Lower, Upper, x.Lower, x.Upper, &temp.Lower, &temp.Upper);
temp.Stride = 1;
} else if (Stride == 1) {
r_s_intersect(x.Lower, x.Upper, x.Stride, Lower, Upper,
&temp.Lower, &temp.Upper, &temp.Stride);
} else if (x.Stride == 1) {
r_s_intersect(Lower, Upper, Stride, x.Lower, x.Upper,
&temp.Lower, &temp.Upper, &temp.Stride);
} else {
r_r_intersect(Lower, Upper, Stride, x.Lower, x.Upper, x.Stride,
&temp.Lower, &temp.Upper, &temp.Stride);
}
return temp;
}
long LS::GetLSSize() const
{
//====
//printf("GETSIZE Lower=%d Upper=%d Stride=%d\n",Lower,Upper,Stride);
//was return IsEmpty() ? 0 : Upper - Lower + 1;
return IsEmpty() ? 0 : (Upper - Lower) / Stride + 1;
//=***
}
bool LS::IsEmpty() const
{
return (Lower > Upper) ? true : false;
}
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> LS <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// i = A * j * B ==> j = (i - B) / A <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
void LS::transform(long A, long B, long daDimSize)
{
// printf("LS::transform %d %d %d \n",A,B,daDimSize);
long daB1, daB2;
long daLower, daUpper;
long displace, displace_0, temp;
daB1 = (Lower - B) / A;
daB2 = (Upper - B) / A;
displace = (Lower - B) % A;
displace_0 = (Upper - B) % A;
if (A < 0)
{
temp = displace;
displace = displace_0;
displace_0 = -temp;
}
daLower = min(daB1, daB2);
daUpper = max(daB1, daB2);
if ((daLower < 0 && daUpper < 0) ||
(daLower >= daDimSize) ||
(daUpper == 0 && displace_0 < 0))
{
Lower = -1;
return;
}
if (displace != 0 && daUpper != 0)
daLower++;
if (daLower < 0)
daLower = 0;
if (daUpper >= daDimSize)
daUpper = daDimSize - 1;
if (daUpper < daLower)
{
Lower = -1;
return;
}
Lower = daLower;
Upper = daUpper;
}
bool LS::IsBoundIn(long ALeftBSize, long ARightBSize) const
{
// printf("IsBoundIn %d %d vs %d\n",ALeftBSize,ARightBSize,GetLSSize());
if (ALeftBSize > GetLSSize())
return false;
if (ARightBSize > GetLSSize())
return false;
return true;
}
bool LS::IsLeft(long elem) const
{
return IsEmpty() ? false : Lower > elem;
}
bool LS::IsRight(long elem) const
{
return IsEmpty() ? false : Upper < elem;
}
//====
long LS::GetLower()
{ return Lower;
}
long LS::GetUpper()
{ return Upper;
}
//=***

View File

@@ -0,0 +1,47 @@
#ifndef LSH
#define LSH
//////////////////////////////////////////////////////////////////////
//
// LS.h: interface for Line Segment (LS) class.
//
//////////////////////////////////////////////////////////////////////
#include <algorithm>
// Line Segment
class LS {
long Lower;
long Upper;
long Stride;
public:
bool IsRight(long elem) const;
bool IsLeft(long elem) const ;
bool IsBoundIn(long ALeftBSize, long ARightBSize) const;
LS(long ALower, long AUpper, long AStride = 1);
LS();
virtual ~LS();
long GetLSSize() const;
bool IsEmpty() const;
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> LS AMView <20> LS DArray-<2D>
void transform(long A, long B, long daDimSize);
// intersection operator (Lower > Upper if intersection empty)
LS operator^ (const LS &x) const;
friend bool operator==(const LS& x, const LS& y);
friend bool operator<(const LS& x, const LS& y);
//====
long GetLower();
long GetUpper();
//=***
};
#endif

View File

@@ -0,0 +1,427 @@
#include <stdlib.h>
#include <assert.h>
#include <iostream>
using namespace std;
#include "ModelStructs.h"
#include "FuncCall.h"
#include "CallInfoStructs.h"
#include "Interval.h"
bool SynchCopy;//====//
extern _AMViewInfo * GetAMViewByIndex(long ID);
_DArrayFlag * DAF_tmp;
_DArrayInfo * DArrays = NULL;
int _DArrayInfo::count = 0;
_DArrayFlag * dArrayFlags = NULL;
int _DArrayFlag::count = 0;
//---------------------------------- DArray ----------------------------------------------
int GetDArrayIndex(long ID)
{
int i;
for (i = DArrays->size() - 1; (i >= 0) && DArrays[i].ID!=ID; i--);
return i;
}
_DArrayInfo* GetDArrayByIndex(long ID)
{
int i = GetDArrayIndex(ID);
return (i >=0) ? &DArrays[i] : NULL;
}
bool ResetDArrayKey(long OldKey, long NewKey)
{
int i=GetDArrayIndex(OldKey);
if (i >= 0) {
DArrays[i].ID = NewKey;
return true;
} else
return false;
}
_DArrayInfo* AddDArray(long ID)
{
_DArrayInfo* tmp;
int curr_size = DArrays->size();
DArrays=(_DArrayInfo*)realloc(DArrays,(curr_size+1)*sizeof(_DArrayInfo));
assert(DArrays != NULL);
++*DArrays;
tmp=&DArrays[curr_size];
tmp->ID=ID;
return tmp;
}
void DelDArray(long ID)
{
int idx=GetDArrayIndex(ID);
int curr_size = DArrays->size();
int i;
if(idx<0) return;
delete DArrays[idx].DArray_Obj;
for(i=idx+1; i<curr_size; i++)
DArrays[i-1]=DArrays[i];
DArrays=(_DArrayInfo*)realloc(DArrays,(curr_size-1)*sizeof(_DArrayInfo));
assert((DArrays != NULL) || (curr_size == 1));
--*DArrays;
}
//---------------------------------- DArrayFlag --------------------------------------------
int GetDArrayFlagIndex(long ID)
{
int i;
for (i = dArrayFlags->size() - 1; (i >= 0) && dArrayFlags[i].ID!=ID; i--);
return i;
}
_DArrayFlag* GetDArrayFlagByIndex(long ID)
{
int i = GetDArrayFlagIndex(ID);
return (i>=0) ? &dArrayFlags[i] : NULL;
}
_DArrayFlag* AddDArrayFlag(long ID)
{
_DArrayFlag* tmp;
int curr_size = dArrayFlags->size();
dArrayFlags = (_DArrayFlag*) realloc(dArrayFlags,(curr_size+1)*sizeof(_DArrayFlag));
assert(dArrayFlags != NULL);
++*dArrayFlags;
tmp = &dArrayFlags[curr_size];
tmp->ProcessTimeStamp = new double[rootProcCount];
tmp->time_start = 0.0;
tmp->time_end = 0.0;
tmp->ID=ID;
return tmp;
}
void DelDArrayFlag(long ID)
{
int idx=GetDArrayFlagIndex(ID);
int curr_size = dArrayFlags->size();
int i;
if (idx<0)
return;
delete dArrayFlags[idx].ProcessTimeStamp;
for(i=idx+1; i<curr_size; i++)
dArrayFlags[i-1] = dArrayFlags[i];
dArrayFlags = (_DArrayFlag*) realloc(dArrayFlags,(curr_size-1)*sizeof(_DArrayFlag));
assert((dArrayFlags != NULL) || (curr_size == 1));
--*dArrayFlags;
}
//------------------------------- Modelling functions DArray --------------------------
void FuncCall::crtda()
{
crtda_Info* params = (crtda_Info*) call_params;
_DArrayInfo* tmp=AddDArray(params->ArrayHeader);
tmp->AlignType=0;
tmp->DArray_Obj=new DArray(params->SizeArray,
params->LowShdWidthArray, params->HiShdWidthArray, params->TypeSize);
}
void FuncCall::align()
{
align_Info* params=(align_Info*) call_params;
// _DArrayInfo* ArrInfo=GetDArrayByIndex(params->ID);
_DArrayInfo* ArrInfo=GetDArrayByIndex(params->ArrayHeader);
assert(ArrInfo != NULL);
if (params->PatternType == 1) {
// AMView
_AMViewInfo* AMV_Info=GetAMViewByIndex(params->PatternRef);
ArrInfo->AlignType=1;
ArrInfo->DArray_Obj->AlnDA(AMV_Info->AMView_Obj, params->AxisArray,
params->CoeffArray, params->ConstArray);
} else if (params->PatternType == 2) {
// DisArray
_DArrayInfo* DA_Info=GetDArrayByIndex(params->PatternRefPtr);
ArrInfo->AlignType=2;
ArrInfo->DArray_Obj->AlnDA(DA_Info->DArray_Obj, params->AxisArray,
params->CoeffArray, params->ConstArray);
}
}
void FuncCall::delda()
{
delda_Info* params=(delda_Info*) call_params;
DelDArray(params->ID);
}
void FuncCall::realn()
{
double f_time;
realn_Info* params = (realn_Info*) call_params;
_DArrayInfo* ArrInfo=GetDArrayByIndex(params->ArrayHeader);
assert(ArrInfo != NULL);
if (params->PatternType == 1) {
// AMView
_AMViewInfo* AMV_Info=GetAMViewByIndex(params->PatternRef);
ArrInfo->AlignType=1;
f_time=ArrInfo->DArray_Obj->RAlnDA(AMV_Info->AMView_Obj, params->AxisArray,
params->CoeffArray, params->ConstArray,
params->NewSign);
} else if (params->PatternType == 2) {
// DisArray
_DArrayInfo* DA_Info=GetDArrayByIndex(params->PatternRefPtr);
ArrInfo->AlignType=2;
f_time=ArrInfo->DArray_Obj->RAlnDA(DA_Info->DArray_Obj, params->AxisArray,
params->CoeffArray, params->ConstArray,
params->NewSign);
} else {
return;
}
// printf("Realign lasted %f sec\n",f_time);
MPSSynchronize(__Redistribute);
AddMPSTime(__Redistribute, f_time);
++CurrInterval->num_op_redist;
}
void FuncCall::arrcpy()
{
double f_time = 0.0;
int i,j;
vector<LS> blockIni;
bool returned=false;
SynchCopy=true;
_DArrayInfo* DA_From = NULL;
_DArrayInfo* DA_To = NULL;
arrcpy_Info* params = (arrcpy_Info*) call_params;
if (params->FromBufferPtr == 0)
DA_From=GetDArrayByIndex(params->FromArrayHeader);
if (params->ToBufferPtr == 0)
DA_To=GetDArrayByIndex(params->ToArrayHeader);
if (DA_From == NULL && DA_To == NULL)
return;
if (DA_From != NULL && DA_To != NULL) {
//cout << DA_From->DArray_Obj->Rank() << ' ' << DA_From->ID << endl;
// Distributed -> Distributed
f_time=ArrayCopy(DA_From->DArray_Obj, params->FromInitIndexArray,
params->FromLastIndexArray, params->FromStepArray,
DA_To->DArray_Obj, params->ToInitIndexArray,
params->ToLastIndexArray, params->ToStepArray);
} else if (DA_From == NULL && DA_To != NULL) {
// Replicated -> Distributed
returned=true;
} else if (DA_From != NULL && DA_To == NULL) {
// Distributed -> Replicated
f_time=ArrayCopy(DA_From->DArray_Obj, params->FromInitIndexArray,
params->FromLastIndexArray, params->FromStepArray,
params->CopyRegim);
}
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> (<28><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD>)
if(DA_To != NULL)
{
for(i=0;i<params->ToStepArray.size();i++)
if(params->ToStepArray[i]>0)
blockIni.push_back(LS(params->ToInitIndexArray[i], params->ToLastIndexArray[i], params->ToStepArray[i]));
else
blockIni.push_back(LS(params->ToLastIndexArray[i], params->ToInitIndexArray[i], -params->ToStepArray[i]));
Block writeBlock(blockIni);
for(i=0;i<MPSProcCount();i++)
{ Block locBlock(DA_To->DArray_Obj, i, 1);
Block writeLocBlock = locBlock ^ writeBlock;
// printf("To Repl = %d BlockSZ=%d Loc=%d\n", DA_To->DArray_Obj->Repl,writeBlock.GetBlockSize(),locBlock.GetBlockSize());
if(writeBlock.GetBlockSize()!=0)
AddTime(__CPU_time_usr, currentVM->map(i), vcall_time[i] * writeLocBlock.GetBlockSize()/writeBlock.GetBlockSize());
if(writeBlock.GetBlockSize()==0 && DA_To->DArray_Obj->Repl) //<2F> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20>.<2E>. <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> -1 <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
AddTime(__CPU_time_usr, currentVM->map(i), vcall_time[i]);
}
if(DA_To->DArray_Obj->Repl) //<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
{
for(i=0,j=0;i<MPSProcCount();i++)
{
Block locBlock(DA_To->DArray_Obj, i, 1);
if(j!=0)
AddTime(__Insuff_parall_usr, currentVM->map(i), vcall_time[i]);
if(locBlock.GetBlockSize()>0)
j++;
}
}
}
else if(DA_From != NULL)
{
for(i=0;i<params->FromStepArray.size();i++)
if(params->FromStepArray[i]>0)
blockIni.push_back(LS(params->FromInitIndexArray[i], params->FromLastIndexArray[i], params->FromStepArray[i]));
else
blockIni.push_back(LS(params->FromLastIndexArray[i], params->FromInitIndexArray[i], -params->FromStepArray[i]));
Block writeBlock(blockIni);
for(i=0;i<MPSProcCount();i++)
{ Block locBlock(DA_From->DArray_Obj, i, 1);
Block writeLocBlock = locBlock ^ writeBlock;
if(writeBlock.GetBlockSize()!=0)
AddTime(__CPU_time_usr, currentVM->map(i), vcall_time[i] * writeLocBlock.GetBlockSize()/writeBlock.GetBlockSize());
}
}
if(returned) return;
MPSSynchronize(__Remote_access);
AddMPSTime(__Remote_access, f_time);
++CurrInterval->num_op_remote;
}
void FuncCall::aarrcp()
{
double f_time = 0.0;
_DArrayInfo* DA_From = NULL;
_DArrayInfo* DA_To = NULL;
arrcpy_Info* params = (arrcpy_Info*) call_params;
_DArrayFlag* DA_Flags = AddDArrayFlag(params->CopyFlagPtr);
DA_Flags->ProcessTimeStamp=(double *)malloc(MPSProcCount()*sizeof(double));
DAF_tmp = DA_Flags; //====// <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20> CommCost::CopyUpdateDistr
if (params->FromBufferPtr == 0)
DA_From=GetDArrayByIndex(params->FromArrayHeader);
if (params->ToBufferPtr == 0)
DA_To=GetDArrayByIndex(params->ToArrayHeader);
if (DA_From == NULL && DA_To == NULL)
f_time = 0.0;
if (DA_From != NULL && DA_To != NULL) {
//cout << DA_From->DArray_Obj->Rank() << ' ' << DA_From->ID << endl;
// Distributed -> Distributed
f_time=ArrayCopy(DA_From->DArray_Obj, params->FromInitIndexArray,
params->FromLastIndexArray, params->FromStepArray,
DA_To->DArray_Obj, params->ToInitIndexArray,
params->ToLastIndexArray, params->ToStepArray);
} else if (DA_From == NULL && DA_To != NULL) {
// Replicated -> Distributed
f_time = 0.0;
} else if (DA_From != NULL && DA_To == NULL) {
// Distributed -> Replicated
f_time=ArrayCopy(DA_From->DArray_Obj, params->FromInitIndexArray,
params->FromLastIndexArray, params->FromStepArray,
params->CopyRegim);
}
DAF_tmp = NULL; //====//
// MPSSynchronize(__Remote_access); //aarrcp - <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
DA_Flags-> time_start = CurrProcTime(0);
DA_Flags-> time_end = DA_Flags-> time_start + f_time;
// printf("DAF STart=%f DAF End=%f\n",DA_Flags->time_start, DA_Flags->time_end);
++CurrInterval->num_op_remote;
//printf("F_time=%f\n",f_time);
// AddMPSTime(__Remote_access, f_time);
}
#define max(a,b) ((a>b)?a:b)
#define min(a,b) ((a<b)?a:b)
void FuncCall::waitcp()
{
int i;
double curr_pt;
waitcp_Info* params=(waitcp_Info*) call_params;
_DArrayFlag * DAF=GetDArrayFlagByIndex(params->CopyFlagPtr);
// assert(DAF != NULL);
if (DAF == NULL)
return;
for (i=0; i < MPSProcCount(); i++) {
// printf("TIME stamp[%d]=%f\n",currentVM->map(i),DAF->ProcessTimeStamp[currentVM->map(i)]);
curr_pt = CurrProcTime(currentVM->map(i));
// printf("Curr_pt[%d]=%f\n",i,curr_pt);
// printf("WAIT[%d] Copy [%f]-[%f]=[%f]\n",currentVM->map(i),curr_pt,DAF->time_end,max(DAF->time_end-curr_pt,0));
if(DAF->ProcessTimeStamp[currentVM->map(i)]>0 && curr_pt < DAF->time_start + DAF->ProcessTimeStamp[currentVM->map(i)])
{
//printf("proc[%d].waitcp\n",currentVM->map(i));
AddTime(__Remote_access,currentVM->map(i), max(DAF->time_start + DAF->ProcessTimeStamp[currentVM->map(i)] - curr_pt, DAF->ProcessTimeStamp[currentVM->map(i)] ));
if(curr_pt < DAF->time_start) AddTimeSynchronize(__Synchronize, currentVM->map(i), DAF->time_start - curr_pt);
} else {
// if(DAF->ProcessTimeStamp[currentVM->map(i)]>0.00000001) printf("Overlap %f %f\n",curr_pt, DAF->ProcessTimeStamp[currentVM->map(i)]);
// AddTime(__Remote_overlap,currentVM->map(i), DAF->ProcessTimeStamp[currentVM->map(i)]);
}
}
// printf("Wait cp done****************************************\n");
DelDArrayFlag(params->CopyFlagPtr);
}
// Main function
void FuncCall::DArrayTime()
{
switch (func_id) {
case crtda_ :
crtda();
break;
case align_ :
align();
break;
case realn_ :
realn();
break;
case delda_ :
delda();
break;
case aarrcp_ :
aarrcp();
break;
//====
case arrcpy_ :
arrcpy();
break;
//=***
case waitcp_ :
waitcp();
break;
default :
RegularTime();
}
}

View File

@@ -0,0 +1,69 @@
#include "FuncCall.h"
#include "ModelStructs.h"
#include "Interval.h"
void FuncCall::biof()
{
CurrInterval->setIOTrafic();
++CurrInterval->num_op_io;
}
void FuncCall::tstio()
{
}
void FuncCall::srmem()
{
int i;
double TimeDelta = 0.0;
long account = 0;
double tbyte = currentVM->getTByte();
double tstart = currentVM->getTStart();
srmem_Info* params=(srmem_Info*) call_params;
for (i = 0; i < params-> MemoryCount; i++)
account += params-> LengthArray[i];
MPSSynchronize(__IO_comm);
double time_start = CurrProcTime(0); //__IO_comm
for (i=0; i < MPSProcCount(); i++) {
TimeDelta += i * tbyte * account + tstart;
AddTime(__IO_comm, currentVM->map(i), TimeDelta);
}
}
void FuncCall::eiof()
{
CurrInterval->resetIOTrafic();
}
void FuncCall::ciotime()
{
AddTime(__IO_time, 0, vret_time[0]);
//grig AddTime(__IO_time, 0, ret_time);
++CurrInterval->num_op_io;
}
void FuncCall::IOTime()
{
switch(func_id) {
case biof_ :
biof();
break;
case eiof_ :
eiof();
break;
case srmem_ :
srmem();
break;
default:
ciotime();
}
// calculate times
RegularTime();
}

View File

@@ -0,0 +1,27 @@
#include "Interval.h"
#include "CallInfoStructs.h"
void FuncCall::IntervalTime()
{
binter_Info* tmp=(binter_Info*) call_params;
RegularTime(); // add time befor changing interval
switch(func_id) {
case binter_ :
Interval::Enter(__IT_USER, tmp->line, tmp->file, tmp->index);
break;
case bsloop_ :
Interval::Enter(__IT_SEQ, tmp->line, tmp->file, tmp->index);
break;
case bploop_ :
Interval::Enter(__IT_PAR, tmp->line, tmp->file, tmp->index);
break;
case einter_ :
case eloop_ :
Interval::Leave();
break;
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,518 @@
#include <stdlib.h>
#include <assert.h>
#include "ModelStructs.h"
#include "FuncCall.h"
#include "CallInfoStructs.h"
#include "Interval.h"
//====
#include <stdio.h>
extern long TraceProcNum;
//=***
#include "LoopBlock.h"
extern _ShadowInfo * GetShadowByIndex(long ID);
extern void DelShadow(long ID);
extern _AMViewInfo * GetAMViewByIndex(long ID);
extern _DArrayInfo * GetDArrayByIndex(long ID);
extern _ShdGrpInfo * GetShdGroupByIndex(long ID);
_ParLoopInfo ParLoopInfo;
int _ParLoopInfo::count = 0;
//grig
//LoopBlock * prevLoopBlock=NULL;
//\grig
void FuncCall::crtpl()
{
crtpl_Info* params=(crtpl_Info*) call_params;
ParLoopInfo.ID=params->ID;
ParLoopInfo.Rank=params->Rank;
ParLoopInfo.AlignType=0;
ParLoopInfo.PatternType=0;
ParLoopInfo.PatternID=0;
ParLoopInfo.exfrst = false;
ParLoopInfo.imlast = false;
ParLoopInfo.across = false;
#ifdef nodef
if(ParLoopInfo.AxisArray)
free(ParLoopInfo.AxisArray);
ParLoopInfo.AxisArray=(long*)calloc(ParLoopInfo.Rank,sizeof(long));
assert(ParLoopInfo.AxisArray != NULL);
#endif
ParLoopInfo.ParLoop_Obj = new ParLoop(ParLoopInfo.Rank);
}
void FuncCall::endpl()
{
int i;
double curr_pt;
_ShadowInfo* SHD;
if (ParLoopInfo.imlast) {
SHD=GetShadowByIndex(ParLoopInfo.imlast_SGR);
for (i=0; i<MPSProcCount(); i++) {
curr_pt = CurrProcTime(currentVM->map(i));
if(curr_pt < SHD->time_end) {
printf("Overlap = %f (%f -%f )\n", curr_pt - SHD->time_start, curr_pt, SHD->time_start);
AddTime(__Shadow_overlap,currentVM->map(i), (curr_pt - SHD->time_start));
AddTime(__Wait_shadow,currentVM->map(i),
(SHD->time_end - curr_pt));
} else {
AddTime(__Shadow_overlap,currentVM->map(i), (curr_pt - SHD->time_start));
}
}
ParLoopInfo.imlast = false;
DelShadow(ParLoopInfo.imlast_SGR/*params->ID*/);
}
else if (ParLoopInfo.across) {
/* <20><> <20><><EFBFBD><EFBFBD><EFBFBD>, <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> across, <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
/* (<28><><EFBFBD> <20><> <20> <20><><EFBFBD><EFBFBD><EFBFBD> (<28><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> (<28><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>))
/* <20><><EFBFBD> <20> <20><><EFBFBD> <20><><EFBFBD><EFBFBD>c<EFBFBD><63><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> across), <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> Shadow.
SHD=GetShadowByIndex(ParLoopInfo.across_SGR);
for (i=0; i<MPSProcCount(); i++) {
curr_pt = CurrProcTime(currentVM->map(i));
if(curr_pt < SHD->time_end)
{
printf("Overlap across= %f (%f - %f )\n", curr_pt - SHD->time_start, curr_pt, SHD->time_start);
AddTime(__Shadow_overlap,currentVM->map(i), (curr_pt - SHD->time_start));
AddTime(__Wait_shadow,currentVM->map(i), (SHD->time_end - curr_pt));
} else {
//====
//printf("SHD %f %f\n",curr_pt, SHD->time_start);
//was AddTime(__Shadow_overlap,currentVM->map(i), (curr_pt - SHD->time_start));
//=***
}
}
*/
ParLoopInfo.across = false;
DelShadow(ParLoopInfo.across_SGR);
}
delete ParLoopInfo.ParLoop_Obj;
ParLoopInfo.ParLoop_Obj=NULL;
//grig
// if(prevLoopBlock!=NULL)
// {
// delete prevLoopBlock;
// prevLoopBlock=NULL;
// }
//\grig
}
void FuncCall::mappl()
{
mappl_Info* params = (mappl_Info*) call_params;
if (params->PatternType == 1) {
// AMView
ParLoopInfo.PatternType=1;
ParLoopInfo.AlignType=1;
ParLoopInfo.PatternID=params->PatternRef;
_AMViewInfo* AMV_Info=GetAMViewByIndex(params->PatternRef);
ParLoopInfo.ParLoop_Obj->MapPL(AMV_Info->AMView_Obj, params->AxisArray,
params->CoeffArray, params->ConstArray, params->InInitIndexArray,
params->InLastIndexArray, params->InStepArray);
} else if (params->PatternType == 2) {
// DisArray
ParLoopInfo.PatternType=2;
ParLoopInfo.AlignType=2;
ParLoopInfo.PatternID=params->PatternRefPtr;
_DArrayInfo* DA_Info=GetDArrayByIndex(params->PatternRefPtr);
//RRRRRRRRRRRRRRRR
ParLoopInfo.ParLoop_Obj->MapPL(DA_Info->DArray_Obj, params->AxisArray,
params->CoeffArray, params->ConstArray, params->InInitIndexArray,
params->InLastIndexArray, params->InStepArray);
} else {
return;
}
ParLoopInfo.AxisArray = params->AxisArray;
}
void FuncCall::dopl()
{
int i, j, cnt;
double time = 0.0, ip_time = 0.0;
long loop_size = ParLoopInfo.ParLoop_Obj->GetLoopSize();
long block_size;
// long interceptj;
//====
int type_size, mode=0;
//=***
/* for(i=0;i<MPSProcCount();i++)
{ printf("vcalltime[%d]=%f * Proc %d = %f\n",i,vcall_time[i],TraceProcNum,vcall_time[i]*TraceProcNum);
vcall_time[i]*=TraceProcNum;
}
// printf("calltime=%f * Proc %d = %f\n",call_time,TraceProcNum,call_time*TraceProcNum);
call_time*=TraceProcNum; //number of processors in trace-mode execution
*/
// printf("DOPL %f\n",call_time);
if (mode)
{
printf("DOPL ");
for(i=0;i<MPSProcCount(); i++)
printf("%f ",CurrProcTime(i));
printf("\n");
}
dopl_full_Info* tmp_params = (dopl_full_Info*)this->call_params;
if (call_time==0 || loop_size==0)
return;
LoopBlock** ProcBlock=(LoopBlock**)calloc(MPSProcCount(),sizeof(LoopBlock*));
assert(ProcBlock != NULL);
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
for(i=0;i<MPSProcCount();i++)
ProcBlock[i]=new LoopBlock(ParLoopInfo.ParLoop_Obj, i,1);
if (mode)
{
printf("start DOPL\n");
for (i = 0; i < MPSProcCount(); i++)
{
int k;
printf("DOPL proc[%d]= ", i);
for (k = 0; k < ProcBlock[i]->LSDim.size(); k++)
printf("%d %d ", ProcBlock[i]->LSDim[k].Lower, ProcBlock[i]->LSDim[k].Upper);
printf("\n");
}
}
//====
if(ParLoopInfo.across && tmp_params->ReturnVar==1)
{
#define max_rank 4
#define ShdWid(k) ((!invers[k])?ParLoopInfo.SGnew->BoundGroup_Obj->dimInfo[k].LeftBSize:ParLoopInfo.SG->BoundGroup_Obj->dimInfo[k].RightBSize)
#define PreShdWid(k) (invers[k]?ParLoopInfo.SGnew->BoundGroup_Obj->dimInfo[k].LeftBSize:ParLoopInfo.SG->BoundGroup_Obj->dimInfo[k].RightBSize)
#define msize(i,j) ((j<rank_mas)?(ProcBlock[i]->LSDim[j].Upper - ProcBlock[i]->LSDim[j].Lower + 1) / ProcBlock[i]->LSDim[j].Step:1)
std::vector<long> pp;
int k,d,rank,j,i,rank_mas,x;
int invers[max_rank],prev[max_rank],post[max_rank],p[max_rank],n[max_rank];
double a,sendtime,com_time,real_sync,exectime,overlap,TStart,TByte;
pp=currentVM->getSizeArray();
rank=pp.size();
for(k=0;k<rank;k++)
p[k]=pp[k];
//<2F><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> 1
for(k=rank;k<max_rank;k++)
p[k]=1;
for(k=0;k<MPSProcCount();k++)
if(!ProcBlock[k]->empty())
break;
if(k<MPSProcCount())
rank_mas=ProcBlock[0]->LSDim.size();
else
rank_mas=0; //impossible must be
// rank_mas=rank;
// printf("rank=%d rank_mas=%d\n",rank,rank_mas);
//calc invers
for(i=0;i<MPSProcCount();i++)
{
for(j=0;j<ParLoopInfo.Rank;j++)
invers[j]=ParLoopInfo.ParLoop_Obj->Invers[j];
for(k=0;k<rank_mas;k++)
n[k]=i;
for(k=rank_mas;k<max_rank;k++)
n[k]=0;
for(k=max_rank-1;k>=0;k--)
{
n[k]=n[k]%p[k];
for(x=0;x<k;x++)
n[x]=n[x]/p[k];
}
for(k=0;k<rank;k++)
{
for(j=k+1,d=1;j<rank;j++)
d*=p[j];
//<2F><><EFBFBD><EFBFBD> prev == -1 <20><><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD>. <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20><><EFBFBD>. <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
if(invers[k])
if(n[k]!=p[k]-1 && i+d<MPSProcCount()) prev[k]=i+d;
else prev[k]=-1;
else
if(n[k]!=0 && i-d>=0) prev[k]=i-d;
else prev[k]=-1;
if(!invers[k])
if(n[k]!=p[k]-1 && i+d<MPSProcCount()) post[k]=i+d;
else post[k]=-1;
else
if(n[k]!=0 && i-d>=0) post[k]=i-d;
else post[k]=-1;
}
// printf("PREV %d %d\n",prev[0],prev[1],prev[2],prev[3]);
// printf("POST %d %d\n",post[0],post[1],post[2],post[3]);
for(k=0,a=1;k<rank;k++)
a*=msize(i,k);
// for(k=0;k<rank;k++)
// printf("SHAD widthNEW[%d]=%d SHAD width[%d]=%d\n",k,ShdWid(k),k,PreShdWid(k));
type_size=ParLoopInfo.ParLoop_Obj->AcrossFlag;
TStart = currentVM->getTStart();
TByte = currentVM->getTByte()*type_size;
//printf("Tstart=%.10f TByte=%.10f\n",TStart,TByte);
sendtime=0; com_time=0; real_sync=0; exectime=0; overlap=0;
for(k=0;k<rank;k++)
{
if(post[k]!=-1)
{ com_time+=TStart+a/msize(i,k)*TByte;
}
}
}
//printf("Procs[%d] comm=%f\n",i,com_time);
// AddMPSTime(__Shadow_synchronize,my_num,real_sync);
AddMPSTime(__Wait_shadow,com_time);
// AddMPSTime(__Shadow_overlap,overlap);
}
if(ParLoopInfo.across && tmp_params->ReturnVar==0)
{
double max_time;
type_size=ParLoopInfo.ParLoop_Obj->AcrossFlag;
//<2F><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD> <20><> Step <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> < 0
for(i=0;i<MPSProcCount();i++)
for(j=0;j<ProcBlock[i]->GetRank();j++)
if(ParLoopInfo.ParLoop_Obj->Invers[j]==1) ProcBlock[i]->LSDim[j].Step=-ProcBlock[i]->LSDim[j].Step;
max_time=0;
for(i=0;i<MPSProcCount();i++)
max_time=(CurrProcTime(i)>max_time)?CurrProcTime(i):max_time;
for(i=0;i<MPSProcCount();i++)
{
// AddTimeSynchronize(__Synchronize, i, max_time-CurrProcTime(i));
AddTimeSynchronize(__Wait_shadow, i, max_time-CurrProcTime(i));
// printf("Sync %f\n",max_time-CurrProcTime(i));
}
// printf("DOPL %f ACROSS LoopSZ=%d:%d %d:%d %d:%d\n",call_time,ParLoopInfo.ParLoop_Obj->LowerIndex[0],ParLoopInfo.ParLoop_Obj->HigherIndex[0],ParLoopInfo.ParLoop_Obj->LowerIndex[1],ParLoopInfo.ParLoop_Obj->HigherIndex[1],ParLoopInfo.ParLoop_Obj->LowerIndex[2],ParLoopInfo.ParLoop_Obj->HigherIndex[2]);
// printf("DOPL ACROSS LoopInvers=%d %d %d\n",ParLoopInfo.ParLoop_Obj->Invers[0],ParLoopInfo.ParLoop_Obj->Invers[1],ParLoopInfo.ParLoop_Obj->Invers[2]);
ParLoopInfo.ParLoop_Obj->AcrossCost->Across(call_time, ParLoopInfo.ParLoop_Obj->GetLoopSize(),ProcBlock,type_size);
max_time=0;
for(i=0;i<MPSProcCount();i++)
max_time=(CurrProcTime(i)>max_time)?CurrProcTime(i):max_time;
for(i=0;i<MPSProcCount();i++)
{
AddTimeVariation(__Wait_shadow, i, max_time-CurrProcTime(i));
// printf("time[%d]=%f max=%f TimVar=%f\n",i,CurrProcTime(i),max_time,max_time-CurrProcTime(i));
}
AddMPSTime(__CPU_time_sys, vret_time);
AddMPSTime(__Insuff_parall_sys, (ret_time * ((double) MPSProcCount()-1.0) / (double) MPSProcCount()));
if(mode)
{
printf("DONE DOPL ");
for(i=0;i<MPSProcCount(); i++)
printf("%f ",CurrProcTime(i));
printf("\n");
}
// return;
}
else //ParLoopInfo.across && tmp_params->ReturnVar==1
{
//=***
//grig
/*
LoopBlock *minipl;
if(prevLoopBlock!=NULL)
{
minipl= prevLoopBlock;
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> minipl <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>,
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> - <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
for(i=0;i<MPSProcCount();i++)
{
block_size=ProcBlock[i]->GetBlockSize();
if(block_size==0)
continue;
interceptj=intersection(*minipl,*ProcBlock[i]); // <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
time= ((double)vcall_time[i])*((double)interceptj/(double)minipl->GetBlockSize());
//\grig
//currentVM->getProcPower(); // MPSProcPower();
AddTime(__CPU_time_usr, currentVM->map(i), time);
cnt=0;
for (j=0; j<MPSProcCount(); j++)
if(*(ProcBlock[i]) == *(ProcBlock[j]))
cnt++;
if (cnt > 1)
{
ip_time = time * (((double) cnt - 1.0) / (double) cnt);
AddTime(__Insuff_parall_usr, currentVM->map(i), ip_time);
}
}
//delete minipl;
if(tmp_params->Dim.size()!=0)
{
delete minipl;
std::vector<LoopLS> lstemp;
//lstemp.resize(tmp_params->Dim.size();
for(i=0;i<tmp_params->Dim.size();i++)
{
lstemp.push_back(LoopLS(tmp_params->Lower[i],tmp_params->Upper[i],tmp_params->Step[i]));
}
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
prevLoopBlock = new LoopBlock(lstemp);
lstemp.resize(0);
}
AddMPSTime(__CPU_time_sys, vret_time);
AddMPSTime(__Insuff_parall_sys,(ret_time * ((double) MPSProcCount()-1.0) / (double) MPSProcCount()));
}
//grig
*/
// else
// {
//grig
/*
if(tmp_params->Dim.size()!=0)
{
std::vector<LoopLS> lstemp;
for(i=0;i<tmp_params->Dim.size();i++)
{
lstemp.push_back(LoopLS(tmp_params->Lower[i],tmp_params->Upper[i],tmp_params->Step[i]));
}
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
prevLoopBlock = new LoopBlock(lstemp);
lstemp.resize(0);
}
*/
//\grig
// LoopBlock** ProcBlock=(LoopBlock**)calloc(MPSProcCount(),sizeof(LoopBlock*));
// assert(ProcBlock != NULL);
// for(i=0;i<MPSProcCount();i++)
// ProcBlock[i]=new LoopBlock(ParLoopInfo.ParLoop_Obj, i,1);
for(i=0;i<MPSProcCount();i++)
{
block_size=ProcBlock[i]->GetBlockSize();
// printf("DOPL[%d]=%d of %d\n",i,block_size,loop_size);
if(block_size==0)
continue;
//grig
time = (vcall_time[i]*((double)block_size/(double)loop_size));//commented grig /currentVM->getProcPower(i);
//\grig
//currentVM->getProcPower()/*MPSProcPower()*/;
AddTime(__CPU_time_usr, currentVM->map(i), time);
cnt=0;
for (j=0; j<MPSProcCount(); j++)
{
// printf("i=%d j=%d [0] %d %d %d %d\n",i,j,ProcBlock[i]->LSDim[0].Lower,ProcBlock[i]->LSDim[0].Upper, ProcBlock[j]->LSDim[0].Lower, ProcBlock[j]->LSDim[0].Upper);
// printf("i=%d j=%d [1] %d %d %d %d",i,j,ProcBlock[i]->LSDim[1].Lower,ProcBlock[i]->LSDim[1].Upper, ProcBlock[j]->LSDim[1].Lower, ProcBlock[j]->LSDim[1].Upper);
if(*(ProcBlock[i]) == *(ProcBlock[j]))
cnt++;
// printf(" cnt=%d\n",cnt);
}
//printf("DOPL time=%f cnt=%d\n",time,cnt);
if (cnt > 1)
{
ip_time = time * (((double) cnt - 1.0) / (double) cnt);
AddTime(__Insuff_parall_usr, currentVM->map(i), ip_time);
}
}
// for (i=0;i<MPSProcCount();i++)
// delete ProcBlock[i];
// free(ProcBlock);
AddMPSTime(__CPU_time_sys, vret_time);
AddMPSTime(__Insuff_parall_sys, (ret_time * ((double) MPSProcCount()-1.0) / (double) MPSProcCount()));
}
// for (i=0;i<MPSProcCount();i++)
// delete ProcBlock[i];
free(ProcBlock);
return;
}
void FuncCall::ParLoopTime()
{
switch(func_id) {
case crtpl_ :
crtpl();
break;
case mappl_ :
mappl();
break;
case dopl_ :
dopl();
break;
case endpl_ :
endpl();
break;
}
if (func_id != dopl_)
RegularTime();
}

View File

@@ -0,0 +1,296 @@
#include <fstream>
#include <assert.h>
#include "ModelStructs.h"
#include "FuncCall.h"
#include "CallInfoStructs.h"
#include "Interval.h"
#include "Vm.h"
using namespace std;
extern int mode;
extern ofstream prot;
extern _DArrayInfo * GetDArrayByIndex(long ID);
extern _AMViewInfo * GetAMViewByIndex(long ID);
extern _ParLoopInfo ParLoopInfo;
_RedVarInfo *RedVars = NULL;
_RedGrpInfo *RedGroups = NULL;
_ReductInfo *Reductions = NULL;
int _RedVarInfo::count = 0;
int _RedGrpInfo::count = 0;
int _ReductInfo::count = 0;
//---------------------------------- RedVar ----------------------------------------------
int GetRedVarIndex(long ID)
{
int i;
for (i = RedVars->size() - 1; (i >= 0) && RedVars[i].ID!=ID; i--);
return i;
}
_RedVarInfo* GetRedVarByIndex(long ID)
{
int i = GetRedVarIndex(ID);
return (i>=0) ? &RedVars[i] : NULL;
}
_RedVarInfo* AddRedVar(long ID)
{
_RedVarInfo* tmp;
int curr_size = RedVars->size();
RedVars=(_RedVarInfo*)realloc(RedVars,(curr_size+1)*sizeof(_RedVarInfo));
assert(RedVars != NULL);
++*RedVars;
tmp=&RedVars[curr_size];
tmp->ID=ID;
return tmp;
}
void DelRedVar(long ID)
{
int idx=GetRedVarIndex(ID);
int curr_size = RedVars->size();
int i;
if (idx<0)
return;
delete RedVars[idx].RedVar_Obj;
for(i=idx+1; i<curr_size; i++)
{ RedVars[i-1]=RedVars[i];
}
RedVars=(_RedVarInfo*)realloc(RedVars,(curr_size-1)*sizeof(_RedVarInfo));
assert((RedVars != NULL) || (curr_size == 1));
--*RedVars;
}
//---------------------------------- RedGroup --------------------------------------------
int GetRedGroupIndex(long ID)
{
int i;
for (i = RedGroups->size() - 1; (i >= 0) && RedGroups[i].ID!=ID; i--);
return i;
}
_RedGrpInfo* GetRedGroupByIndex(long ID)
{
int i=GetRedGroupIndex(ID);
return (i>=0) ? &RedGroups[i] : NULL;
}
_RedGrpInfo* AddRedGroup(long ID)
{
_RedGrpInfo* tmp;
int curr_size = RedGroups->size();
RedGroups=(_RedGrpInfo*)realloc(RedGroups,(curr_size+1)*sizeof(_RedGrpInfo));
assert(RedGroups != NULL);
++*RedGroups;
tmp=&RedGroups[curr_size];
tmp->ID=ID;
return tmp;
}
void DelRedGroup(long ID)
{
int idx=GetRedGroupIndex(ID);
int curr_size = RedGroups->size();
int i;
if(idx<0) return;
delete RedGroups[idx].RedGroup_Obj;
for(i=idx+1; i<curr_size; i++) {
RedGroups[i-1]=RedGroups[i];
}
RedGroups=(_RedGrpInfo*)realloc(RedGroups,(curr_size-1)*sizeof(_RedGrpInfo));
assert((RedGroups != NULL) || (curr_size == 1));
--*RedGroups;
}
//---------------------------------- Reduct --------------------------------------------
int GetReductIndex(long ID)
{
int i;
for (i = Reductions->size() - 1; (i >= 0) && Reductions[i].ID!=ID; i--);
return i;
}
_ReductInfo* GetReductByIndex(long ID)
{
int i=GetReductIndex(ID);
return (i>=0) ? &Reductions[i] : NULL;
}
_ReductInfo* AddReduct(long ID)
{
_ReductInfo* tmp;
int curr_size = Reductions->size();
Reductions=(_ReductInfo*)realloc(Reductions,(curr_size+1)*sizeof(_ReductInfo));
assert(Reductions != NULL);
++*Reductions;
tmp=&Reductions[curr_size];
tmp->ID=ID;
return tmp;
}
void DelReduct(long ID)
{
int idx=GetReductIndex(ID);
int curr_size = Reductions->size();
int i;
if(idx<0) return;
for(i=idx+1; i<curr_size; i++)
{ Reductions[i-1]=Reductions[i];
};
Reductions=(_ReductInfo*)realloc(Reductions, (curr_size-1)*sizeof(_ReductInfo));
assert((Reductions != NULL) || (curr_size == 1));
--*Reductions;
}
//--------------------------------------------------------------------------------------------------
void FuncCall::crtrg()
{
crtrg_Info* params=(crtrg_Info*) call_params;
_RedGrpInfo* tmp=AddRedGroup(params->ID);
tmp->RedGroup_Obj=new RedGroup(currentVM);
}
void FuncCall::crtred()
{
crtred_Info* params=(crtred_Info*) call_params;
_RedVarInfo* tmp=AddRedVar(params->ID);
int RedElmSize=0;
switch(params->RedArrayType) {
case 1 :
RedElmSize=sizeof(int); break;
case 2:
RedElmSize=sizeof(long); break;
case 3:
RedElmSize=sizeof(float); break;
case 4:
RedElmSize=sizeof(double); break;
};
tmp->RedVar_Obj=new RedVar(RedElmSize, params->RedArrayLength, params->LocElmLength);
}
void FuncCall::insred()
{
insred_Info* params=(insred_Info*) call_params;
_RedVarInfo* RV=GetRedVarByIndex(params->RV_ID);
_RedGrpInfo* RG=GetRedGroupByIndex(params->RG_ID);
RG->RedGroup_Obj->AddRV(RV->RedVar_Obj);
}
void FuncCall::delred()
{
delred_Info* params=(delred_Info*) call_params;
DelRedVar(params->ID);
}
void FuncCall::delrg()
{
delrg_Info* params=(delrg_Info*) call_params;
DelRedGroup(params->ID);
}
void FuncCall::strtrd()
{
strtrd_Info* params=(strtrd_Info*) call_params;
_ReductInfo* RED=AddReduct(params->ID);
// printf("Reduction sync\n");
MPSSynchronize(__Wait_reduct);
//// MPSSynchronize(__Synchronize);
RED->time_start = CurrProcTime(0);
++CurrInterval->num_op_reduct;
}
void FuncCall::waitrd()
{
int i;
double curr_pt;
double red_time;
_AMViewInfo* AM;
_DArrayInfo* DA;
waitrd_Info* params=(waitrd_Info*) call_params;
_ReductInfo* RED=GetReductByIndex(params->ID);
_RedGrpInfo* RG=GetRedGroupByIndex(params->ID);
if (ParLoopInfo.PatternType == 1) {
// AMView
AM = GetAMViewByIndex(ParLoopInfo.PatternID);
red_time = RG->RedGroup_Obj->StartR(AM->AMView_Obj, ParLoopInfo.Rank,
ParLoopInfo.AxisArray);
} else if (ParLoopInfo.PatternType == 2) {
// DisArray
DA = GetDArrayByIndex(ParLoopInfo.PatternID);
red_time = RG->RedGroup_Obj->StartR(DA->DArray_Obj, ParLoopInfo.Rank,
ParLoopInfo.AxisArray);
}
RED->time_end=RED->time_start+red_time;
for (i = 0; i < MPSProcCount(); i++)
{
curr_pt = CurrProcTime(currentVM->map(i));
if (mode)
printf("red.proc[%d] %f -> %f\n", i, curr_pt, RED->time_end);
if (curr_pt < RED->time_end) {
AddTime(__Reduct_overlap, currentVM->map(i), (curr_pt - RED->time_start));
AddTime(__Wait_reduct, currentVM->map(i), (RED->time_end - curr_pt));
}
else {
// AddTime(__Reduct_overlap,currentVM->map(i), (RED->time_end - RED->time_start));
AddTime(__Reduct_overlap, currentVM->map(i), (curr_pt - RED->time_start));
}
}
DelReduct(params->ID);
}
void FuncCall::ReductTime()
{
switch(func_id) {
case crtrg_ :
crtrg();
break;
case crtred_ :
crtred();
break;
case insred_ :
insred();
break;
case delred_ :
delred();
break;
case delrg_ :
delrg();
break;
case strtrd_ :
strtrd();
break;
case waitrd_ :
waitrd();
break;
}
RegularTime();
}

View File

@@ -0,0 +1,54 @@
#include <stdlib.h>
#include <assert.h>
#include <fstream>
#include <vector>
#include "ModelStructs.h"
#include "FuncCall.h"
#include "CallInfoStructs.h"
#include "Interval.h"
using namespace std;
extern ofstream prot;
void FuncCall::RegularTime()
{
//grig
AddMPSTime(__CPU_time_usr, vcall_time);
//\grig
if (ret_time !=0.0)
{
//grig AddMPSTime(__CPU_time_sys, ret_time);
//grig
AddMPSTime(__CPU_time_sys, vret_time);
//grig
}
//grig
int k;
vector<double> tempret,tempcall;
tempcall.resize(0);
tempret.resize(0);
for(k=0;k<vret_time.size();k++)
tempret.push_back(vret_time[k]*((double) MPSProcCount()-1.0) / (double) MPSProcCount());
for(k=0;k<vcall_time.size();k++)
tempcall.push_back(vcall_time[k] * ((double) MPSProcCount()-1.0) / (double) MPSProcCount());
AddMPSTime(__Insuff_parall_sys,tempret );
AddMPSTime(__Insuff_parall_usr,tempcall );
//\grig
}
void FuncCall::UnknownTime()
{
RegularTime();
}

View File

@@ -0,0 +1,278 @@
#include <fstream>
#include <assert.h>
#include "ModelStructs.h"
#include "FuncCall.h"
#include "CallInfoStructs.h"
#include "Interval.h"
#include "Vm.h"
using namespace std;
extern ofstream prot;
extern _PSInfo* GetPSByIndex(long ID);
extern _ParLoopInfo ParLoopInfo;
extern _DArrayFlag * DAF_tmp;
_DArrayInfo* GetDArrayByIndex(long ID);
_RemAccessInfo *RemAccess = NULL;
int _RemAccessInfo::count = 0;
//-------------------------------- RemAccess --------------------------------------------
int GetRemAccessIndex(long ID)
{
int i;
for (i = RemAccess->size() - 1; (i >= 0) && RemAccess[i].ID!=ID; i--);
return i;
}
_RemAccessInfo* GetRemAccessByIndex(long ID)
{
int i = GetRemAccessIndex(ID);
return (i>=0) ? &RemAccess[i] : NULL;
}
_RemAccessInfo* AddRemAccess(long ID)
{
_RemAccessInfo* tmp;
int curr_size = RemAccess->size();
RemAccess=(_RemAccessInfo*)realloc(RemAccess,(curr_size+1)*sizeof(_RemAccessInfo));
assert(RemAccess != NULL);
++*RemAccess;
tmp=&RemAccess[curr_size];
tmp->ID=ID;
return tmp;
}
void DelRemAccess(long ID)
{
int idx=GetRemAccessIndex(ID);
int curr_size = RemAccess->size();
int i;
if (idx<0)
return;
delete RemAccess[idx].RemAccess_Obj;
for (i=idx+1; i<curr_size; i++) {
RemAccess[i-1]=RemAccess[i];
}
RemAccess=(_RemAccessInfo*)realloc(RemAccess,(curr_size-1)*sizeof(_RemAccessInfo));
assert((RemAccess != NULL) || (curr_size == 1));
--*RemAccess;
}
//--------------------------------------------------------------------------------------------------
static double fillRemoteCost(_RemAccessInfo *tmp, _DArrayInfo *ArrFrom, Block &RemBlock)
{
/*DAF_tmp = new (_DArrayFlag);
DAF_tmp->ProcessTimeStamp = (double *)malloc(MPSProcCount() * sizeof(double));*/
CommCost *remCost = new CommCost(currentVM);
for (int i = 0; i < MPSProcCount(); ++i)
remCost->CopyUpdateDistr(ArrFrom->DArray_Obj, RemBlock, i);
double ret = remCost->GetCost();
/*tmp->StartRemoteTimes = (double *)malloc(MPSProcCount() * sizeof(double));
tmp->EndRemoteTimes = (double *)malloc(MPSProcCount() * sizeof(double));
for (int i = 0; i < MPSProcCount(); ++i)
tmp->EndRemoteTimes[currentVM->map(i)] = DAF_tmp->ProcessTimeStamp[currentVM->map(i)]; // time of communication
free(DAF_tmp->ProcessTimeStamp);
delete DAF_tmp;
DAF_tmp = NULL;*/
return ret;
}
void FuncCall::crtrbp()
{
crtrbp_Info *params = (crtrbp_Info*)call_params;
_RemAccessInfo *tmp = AddRemAccess(params->ID);
if (params->PSRef == 0)
tmp->RemAccess_Obj = new RemAccessBuf(currentVM);
else
{
_PSInfo *ps = GetPSByIndex(params->PSRef);
tmp->RemAccess_Obj = new RemAccessBuf(ps->VM_Obj);
}
_DArrayInfo *ArrFrom = GetDArrayByIndex(params->RemArrayHeader);
vector<LS> blockIni;
for (int z = 0; z < params->CoordArray.size(); ++z)
{
if (params->CoordArray[z] == -1)
blockIni.push_back(LS(0, ArrFrom->DArray_Obj->GetSize(z + 1) - 1, 1));
else
blockIni.push_back(LS(0, 1, 1));
}
Block RemBlock(blockIni);
ret_time += fillRemoteCost(tmp, ArrFrom, RemBlock);
}
void FuncCall::crtrbl()
{
int i, j;
CommCost *remCost = new CommCost(currentVM);
vector<long> FromInitIndexArray;
vector<long> FromLastIndexArray;
vector<long> FromStepIndexArray;
vector<long> proc_indexes;
vector <LS> blockIni;
crtrbl_Info* params = (crtrbl_Info*)call_params;
_RemAccessInfo* tmp = AddRemAccess(params->BufferHeader);
_DArrayInfo* ArrFrom = GetDArrayByIndex(params->RemArrayHeader);
// printf("Create Remote block %x %x\n",params->RemArrayHeader, params->LoopRef);
LoopBlock** ProcBlock = (LoopBlock**)calloc(MPSProcCount(), sizeof(LoopBlock*));
assert(ProcBlock != NULL);
proc_indexes.resize(0);
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
for (i = 0; i < MPSProcCount(); i++)
{
ProcBlock[i] = new LoopBlock(ParLoopInfo.ParLoop_Obj, i, 1);
if (!ProcBlock[i]->empty())
proc_indexes.push_back(i);
}
/*
for(i=0;i<MPSProcCount();i++)
{
printf("Proc[%d] Block=",i);
for(j=0;j<ProcBlock[i]->LSDim.size();j++)
printf(" %d..%d",ProcBlock[i]->LSDim[j].Lower,ProcBlock[i]->LSDim[j].Upper);
printf("\n");
}
printf("Array size=");
for(j=0;j<ArrFrom->DArray_Obj->Rank();j++)
printf(" %d",ArrFrom->DArray_Obj->GetSize(j+1));
printf("\n");
*/
FromInitIndexArray.resize(params->AxisArray.size());
FromLastIndexArray.resize(params->AxisArray.size());
FromStepIndexArray.resize(params->AxisArray.size());
for (j = 0; j < params->AxisArray.size(); j++)
{
if (params->AxisArray[j] == -1)
{
FromInitIndexArray[j] = 0;
FromLastIndexArray[j] = ArrFrom->DArray_Obj->GetSize(j + 1) - 1;
FromStepIndexArray[j] = 1;
}
else
{
if (params->CoeffArray[j] == 0)
{
FromInitIndexArray[j] = params->ConstArray[j];
FromLastIndexArray[j] = params->ConstArray[j];
FromStepIndexArray[j] = 1;
}
else
{
//dont know yet
}
}
}
/*
printf("Remote block =");
for(j=0;j<params->AxisArray.size();j++)
printf(" %d..%d(st=%d)",FromInitIndexArray[j],FromLastIndexArray[j],FromStepIndexArray[j]);
printf(" Transfer to procs =");
for(j=0;j<proc_indexes.size();j++)
printf(" %d",proc_indexes[j]);
printf("\n");
*/
for (j = 0; j < FromInitIndexArray.size(); j++)
blockIni.push_back(LS(FromInitIndexArray[j], FromLastIndexArray[j], FromStepIndexArray[j]));
Block RemBlock(blockIni);
for (i = 0; i < proc_indexes.size(); i++)
remCost->CopyUpdateDistr(ArrFrom->DArray_Obj, RemBlock, proc_indexes[i]);
DAF_tmp = new (_DArrayFlag);
DAF_tmp->ProcessTimeStamp = (double *)malloc(MPSProcCount() * sizeof(double));
remCost->GetCost();
tmp->StartRemoteTimes = (double *)malloc(MPSProcCount() * sizeof(double));
tmp->EndRemoteTimes = (double *)malloc(MPSProcCount() * sizeof(double));
for (i = 0; i < MPSProcCount(); i++)
tmp->EndRemoteTimes[currentVM->map(i)] = DAF_tmp->ProcessTimeStamp[currentVM->map(i)]; // time of communication
free(DAF_tmp->ProcessTimeStamp);
delete DAF_tmp;
DAF_tmp = NULL;
++CurrInterval->num_op_remote; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>-<2D><> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
}
void FuncCall::loadrb()
{
loadrb_Info* params=(loadrb_Info*) call_params;
_RemAccessInfo* tmp=GetRemAccessByIndex(params->ID);
int i;
for (i=0; i < MPSProcCount(); i++)
{
tmp->StartRemoteTimes[currentVM->map(i)] = CurrProcTime(currentVM->map(i)); // time of START of communication
tmp->EndRemoteTimes[currentVM->map(i)] += CurrProcTime(currentVM->map(i)); // time of END of communication
}
++CurrInterval->num_op_remote;
}
void FuncCall::waitrb()
{
waitrb_Info* params=(waitrb_Info*) call_params;
_RemAccessInfo* tmp=GetRemAccessByIndex(params->ID);
double curr_pt;
int i;
for (i=0; i < MPSProcCount(); i++)
{
curr_pt=CurrProcTime(currentVM->map(i));
if(curr_pt < tmp->EndRemoteTimes[currentVM->map(i)])
{
AddTime(__Remote_access, currentVM->map(i), tmp->EndRemoteTimes[currentVM->map(i)] - curr_pt);
AddTime(__Remote_overlap, currentVM->map(i), curr_pt - tmp->StartRemoteTimes[currentVM->map(i)]);
}
else
{
AddTime(__Remote_overlap, currentVM->map(i), tmp->EndRemoteTimes[currentVM->map(i)] - tmp->StartRemoteTimes[currentVM->map(i)]);
}
}
++CurrInterval->num_op_remote; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>-<2D><> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
}
void FuncCall::RemAccessTime()
{
switch(func_id) {
case crtrbp_ :
crtrbp();
break;
case crtrbl_ :
crtrbl();
break;
case loadrb_ :
loadrb();
break;
case waitrb_ :
waitrb();
break;
}
RegularTime();
}

View File

@@ -0,0 +1,427 @@
#include <assert.h>
#include <fstream>
// #include "Event.h"
#include "ModelStructs.h"
#include "FuncCall.h"
#include "CallInfoStructs.h"
#include "Interval.h"
using namespace std;
extern ofstream prot;
extern _DArrayInfo * GetDArrayByIndex(long ID);
extern _ParLoopInfo ParLoopInfo;
_ShadowInfo *Shadows = NULL;
int _ShadowInfo::count = 0;
_ShdGrpInfo *ShdGroups = NULL;
int _ShdGrpInfo::count = 0;
//====
int type_size;
//=***
//---------------------------------- Shadow --------------------------------------------
int GetShadowIndex(long ID)
{
int i;
for (i = Shadows->size() - 1; (i >= 0) && Shadows[i].ID!=ID; i--);
return i;
}
_ShadowInfo* GetShadowByIndex(long ID)
{
int i=GetShadowIndex(ID);
return (i >= 0) ? &Shadows[i] : NULL;
}
_ShadowInfo* AddShadow(long ID)
{
_ShadowInfo* tmp;
int curr_size = Shadows->size();
Shadows=(_ShadowInfo*)realloc(Shadows,(curr_size+1)*sizeof(_ShadowInfo));
assert(Shadows != NULL);
++*Shadows;
tmp=&Shadows[curr_size];
tmp->ID=ID;
return tmp;
}
void DelShadow(long ID)
{
int idx=GetShadowIndex(ID);
int curr_size = Shadows->size();
int i;
if(idx<0) return;
for(i=idx+1; i<curr_size; i++) {
Shadows[i-1]=Shadows[i];
}
Shadows=(_ShadowInfo*)realloc(Shadows,(curr_size-1)*sizeof(_ShadowInfo));
assert((Shadows != NULL) || (curr_size == 1));
--*Shadows;
}
//---------------------------------- ShdGroup --------------------------------------------
int GetShdGroupIndex(long ID)
{
int i;
for (i = ShdGroups->size() - 1; (i >= 0) && ShdGroups[i].ID!=ID; i--);
return i;
}
_ShdGrpInfo* GetShdGroupByIndex(long ID)
{
int i=GetShdGroupIndex(ID);
return (i>=0) ? &ShdGroups[i] : NULL;
}
_ShdGrpInfo* AddShdGroup(long ID)
{
_ShdGrpInfo* tmp;
int curr_size = ShdGroups->size();
ShdGroups=(_ShdGrpInfo*)realloc(ShdGroups,(curr_size+1)*sizeof(_ShdGrpInfo));
assert(ShdGroups != NULL);
++*ShdGroups;
tmp=&ShdGroups[curr_size];
tmp->ProcessTimeStamp = new double[rootProcCount];
tmp->ID=ID;
return tmp;
}
void DelShdGroup(long ID)
{
int idx=GetShdGroupIndex(ID);
int curr_size = ShdGroups->size();
int i;
if (idx<0)
return;
delete ShdGroups[idx].BoundGroup_Obj;
delete ShdGroups[idx].ProcessTimeStamp;
for(i=idx+1; i<curr_size; i++)
ShdGroups[i-1]=ShdGroups[i];
ShdGroups=(_ShdGrpInfo*)realloc(ShdGroups,(curr_size-1)*sizeof(_ShdGrpInfo));
assert((ShdGroups != NULL) || (curr_size == 1));
--*ShdGroups;
}
//-------------------------------------------------------------------------------
void FuncCall::crtshg()
{
crtshg_Info* params = (crtshg_Info*) call_params;
_ShdGrpInfo* tmp = AddShdGroup(params->ShadowGroupRef); // ShadowGroupRef
// std::cout << tmp << std::endl;
tmp->BoundGroup_Obj = new BoundGroup();
}
void FuncCall::inssh()
{
inssh_Info* params = (inssh_Info*)call_params;
_ShdGrpInfo* SG = GetShdGroupByIndex(params->ShadowGroupRef);
_DArrayInfo* DA = GetDArrayByIndex(params->ArrayHeader); // DA_ID
type_size = DA->DArray_Obj->TypeSize;
SG->BoundGroup_Obj->AddBound(DA->DArray_Obj, params->LowShdWidthArray, params->HiShdWidthArray, params->FullShdSign);
}
void FuncCall::insshd()
{
inssh_Info* params = (inssh_Info*)call_params;
_ShdGrpInfo* SG = GetShdGroupByIndex(params->ShadowGroupRef);
_DArrayInfo* DA = GetDArrayByIndex(params->ArrayHeader); // DA_ID
type_size = DA->DArray_Obj->TypeSize;
SG->BoundGroup_Obj->AddBound(DA->DArray_Obj, params->LowShdWidthArray, params->HiShdWidthArray, 0); // 0 - EVA ???
}
static void setShdWidth(inssh_Info* params, _DArrayInfo* DA)
{
int i;
unsigned arrayRank = params->InitLowShdIndex.size();
params->HiShdWidthArray.resize(arrayRank);
params->LowShdWidthArray.resize(arrayRank);
for (i = 0; i < arrayRank; i++) {
// Low shadow
if (params->InitLowShdIndex[i] == -1) {
if (params->LastLowShdIndex[i] == -1) {
params->LowShdWidthArray[i] = DA->DArray_Obj->LowShdWidthArray[i];
} else {
params->LowShdWidthArray[i] = params->LastLowShdIndex[i];
}
} else {
params->LowShdWidthArray[i] = params->LastLowShdIndex[i] -
params->InitLowShdIndex[i] + 1;
}
// High shadow
if (params->InitHiShdIndex[i] == -1) {
if (params->LastHiShdIndex[i] == -1) {
params->HiShdWidthArray[i] = DA->DArray_Obj->HiShdWidthArray[i];
} else {
params->HiShdWidthArray[i] = params->LastHiShdIndex[i];
}
} else {
params->HiShdWidthArray[i] = params->LastHiShdIndex[i] -
params->InitHiShdIndex[i] + 1;
}
// prot << "LowShdWidthArray[" << i << "] = " << params->LowShdWidthArray[i] << " ";
// prot << "HiShdWidthArray[" << i << "] = " << params->HiShdWidthArray[i] << endl;
}
}
void FuncCall::incsh()
{
inssh_Info* params = (inssh_Info*) call_params;
_ShdGrpInfo* SG=GetShdGroupByIndex(params->ShadowGroupRef);
_DArrayInfo* DA=GetDArrayByIndex(params->ArrayHeader); // DA_ID
setShdWidth(params, DA);
SG->BoundGroup_Obj->AddBound(DA->DArray_Obj,params->LowShdWidthArray,
params->HiShdWidthArray, params->FullShdSign);
}
void FuncCall::incshd()
{
inssh_Info* params = (inssh_Info*) call_params;
_ShdGrpInfo* SG=GetShdGroupByIndex(params->ShadowGroupRef);
_DArrayInfo* DA=GetDArrayByIndex(params->ArrayHeader); // DA_ID
setShdWidth(params, DA);
SG->BoundGroup_Obj->AddBound(DA->DArray_Obj,params->LowShdWidthArray,
params->HiShdWidthArray, 0); // 0 - EVA ???
}
void FuncCall::delshg()
{
delshg_Info* params=(delshg_Info*) call_params;
DelShdGroup(params->ID);
}
void FuncCall::strtsh()
{
strtsh_Info* params=(strtsh_Info*) call_params;
_ShdGrpInfo* SG=GetShdGroupByIndex(params->ID);
_ShadowInfo* SHD=AddShadow(params->ID);
double shd_time=SG->BoundGroup_Obj->StartB();
// printf("Start shadow\n");
// MPSSynchronize(__Wait_shadow);
// printf("Start shadow end %f\n",shd_time);
SHD->time_start=CurrProcTime(0);
SHD->time_end=SHD->time_start+shd_time;
++CurrInterval->num_op_shadow;
}
void FuncCall::waitsh()
{
waitsh_Info* params=(waitsh_Info*) call_params;
_ShdGrpInfo* SG=GetShdGroupByIndex(params->ID);
_ShadowInfo* SHD=GetShadowByIndex(params->ID);
// assert(SHD != NULL);
if (SHD == NULL) {
// cout << "Pipeline recvsh/sendsh is not implemented." << endl;
// prot << "Pipeline recvsh/sendsh is not implemented." << endl;
exit(0);
}
int i;
double curr_pt;
for (i=0; i<MPSProcCount(); i++) {
curr_pt = CurrProcTime(currentVM->map(i));
// printf("Start WAIT[%d] shadow %f-%f=%f or %f \n",i,SHD->time_start, SHD->time_end, SHD->time_end-SHD->time_start, curr_pt);
if(curr_pt < SHD->time_end) {
AddTime(__Shadow_overlap,currentVM->map(i), curr_pt - SHD->time_start);
AddTime(__Wait_shadow,currentVM->map(i), SHD->time_end - curr_pt);
} else {
// AddTime(__Shadow_overlap,currentVM->map(i), SHD->time_end - SHD->time_start);
AddTime(__Shadow_overlap,currentVM->map(i), curr_pt - SHD->time_start);
}
}
// printf("END WAIT shadow\n");
DelShadow(params->ID);
}
void FuncCall::exfrst()
{
exfrst_Info * params = (exfrst_Info *) call_params;
assert(ParLoopInfo.ID = params->ID);
// ParLoopInfo.exfrst = true;
// ParLoopInfo.exfrst_SGR = params->ShadowGroupRef;
_ShdGrpInfo* SG=GetShdGroupByIndex(params->ShadowGroupRef);
_ShadowInfo* SHD=AddShadow(params->ShadowGroupRef);
double shd_time=SG->BoundGroup_Obj->StartB();
MPSSynchronize(__Wait_shadow);
SHD->time_start = CurrProcTime(0);
SHD->time_end = SHD->time_start+shd_time;
++CurrInterval->num_op_shadow;
}
void FuncCall::imlast()
{
imlast_Info * params = (imlast_Info *) call_params;
assert(ParLoopInfo.ID = params->ID);
ParLoopInfo.imlast = true;
ParLoopInfo.imlast_SGR = params->ShadowGroupRef;
}
void FuncCall::sendsh()
{
// prot << "sendsh" << endl;
++CurrInterval->num_op_shadow;
}
void FuncCall::recvsh()
{
// prot << "recvsh" << endl;
#ifdef nodef
int i;
recvsh_Info* params=(recvsh_Info*) call_params;
_ShdGrpInfo* SG=GetShdGroupByIndex(params->ID);
assert(SG != NULL);
_ShadowInfo* SHD=AddShadow(params->ID);
assert(SHD != NULL);
char dimBound = SG->BoundGroup_Obj->getDimBound();
int vmDim = SG->BoundGroup_Obj->getVmDimension();
int vmDimSize = currentVM->GetSize(vmDim);
int vmDimMult = currentVM->GetMult(vmDim);
double shd_time=SG->BoundGroup_Obj->StartB();
double shd_time1 = 0.0;
// clean ProcessTimeStamp
for (i = 0; i < rootProcCount; i++)
SG->ProcessTimeStamp[i] = 0.0;
if (vmDimSize >= 2) {
if (dimBound == 'L') {
for (i = vmDimSize - 2; i >= 0; i--) {
// Left bound
shd_time1 += shd_time;
SG->ProcessTimeStamp[currentVM->map(i)] = shd_time1;
}
} else if (dimBound == 'R') {
for (i = 1; i < vmDimSize; i++) {
// Left bound
shd_time1 += shd_time;
SG->ProcessTimeStamp[currentVM->map(i)] = shd_time1;
}
} else {
}
}
// SHD->time_start=CurrProcTime(0);
// SHD->time_end=SHD->time_start+shd_time;
#endif
}
void FuncCall::across()
{
across_Info * params = (across_Info *) call_params;
//====
_ShdGrpInfo* SGNEW=GetShdGroupByIndex(params->NewShadowGroupRef);
ParLoopInfo.ParLoop_Obj->Across(SGNEW->BoundGroup_Obj->GetBoundCost(),type_size);
//=***
if (params->OldShadowGroupRef == 0) {
++CurrInterval->num_op_shadow;
return;
}
_ShdGrpInfo* SG=GetShdGroupByIndex(params->OldShadowGroupRef);
//====
ParLoopInfo.SGnew=GetShdGroupByIndex(params->NewShadowGroupRef);
ParLoopInfo.SG=GetShdGroupByIndex(params->OldShadowGroupRef);
ParLoopInfo.across=true;
//printf("PARLoopInfo %d\n",ParLoopInfo.ParLoop_Obj->GetLoopSize());
//was _ShdGrpInfo* SG=GetShdGroupByIndex(params->OldShadowGroupRef);
_ShadowInfo* SHD=AddShadow(params->OldShadowGroupRef);
double shd_time=0;
//was double shd_time=SG->BoundGroup_Obj->StartB();
//printf("shadow time = %f\n",shd_time);
//=***
// MPSSynchronize(__Wait_shadow);
SHD->time_start = CurrProcTime(0);
SHD->time_end = SHD->time_start+shd_time;
++CurrInterval->num_op_shadow;
ParLoopInfo.across = true;
ParLoopInfo.across_SGR = params->OldShadowGroupRef;
}
void FuncCall::ShadowTime()
{
switch(func_id) {
case crtshg_ :
crtshg();
break;
case inssh_ :
inssh();
break;
case insshd_ :
insshd();
break;
case incsh_ :
incsh();
break;
case incshd_ :
incshd();
break;
case delshg_ :
delshg();
break;
case strtsh_ :
strtsh();
break;
case waitsh_ :
waitsh();
break;
case exfrst_:
exfrst();
break;
case imlast_:
imlast();
break;
case sendsh_:
sendsh();
break;
case recvsh_:
recvsh();
break;
case across_:
across();
break;
}
// calculate times
RegularTime();
}

View File

@@ -0,0 +1,162 @@
#ifndef _MODELSTRUCTS_H
#define _MODELSTRUCTS_H
#include "Vm.h"
#include "AMView.h"
#include "DArray.h"
#include "BGroup.h"
#include "RedGroup.h"
#include "RedVar.h"
#include "ParLoop.h"
#include "RemAccessBuf.h"
/* MPS/AM/AMView structures */
struct _PSInfo {
static int count;
long ID;
VM* VM_Obj;
int size() { return count; }
int operator ++() { return ++count; }
int operator --() { return --count; }
};
struct _AMInfo {
static int count;
long ID;
long PS_ID;
int size() { return count; }
int operator ++() { return ++count; }
int operator --() { return --count; }
};
struct _AMViewInfo {
static int count;
long ID;
long AM_ID;
AMView* AMView_Obj;
int size() { return count; }
int operator ++() { return ++count; }
int operator --() { return --count; }
};
struct _DArrayInfo {
static int count;
long ID; // ArrayHeader
long AlignType;
DArray* DArray_Obj;
int size() { return count; }
int operator ++() { return ++count; }
int operator --() { return --count; }
};
struct _DArrayFlag {
static int count;
long ID;
double* ProcessTimeStamp;
double time_start,
time_end;
int size() { return count; }
int operator ++() { return ++count; }
int operator --() { return --count; }
};
struct _RedVarInfo {
static int count;
long ID;
RedVar* RedVar_Obj;
int size() { return count; }
int operator ++() { return ++count; }
int operator --() { return --count; }
};
struct _RedGrpInfo {
static int count;
long ID;
RedGroup* RedGroup_Obj;
int size() { return count; }
int operator ++() { return ++count; }
int operator --() { return --count; }
};
struct _ShdGrpInfo {
static int count;
long ID;
BoundGroup* BoundGroup_Obj;
double * ProcessTimeStamp;
int size() { return count; }
int operator ++() { return ++count; }
int operator --() { return --count; }
};
struct _ReductInfo {
long ID;
double time_start, time_end;
static int count;
int size() { return count; }
int operator ++() { return ++count; }
int operator --() { return --count; }
};
struct _ShadowInfo {
static int count;
long ID;
double time_start, time_end;
int size() { return count; }
int operator ++() { return ++count; }
int operator --() { return --count; }
};
struct _ParLoopInfo {
static int count;
long ID;
long Rank;
long AlignType;
bool exfrst;
long exfrst_SGR;
bool imlast;
long imlast_SGR;
bool across;
long across_SGR;
int PatternType;
long PatternID;
//====
int type_size;
bool Invers[10];
_ShdGrpInfo* SGnew;
_ShdGrpInfo* SG;
//=***
std::vector<long> AxisArray;
ParLoop* ParLoop_Obj;
int size() { return count; }
int operator ++() { return ++count; }
int operator --() { return --count; }
};
struct _RemAccessInfo {
static int count;
long ID;
RemAccessBuf* RemAccess_Obj; // <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
double* StartRemoteTimes;
double* EndRemoteTimes;
int size() { return count; }
int operator ++() { return ++count; }
int operator --() { return --count; }
};
#endif

View File

@@ -0,0 +1,248 @@
// ParLoop.cpp: implementation of the LoopLS class.
//
//////////////////////////////////////////////////////////////////////
#include "ParLoop.h"
extern int mode;
using namespace std;
extern ofstream prot;
ParLoop::ParLoop(long ARank)
{
Rank = ARank;
AlignRule = vector<AlignAxis>(0);
LowerIndex = vector<long>(Rank);
HigherIndex = vector<long>(Rank);
LoopStep = vector<long>(Rank);
Invers = vector<long>(Rank);
AM_Dis = 0;
//====
AcrossFlag=0;
AcrossCost=0;
//=***
}
ParLoop::~ParLoop()
{
}
//====
void ParLoop::Across(CommCost *BoundCost,int type_size)
{ AcrossFlag=type_size;
AcrossCost=BoundCost;
}
//=***
long ParLoop::GetLoopSize()
{ int i;
long size=1;
for(i=0;i<Rank;i++)
size=size*GetSize(i);
return size;
};
long ParLoop::GetSize(long plDim)
{ if(plDim<Rank)
return (HigherIndex[plDim] - LowerIndex[plDim] + LoopStep[plDim])/LoopStep[plDim];
return 0;
};
void ParLoop::SaveLoopParams(const vector<long>& AInInitIndex,
const vector<long>& AInLastIndex, const vector<long>& AInLoopStep)
{
int i; long lv;
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
LowerIndex.clear();
HigherIndex.clear();
LoopStep.clear();
Invers.clear();
for (i = 0; i < Rank; i++)
{
if(AInLoopStep[i]>=0) {
LowerIndex.push_back(AInInitIndex[i]);
HigherIndex.push_back(AInLastIndex[i]);
LoopStep.push_back(AInLoopStep[i]);
Invers.push_back(0);
} else {
lv=(AInInitIndex[i]-AInLastIndex[i]) % AInLoopStep[i];
if(lv)
LowerIndex.push_back((AInLastIndex[i]+AInLoopStep[i]-lv));
else
LowerIndex.push_back(AInLastIndex[i]);
HigherIndex.push_back(AInInitIndex[i]);
LoopStep.push_back(-AInLoopStep[i]);
Invers.push_back(1);
}
if (mode) printf("Save %d %d %d\n",LowerIndex[i],HigherIndex[i],LoopStep[i]);
}
}
void ParLoop::PrepareAlign(long& TempRank, const vector<long>& AAxisArray,
const vector<long>& ACoeffArray, const vector<long>& AConstArray,
vector<AlignAxis>& IniRule)
{
int i;
long IRSize = Rank + TempRank;
IniRule.reserve(IRSize);
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
for (i = 0; i < Rank; i++)
IniRule.push_back(AlignAxis(align_COLLAPSE, i+1, 0));
for (i = Rank; i < IRSize; i++)
IniRule.push_back(AlignAxis(align_NORMTAXIS, 0, i-Rank+1));
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> DistRule <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
for (i = 0; i < TempRank; i++)
{
// prot << "i=" << i << ", AAxisArray[i]=" << AAxisArray[i] << endl;
if (mode)
printf("Rank=%d IRSize=%d i= %d AAxisArray[i]=%d \n",Rank, IRSize, i, AAxisArray[i]);
if (AAxisArray[i] == -1)
IniRule[i+Rank] = AlignAxis(align_REPLICATE, 0, i+1);
else if (ACoeffArray[i] == 0)
IniRule[i+Rank] = AlignAxis(align_CONSTANT, 0, i+1, 0, AConstArray[i]);
else {
IniRule[i+Rank] = AlignAxis(align_NORMTAXIS, AAxisArray[i], i+1, ACoeffArray[i],
AConstArray[i]+ACoeffArray[i]*LowerIndex[AAxisArray[i]-1]);
IniRule[AAxisArray[i]-1] = AlignAxis(align_NORMAL, AAxisArray[i], i+1, ACoeffArray[i],
AConstArray[i]+ACoeffArray[i]*LowerIndex[AAxisArray[i]-1]);
}
}
}
void ParLoop::MapPL(AMView *APattern, const vector<long>& AAxisArray,
const vector<long>& ACoeffArray, const vector<long>& AConstArray,
const vector<long>& AInInitIndex, const vector<long>& AInLastIndex,
const vector<long>& AInLoopStep)
{
if (!APattern->IsDistribute()) {
prot << "Wrong call MapPL" << endl;
abort();
}
long TempRank = APattern->Rank();
vector<AlignAxis> IniRule;
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20>/<2F>, <20><> <20>-<2D><><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
AM_Dis = APattern;
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
SaveLoopParams(AInInitIndex, AInLastIndex, AInLoopStep);
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> AlignRule
PrepareAlign(TempRank, AAxisArray, ACoeffArray, AConstArray, IniRule);
AlignRule = IniRule;
}
void ParLoop::MapPL(DArray *APattern, const vector<long>& AAxisArray,
const vector<long>& ACoeffArray, const vector<long>& AConstArray,
const vector<long>& AInInitIndex, const vector<long>& AInLastIndex,
const vector<long>& AInLoopStep)
{
if (!APattern->IsAlign()) {
prot << "Wrong call MapPL" << endl;
abort();
}
long TempRank = APattern->Rank();
long ALSize;
int i;
vector<AlignAxis> TAlign,
IniRule;
AlignAxis aAl, tAl;
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20>/<2F>, <20><> <20>-<2D><><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
AM_Dis = APattern->AM_Dis;
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
SaveLoopParams(AInInitIndex,AInLastIndex,AInLoopStep);
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> AlignRule
PrepareAlign(TempRank, AAxisArray, ACoeffArray, AConstArray, IniRule);
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
ALSize = Rank + AM_Dis->Rank();
TAlign = APattern->AlignRule;
AlignRule = vector<AlignAxis>(ALSize);
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD>. 2-<2D><> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> (<28><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20><> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>)
for (i = 0; i < AM_Dis->Rank(); i++)
AlignRule[i+Rank]=TAlign[i+TempRank];
/* LU deb
printf("rank=%d tempRank=%d\n",Rank,TempRank);
for(i=0;i<AlignRule.size();i++)
printf("map %d %d %d %d ",AlignRule[i].A, AlignRule[i].B, AlignRule[i].Axis, AlignRule[i].TAxis);
printf("\n");
*/
for (i = 0; i < Rank; i++)
{ aAl = IniRule[i];
if (aAl.Attr == align_NORMAL)
{ tAl = TAlign[aAl.TAxis - 1];
switch (tAl.Attr)
{ case align_NORMAL : aAl.TAxis = tAl.TAxis;
aAl.A *= tAl.A;
aAl.B = aAl.B * tAl.A + tAl.B;
AlignRule[i] = aAl;
AlignRule[Rank+aAl.TAxis-1].Axis = i+1;
AlignRule[Rank+aAl.TAxis-1].A = aAl.A;
AlignRule[Rank+aAl.TAxis-1].B = aAl.B;
break;
case align_COLLAPSE : aAl.TAxis = 0;
aAl.Attr = align_COLLAPSE;
AlignRule[i] = aAl;
break;
};
};
};
/* LU deb
for(i=0;i<AlignRule.size();i++)
printf("map %d %d %d %d ",AlignRule[i].A, AlignRule[i].B, AlignRule[i].Axis, AlignRule[i].TAxis);
printf("\n");
*/
for (i = 0; i < TempRank; i++)
{ aAl = IniRule[i+Rank];
switch (aAl.Attr)
{ case align_CONSTANT : tAl = TAlign[aAl.TAxis-1];
if (tAl.Attr == align_NORMAL)
{ aAl.TAxis = tAl.TAxis;
aAl.B = tAl.A * aAl.B + tAl.B;
AlignRule[Rank+tAl.TAxis-1] = aAl;
};
break;
case align_REPLICATE : tAl = TAlign[aAl.TAxis-1];
if (tAl.Attr == align_NORMAL)
{ aAl.Attr = align_BOUNDREPL;
aAl.TAxis = tAl.TAxis;
aAl.A = tAl.A;
aAl.B = tAl.B;
aAl.Bound = APattern->GetSize(tAl.TAxis);
AlignRule[Rank+tAl.TAxis-1] = aAl;
};
break;
};
};
/* LU deb
for(i=0;i<AlignRule.size();i++)
printf("map %d %d %d %d ",AlignRule[i].A, AlignRule[i].B, AlignRule[i].Axis, AlignRule[i].TAxis);
printf("\n");
*/
};

View File

@@ -0,0 +1,68 @@
#ifndef ParLoopH
#define ParLoopH
//////////////////////////////////////////////////////////////////////
//
// LoopLS.h: interface for the LoopLS class.
//
//////////////////////////////////////////////////////////////////////
#include "Space.h"
#include "AMView.h"
#include "AlignAxis.h"
//====
#include "CommCost.h"
//=***
#include "LoopBlock.h"
#include <vector>
#include <fstream>
class AMView;
//====
class CommCost;
//=***
class ParLoop {
void PrepareAlign(long& TempRank, const std::vector<long>& AAxisArray,
const std::vector<long>& ACoeffArray, const std::vector<long>& AConstArray,
std::vector<AlignAxis>& IniRule);
void SaveLoopParams(const std::vector<long>& AInInitIndex,
const std::vector<long>& AInLastIndex, const std::vector<long>& AInLoopStep);
public:
long Rank;
AMView *AM_Dis; // AMView for ParLoopmapping
std::vector<AlignAxis> AlignRule; // Rule for alignment of AM_Dis
std::vector<long> LowerIndex;
std::vector<long> HigherIndex;
std::vector<long> LoopStep;
std::vector<long> Invers;
//====
int AcrossFlag;
CommCost* AcrossCost;
//=***
ParLoop(long ARank);
~ParLoop();
//====
// int isAcross();
void Across(CommCost* BoundCost,int type_size);
//=***
long GetSize(long plDim);
long GetLoopSize();
void MapPL(AMView *APattern, const std::vector<long>& AAxisArray,
const std::vector<long>& ACoeffArray, const std::vector<long>& AConstArray,
const std::vector<long>& AInInitIndex, const std::vector<long>& AInLastIndex,
const std::vector<long>& AInLoopStep);
void MapPL(DArray *APattern, const std::vector<long>& AAxisArray,
const std::vector<long>& ACoeffArray, const std::vector<long>& AConstArray,
const std::vector<long>& AInInitIndex, const std::vector<long>& AInLastIndex,
const std::vector<long>& AInLoopStep);
};
#endif

View File

@@ -0,0 +1,496 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <fstream>
#include "ParseString.h"
using namespace std;
extern ofstream prot;
#ifdef __GNUC__
//char *_fcvt( double value, int count, int *dec, int *sign );
char *_itoa( int value, char *str, int radix )
{
if (radix != 10) {
prot << "Radix in the function '_itoa' no equal '10'" << endl;
abort();
}
sprintf(str, "%d", value);
return str;
}
char *_ltoa( long value, char *str, int radix )
{
if (radix != 10) {
prot << "Radix in the function '_ltoa' no equal '10'" << endl;
abort();
}
sprintf(str, "%ld", value);
return str;
}
#endif
//******* SERVICE ROUTINES
int StrToInt(char* str)
{
return atoi(str);
}
long StrToLong(char* str, int base)
{
//====
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20> <20><><EFBFBD>
// <20><><EFBFBD> <20><><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>!!!!! <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> = hi
// <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><> != 0 <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD>.
long hi,lo;
char tmp_hi[2*sizeof(long)+1];
char tmp_lo[2*sizeof(long)+1];
char *tmp;
int i, sz=2*sizeof(long);
i=strlen(str);
if(str[strlen(str)-1]==';') i--;
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> str <20><> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> ';' <20> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
if(i>sz) { strncpy(tmp_hi,str,i-sz); tmp_hi[i-sz]=0;}
else strcpy(tmp_hi,"");
if(i>sz) { strncpy(tmp_lo,str+i-sz,sz); tmp_lo[sz]=0;}
else { strncpy(tmp_lo,str,i); tmp_lo[i]=0;}
hi=strtoul(tmp_hi,&tmp,base);
lo=strtoul(tmp_lo,&tmp,base);
// printf("STR->'%s'-> RES::%lx+%lx ",str,hi,lo);
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> hi <20> lo <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> :)
hi = (hi>>16) | (hi<<16);
hi = ((hi<<8) & 0xff00ff00)|((hi>>8) & 0x00ff00ff);
lo += hi;
// printf("-> %lx\n",lo);
return lo;
//was return strtoul(str,&tmp,base);
//=***
};
double StrToDouble(char* str)
{ char* tmp;
return strtod(str,&tmp);
};
void IntToStr(char*& str, int val)
{
_itoa(val,str,10);
}
void LongToStr(char*& str, long val, int base)
{
_ltoa(val,str,base);
}
void DoubleToStr(char*& str, double val)
{
#ifdef __GNUC__
char st[256];
sprintf(st, "%+f", val);
int i = 0;
int j = 0;
while (st[++i] != 0) {
if (st[i] == '.') {
j = 1;
continue;
}
st[i - j] = st[i];
}
st[i - j] = 0;
#else
int dec,
sign;
strcpy(str,_fcvt(val,6,&dec,&sign));
#endif
prot << "DoubleToStr=(" << val << ')' << str << endl;
};
//******* STRING PARAMETERS
int ParamFromString(char * par_name, char* par_val, char* str)
{
char* pos;
int len;
// cout << "par_name=" << par_name << " str=" << str << endl;
if (str==NULL) return 0;
pos = strstr(str,par_name);
if(pos==NULL) return 0;
pos += strlen(par_name);
while (pos[0]==' ') pos++; //====//
if ((pos[0] != '=') && (pos[0] != '['))
return ParamFromString(par_name, par_val, pos);
len=strcspn(++pos," ");
strncpy(par_val,pos,len);
par_val[len]='\0';
return 1;
}
int IndexParamFromString(char * par_name, int par_idx, char*& par_val, char* str)
{ char par_idx_name[64];
char* idx_str;
if(str==NULL) return 0;
idx_str=(char*)malloc(sizeof(char)*8);
assert(idx_str != NULL);
IntToStr(idx_str,par_idx);
strcpy(par_idx_name,par_name);
strcat(par_idx_name,"[");
strcat(par_idx_name,idx_str);
strcat(par_idx_name,"]");
free(idx_str);
return ParamFromString(par_idx_name, par_val, str);
};
int SubParamFromString(char * par_name, int par_idx, char* sub_par, char*& par_val, char* str)
{ char par_idx_name[64];
char* idx_str;
char* subpar_str;
if(str==NULL) return 0;
idx_str=(char*)malloc(sizeof(char)*8);
assert(idx_str != NULL);
IntToStr(idx_str,par_idx);
strcpy(par_idx_name,par_name);
strcat(par_idx_name,"[");
strcat(par_idx_name,idx_str);
strcat(par_idx_name,"]");
free(idx_str);
if((subpar_str=strstr(str,par_idx_name))==NULL)
return 0;
else
return ParamFromString(sub_par, par_val, subpar_str);
};
int ParamFromStrArr(char * par_name, char*& par_val, char** str_arr, int str_cnt)
{ int i;
int res = 0;
if(str_cnt<=0) return 0;
for(i=0; i<str_cnt; i++)
if ((res=ParamFromString(par_name, par_val, str_arr[i])) != 0) break;
return res;
};
int IndexParamFromStrArr(char * par_name, int par_idx, char*& par_val, char** str_arr, int str_cnt)
{ int i;
int res = 0;
if(str_cnt<=0) return 0;
for(i=0; i<str_cnt; i++) {
if((res=IndexParamFromString(par_name, par_idx, par_val, str_arr[i])) != 0) break;
}
return res;
};
int SubParamFromStrArr(char * par_name, int par_idx, char* sub_par, char*& par_val, char** str_arr, int str_cnt)
{ int i;
int res = 0;
if(str_cnt<=0) return 0;
for(i=0; i<str_cnt; i++)
if((res=SubParamFromString(par_name, par_idx, sub_par, par_val, str_arr[i])) != 0) break;
return res;
};
//******* INTEGER PARAMETERS
int ParamFromString(char * par_name, int& par_val, char* str)
{ int res = 0;
char* buffer=(char*)malloc(sizeof(char)*16);
assert(buffer != NULL);
if((res=ParamFromString(par_name,buffer,str)) != 0)
par_val=StrToInt(buffer);
free(buffer);
return res;
};
int IndexParamFromString(char * par_name, int par_idx, int& par_val, char* str)
{ int res = 0;
char* buffer=(char*)malloc(sizeof(char)*16);
assert(buffer != NULL);
if((res=IndexParamFromString(par_name,par_idx,buffer,str)) != 0)
par_val=StrToInt(buffer);
free(buffer);
return res;
};
int SubParamFromString(char * par_name, int par_idx, char* sub_par, int& par_val, char* str)
{ int res;
char* buffer=(char*)malloc(sizeof(char)*16);
assert(buffer != NULL);
if((res=SubParamFromString(par_name,par_idx,sub_par,buffer,str)) != 0)
par_val=StrToInt(buffer);
free(buffer);
return res;
};
int ParamFromStrArr(char * par_name, int& par_val, char** str_arr, int str_cnt)
{ int res = 0;
char* buffer=(char*)malloc(sizeof(char)*16);
assert(buffer != NULL);
if((res=ParamFromStrArr(par_name,buffer,str_arr,str_cnt)) != 0)
par_val=StrToInt(buffer);
free(buffer);
return res;
};
int IndexParamFromStrArr(char * par_name, int par_idx, int& par_val, char** str_arr, int str_cnt)
{ int res = 0;
char* buffer=(char*)malloc(sizeof(char)*16);
assert(buffer != NULL);
if((res=IndexParamFromStrArr(par_name,par_idx,buffer,str_arr,str_cnt)) != 0)
par_val=StrToInt(buffer);
free(buffer);
return res;
};
int SubParamFromStrArr(char * par_name, int par_idx, char* sub_par, int& par_val, char** str_arr, int str_cnt)
{ int res = 0;
char* buffer=(char*)malloc(sizeof(char)*16);
assert(buffer != NULL);
if((res=SubParamFromStrArr(par_name,par_idx,sub_par,buffer,str_arr,str_cnt)) != 0)
par_val=StrToInt(buffer);
free(buffer);
return res;
};
//******* LONG PARAMETERS
int ParamFromString(char * par_name, long& par_val, char* str, int base)
{ int res = 0;
char* buffer=(char*)malloc(sizeof(char)*16);
assert(buffer != NULL);
if((res=ParamFromString(par_name,buffer,str)) != 0)
par_val=StrToLong(buffer, base);
free(buffer);
return res;
};
int IndexParamFromString(char * par_name, int par_idx, long& par_val, char* str, int base)
{ int res = 0;
char* buffer=(char*)malloc(sizeof(char)*16);
assert(buffer != NULL);
if((res=IndexParamFromString(par_name,par_idx,buffer,str)) != 0)
par_val=StrToLong(buffer, base);
free(buffer);
return res;
};
int SubParamFromString(char * par_name, int par_idx, char* sub_par, long& par_val, char* str, int base)
{ int res = 0;
char* buffer=(char*)malloc(sizeof(char)*16);
assert(buffer != NULL);
if((res=SubParamFromString(par_name,par_idx,sub_par,buffer,str)) != 0)
par_val=StrToLong(buffer, base);
free(buffer);
return res;
};
int ParamFromStrArr(char * par_name, long& par_val, char** str_arr, int str_cnt, int base)
{ int res = 0;
char* buffer=(char*)malloc(sizeof(char)*16);
assert(buffer != NULL);
if((res=ParamFromStrArr(par_name,buffer,str_arr,str_cnt)) != 0)
par_val=StrToLong(buffer, base);
free(buffer);
return res;
};
int IndexParamFromStrArr(char * par_name, int par_idx, long& par_val, char** str_arr, int str_cnt, int base)
{ int res = 0;
char* buffer=(char*)malloc(sizeof(char)*16);
assert(buffer != NULL);
if((res=IndexParamFromStrArr(par_name,par_idx,buffer,str_arr,str_cnt)) != 0)
par_val=StrToLong(buffer, base);
free(buffer);
return res;
};
int SubParamFromStrArr(char * par_name, int par_idx, char* sub_par, long& par_val, char** str_arr, int str_cnt, int base)
{ int res = 0;
char* buffer=(char*)malloc(sizeof(char)*16);
assert(buffer != NULL);
if((res=SubParamFromStrArr(par_name,par_idx,sub_par,buffer,str_arr,str_cnt)) != 0)
par_val=StrToLong(buffer, base);
free(buffer);
return res;
};
//******* FLOAT PARAMETERS
int ParamFromString(char * par_name, double& par_val, char* str)
{ int res = 0;
char* buffer=(char*)malloc(sizeof(char)*32);
assert(buffer != NULL);
if((res=ParamFromString(par_name,buffer,str)) != 0)
par_val=StrToDouble(buffer);
free(buffer);
return res;
};
int IndexParamFromString(char * par_name, int par_idx, double& par_val, char* str)
{ int res = 0;
char* buffer=(char*)malloc(sizeof(char)*32);
assert(buffer != NULL);
if((res=IndexParamFromString(par_name,par_idx,buffer,str)) != 0)
par_val=StrToDouble(buffer);
free(buffer);
return res;
};
int SubParamFromString(char * par_name, int par_idx, char* sub_par, double& par_val, char* str)
{ int res = 0;
char* buffer=(char*)malloc(sizeof(char)*32);
assert(buffer != NULL);
if((res=SubParamFromString(par_name,par_idx,sub_par,buffer,str)) != 0)
par_val=StrToDouble(buffer);
free(buffer);
return res;
};
int ParamFromStrArr(char * par_name, double& par_val, char** str_arr, int str_cnt)
{ int res = 0;
char* buffer=(char*)malloc(sizeof(char)*32);
assert(buffer != NULL);
if((res=ParamFromStrArr(par_name,buffer,str_arr,str_cnt)) != 0)
par_val=StrToDouble(buffer);
free(buffer);
return res;
};
int IndexParamFromStrArr(char * par_name, int par_idx, double& par_val, char** str_arr, int str_cnt)
{ int res = 0;
char* buffer=(char*)malloc(sizeof(char)*32);
assert(buffer != NULL);
if((res=IndexParamFromStrArr(par_name,par_idx,buffer,str_arr,str_cnt)) != 0)
par_val=StrToDouble(buffer);
free(buffer);
return res;
};
int SubParamFromStrArr(char * par_name, int par_idx, char* sub_par, double& par_val, char** str_arr, int str_cnt)
{ int res = 0;
char* buffer=(char*)malloc(sizeof(char)*32);
assert(buffer != NULL);
if((res=SubParamFromStrArr(par_name,par_idx,sub_par,buffer,str_arr,str_cnt)) != 0)
par_val=StrToDouble(buffer);
free(buffer);
return res;
};
//******* SPECIAL ROUTINES
//==================================================================================
double atof( const string& str )
{
return atof(str.c_str());
}
int atoi( const string& str )
{
return atoi(str.c_str());
}
long atol( const string& str )
{
return atol(str.c_str());
}
bool ParamFromString(const string& par_name, string& par_val, const string& str)
{
string::size_type i = str.find(par_name);
if (i == string::npos) return false;
i += par_name.size() + 1;
i = str.find_first_not_of(' ', i);
if (str[i] != '=') return false;
i = str.find_first_not_of(' ', i + 1);
string::size_type j = str.find_first_of(" ,;", i);
if (j == string::npos) return false;
par_val = str.substr(i, j-1);
return true;
}
bool ParamFromString(const string& par_name, int& par_val, const string& str)
{
string par;
if (ParamFromString(par_name, par, str)) {
par_val = atoi(par.c_str());
return true;
} else {
par_val = 0;
return false;
}
}
bool ParamFromString(const string& par_name, long& par_val, const string& str)
{
string par;
if (ParamFromString(par_name, par, str)) {
par_val = atol(par.c_str());
return true;
} else {
par_val = 0L;
return false;
}
}
bool ParamFromString(const string& par_name, double& par_val, const string& str)
{
string par;
if (ParamFromString(par_name, par, str)) {
par_val = atof(par.c_str());
return true;
} else {
par_val = 0.;
return false;
}
}
bool ParamFromString(const string& par_name, int ind, string& par_val, const string& str)
{
string index;
string::size_type pos = 0;
string::size_type i = str.find(par_name, pos);
if (i == string::npos) return false;
i += par_name.size() + 1; // skip parameter name
i = str.find_first_not_of(' ', i); // skip spaces
if (str[i] != '=') return false;
i = str.find_first_not_of(' ', i + 1);
string::size_type j = str.find_first_of(" ,;", i);
if (j == string::npos) return false;
par_val = str.substr(i, j-1);
return true;
}
int ModifierFromStrArr(char** str_arr, int str_cnt)
{
for (int i=0; i < str_cnt; i++) {
if (strstr(str_arr[i], "(AMView)") != NULL)
return 1;
if (strstr(str_arr[i], "(DisArray)") != NULL)
return 2;
}
return 0;
}

View File

@@ -0,0 +1,85 @@
#ifndef _PARSESTRING_H
#define _PARSESTRING_H
#include <string>
#include "Event.h"
//*************** for GNU only ****************//
#ifdef __GNUC__
char *_itoa(int value, char *str, int radix);
char *_ltoa(long value, char *str, int radix);
#endif
double atof( const std::string& str );
int atoi( const std::string& str );
long atol( const std::string& str );
//************* INTEGER PARAMETERS *************//
extern int ParamFromString(char* par_name, int& par_val, char* str);
extern int IndexParamFromString(char* par_name, int par_idx, int& par_val, char* str);
extern int SubParamFromString(char* par_name, int par_idx, char* sub_par, int& par_val,
char* str);
extern int ParamFromStrArr(char* par_name, int& par_val, char** str_arr, int str_cnt);
extern int IndexParamFromStrArr(char* par_name, int par_idx, int& par_val, char** str_arr,
int str_cnt);
extern int SubParamFromStrArr(char* par_name, int par_idx, char* sub_par, int& par_val,
char** str_arr, int str_cnt);
bool ParamFromString(const std::string& par_name, std::string::size_type& par_val,
const std::string& str);
//************** LONG PARAMETERS *************//
extern int ParamFromString(char* par_name, long& par_val, char* str, int base);
extern int IndexParamFromString(char* par_name, int par_idx, long& par_val, char* str, int base);
extern int SubParamFromString(char* par_name, int par_idx, char* sub_par, long& par_val, char* str, int base);
extern int ParamFromStrArr(char* par_name, long& par_val, char** str_arr, int str_cnt, int base);
extern int IndexParamFromStrArr(char* par_name, int par_idx, long& par_val, char** str_arr, int str_cnt, int base);
extern int SubParamFromStrArr(char* par_name, int par_idx, char* sub_par, long& par_val, char** str_arr, int str_cnt, int base);
//************ FLOAT PARAMETERS *************//
extern int ParamFromString(char* par_name, double& par_val, char* str);
extern int IndexParamFromString(char* par_name, int par_idx, double& par_val, char* str);
extern int SubParamFromString(char* par_name, int par_idx, char* sub_par, double& par_val, char* str);
extern int ParamFromStrArr(char* par_name, double& par_val, char** str_arr, int str_cnt);
extern int IndexParamFromStrArr(char* par_name, int par_idx, double& par_val, char** str_arr, int str_cnt);
extern int SubParamFromStrArr(char* par_name, int par_idx, char* sub_par, double& par_val, char** str_arr, int str_cnt);
//************ STRING PARAMETERS *************//
extern int ParamFromString(char* par_name, char* par_val, char* str);
extern int IndexParamFromString(char* par_name, char* par_idx, char*& par_val, char* str);
extern int SubParamFromString(char* par_name, int par_idx, char* sub_par, char*& par_val, char* str);
extern int ParamFromStrArr(char* par_name, char*& par_val, char** str_arr, int str_cnt);
extern int IndexParamFromStrArr(char* par_name, int par_idx, char*& par_val, char** str_arr, int str_cnt);
extern int SubParamFromStrArr(char* par_name, int par_idx, char* sub_par, char*& par_val, char** str_arr, int str_cnt);
//************ SERVICE ROUTINES *************//
extern int StrToInt(char* str);
extern long StrToLong(char* str);
extern long StrToLong(char* str, int base);
extern double StrToDouble(char* str);
extern void IntToStr(char*& str, int val);
extern void LongToStr(char*& str, long val);
extern void DoubleToStr(char*& str, double val);
//-----------------------------------------------------------------------------------
bool ParamFromString(const std::string& par_name, std::string& par_val, const std::string& str);
bool ParamFromString(const std::string& par_name, int& par_val, const std::string& str);
bool ParamFromString(const std::string& par_name, long& par_val, const std::string& str);
bool ParamFromString(const std::string& par_name, double& par_val, const std::string& str);
int ModifierFromStrArr(char** str_arr, int str_cnt);
#endif

View File

@@ -0,0 +1,73 @@
#include "Interval.h"
#include "Vm.h"
double *procElapsedTime; // processor's elapsed times vector
int rootProcCount; // number of processors in root VM
Processor::Processor() :
Lost_time(0.0),
Insuff_parallelism(0.0),
Insuff_parallelism_usr(0.0),
Insuff_parallelism_sys(0.0),
Idle(0.0),
Communication(0.0),
Synchronization(0.0),
Real_synchronization(0.0),
Variation(0.0),
Overlap(0.0),
Load_imbalance(0.0),
Execution_time(0.0),
CPU_time(0.0),
CPU_time_usr(0.0),
CPU_time_sys(0.0),
IO_time(0.0),
IO_comm(0.0),
IO_real_synch(0.0),
IO_synch(0.0),
IO_vary(0.0),
IO_overlap(0.0),
Wait_reduction(0.0),
Reduction_real_synch(0.0),
Reduction_synch(0.0),
Reduction_vary(0.0),
Reduction_overlap(0.0),
Wait_shadow(0.0),
Shadow_real_synch(0.0),
Shadow_synch(0.0),
Shadow_vary(0.0),
Shadow_overlap(0.0),
Remote_access(0.0),
Remote_real_synch(0.0),
Remote_synch(0.0),
Remote_vary(0.0),
Remote_overlap(0.0),
Redistribution(0.0),
Redistribution_real_synch(0.0),
Redistribution_synch(0.0),
Redistribution_vary(0.0),
Redistribution_overlap(0.0)
{
}
void MPSSynchronize(TimeType InfoType)
{
double max_time=0;
int i;
for (i = 0; i < MPSProcCount(); i++)
if (procElapsedTime[currentVM->map(i)] > max_time)
max_time = procElapsedTime[currentVM->map(i)];
for (i = 0; i < MPSProcCount(); i++) {
AddTimeSynchronize(InfoType, currentVM->map(i),
(max_time - procElapsedTime[currentVM->map(i)]));
AddTime(InfoType, currentVM->map(i),
(max_time - procElapsedTime[currentVM->map(i)]));
}
}

View File

@@ -0,0 +1,91 @@
#ifndef _PROCESSOR_H
#define _PROCESSOR_H
enum TimeType {
__IO_time = 1,
__CPU_time,
__CPU_time_sys,
__CPU_time_usr,
__Wait_reduct,
__Wait_shadow,
__Remote_access,
__Remote_overlap,
__Redistribute,
__IO_comm,
__Other_comm,
__Insuff_parall_sys,
__Insuff_parall_usr,
__Synchronize,
__Variation,
__Reduct_overlap,
__Shadow_overlap,
};
class Processor {
friend class Interval;
public:
double
Lost_time,
Insuff_parallelism, // Insuff_parallelism_usr + Insuff_parallelism_sys
Insuff_parallelism_usr, // User insufficient parallelism
Insuff_parallelism_sys, // System Insufficient parallelism
Idle, // Idle time
Communication, // Communications
Synchronization, // Synchronization
Real_synchronization, // Synchronization
Variation, // Time variation
Overlap, // Overlap
Load_imbalance, // Load imbalance
Execution_time, // Execution time
CPU_time, // CPU_time_usr + CPU_time_sys
CPU_time_usr, // Usefull processor time
CPU_time_sys, // Usefull system time
IO_time,
IO_comm, // IO: Communications
IO_real_synch, // IO: Real synch
IO_synch, // IO: Synchronization
IO_vary, // IO: Time variation
IO_overlap, // IO: Overlap
Wait_reduction, // Reduction: Communications
Reduction_real_synch, // Reduction: Real synch
Reduction_synch, // Reduction synchronization
Reduction_vary, // Time variation
Reduction_overlap, // Reduction: Overlap
Wait_shadow, // Shadow: Communications
Shadow_real_synch, // Shadow: Real synch
Shadow_synch, // Shadow synchronization
Shadow_vary, // Time variation
Shadow_overlap, // Shadow: Overlap
Remote_access, // Remote access: Communications
Remote_real_synch, // Remote access: Real synch
Remote_vary, // Remote access: Time variation
Remote_synch, // Remote access: synchronization
Remote_overlap, // Remote access: Overlap
Redistribution, // Redistribution: Communications
Redistribution_real_synch, // Redistribution: Real synch
Redistribution_synch, // Redistribution: synchronization
Redistribution_vary, // Redistribution: time vary
Redistribution_overlap; // Redistribution: Overlap
public:
Processor();
~Processor() {}
};
extern void MPSSynchronize(TimeType InfoType);
extern double * procElapsedTime; // processors elapsed times vector
extern int rootProcCount; // number of processors in root VM
inline double CurrProcTime(int proc_no) { return procElapsedTime[proc_no]; }
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,123 @@
#ifndef __PS_H
#define __PS_H
// #pragma warning(disable: 4786)
#include <iostream>
#include <vector>
#include <queue>
#include <string>
#include "Vm.h"
//grigory add-on
using namespace std;
void ClustError (int num_error);
struct ProcInfo
{
int numClust;
int numInClust;
double ProcPower;
};
typedef struct ProcInfo strProcInfo;
class ClustInfo
{
public :
int numClust;
vector <strProcInfo> Procs;
ClustInfo();
ClustInfo(int num);
void AddProc(int num,double power);
void setNum(int num) {this->numClust=num;}
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
void setTStart(double tstart);
void setTByte(double tbyte);
//<2F><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
};
typedef class ClustInfo classClustInfo;
class CompletePS
{
public :
vector<classClustInfo> Clusters;
CompletePS();
void MakeNewCluster(int numClust);
void AddProcToClust(int numClust,int numProc,strProcInfo procInfo);
void MakeFullMapping(std::vector<double>& result);
void SortProcessors(std::vector<double> &array_of_productivity);
};
class MappedProcs
{
public:
vector<strProcInfo> Processors;
void AddProc(strProcInfo& procInfo);
MappedProcs() ;
void AddProccessors(int start,int end,int step , ClustInfo &cPS);
};
//\grigory add-on
typedef std::vector<long> LongVector;
typedef std::vector<double> DoubleVector;
class PS {
static bool NextOptionLine(std::istream& opt_file, std::string& buffer);
std::queue<LongVector> ps_lb_list; // low bounderies on each dim
std::queue<LongVector> SizeArray_list; // extentions on each dim
std::queue<DoubleVector> weight_list; // list of vectors - PS weights
mach_Type Type;
int numChanels; // numbers of parallel chanels in Myrinet
double TStart,
TByte,
ProcPower,
scale; //
//grigory add-on
public :
CompletePS completePS; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
MappedProcs mappedProcs; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>(<28><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>)
//std::vector<LongVector> vWeights;
vector<double> vProcPower;// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> , <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
public :
int getProcCount();
void CorrectMappedProcs();
void PrepareForAutoSearch(std::vector<long>& perstanovki);
void reset();
//\grigory add-on
public:
// read PS's configurations from the file
PS(const char* file_name);
PS(mach_Type AType, int AnumChanels, double TStart, double TByte, int proc_num);
// returns next processor's charactiristics
void nextPS(std::vector<long>& lb, std::vector<long>& ASizeArray,
mach_Type& AMType, int& AnumChanels, double& Ascale,
double& ATStart, double& ATByte, double& AProcPower, vector<double>& AvProcPower);
void setTopology(std::vector<long>& ASizeArray);
void hardwarePS(int& AMType, double& ATStart, double& ATByte, double& AProcPower,vector<double> & AvProcPower);
#ifdef P_DEBUG
friend std::ostream& operator << (std::ostream& os, const PS& ps);
#endif
};
extern PS * ps; // prosessor system object
extern long currentPS_ID; // current PS ID
#endif

View File

@@ -0,0 +1,181 @@
//////////////////////////////////////////////////////////////////////
//
// RedGroup.cpp: implementation of the RedGroup class.
//
//////////////////////////////////////////////////////////////////////
#include "RedGroup.h"
using namespace std;
extern ofstream prot;
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
RedGroup::RedGroup(VM *AvmPtr):
vmPtr(AvmPtr)
{
redVars = vector<RedVar *>(0);
CentralProc = vmPtr->GetCenterLI();
TotalSize = 0;
}
RedGroup::~RedGroup()
{
}
//////////////////////////////////////////////////////////////////////
// Add reduction variable to reduction groupe
//////////////////////////////////////////////////////////////////////
void RedGroup::AddRV(RedVar * ARedVar)
{
redVars.push_back(ARedVar);
TotalSize += ARedVar->GetSize();
}
//////////////////////////////////////////////////////////////////////
// Calculate reduction time
//////////////////////////////////////////////////////////////////////
double RedGroup::StartR(DArray *APattern, long ALoopRank, const vector<long>& AAxisArray)
{
double time = 0;
long i,
redBlSize = 1,
redBlCenterDist = 0;
vector<long> loopAlign(ALoopRank);
int dir;
long LSize = vmPtr->GetLSize();
double TStart = vmPtr->getTStart();
double TByte = vmPtr->getTByte();
bool redBlEmpty = true;
// ???<3F><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>-<2D><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20>.<2E>. <20><><EFBFBD>.<2E><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD>. <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>,
// <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD>. <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20> <20><><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// <20> <20><><EFBFBD> <20><> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20><> <20><><EFBFBD>
// <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> Update
// <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// (<28><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD>.<2E><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD>)
if (APattern->Repl)
return 0;
for (i = 0; i < ALoopRank; i++) {
loopAlign[i] = APattern->GetMapDim(AAxisArray[i], dir);
// prot << "loopAlign[" << i << "] = " << loopAlign[i] << endl;
}
switch (vmPtr->getMType()) {
case mach_ETHERNET :
for (i = 0; i < ALoopRank; i++) {
if (loopAlign[i]) {
redBlSize *= vmPtr->GetSize(loopAlign[i]);
redBlEmpty = false;
}
}
if (!redBlEmpty)
time = (TStart + TByte * TotalSize) * (redBlSize + LSize - 2);
break;
case mach_TRANSPUTER :
for (i = 0; i < ALoopRank; i++) {
if (loopAlign[i]) {
redBlCenterDist += vmPtr->GetSize(loopAlign[i]) / 2;
redBlEmpty = false;
}
}
if (!redBlEmpty)
time = (TStart + TByte * TotalSize) * (vmPtr->GetDistance(0, CentralProc)
+ redBlCenterDist);
break;
case mach_MYRINET :
for (i = 0; i < ALoopRank; i++) {
if (loopAlign[i]) {
redBlSize *= vmPtr->GetSize(loopAlign[i]);
redBlEmpty = false;
// prot << "i = " << i << ", redBlSize = " << redBlSize << endl;
}
}
if (!redBlEmpty) {
time = (TStart + TByte * TotalSize) * (redBlSize + LSize - 2);
}
break;
}
// ? <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><> const <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> BOUNDREPL
return time * vmPtr->getScale();
}
//////////////////////////////////////////////////////////////////////
// Calculate reduction time
//////////////////////////////////////////////////////////////////////
double RedGroup::StartR(AMView *APattern, long ALoopRank, const vector<long>& AAxisArray)
{
double time = 0;
long i, redBlSize = 1, redBlCenterDist = 0;
vector<long> loopAlign(ALoopRank);
// int dir;
long LSize = vmPtr->GetLSize();
double TStart = vmPtr->getTStart();
double TByte = vmPtr->getTByte();
bool redBlEmpty = true;
// ???<3F><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>-<2D><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20>.<2E>. <20><><EFBFBD>.<2E><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD>. <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>,
// <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD>. <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20> <20><><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// <20> <20><><EFBFBD> <20><> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20><> <20><><EFBFBD> <20>
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> Update
// <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// (<28><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD>.<2E><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD>)
if (APattern->Repl)
return 0;
for (i = 0; i < ALoopRank; i++)
loopAlign[i] = AAxisArray[i];
// loopAlign[i] = APattern->GetMapDim(AAxisArray[i], dir);
switch (vmPtr->getMType()) {
case mach_ETHERNET :
for (i = 0; i < ALoopRank; i++) {
if (loopAlign[i]) {
redBlSize *= vmPtr->GetSize(loopAlign[i]);
redBlEmpty = false;
}
}
if (!redBlEmpty)
time = (TStart + TByte * TotalSize) * (redBlSize + LSize - 2);
break;
case mach_TRANSPUTER :
for (i = 0; i < ALoopRank; i++) {
if (loopAlign[i]) {
redBlCenterDist += vmPtr->GetSize(loopAlign[i]) / 2;
redBlEmpty = false;
}
}
if (!redBlEmpty)
time = (TStart + TByte * TotalSize) * (vmPtr->GetDistance(0, CentralProc)
+ redBlCenterDist);
break;
}
// ? <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><> const <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> BOUNDREPL
return time;
}

View File

@@ -0,0 +1,30 @@
#ifndef RedGroupH
#define RedGroupH
//////////////////////////////////////////////////////////////////////
//
// RedGroup.h: interface for the RedGroup class.
//
//////////////////////////////////////////////////////////////////////
#include <vector>
#include "RedVar.h"
#include "DArray.h"
class RedGroup {
public:
// ??? <20><><EFBFBD><EFBFBD><EFBFBD> <20><> long CentralProc; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
VM *vmPtr;
std::vector<RedVar *> redVars; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
long TotalSize; // <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
long CentralProc; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
RedGroup(VM *AvmPtr);
virtual ~RedGroup();
double StartR(DArray * APattern, long ALoopRank, const std::vector<long>& AAxisArray); // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> // ??? <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
double StartR(AMView * APattern, long ALoopRank, const std::vector<long>& AAxisArray); // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> // ??? <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
void AddRV(RedVar *ARedVar);
};
#endif

View File

@@ -0,0 +1,32 @@
// RedVar.cpp: implementation of the RedVar class.
//
//////////////////////////////////////////////////////////////////////
#include "RedVar.h"
using namespace std;
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
RedVar::RedVar()
{
}
RedVar::~RedVar()
{
}
RedVar::RedVar(long ARedElmSize, long ARedArrLength, long ALocElmSize) :
RedElmSize(ARedElmSize),
RedArrLength(ARedArrLength),
LocElmSize(ALocElmSize)
{
}
long RedVar::GetSize()
{
return (LocElmSize + RedElmSize) * RedArrLength;
}

View File

@@ -0,0 +1,24 @@
#ifndef RedVarH
#define RedVarH
//////////////////////////////////////////////////////////////////////
//
// RedVar.h: interface for the RedVar class.
//
//////////////////////////////////////////////////////////////////////
#include "Vm.h"
class RedVar {
public:
long RedElmSize; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>-<2D><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
long RedArrLength; // <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>-<2D><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
long LocElmSize; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
RedVar(long ARedElmSize, long ARedArrLength, long ALocElmSize);
RedVar();
virtual ~RedVar();
long GetSize(); // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>-<2D><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
};
#endif

View File

@@ -0,0 +1,17 @@
//////////////////////////////////////////////////////////////////////
//
// RemAccessBuf.cpp: implementation of RemAccessBuf class.
//
//////////////////////////////////////////////////////////////////////
#include "RemAccessBuf.h"
#include "Vm.h"
RemAccessBuf::RemAccessBuf(VM* vm)
{
}
RemAccessBuf::~RemAccessBuf()
{
}

View File

@@ -0,0 +1,19 @@
#ifndef __REMACCESSBUF
#define __REMACCESSBUF
//////////////////////////////////////////////////////////////////////
//
// RemAccessBuf.h: interface for the RemAccessBuf class.
//
//////////////////////////////////////////////////////////////////////
#include "Vm.h"
class RemAccessBuf {
public:
RemAccessBuf(VM* vm);
~RemAccessBuf();
};
#endif

View File

@@ -0,0 +1,200 @@
//---------------------------------------------------------------------------
#include "Space.h"
using namespace std;
extern ofstream prot;
Space::Space(const vector<long>& ASizeArray) :
SizeArray(ASizeArray)
{
unsigned int rank = ASizeArray.size();
vector<long> ini(rank);
//==== "long i" -> "int i"
int i;
//=***
// Rank = ARank;
// SizeArray = ASizeArray;
// SizeArray.reserve(rank);
// copy(ASizeArray.begin(), ASizeArray.end(), back_inserter(SizeArray));
ini[rank-1] = 1;
for (i = rank-2; i >= 0; i--)
ini[i] = ini[i+1] * SizeArray[i+1];
MultArray = ini;
}
#ifdef P_DEBUG
ostream& operator << (ostream& os, const Space& s)
{
int i;
unsigned int rank = s.SizeArray.size();
os << "Space: rank = " << rank << " SizeArray =";
for (i = 0; i < rank; i++ )
os << ' ' << s.SizeArray[i];
os << "; MultArray =";
for (i = 0; i < rank; i++ )
os << ' ' << s.MultArray[i];
os << ';';
return os;
}
#endif
Space::~Space()
{
}
/*long Space::GetRank()
{
return Rank;
}
*/
long Space::GetSize(long AAxis)
{
// printf("space::getsize :: rank()=%d\n",Rank());
if (AAxis < 1 || AAxis > Rank())
return -1;
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> 1
return SizeArray[AAxis-1];
}
long Space::GetMult(long AAxis)
{
if (AAxis < 1 || AAxis > Rank())
return -1;
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> 1
return MultArray[AAxis-1];
}
long Space::GetLSize()
{
long i, lsize = 1;
for(i = 0; i < Rank(); i++)
lsize *= SizeArray[i];
return lsize;
}
void Space::GetSI(long LI, vector<long> & SI)
{
int i;
SI = vector<long>(Rank());
for (i=0; i < Rank(); i++)
{
SI[i] = LI / MultArray[i];
LI -= SI[i] * MultArray[i];
}
}
Space::Space(const Space &x) :
SizeArray(x.SizeArray),
MultArray(x.MultArray)
{
// Rank = x.Rank();
// SizeArray = x.SizeArray;
// MultArray = x.MultArray;
}
Space & Space::operator= (const Space & x)
{
if (this != &x)
{
// Rank = x.Rank();
SizeArray = x.SizeArray;
MultArray = x.MultArray;
}
return * this;
}
Space::Space(const vector<long>& ASizeArray, const vector<long> AMultArray) :
SizeArray(ASizeArray),
MultArray(AMultArray)
{
}
Space::Space() :
SizeArray(vector<long>(0)),
MultArray(vector<long>(0))
{
// Rank = 0;
// SizeArray = vector<long>(0);
// MultArray = vector<long>(0);
}
long Space::GetDistance(long LI1, long LI2)
{
vector<long> SI1, SI2;
int i;
long distance = 0;
GetSI(LI1, SI1);
GetSI(LI2, SI2);
for (i = 0; i < Rank(); i++)
distance += abs(SI1[i] - SI2[i]);
return distance;
}
/*inline long abs(long x)
{
return (x < 0) ? (-x) : (x);
}
*/
long Space::GetLI(const vector<long> & SI)
{
int i;
long LI = 0;
// printf("GET LI rank=%d size=%d\n",Rank(),SI.size());
if (Rank() != SI.size())
{
prot << "Wrong call GetLI" << endl;
// printf("Wrong call GetLI\n");
exit(1);
}
for (i=0; i < Rank(); i++)
{
LI += SI[i] * MultArray[i];
}
return LI;
}
long Space::GetSpecLI(long LI, long dim, int shift)
{
vector<long> SI;
GetSI(LI, SI);
SI[dim-1] += shift;
return GetLI(SI);
}
long Space::GetCenterLI()
{
vector<long> CenterSI(Rank());
int i;
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> (<28><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> 0 <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>)
for (i = 0; i < Rank(); i++)
CenterSI[i] = SizeArray[i] / 2;
return GetLI(CenterSI);
}
long Space::GetNumInDim(long LI, long dimNum)
{
vector<long> SI;
GetSI(LI, SI);
return SI[dimNum - 1];
}

View File

@@ -0,0 +1,66 @@
#ifndef SpaceH
#define SpaceH
//////////////////////////////////////////////////////////////////////
//
// Space.h: interface for the Space base class.
//
//////////////////////////////////////////////////////////////////////
#include <vector>
#include <algorithm>
#include <fstream>
class Space {
protected:
std::vector<long> SizeArray; // Size of every dimension
std::vector<long> MultArray; // Multiplier for each dimension
public:
Space();
Space(const std::vector<long>& ASizeArray, std::vector<long> AMultArray);
Space(const Space &);
Space(const std::vector<long>& ASizeArray);
~Space();
long GetNumInDim(long LI, long dimNum);
long GetCenterLI();
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> shift
// <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> dim <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
long GetSpecLI(long LI, long dim, int shift);
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
long GetLI(const std::vector<long> & SI);
// <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
long GetDistance(long LI1, long LI2);
Space& operator= (const Space &x);
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// (Space Index - SI)
void GetSI(long LI, std::vector<long> & SI);
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
long GetLSize();
// inline long GetRank() { return Rank; }
long GetSize(long AAxis); // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> 1
long GetMult(long AAxis); // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> 1
unsigned int Rank() { return SizeArray.size(); }
#ifdef P_DEBUG
friend std::ostream& operator << (std::ostream& os, const Space& s);
#endif
};
#if defined (__GNUG__) && (__GNUC__ < 3)
inline long abs(long x)
{
return (x < 0) ? (-x) : (x);
};
#endif
#endif

View File

@@ -0,0 +1,22 @@
// stdafx.h : include file for standard system include files,
// or project specific include files that are used frequently, but
// are changed infrequently
//
#if !defined(AFX_STDAFX_H__14513F16_55D6_4E8C_84CB_8B57D48669B3__INCLUDED_)
#define AFX_STDAFX_H__14513F16_55D6_4E8C_84CB_8B57D48669B3__INCLUDED_
#if _MSC_VER > 1000
#pragma once
#endif // _MSC_VER > 1000
#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers
#include <stdio.h>
// TODO: reference additional headers your program requires here
//{{AFX_INSERT_LOCATION}}
// Microsoft Visual C++ will insert additional declarations immediately before the previous line.
#endif // !defined(AFX_STDAFX_H__14513F16_55D6_4E8C_84CB_8B57D48669B3__INCLUDED_)

View File

@@ -0,0 +1,272 @@
#include <string.h>
#include <assert.h>
#include <memory>
#include <stdlib.h>
#include <fstream>
#include "TraceLine.h"
#include "ParseString.h"
#include "Ver.h"
#ifdef __GNUC__
#define _strdup strdup
#endif
using namespace std;
extern ofstream prot;
VectorTraceLine * traceLines; // lines from file, partially decoded
char * TraceLine::first_file_name = NULL;
int TraceLine::first_line_number = -1;
//bool VectorTraceLine::startStoreLinesRet = false;
//bool VectorTraceLine::startStoreLines = false;
// --------------------------------- TraceLine ---------------------------------------
// -------------------------------- CONSTRUCTORS -------------------------------------
TraceLine::TraceLine(char * buffer)
{
const int LINE_BUFFER_SIZE = 256;
char f_file[LINE_BUFFER_SIZE];
char f_name[LINE_BUFFER_SIZE];
char * fn_pos;
int str_len;
if (ParamFromString("TIME", func_time, buffer) &&
ParamFromString("LINE", source_line, buffer) &&
ParamFromString("FILE", f_file, buffer)) {
if ((fn_pos=strstr(buffer,"call_"))!=NULL) {
line_type = Call_;
fn_pos += strlen("Call_");
} else if ((fn_pos=strstr(buffer,"ret_"))!=NULL) {
line_type = Ret_;
fn_pos += strlen("Ret_");
} else if ((fn_pos=strstr(buffer,"Event_"))!=NULL) {
line_type = Event_;
fn_pos += strlen("Event_");;
} else
goto unknown;
str_len = strcspn(fn_pos," ");
strncpy(f_name, fn_pos, str_len);
f_name[str_len]='\0';
func_id = EventNameToID((string) f_name);
source_file =_strdup(f_file);
if (first_file_name == NULL) {
first_file_name = _strdup(f_file);
first_line_number = source_line;
}
info_line = _strdup(buffer); //was NULL
if (func_id == Unknown_Func)
{
#ifdef P_DEBUG
prot << " Warning : unknown function: " << f_name
<< " file = " << source_file
<< " line = " << source_line << endl;
#else
#endif
}
} else {
unknown:
func_time = 0.0;
source_line = 0;
source_file = NULL;
func_id = Unknown_Func;
source_file = NULL;
line_type = Unknown_;
info_line =_strdup(buffer);
}
}
// ---------------------------- COPY CONSTRACTOR -----------------------------------
TraceLine::TraceLine(TraceLine& tr) :
line_type(tr.line_type),
func_id(tr.func_id),
func_time(tr.func_time),
source_line(tr.source_line)
{
source_file = tr.source_file;
tr.source_file = NULL;
info_line = tr.info_line;
tr.info_line = NULL;
}
// -------------------------------- DESTRUCTOR -------------------------------------
TraceLine::~TraceLine()
{
if (source_file != NULL)
delete source_file;
if (info_line != NULL)
delete info_line;
}
// ------------------------------ VectorTraceLine ------------------------------------
// -------------------------------- CONSTRUCTOR --------------------------------------
VectorTraceLine::VectorTraceLine(char* file_name) :
endOfVector(false),
count(0),
size(0),
lines(NULL),
p_count(0),
p_size(0),
p_lines(NULL),
startStoreLines(false),
startStoreLinesRet(false)
{
// trace_file.open(file_name); //ZIP
if ((trace_file = gzopen(file_name, "rb")) == NULL) {
// if (!trace_file.is_open()) { //ZIP
cerr << "Can't open trace file '" << file_name << '\'' << endl;
// trace_file.exceptions(ostream::badbit | ostream::failbit | ostream::eofbit);
exit(1);
} else {
prot << " Reading/initial decoding of trace file..." << endl;
// read preamble lines
while (!startStoreLines && getLine())
;
getFrame();
prot << " Completed." << endl;
}
}
// -------------------------------- DESTRUCTOR -------------------------------------
VectorTraceLine::~VectorTraceLine()
{
for (int i = 0; i < size; i++)
delete lines[i];
free(lines);
//gzclose(trace_file);
}
bool VectorTraceLine::getLine()
{
const int LINE_BUFFER_SIZE = 256;
char buffer[LINE_BUFFER_SIZE];
int len;
int spn;
TraceLine * tmp;
// if (!trace_file)
if (trace_file == NULL)
return false;
else {
do {
// if (!trace_file.getline(buffer, LINE_BUFFER_SIZE)) {
if (gzgets(trace_file, buffer, LINE_BUFFER_SIZE) == Z_NULL) {
cerr << "Bad trace file" << endl;
prot << "Bad trace file" << endl;
exit(0);
}
len=strlen(buffer);
if ((buffer[len - 1] == 0x0a) && (buffer[len - 2] == 0x0d))
len -= 2;
else if (buffer[len - 1] == 0x0a)
len -= 1;
buffer[len] = 0;
spn=strspn(buffer," ");
} while ( len==0 ||
len==spn ||
(len==1 && *buffer==0x0d) ||
(strncmp(buffer+spn, "----------", 10) == 0));
tmp = new TraceLine(buffer + spn);
// control event dvm_exit
if (tmp->line_type == Event_) {
endOfVector = true;
return false;
}
if ((!startStoreLinesRet) && (tmp->line_type == Ret_) && (tmp->func_id == dvm_Init))
startStoreLinesRet = true;
if ((!startStoreLines) && (tmp->line_type == Call_) && startStoreLinesRet)
startStoreLines = true;
if (startStoreLines) {
// store trace line
lines = (TraceLine **) realloc(lines, sizeof(TraceLine *) * ++size);
lines[size - 1] = tmp;
} else {
// store preamble trace line
p_lines = (char **) realloc(p_lines, sizeof(char *) * ++p_size);
p_lines[p_size - 1] = tmp->info_line;
}
return true;
}
}
bool VectorTraceLine::getFrame()
{
if (lines[size - 1]->line_type == Call_) {
// next call exist in the trace file
int i;
// save last Call_ line from frame
TraceLine * tmp = new TraceLine(*lines[size - 1]);
// delete previouse frame;
for (i = 0; i < size; i++)
delete lines[i];
free(lines);
size = count = 0;
lines = NULL;
// store trace line Call_ in the object
lines = (TraceLine **) realloc(lines, sizeof(TraceLine *) * ++size);
lines[size - 1] = tmp;
// store the rest of the frame
while (getLine() && lines[size - 1]->line_type != Call_)
;
return true;
} else
return false;
}
TraceLine* VectorTraceLine::next()
{
count++;
if (count == size) {
// get next trace frame
if (getFrame()) {
return lines[++count];
} else
return NULL;
} else
return lines[count];
}
// ------------------------------ GetUnknownLines ------------------------------------
void VectorTraceLine::GetUnknownLines(int& il_count, char**& info_lines)
{
while ((count < size) && (lines[count]->line_type == Unknown_)) {
info_lines = (char**) realloc(info_lines, sizeof(char*) * (++il_count));
assert(info_lines != NULL);
info_lines[il_count-1] = _strdup(lines[count]->info_line);
count++;
}
}

View File

@@ -0,0 +1,85 @@
#ifndef TRACELINE_H
#define TRACELINE_H
#include <fstream>
#include "Event.h"
#include "CallInfoStructs.h"
//#include "../../Zlib/Include/zlib.h"
#include "zlib.h"
// Structure for the first stage of file parsing -- file reading
class TraceLine {
public:
LineType line_type;
Event func_id; // Only for call-type lines
double func_time;
int source_line;
char* source_file;
char* info_line; // Only for info-type lines
static char* first_file_name;
static int first_line_number;
TraceLine(char * buffer);
TraceLine(TraceLine& tr);
~TraceLine();
TraceLine& operator = (TraceLine& tr);
};
class VectorTraceLine {
/*static*/ bool startStoreLinesRet;
/*static*/ bool startStoreLines;
bool endOfVector;
unsigned count; // current line
unsigned size; // vector size
TraceLine** lines; // array of pointers to lines
unsigned p_count; // current preamble line
public:
unsigned p_size; // preamble vector size
char** p_lines; // array of pointers to preamble lines
// std::ifstream trace_file;
//gzFile trace_file;
private:
bool getLine();
bool getFrame();
public:
gzFile trace_file;
VectorTraceLine(unsigned sz = 0) : lines(NULL), count(0), size(sz) {}
VectorTraceLine(char* file_name);
~VectorTraceLine();
TraceLine* current() { return lines[count]; }
void GetUnknownLines(int& il_count, char**& info_lines);
TraceLine* next();
bool end() const { return endOfVector; }
friend root_Info* Get_Root_Info();
//grig
void restore() {
VectorTraceLine::startStoreLinesRet = false;
VectorTraceLine::startStoreLines = false;
}
//\grig
//====
unsigned Get_p_size();
char * Get_p_lines(int i);
//=***
};
extern VectorTraceLine * traceLines; // lines from file, partially decoded
#endif

View File

@@ -0,0 +1,8 @@
#ifndef VER_H
#define VER_H
#define VER_PRED "Predictor 2.53, 1.09.2005..."
#define RTS_VERSION "RTS VERSION = 2884"
#endif

View File

@@ -0,0 +1,398 @@
#include <assert.h>
#include "Vm.h"
using namespace std;
extern ofstream prot;
extern VM * rootVM = NULL; // pointer to root VM
extern VM * currentVM = NULL; // pointer to current VM
//grig
extern long_vect MinSizesOfAM; // <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
//\grig
//---------------------------------------------------------------------------------
//
// Constructor for root VM
//
//---------------------------------------------------------------------------------
VM::VM(const std::vector<long>& ASizeArray, mach_Type AMType, int AnumChanels,
double Ascale, double ATStart, double ATByte, double AProcPower,std::vector<double> & AvProcPower) :
Space(ASizeArray),
parent(NULL),
MType(AMType),
numChanels(AnumChanels),
scale(Ascale),
TStart(ATStart),
TByte(ATByte),
ProcPower(AProcPower),
ProcCount(procCount())
{
int i,j,l;
int rank = SizeArray.size();
/// for(i=0;i<ASizeArray.size();i++)
// {
// printf("%d ",ASizeArray[i]);
// }
// printf("\n");
// printf("rank=%d\n",this->Rank());
// weight
int k = 0;
for (i = 0; i < rank; i++)
k += SizeArray[i];
weight = vector<double>(k, 1.0);
//grig
vWeights = vector<double>(k,1.0); // init weights array
vProcPower.resize(0);
for(i=0;i<ProcCount;i++)
{
vProcPower.push_back(AvProcPower[i]);
}
//\grig
// initialize mapping;
mapping = vector<int>(ProcCount);
// prot << "ProcCount = " << ProcCount << " rank = " << rank << endl;
// set mapping
// root VM : mapping 1:1
switch (rank) {
case 1:
for (i = 0; i < SizeArray[0]; i++)
mapping[i] = i;
break;
case 2:
for (j = 0; j < SizeArray[0]; j++)
for (i = 0; i < SizeArray[1]; i++) {
// cout << " i = " << i << " j = " << j << endl;
mapping[i + MultArray[0] * j] = i + MultArray[0] * j;
}
break;
case 3:
for (k = 0; k < SizeArray[0]; k++)
for (j = 0; j < SizeArray[1]; j++)
for (i = 0; i < SizeArray[2]; i++)
mapping[i + MultArray[1] * j + MultArray[0] * k] =
i + MultArray[1] * j + MultArray[0] * k;
break;
case 4:
for (l = 0; l < SizeArray[0]; l++)
for (k = 0; k < SizeArray[1]; k++)
for (j = 0; j < SizeArray[2]; j++)
for (i = 0; i < SizeArray[3]; i++)
mapping[i + MultArray[2] * j + MultArray[1] * k + MultArray[0] * l] =
i + MultArray[2] * j + MultArray[1] * k + MultArray[0] * l;
break;
default:
prot << "VM rank more then 4 (rank = " << rank << ')' << endl;
printf("VM rank more then 4 (rank = %d)\n", rank);
exit(EXIT_FAILURE);
}
// for(i=0;i<ProcCount;i++)
// {
// printf("::TEST MAPPING : logical num=%d, it's mapped to %d processor of PS it's power is %f\n",i,map(i),getProcPower(i));
// }
//for(i=0;i<GetSize(i+1);i++)
// for(j=0;j<GetSize)
/* std::vector<long> sss;
sss.resize(2);
sss[0]=1;
sss[0]=2;
long rrr=GetLI(sss);
*/
}
VM::VM(const std::vector<long>& ASizeArray, mach_Type AMType, int AnumChanels,
double Ascale, double ATStart, double ATByte, std::vector<double>& AvProcPower) :
Space(ASizeArray),
parent(NULL),
MType(AMType),
numChanels(AnumChanels),
scale(Ascale),
TStart(ATStart),
TByte(ATByte),
ProcPower(1.0),
ProcCount(procCount())
{
int i,j,l;
int rank = SizeArray.size();
// for(i=0;i<ASizeArray.size();i++)
// {
// printf("%d ",ASizeArray[i]);
// }
// printf("\n");
// printf("rank=%d\n",this->Rank());
// weight
int k = 0;
for (i = 0; i < rank; i++)
k += SizeArray[i];
weight = vector<double>(k, 1.0);
//grig
vWeights = vector<double>(k,1.0); // init weights array
vProcPower.resize(0);
for(i=0;i<ProcCount;i++)
{
vProcPower.push_back(AvProcPower[i]);
}
//\grig
// initialize mapping;
mapping = vector<int>(ProcCount);
// prot << "ProcCount = " << ProcCount << " rank = " << rank << endl;
// set mapping
// root VM : mapping 1:1
switch (rank) {
case 1:
for (i = 0; i < SizeArray[0]; i++)
mapping[i] = i;
break;
case 2:
for (j = 0; j < SizeArray[0]; j++)
for (i = 0; i < SizeArray[1]; i++) {
// cout << " i = " << i << " j = " << j << endl;
mapping[i + MultArray[0] * j] = i + MultArray[0] * j;
}
break;
case 3:
for (k = 0; k < SizeArray[0]; k++)
for (j = 0; j < SizeArray[1]; j++)
for (i = 0; i < SizeArray[2]; i++)
mapping[i + MultArray[1] * j + MultArray[0] * k] =
i + MultArray[1] * j + MultArray[0] * k;
break;
case 4:
for (l = 0; l < SizeArray[0]; l++)
for (k = 0; k < SizeArray[1]; k++)
for (j = 0; j < SizeArray[2]; j++)
for (i = 0; i < SizeArray[3]; i++)
mapping[i + MultArray[2] * j + MultArray[1] * k + MultArray[0] * l] =
i + MultArray[2] * j + MultArray[1] * k + MultArray[0] * l;
break;
default:
prot << "VM rank more then 4 (rank = " << rank << ')' << endl;
printf("VM rank more then 4 (rank = %d)\n", rank);
exit(EXIT_FAILURE);
}
// for(i=0;i<ProcCount;i++)
// {
// printf("::TEST MAPPING : logical num=%d, it's mapped to %d processor of PS it's power is %f\n",i,map(i),getProcPower(i));
// }
//
//for(i=0;i<GetSize(i+1);i++)
// for(j=0;j<GetSize)
/* std::vector<long> sss;
sss.resize(2);
sss[0]=1;
sss[0]=2;
long rrr=GetLI(sss);
*/
}
//---------------------------------------------------------------------------------
//
// Constructor for child VM (crtps_)
//
//---------------------------------------------------------------------------------
VM::VM(const std::vector<long>& lb, const std::vector<long>& ASizeArray, const VM* Aparent) :
Space(ASizeArray),
parent(Aparent),
MType(parent->MType),
numChanels(parent->numChanels),
scale(parent->scale),
TStart(parent->TStart),
TByte(parent->TByte),
ProcPower(parent->ProcPower),
ProcCount(procCount())
{
int i,j,l;
int rank = SizeArray.size();
// weight
int k = 0;
for (i = 0; i < rank; i++)
k += SizeArray[i];
weight = vector<double>(k, 1.0);
//grig
vWeights=vector<double>(k,1.0);
//\grig
// initialize mapping;
mapping = vector<int>(ProcCount);
// set mapping
// child VM
switch (rank) {
case 1:
for (i = 0; i < SizeArray[0]; i++)
mapping[i] = parent->mapping[i + lb[0]];
break;
case 2:
for (j = 0; j < SizeArray[0]; j++)
for (i = 0; i < SizeArray[1]; i++)
mapping[i + MultArray[0] * j] =
parent->mapping[ i + lb[1] +
(j + lb[0]) * parent->MultArray[0]];
break;
case 3:
for (k = 0; k < SizeArray[0]; k++)
for (j = 0; j < SizeArray[1]; j++)
for (i = 0; i < SizeArray[2]; i++)
mapping[i + MultArray[1] * j + MultArray[0] * k] =
parent->mapping[ i + lb[2] +
(j + lb[1]) * parent->MultArray[1] +
(k + lb[0]) * parent->MultArray[0]];
break;
case 4:
for (l = 0; l < SizeArray[0]; l++)
for (k = 0; k < SizeArray[1]; k++)
for (j = 0; j < SizeArray[2]; j++)
for (i = 0; i < SizeArray[3]; i++)
mapping[i + MultArray[2] * j + MultArray[1] * k + MultArray[0] * l] =
parent->mapping[ i + lb[3] +
(j + lb[2]) * parent->MultArray[2] +
(k + lb[1]) * parent->MultArray[1] +
(l + lb[0]) * parent->MultArray[0]];
break;
default:
prot << "VM rank more then 4 (rank = " << rank << ')' << endl;
printf("VM rank more then 4 (rank = %d)\n", rank);
exit(EXIT_FAILURE);
}
//grig
vProcPower.resize(0);
for(i=0;i<ProcCount;i++)
this->vProcPower.push_back(1.0);
for(i=0;i<ProcCount;i++)
this->vProcPower[i]=Aparent->vProcPower[i];
//\grig
}
//---------------------------------------------------------------------------------
//
// Constructor for child VM (psview_)
//
//---------------------------------------------------------------------------------
VM::VM(const std::vector<long>& ASizeArray, const VM* Aparent) :
Space(ASizeArray),
parent(Aparent),
MType(parent->MType),
numChanels(parent->numChanels),
scale(parent->scale),
TStart(parent->TStart),
TByte(parent->TByte),
ProcPower(parent->ProcPower),
ProcCount(procCount()),
mapping(parent->mapping)
{
int k,
i;
//grig
vProcPower.resize(0);
for(i=0;i<Aparent->ProcCount;i++)
this->vProcPower.push_back(1.0);
for(i=0;i<ProcCount;i++)
this->vProcPower[i]=Aparent->vProcPower[i];
//\grig
// weight
for (k = 0, i = 0; i < SizeArray.size(); i++)
k += SizeArray[i];
weight = vector<double>(k, 1.0);
//grig
vWeights=vector<double>(k,1.0);
//\grig
}
//---------------------------------------------------------------------------------
//
// Set weights for VM
//
//---------------------------------------------------------------------------------
void VM::setWeight(const std::vector<double>& Aweight)
{
assert(Aweight.size() == weight.size());
weight = Aweight;
//grig
vWeights=Aweight;
//\grig
}
//---------------------------------------------------------------------------------
//
// calculates number of processors in VM
//
//---------------------------------------------------------------------------------
int VM::procCount()
{
ProcCount = 1;
for (int i = 0; i < SizeArray.size(); i++) {
ProcCount *= SizeArray[i];
}
return ProcCount;
}
VM::~VM()
{
if(this->parent!=NULL)
parent=NULL;
mapping.resize(0);
weight.resize(0);
vProcPower.resize(0);
vWeights.resize(0);
}
#ifdef P_DEBUG
ostream& operator << (ostream& os, const VM& vm)
{
int i;
unsigned int rank = vm.mapping.size();
os << (Space) vm;
os << " Map =";
for (i = 0; i < rank; i++ )
os << ' ' << vm.mapping[i];
os << ' ';
return os;
}
#endif

View File

@@ -0,0 +1,105 @@
#ifndef VM_H
#define VM_H
//////////////////////////////////////////////////////////////////////
//
// Vm.h: interface for the Virtual machine (VM) class.
//
//////////////////////////////////////////////////////////////////////
#include "Space.h"
#include <vector>
//#include "ps.h"
//using namespace std;
enum mach_Type {
mach_ETHERNET, // 0
mach_TRANSPUTER, // 1
mach_MYRINET // 2
};
//grig
typedef std::vector<long> long_vect;
//\grig
class VM : public Space {
const VM* parent; // pointer to parent VM
mach_Type MType; // system type: 0 - ethernet, 1 - transputers, 2 - myrinet
int numChanels; // number of chanells for myrinet
double scale;
double TStart; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>: start time
double TByte; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>: byte trasfer time
double ProcPower; // relative VM power
int ProcCount; // number of processors in VM
std::vector<int> mapping; // map to absolute processors numbers
std::vector<double> weight; // vector - PS weights
// calculates number of processors in VM
int procCount();
//grig
std::vector<double> vProcPower; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
std::vector<double> vWeights; // <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
public:
void SetvWeights(std::vector<double> & varray) {
vWeights.resize(0);
vWeights.resize(varray.size());
for(int i=0;i<varray.size();i++)
vWeights[i]=varray[i];
}
double getProcPower(int k) { return vProcPower[this->map(k)];}
//\grig
public:
// constructor for root VM
VM(const std::vector<long>& ASizeArray, mach_Type AMType, int AnumChanels,
double Ascale, double ATStart, double ATByte, double AProcPower, std::vector<double>& AvProcPower);
// constructor for child VM (crtps_)
VM(const std::vector<long>& lb, const std::vector<long>& ASizeArray, const VM* Aparent);
// constructor for child VM (psview_)
VM(const std::vector<long>& ASizeArray, const VM* Aparent);
//grig!!!!
VM(const std::vector<long>& ASizeArray, mach_Type AMType, int AnumChanels,
double Ascale, double ATStart, double ATByte, std::vector<double>& AvProcPower);
//\grig
// Destructor
~VM();
double getTByte() const { return TByte; }
double getTStart() const { return TStart; }
int getMType() const { return MType; }
int getProcCount() const { return ProcCount; }
//comment by grig double getProcPower() const { return ProcPower; }
double getProcPower() const { return 0; }
int getNumChanels() const { return numChanels; }
double getScale() const { return scale; }
const std::vector<long>& getSizeArray() const { return SizeArray; }
int map(int i) const { return mapping[i]; }
// Set weights for VM
void setWeight(const std::vector<double>& Aweight);
#ifdef P_DEBUG
friend std::ostream& operator << (std::ostream& os, const VM& vm);
#endif
};
extern VM * rootVM; // pointer to root VM
extern VM * currentVM; // pointer to current VM
//grig
extern long_vect MinSizesOfAM; // <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
//\grig
inline int MPSProcCount() { return currentVM->getProcCount(); }
#endif

View File

@@ -0,0 +1,48 @@
/* adler32.c -- compute the Adler-32 checksum of a data stream
* Copyright (C) 1995-1998 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
/* @(#) $Id$ */
#include "zlib.h"
#define BASE 65521L /* largest prime smaller than 65536 */
#define NMAX 5552
/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
#define DO1(buf,i) {s1 += buf[i]; s2 += s1;}
#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1);
#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2);
#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4);
#define DO16(buf) DO8(buf,0); DO8(buf,8);
/* ========================================================================= */
uLong ZEXPORT adler32(adler, buf, len)
uLong adler;
const Bytef *buf;
uInt len;
{
unsigned long s1 = adler & 0xffff;
unsigned long s2 = (adler >> 16) & 0xffff;
int k;
if (buf == Z_NULL) return 1L;
while (len > 0) {
k = len < NMAX ? len : NMAX;
len -= k;
while (k >= 16) {
DO16(buf);
buf += 16;
k -= 16;
}
if (k != 0) do {
s1 += *buf++;
s2 += s1;
} while (--k);
s1 %= BASE;
s2 %= BASE;
}
return (s2 << 16) | s1;
}

View File

@@ -0,0 +1,68 @@
/* compress.c -- compress a memory buffer
* Copyright (C) 1995-1998 Jean-loup Gailly.
* For conditions of distribution and use, see copyright notice in zlib.h
*/
/* @(#) $Id$ */
#include "zlib.h"
/* ===========================================================================
Compresses the source buffer into the destination buffer. The level
parameter has the same meaning as in deflateInit. sourceLen is the byte
length of the source buffer. Upon entry, destLen is the total size of the
destination buffer, which must be at least 0.1% larger than sourceLen plus
12 bytes. Upon exit, destLen is the actual size of the compressed buffer.
compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
memory, Z_BUF_ERROR if there was not enough room in the output buffer,
Z_STREAM_ERROR if the level parameter is invalid.
*/
int ZEXPORT compress2 (dest, destLen, source, sourceLen, level)
Bytef *dest;
uLongf *destLen;
const Bytef *source;
uLong sourceLen;
int level;
{
z_stream stream;
int err;
stream.next_in = (Bytef*)source;
stream.avail_in = (uInt)sourceLen;
#ifdef MAXSEG_64K
/* Check for source > 64K on 16-bit machine: */
if ((uLong)stream.avail_in != sourceLen) return Z_BUF_ERROR;
#endif
stream.next_out = dest;
stream.avail_out = (uInt)*destLen;
if ((uLong)stream.avail_out != *destLen) return Z_BUF_ERROR;
stream.zalloc = (alloc_func)0;
stream.zfree = (free_func)0;
stream.opaque = (voidpf)0;
err = deflateInit(&stream, level);
if (err != Z_OK) return err;
err = deflate(&stream, Z_FINISH);
if (err != Z_STREAM_END) {
deflateEnd(&stream);
return err == Z_OK ? Z_BUF_ERROR : err;
}
*destLen = stream.total_out;
err = deflateEnd(&stream);
return err;
}
/* ===========================================================================
*/
int ZEXPORT compress (dest, destLen, source, sourceLen)
Bytef *dest;
uLongf *destLen;
const Bytef *source;
uLong sourceLen;
{
return compress2(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION);
}

View File

@@ -0,0 +1,162 @@
/* crc32.c -- compute the CRC-32 of a data stream
* Copyright (C) 1995-1998 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
/* @(#) $Id$ */
#include "zlib.h"
#define local static
#ifdef DYNAMIC_CRC_TABLE
local int crc_table_empty = 1;
local uLongf crc_table[256];
local void make_crc_table OF((void));
/*
Generate a table for a byte-wise 32-bit CRC calculation on the polynomial:
x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
Polynomials over GF(2) are represented in binary, one bit per coefficient,
with the lowest powers in the most significant bit. Then adding polynomials
is just exclusive-or, and multiplying a polynomial by x is a right shift by
one. If we call the above polynomial p, and represent a byte as the
polynomial q, also with the lowest power in the most significant bit (so the
byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p,
where a mod b means the remainder after dividing a by b.
This calculation is done using the shift-register method of multiplying and
taking the remainder. The register is initialized to zero, and for each
incoming bit, x^32 is added mod p to the register if the bit is a one (where
x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by
x (which is shifting right by one and adding x^32 mod p if the bit shifted
out is a one). We start with the highest power (least significant bit) of
q and repeat for all eight bits of q.
The table is simply the CRC of all possible eight bit values. This is all
the information needed to generate CRC's on data a byte at a time for all
combinations of CRC register values and incoming bytes.
*/
local void make_crc_table()
{
uLong c;
int n, k;
uLong poly; /* polynomial exclusive-or pattern */
/* terms of polynomial defining this crc (except x^32): */
static const Byte p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26};
/* make exclusive-or pattern from polynomial (0xedb88320L) */
poly = 0L;
for (n = 0; n < sizeof(p)/sizeof(Byte); n++)
poly |= 1L << (31 - p[n]);
for (n = 0; n < 256; n++)
{
c = (uLong)n;
for (k = 0; k < 8; k++)
c = c & 1 ? poly ^ (c >> 1) : c >> 1;
crc_table[n] = c;
}
crc_table_empty = 0;
}
#else
/* ========================================================================
* Table of CRC-32's of all single-byte values (made by make_crc_table)
*/
local const uLongf crc_table[256] = {
0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
0x2d02ef8dL
};
#endif
/* =========================================================================
* This function can be used by asm versions of crc32()
*/
const uLongf * ZEXPORT get_crc_table()
{
#ifdef DYNAMIC_CRC_TABLE
if (crc_table_empty) make_crc_table();
#endif
return (const uLongf *)crc_table;
}
/* ========================================================================= */
#define DO1(buf) crc = crc_table[((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8);
#define DO2(buf) DO1(buf); DO1(buf);
#define DO4(buf) DO2(buf); DO2(buf);
#define DO8(buf) DO4(buf); DO4(buf);
/* ========================================================================= */
uLong ZEXPORT crc32(crc, buf, len)
uLong crc;
const Bytef *buf;
uInt len;
{
if (buf == Z_NULL) return 0L;
#ifdef DYNAMIC_CRC_TABLE
if (crc_table_empty)
make_crc_table();
#endif
crc = crc ^ 0xffffffffL;
while (len >= 8)
{
DO8(buf);
len -= 8;
}
if (len) do {
DO1(buf);
} while (--len);
return crc ^ 0xffffffffL;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,318 @@
/* deflate.h -- internal compression state
* Copyright (C) 1995-1998 Jean-loup Gailly
* For conditions of distribution and use, see copyright notice in zlib.h
*/
/* WARNING: this file should *not* be used by applications. It is
part of the implementation of the compression library and is
subject to change. Applications should only use zlib.h.
*/
/* @(#) $Id$ */
#ifndef _DEFLATE_H
#define _DEFLATE_H
#include "zutil.h"
/* ===========================================================================
* Internal compression state.
*/
#define LENGTH_CODES 29
/* number of length codes, not counting the special END_BLOCK code */
#define LITERALS 256
/* number of literal bytes 0..255 */
#define L_CODES (LITERALS+1+LENGTH_CODES)
/* number of Literal or Length codes, including the END_BLOCK code */
#define D_CODES 30
/* number of distance codes */
#define BL_CODES 19
/* number of codes used to transfer the bit lengths */
#define HEAP_SIZE (2*L_CODES+1)
/* maximum heap size */
#define MAX_BITS 15
/* All codes must not exceed MAX_BITS bits */
#define INIT_STATE 42
#define BUSY_STATE 113
#define FINISH_STATE 666
/* Stream status */
/* Data structure describing a single value and its code string. */
typedef struct ct_data_s {
union {
ush freq; /* frequency count */
ush code; /* bit string */
} fc;
union {
ush dad; /* father node in Huffman tree */
ush len; /* length of bit string */
} dl;
} FAR ct_data;
#define Freq fc.freq
#define Code fc.code
#define Dad dl.dad
#define Len dl.len
typedef struct static_tree_desc_s static_tree_desc;
typedef struct tree_desc_s {
ct_data *dyn_tree; /* the dynamic tree */
int max_code; /* largest code with non zero frequency */
static_tree_desc *stat_desc; /* the corresponding static tree */
} FAR tree_desc;
typedef ush Pos;
typedef Pos FAR Posf;
typedef unsigned IPos;
/* A Pos is an index in the character window. We use short instead of int to
* save space in the various tables. IPos is used only for parameter passing.
*/
typedef struct internal_state {
z_streamp strm; /* pointer back to this zlib stream */
int status; /* as the name implies */
Bytef *pending_buf; /* output still pending */
ulg pending_buf_size; /* size of pending_buf */
Bytef *pending_out; /* next pending byte to output to the stream */
int pending; /* nb of bytes in the pending buffer */
int noheader; /* suppress zlib header and adler32 */
Byte data_type; /* UNKNOWN, BINARY or ASCII */
Byte method; /* STORED (for zip only) or DEFLATED */
int last_flush; /* value of flush param for previous deflate call */
/* used by deflate.c: */
uInt w_size; /* LZ77 window size (32K by default) */
uInt w_bits; /* log2(w_size) (8..16) */
uInt w_mask; /* w_size - 1 */
Bytef *window;
/* Sliding window. Input bytes are read into the second half of the window,
* and move to the first half later to keep a dictionary of at least wSize
* bytes. With this organization, matches are limited to a distance of
* wSize-MAX_MATCH bytes, but this ensures that IO is always
* performed with a length multiple of the block size. Also, it limits
* the window size to 64K, which is quite useful on MSDOS.
* To do: use the user input buffer as sliding window.
*/
ulg window_size;
/* Actual size of window: 2*wSize, except when the user input buffer
* is directly used as sliding window.
*/
Posf *prev;
/* Link to older string with same hash index. To limit the size of this
* array to 64K, this link is maintained only for the last 32K strings.
* An index in this array is thus a window index modulo 32K.
*/
Posf *head; /* Heads of the hash chains or NIL. */
uInt ins_h; /* hash index of string to be inserted */
uInt hash_size; /* number of elements in hash table */
uInt hash_bits; /* log2(hash_size) */
uInt hash_mask; /* hash_size-1 */
uInt hash_shift;
/* Number of bits by which ins_h must be shifted at each input
* step. It must be such that after MIN_MATCH steps, the oldest
* byte no longer takes part in the hash key, that is:
* hash_shift * MIN_MATCH >= hash_bits
*/
long block_start;
/* Window position at the beginning of the current output block. Gets
* negative when the window is moved backwards.
*/
uInt match_length; /* length of best match */
IPos prev_match; /* previous match */
int match_available; /* set if previous match exists */
uInt strstart; /* start of string to insert */
uInt match_start; /* start of matching string */
uInt lookahead; /* number of valid bytes ahead in window */
uInt prev_length;
/* Length of the best match at previous step. Matches not greater than this
* are discarded. This is used in the lazy match evaluation.
*/
uInt max_chain_length;
/* To speed up deflation, hash chains are never searched beyond this
* length. A higher limit improves compression ratio but degrades the
* speed.
*/
uInt max_lazy_match;
/* Attempt to find a better match only when the current match is strictly
* smaller than this value. This mechanism is used only for compression
* levels >= 4.
*/
# define max_insert_length max_lazy_match
/* Insert new strings in the hash table only if the match length is not
* greater than this length. This saves time but degrades compression.
* max_insert_length is used only for compression levels <= 3.
*/
int level; /* compression level (1..9) */
int strategy; /* favor or force Huffman coding*/
uInt good_match;
/* Use a faster search when the previous match is longer than this */
int nice_match; /* Stop searching when current match exceeds this */
/* used by trees.c: */
/* Didn't use ct_data typedef below to supress compiler warning */
struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */
struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */
struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */
struct tree_desc_s l_desc; /* desc. for literal tree */
struct tree_desc_s d_desc; /* desc. for distance tree */
struct tree_desc_s bl_desc; /* desc. for bit length tree */
ush bl_count[MAX_BITS+1];
/* number of codes at each bit length for an optimal tree */
int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */
int heap_len; /* number of elements in the heap */
int heap_max; /* element of largest frequency */
/* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
* The same heap array is used to build all trees.
*/
uch depth[2*L_CODES+1];
/* Depth of each subtree used as tie breaker for trees of equal frequency
*/
uchf *l_buf; /* buffer for literals or lengths */
uInt lit_bufsize;
/* Size of match buffer for literals/lengths. There are 4 reasons for
* limiting lit_bufsize to 64K:
* - frequencies can be kept in 16 bit counters
* - if compression is not successful for the first block, all input
* data is still in the window so we can still emit a stored block even
* when input comes from standard input. (This can also be done for
* all blocks if lit_bufsize is not greater than 32K.)
* - if compression is not successful for a file smaller than 64K, we can
* even emit a stored file instead of a stored block (saving 5 bytes).
* This is applicable only for zip (not gzip or zlib).
* - creating new Huffman trees less frequently may not provide fast
* adaptation to changes in the input data statistics. (Take for
* example a binary file with poorly compressible code followed by
* a highly compressible string table.) Smaller buffer sizes give
* fast adaptation but have of course the overhead of transmitting
* trees more frequently.
* - I can't count above 4
*/
uInt last_lit; /* running index in l_buf */
ushf *d_buf;
/* Buffer for distances. To simplify the code, d_buf and l_buf have
* the same number of elements. To use different lengths, an extra flag
* array would be necessary.
*/
ulg opt_len; /* bit length of current block with optimal trees */
ulg static_len; /* bit length of current block with static trees */
uInt matches; /* number of string matches in current block */
int last_eob_len; /* bit length of EOB code for last block */
#ifdef DEBUG
ulg compressed_len; /* total bit length of compressed file mod 2^32 */
ulg bits_sent; /* bit length of compressed data sent mod 2^32 */
#endif
ush bi_buf;
/* Output buffer. bits are inserted starting at the bottom (least
* significant bits).
*/
int bi_valid;
/* Number of valid bits in bi_buf. All bits above the last valid bit
* are always zero.
*/
} FAR deflate_state;
/* Output a byte on the stream.
* IN assertion: there is enough room in pending_buf.
*/
#define put_byte(s, c) {s->pending_buf[s->pending++] = (c);}
#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
/* Minimum amount of lookahead, except at the end of the input file.
* See deflate.c for comments about the MIN_MATCH+1.
*/
#define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD)
/* In order to simplify the code, particularly on 16 bit machines, match
* distances are limited to MAX_DIST instead of WSIZE.
*/
/* in trees.c */
void _tr_init OF((deflate_state *s));
int _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc));
void _tr_flush_block OF((deflate_state *s, charf *buf, ulg stored_len,
int eof));
void _tr_align OF((deflate_state *s));
void _tr_stored_block OF((deflate_state *s, charf *buf, ulg stored_len,
int eof));
#define d_code(dist) \
((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)])
/* Mapping from a distance to a distance code. dist is the distance - 1 and
* must not have side effects. _dist_code[256] and _dist_code[257] are never
* used.
*/
#ifndef DEBUG
/* Inline versions of _tr_tally for speed: */
#if defined(GEN_TREES_H) || !defined(STDC)
extern uch _length_code[];
extern uch _dist_code[];
#else
extern const uch _length_code[];
extern const uch _dist_code[];
#endif
# define _tr_tally_lit(s, c, flush) \
{ uch cc = (c); \
s->d_buf[s->last_lit] = 0; \
s->l_buf[s->last_lit++] = cc; \
s->dyn_ltree[cc].Freq++; \
flush = (s->last_lit == s->lit_bufsize-1); \
}
# define _tr_tally_dist(s, distance, length, flush) \
{ uch len = (length); \
ush dist = (distance); \
s->d_buf[s->last_lit] = dist; \
s->l_buf[s->last_lit++] = len; \
dist--; \
s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \
s->dyn_dtree[d_code(dist)].Freq++; \
flush = (s->last_lit == s->lit_bufsize-1); \
}
#else
# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c)
# define _tr_tally_dist(s, distance, length, flush) \
flush = _tr_tally(s, distance, length)
#endif
#endif

View File

@@ -0,0 +1,875 @@
/* gzio.c -- IO on .gz files
* Copyright (C) 1995-1998 Jean-loup Gailly.
* For conditions of distribution and use, see copyright notice in zlib.h
*
* Compile this file with -DNO_DEFLATE to avoid the compression code.
*/
/* @(#) $Id$ */
#include <stdio.h>
#include "zutil.h"
struct internal_state {int dummy;}; /* for buggy compilers */
#ifndef Z_BUFSIZE
# ifdef MAXSEG_64K
# define Z_BUFSIZE 4096 /* minimize memory usage for 16-bit DOS */
# else
# define Z_BUFSIZE 16384
# endif
#endif
#ifndef Z_PRINTF_BUFSIZE
# define Z_PRINTF_BUFSIZE 4096
#endif
#define ALLOC(size) malloc(size)
#define TRYFREE(p) {if (p) free(p);}
static int gz_magic[2] = {0x1f, 0x8b}; /* gzip magic header */
/* gzip flag byte */
#define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */
#define HEAD_CRC 0x02 /* bit 1 set: header CRC present */
#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */
#define ORIG_NAME 0x08 /* bit 3 set: original file name present */
#define COMMENT 0x10 /* bit 4 set: file comment present */
#define RESERVED 0xE0 /* bits 5..7: reserved */
typedef struct gz_stream {
z_stream stream;
int z_err; /* error code for last stream operation */
int z_eof; /* set if end of input file */
FILE *file; /* .gz file */
Byte *inbuf; /* input buffer */
Byte *outbuf; /* output buffer */
uLong crc; /* crc32 of uncompressed data */
char *msg; /* error message */
char *path; /* path name for debugging only */
int transparent; /* 1 if input file is not a .gz file */
char mode; /* 'w' or 'r' */
long startpos; /* start of compressed data in file (header skipped) */
} gz_stream;
local gzFile gz_open OF((const char *path, const char *mode, int fd));
local int do_flush OF((gzFile file, int flush));
local int get_byte OF((gz_stream *s));
local void check_header OF((gz_stream *s));
local int destroy OF((gz_stream *s));
local void putLong OF((FILE *file, uLong x));
local uLong getLong OF((gz_stream *s));
/* ===========================================================================
Opens a gzip (.gz) file for reading or writing. The mode parameter
is as in fopen ("rb" or "wb"). The file is given either by file descriptor
or path name (if fd == -1).
gz_open return NULL if the file could not be opened or if there was
insufficient memory to allocate the (de)compression state; errno
can be checked to distinguish the two cases (if errno is zero, the
zlib error is Z_MEM_ERROR).
*/
local gzFile gz_open (path, mode, fd)
const char *path;
const char *mode;
int fd;
{
int err;
int level = Z_DEFAULT_COMPRESSION; /* compression level */
int strategy = Z_DEFAULT_STRATEGY; /* compression strategy */
char *p = (char*)mode;
gz_stream *s;
char fmode[80]; /* copy of mode, without the compression level */
char *m = fmode;
if (!path || !mode) return Z_NULL;
s = (gz_stream *)ALLOC(sizeof(gz_stream));
if (!s) return Z_NULL;
s->stream.zalloc = (alloc_func)0;
s->stream.zfree = (free_func)0;
s->stream.opaque = (voidpf)0;
s->stream.next_in = s->inbuf = Z_NULL;
s->stream.next_out = s->outbuf = Z_NULL;
s->stream.avail_in = s->stream.avail_out = 0;
s->file = NULL;
s->z_err = Z_OK;
s->z_eof = 0;
s->crc = crc32(0L, Z_NULL, 0);
s->msg = NULL;
s->transparent = 0;
s->path = (char*)ALLOC(strlen(path)+1);
if (s->path == NULL) {
return destroy(s), (gzFile)Z_NULL;
}
strcpy(s->path, path); /* do this early for debugging */
s->mode = '\0';
do {
if (*p == 'r') s->mode = 'r';
if (*p == 'w' || *p == 'a') s->mode = 'w';
if (*p >= '0' && *p <= '9') {
level = *p - '0';
} else if (*p == 'f') {
strategy = Z_FILTERED;
} else if (*p == 'h') {
strategy = Z_HUFFMAN_ONLY;
} else {
*m++ = *p; /* copy the mode */
}
} while (*p++ && m != fmode + sizeof(fmode));
if (s->mode == '\0') return destroy(s), (gzFile)Z_NULL;
if (s->mode == 'w') {
#ifdef NO_DEFLATE
err = Z_STREAM_ERROR;
#else
err = deflateInit2(&(s->stream), level,
Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL, strategy);
/* windowBits is passed < 0 to suppress zlib header */
s->stream.next_out = s->outbuf = (Byte*)ALLOC(Z_BUFSIZE);
#endif
if (err != Z_OK || s->outbuf == Z_NULL) {
return destroy(s), (gzFile)Z_NULL;
}
} else {
s->stream.next_in = s->inbuf = (Byte*)ALLOC(Z_BUFSIZE);
err = inflateInit2(&(s->stream), -MAX_WBITS);
/* windowBits is passed < 0 to tell that there is no zlib header.
* Note that in this case inflate *requires* an extra "dummy" byte
* after the compressed stream in order to complete decompression and
* return Z_STREAM_END. Here the gzip CRC32 ensures that 4 bytes are
* present after the compressed stream.
*/
if (err != Z_OK || s->inbuf == Z_NULL) {
return destroy(s), (gzFile)Z_NULL;
}
}
s->stream.avail_out = Z_BUFSIZE;
errno = 0;
s->file = fd < 0 ? F_OPEN(path, fmode) : (FILE*)fdopen(fd, fmode);
if (s->file == NULL) {
return destroy(s), (gzFile)Z_NULL;
}
if (s->mode == 'w') {
/* Write a very simple .gz header:
*/
fprintf(s->file, "%c%c%c%c%c%c%c%c%c%c", gz_magic[0], gz_magic[1],
Z_DEFLATED, 0 /*flags*/, 0,0,0,0 /*time*/, 0 /*xflags*/, OS_CODE);
s->startpos = 10L;
/* We use 10L instead of ftell(s->file) to because ftell causes an
* fflush on some systems. This version of the library doesn't use
* startpos anyway in write mode, so this initialization is not
* necessary.
*/
} else {
check_header(s); /* skip the .gz header */
s->startpos = (ftell(s->file) - s->stream.avail_in);
}
return (gzFile)s;
}
/* ===========================================================================
Opens a gzip (.gz) file for reading or writing.
*/
gzFile ZEXPORT gzopen (path, mode)
const char *path;
const char *mode;
{
return gz_open (path, mode, -1);
}
/* ===========================================================================
Associate a gzFile with the file descriptor fd. fd is not dup'ed here
to mimic the behavio(u)r of fdopen.
*/
gzFile ZEXPORT gzdopen (fd, mode)
int fd;
const char *mode;
{
char name[20];
if (fd < 0) return (gzFile)Z_NULL;
sprintf(name, "<fd:%d>", fd); /* for debugging */
return gz_open (name, mode, fd);
}
/* ===========================================================================
* Update the compression level and strategy
*/
int ZEXPORT gzsetparams (file, level, strategy)
gzFile file;
int level;
int strategy;
{
gz_stream *s = (gz_stream*)file;
if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR;
/* Make room to allow flushing */
if (s->stream.avail_out == 0) {
s->stream.next_out = s->outbuf;
if (fwrite(s->outbuf, 1, Z_BUFSIZE, s->file) != Z_BUFSIZE) {
s->z_err = Z_ERRNO;
}
s->stream.avail_out = Z_BUFSIZE;
}
return deflateParams (&(s->stream), level, strategy);
}
/* ===========================================================================
Read a byte from a gz_stream; update next_in and avail_in. Return EOF
for end of file.
IN assertion: the stream s has been sucessfully opened for reading.
*/
local int get_byte(s)
gz_stream *s;
{
if (s->z_eof) return EOF;
if (s->stream.avail_in == 0) {
errno = 0;
s->stream.avail_in = fread(s->inbuf, 1, Z_BUFSIZE, s->file);
if (s->stream.avail_in == 0) {
s->z_eof = 1;
if (ferror(s->file)) s->z_err = Z_ERRNO;
return EOF;
}
s->stream.next_in = s->inbuf;
}
s->stream.avail_in--;
return *(s->stream.next_in)++;
}
/* ===========================================================================
Check the gzip header of a gz_stream opened for reading. Set the stream
mode to transparent if the gzip magic header is not present; set s->err
to Z_DATA_ERROR if the magic header is present but the rest of the header
is incorrect.
IN assertion: the stream s has already been created sucessfully;
s->stream.avail_in is zero for the first time, but may be non-zero
for concatenated .gz files.
*/
local void check_header(s)
gz_stream *s;
{
int method; /* method byte */
int flags; /* flags byte */
uInt len;
int c;
/* Check the gzip magic header */
for (len = 0; len < 2; len++) {
c = get_byte(s);
if (c != gz_magic[len]) {
if (len != 0) s->stream.avail_in++, s->stream.next_in--;
if (c != EOF) {
s->stream.avail_in++, s->stream.next_in--;
s->transparent = 1;
}
s->z_err = s->stream.avail_in != 0 ? Z_OK : Z_STREAM_END;
return;
}
}
method = get_byte(s);
flags = get_byte(s);
if (method != Z_DEFLATED || (flags & RESERVED) != 0) {
s->z_err = Z_DATA_ERROR;
return;
}
/* Discard time, xflags and OS code: */
for (len = 0; len < 6; len++) (void)get_byte(s);
if ((flags & EXTRA_FIELD) != 0) { /* skip the extra field */
len = (uInt)get_byte(s);
len += ((uInt)get_byte(s))<<8;
/* len is garbage if EOF but the loop below will quit anyway */
while (len-- != 0 && get_byte(s) != EOF) ;
}
if ((flags & ORIG_NAME) != 0) { /* skip the original file name */
while ((c = get_byte(s)) != 0 && c != EOF) ;
}
if ((flags & COMMENT) != 0) { /* skip the .gz file comment */
while ((c = get_byte(s)) != 0 && c != EOF) ;
}
if ((flags & HEAD_CRC) != 0) { /* skip the header crc */
for (len = 0; len < 2; len++) (void)get_byte(s);
}
s->z_err = s->z_eof ? Z_DATA_ERROR : Z_OK;
}
/* ===========================================================================
* Cleanup then free the given gz_stream. Return a zlib error code.
Try freeing in the reverse order of allocations.
*/
local int destroy (s)
gz_stream *s;
{
int err = Z_OK;
if (!s) return Z_STREAM_ERROR;
TRYFREE(s->msg);
if (s->stream.state != NULL) {
if (s->mode == 'w') {
#ifdef NO_DEFLATE
err = Z_STREAM_ERROR;
#else
err = deflateEnd(&(s->stream));
#endif
} else if (s->mode == 'r') {
err = inflateEnd(&(s->stream));
}
}
if (s->file != NULL && fclose(s->file)) {
#ifdef ESPIPE
if (errno != ESPIPE) /* fclose is broken for pipes in HP/UX */
#endif
err = Z_ERRNO;
}
if (s->z_err < 0) err = s->z_err;
TRYFREE(s->inbuf);
TRYFREE(s->outbuf);
TRYFREE(s->path);
TRYFREE(s);
return err;
}
/* ===========================================================================
Reads the given number of uncompressed bytes from the compressed file.
gzread returns the number of bytes actually read (0 for end of file).
*/
int ZEXPORT gzread (file, buf, len)
gzFile file;
voidp buf;
unsigned len;
{
gz_stream *s = (gz_stream*)file;
Bytef *start = (Bytef*)buf; /* starting point for crc computation */
Byte *next_out; /* == stream.next_out but not forced far (for MSDOS) */
if (s == NULL || s->mode != 'r') return Z_STREAM_ERROR;
if (s->z_err == Z_DATA_ERROR || s->z_err == Z_ERRNO) return -1;
if (s->z_err == Z_STREAM_END) return 0; /* EOF */
next_out = (Byte*)buf;
s->stream.next_out = (Bytef*)buf;
s->stream.avail_out = len;
while (s->stream.avail_out != 0) {
if (s->transparent) {
/* Copy first the lookahead bytes: */
uInt n = s->stream.avail_in;
if (n > s->stream.avail_out) n = s->stream.avail_out;
if (n > 0) {
zmemcpy(s->stream.next_out, s->stream.next_in, n);
next_out += n;
s->stream.next_out = next_out;
s->stream.next_in += n;
s->stream.avail_out -= n;
s->stream.avail_in -= n;
}
if (s->stream.avail_out > 0) {
s->stream.avail_out -= fread(next_out, 1, s->stream.avail_out,
s->file);
}
len -= s->stream.avail_out;
s->stream.total_in += (uLong)len;
s->stream.total_out += (uLong)len;
if (len == 0) s->z_eof = 1;
return (int)len;
}
if (s->stream.avail_in == 0 && !s->z_eof) {
errno = 0;
s->stream.avail_in = fread(s->inbuf, 1, Z_BUFSIZE, s->file);
if (s->stream.avail_in == 0) {
s->z_eof = 1;
if (ferror(s->file)) {
s->z_err = Z_ERRNO;
break;
}
}
s->stream.next_in = s->inbuf;
}
s->z_err = inflate(&(s->stream), Z_NO_FLUSH);
if (s->z_err == Z_STREAM_END) {
/* Check CRC and original size */
s->crc = crc32(s->crc, start, (uInt)(s->stream.next_out - start));
start = s->stream.next_out;
if (getLong(s) != s->crc) {
s->z_err = Z_DATA_ERROR;
} else {
(void)getLong(s);
/* The uncompressed length returned by above getlong() may
* be different from s->stream.total_out) in case of
* concatenated .gz files. Check for such files:
*/
check_header(s);
if (s->z_err == Z_OK) {
uLong total_in = s->stream.total_in;
uLong total_out = s->stream.total_out;
inflateReset(&(s->stream));
s->stream.total_in = total_in;
s->stream.total_out = total_out;
s->crc = crc32(0L, Z_NULL, 0);
}
}
}
if (s->z_err != Z_OK || s->z_eof) break;
}
s->crc = crc32(s->crc, start, (uInt)(s->stream.next_out - start));
return (int)(len - s->stream.avail_out);
}
/* ===========================================================================
Reads one byte from the compressed file. gzgetc returns this byte
or -1 in case of end of file or error.
*/
int ZEXPORT gzgetc(file)
gzFile file;
{
unsigned char c;
return gzread(file, &c, 1) == 1 ? c : -1;
}
/* ===========================================================================
Reads bytes from the compressed file until len-1 characters are
read, or a newline character is read and transferred to buf, or an
end-of-file condition is encountered. The string is then terminated
with a null character.
gzgets returns buf, or Z_NULL in case of error.
The current implementation is not optimized at all.
*/
char * ZEXPORT gzgets(file, buf, len)
gzFile file;
char *buf;
int len;
{
char *b = buf;
if (buf == Z_NULL || len <= 0) return Z_NULL;
while (--len > 0 && gzread(file, buf, 1) == 1 && *buf++ != '\n') ;
*buf = '\0';
return b == buf && len > 0 ? Z_NULL : b;
}
#ifndef NO_DEFLATE
/* ===========================================================================
Writes the given number of uncompressed bytes into the compressed file.
gzwrite returns the number of bytes actually written (0 in case of error).
*/
int ZEXPORT gzwrite (file, buf, len)
gzFile file;
const voidp buf;
unsigned len;
{
gz_stream *s = (gz_stream*)file;
if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR;
s->stream.next_in = (Bytef*)buf;
s->stream.avail_in = len;
while (s->stream.avail_in != 0) {
if (s->stream.avail_out == 0) {
s->stream.next_out = s->outbuf;
if (fwrite(s->outbuf, 1, Z_BUFSIZE, s->file) != Z_BUFSIZE) {
s->z_err = Z_ERRNO;
break;
}
s->stream.avail_out = Z_BUFSIZE;
}
s->z_err = deflate(&(s->stream), Z_NO_FLUSH);
if (s->z_err != Z_OK) break;
}
s->crc = crc32(s->crc, (const Bytef *)buf, len);
return (int)(len - s->stream.avail_in);
}
/* ===========================================================================
Converts, formats, and writes the args to the compressed file under
control of the format string, as in fprintf. gzprintf returns the number of
uncompressed bytes actually written (0 in case of error).
*/
#ifdef STDC
#include <stdarg.h>
int ZEXPORTVA gzprintf (gzFile file, const char *format, /* args */ ...)
{
char buf[Z_PRINTF_BUFSIZE];
va_list va;
int len;
va_start(va, format);
#ifdef HAS_vsnprintf
(void)vsnprintf(buf, sizeof(buf), format, va);
#else
(void)vsprintf(buf, format, va);
#endif
va_end(va);
len = strlen(buf); /* some *sprintf don't return the nb of bytes written */
if (len <= 0) return 0;
return gzwrite(file, buf, (unsigned)len);
}
#else /* not ANSI C */
int ZEXPORTVA gzprintf (file, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10,
a11, a12, a13, a14, a15, a16, a17, a18, a19, a20)
gzFile file;
const char *format;
int a1, a2, a3, a4, a5, a6, a7, a8, a9, a10,
a11, a12, a13, a14, a15, a16, a17, a18, a19, a20;
{
char buf[Z_PRINTF_BUFSIZE];
int len;
#ifdef HAS_snprintf
snprintf(buf, sizeof(buf), format, a1, a2, a3, a4, a5, a6, a7, a8,
a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20);
#else
sprintf(buf, format, a1, a2, a3, a4, a5, a6, a7, a8,
a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20);
#endif
len = strlen(buf); /* old sprintf doesn't return the nb of bytes written */
if (len <= 0) return 0;
return gzwrite(file, buf, len);
}
#endif
/* ===========================================================================
Writes c, converted to an unsigned char, into the compressed file.
gzputc returns the value that was written, or -1 in case of error.
*/
int ZEXPORT gzputc(file, c)
gzFile file;
int c;
{
unsigned char cc = (unsigned char) c; /* required for big endian systems */
return gzwrite(file, &cc, 1) == 1 ? (int)cc : -1;
}
/* ===========================================================================
Writes the given null-terminated string to the compressed file, excluding
the terminating null character.
gzputs returns the number of characters written, or -1 in case of error.
*/
int ZEXPORT gzputs(file, s)
gzFile file;
const char *s;
{
return gzwrite(file, (char*)s, (unsigned)strlen(s));
}
/* ===========================================================================
Flushes all pending output into the compressed file. The parameter
flush is as in the deflate() function.
*/
local int do_flush (file, flush)
gzFile file;
int flush;
{
uInt len;
int done = 0;
gz_stream *s = (gz_stream*)file;
if (s == NULL || s->mode != 'w') return Z_STREAM_ERROR;
s->stream.avail_in = 0; /* should be zero already anyway */
for (;;) {
len = Z_BUFSIZE - s->stream.avail_out;
if (len != 0) {
if ((uInt)fwrite(s->outbuf, 1, len, s->file) != len) {
s->z_err = Z_ERRNO;
return Z_ERRNO;
}
s->stream.next_out = s->outbuf;
s->stream.avail_out = Z_BUFSIZE;
}
if (done) break;
s->z_err = deflate(&(s->stream), flush);
/* Ignore the second of two consecutive flushes: */
if (len == 0 && s->z_err == Z_BUF_ERROR) s->z_err = Z_OK;
/* deflate has finished flushing only when it hasn't used up
* all the available space in the output buffer:
*/
done = (s->stream.avail_out != 0 || s->z_err == Z_STREAM_END);
if (s->z_err != Z_OK && s->z_err != Z_STREAM_END) break;
}
return s->z_err == Z_STREAM_END ? Z_OK : s->z_err;
}
int ZEXPORT gzflush (file, flush)
gzFile file;
int flush;
{
gz_stream *s = (gz_stream*)file;
int err = do_flush (file, flush);
if (err) return err;
fflush(s->file);
return s->z_err == Z_STREAM_END ? Z_OK : s->z_err;
}
#endif /* NO_DEFLATE */
/* ===========================================================================
Sets the starting position for the next gzread or gzwrite on the given
compressed file. The offset represents a number of bytes in the
gzseek returns the resulting offset location as measured in bytes from
the beginning of the uncompressed stream, or -1 in case of error.
SEEK_END is not implemented, returns error.
In this version of the library, gzseek can be extremely slow.
*/
z_off_t ZEXPORT gzseek (file, offset, whence)
gzFile file;
z_off_t offset;
int whence;
{
gz_stream *s = (gz_stream*)file;
if (s == NULL || whence == SEEK_END ||
s->z_err == Z_ERRNO || s->z_err == Z_DATA_ERROR) {
return -1L;
}
if (s->mode == 'w') {
#ifdef NO_DEFLATE
return -1L;
#else
if (whence == SEEK_SET) {
offset -= s->stream.total_in;
}
if (offset < 0) return -1L;
/* At this point, offset is the number of zero bytes to write. */
if (s->inbuf == Z_NULL) {
s->inbuf = (Byte*)ALLOC(Z_BUFSIZE); /* for seeking */
zmemzero(s->inbuf, Z_BUFSIZE);
}
while (offset > 0) {
uInt size = Z_BUFSIZE;
if (offset < Z_BUFSIZE) size = (uInt)offset;
size = gzwrite(file, s->inbuf, size);
if (size == 0) return -1L;
offset -= size;
}
return (z_off_t)s->stream.total_in;
#endif
}
/* Rest of function is for reading only */
/* compute absolute position */
if (whence == SEEK_CUR) {
offset += s->stream.total_out;
}
if (offset < 0) return -1L;
if (s->transparent) {
/* map to fseek */
s->stream.avail_in = 0;
s->stream.next_in = s->inbuf;
if (fseek(s->file, offset, SEEK_SET) < 0) return -1L;
s->stream.total_in = s->stream.total_out = (uLong)offset;
return offset;
}
/* For a negative seek, rewind and use positive seek */
if ((uLong)offset >= s->stream.total_out) {
offset -= s->stream.total_out;
} else if (gzrewind(file) < 0) {
return -1L;
}
/* offset is now the number of bytes to skip. */
if (offset != 0 && s->outbuf == Z_NULL) {
s->outbuf = (Byte*)ALLOC(Z_BUFSIZE);
}
while (offset > 0) {
int size = Z_BUFSIZE;
if (offset < Z_BUFSIZE) size = (int)offset;
size = gzread(file, s->outbuf, (uInt)size);
if (size <= 0) return -1L;
offset -= size;
}
return (z_off_t)s->stream.total_out;
}
/* ===========================================================================
Rewinds input file.
*/
int ZEXPORT gzrewind (file)
gzFile file;
{
gz_stream *s = (gz_stream*)file;
if (s == NULL || s->mode != 'r') return -1;
s->z_err = Z_OK;
s->z_eof = 0;
s->stream.avail_in = 0;
s->stream.next_in = s->inbuf;
s->crc = crc32(0L, Z_NULL, 0);
if (s->startpos == 0) { /* not a compressed file */
rewind(s->file);
return 0;
}
(void) inflateReset(&s->stream);
return fseek(s->file, s->startpos, SEEK_SET);
}
/* ===========================================================================
Returns the starting position for the next gzread or gzwrite on the
given compressed file. This position represents a number of bytes in the
uncompressed data stream.
*/
z_off_t ZEXPORT gztell (file)
gzFile file;
{
return gzseek(file, 0L, SEEK_CUR);
}
/* ===========================================================================
Returns 1 when EOF has previously been detected reading the given
input stream, otherwise zero.
*/
int ZEXPORT gzeof (file)
gzFile file;
{
gz_stream *s = (gz_stream*)file;
return (s == NULL || s->mode != 'r') ? 0 : s->z_eof;
}
/* ===========================================================================
Outputs a long in LSB order to the given file
*/
local void putLong (file, x)
FILE *file;
uLong x;
{
int n;
for (n = 0; n < 4; n++) {
fputc((int)(x & 0xff), file);
x >>= 8;
}
}
/* ===========================================================================
Reads a long in LSB order from the given gz_stream. Sets z_err in case
of error.
*/
local uLong getLong (s)
gz_stream *s;
{
uLong x = (uLong)get_byte(s);
int c;
x += ((uLong)get_byte(s))<<8;
x += ((uLong)get_byte(s))<<16;
c = get_byte(s);
if (c == EOF) s->z_err = Z_DATA_ERROR;
x += ((uLong)c)<<24;
return x;
}
/* ===========================================================================
Flushes all pending output if necessary, closes the compressed file
and deallocates all the (de)compression state.
*/
int ZEXPORT gzclose (file)
gzFile file;
{
int err;
gz_stream *s = (gz_stream*)file;
if (s == NULL) return Z_STREAM_ERROR;
if (s->mode == 'w') {
#ifdef NO_DEFLATE
return Z_STREAM_ERROR;
#else
err = do_flush (file, Z_FINISH);
if (err != Z_OK) return destroy((gz_stream*)file);
putLong (s->file, s->crc);
putLong (s->file, s->stream.total_in);
#endif
}
return destroy((gz_stream*)file);
}
/* ===========================================================================
Returns the error message for the last error which occured on the
given compressed file. errnum is set to zlib error number. If an
error occured in the file system and not in the compression library,
errnum is set to Z_ERRNO and the application may consult errno
to get the exact error code.
*/
const char* ZEXPORT gzerror (file, errnum)
gzFile file;
int *errnum;
{
char *m;
gz_stream *s = (gz_stream*)file;
if (s == NULL) {
*errnum = Z_STREAM_ERROR;
return (const char*)ERR_MSG(Z_STREAM_ERROR);
}
*errnum = s->z_err;
if (*errnum == Z_OK) return (const char*)"";
m = (char*)(*errnum == Z_ERRNO ? zstrerror(errno) : s->stream.msg);
if (m == NULL || *m == '\0') m = (char*)ERR_MSG(s->z_err);
TRYFREE(s->msg);
s->msg = (char*)ALLOC(strlen(s->path) + strlen(m) + 3);
strcpy(s->msg, s->path);
strcat(s->msg, ": ");
strcat(s->msg, m);
return (const char*)s->msg;
}

View File

@@ -0,0 +1,398 @@
/* infblock.c -- interpret and process block types to last block
* Copyright (C) 1995-1998 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "zutil.h"
#include "infblock.h"
#include "inftrees.h"
#include "infcodes.h"
#include "infutil.h"
struct inflate_codes_state {int dummy;}; /* for buggy compilers */
/* simplify the use of the inflate_huft type with some defines */
#define exop word.what.Exop
#define bits word.what.Bits
/* Table for deflate from PKZIP's appnote.txt. */
local const uInt border[] = { /* Order of the bit length code lengths */
16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
/*
Notes beyond the 1.93a appnote.txt:
1. Distance pointers never point before the beginning of the output
stream.
2. Distance pointers can point back across blocks, up to 32k away.
3. There is an implied maximum of 7 bits for the bit length table and
15 bits for the actual data.
4. If only one code exists, then it is encoded using one bit. (Zero
would be more efficient, but perhaps a little confusing.) If two
codes exist, they are coded using one bit each (0 and 1).
5. There is no way of sending zero distance codes--a dummy must be
sent if there are none. (History: a pre 2.0 version of PKZIP would
store blocks with no distance codes, but this was discovered to be
too harsh a criterion.) Valid only for 1.93a. 2.04c does allow
zero distance codes, which is sent as one code of zero bits in
length.
6. There are up to 286 literal/length codes. Code 256 represents the
end-of-block. Note however that the static length tree defines
288 codes just to fill out the Huffman codes. Codes 286 and 287
cannot be used though, since there is no length base or extra bits
defined for them. Similarily, there are up to 30 distance codes.
However, static trees define 32 codes (all 5 bits) to fill out the
Huffman codes, but the last two had better not show up in the data.
7. Unzip can check dynamic Huffman blocks for complete code sets.
The exception is that a single code would not be complete (see #4).
8. The five bits following the block type is really the number of
literal codes sent minus 257.
9. Length codes 8,16,16 are interpreted as 13 length codes of 8 bits
(1+6+6). Therefore, to output three times the length, you output
three codes (1+1+1), whereas to output four times the same length,
you only need two codes (1+3). Hmm.
10. In the tree reconstruction algorithm, Code = Code + Increment
only if BitLength(i) is not zero. (Pretty obvious.)
11. Correction: 4 Bits: # of Bit Length codes - 4 (4 - 19)
12. Note: length code 284 can represent 227-258, but length code 285
really is 258. The last length deserves its own, short code
since it gets used a lot in very redundant files. The length
258 is special since 258 - 3 (the min match length) is 255.
13. The literal/length and distance code bit lengths are read as a
single stream of lengths. It is possible (and advantageous) for
a repeat code (16, 17, or 18) to go across the boundary between
the two sets of lengths.
*/
void inflate_blocks_reset(s, z, c)
inflate_blocks_statef *s;
z_streamp z;
uLongf *c;
{
if (c != Z_NULL)
*c = s->check;
if (s->mode == BTREE || s->mode == DTREE)
ZFREE(z, s->sub.trees.blens);
if (s->mode == CODES)
inflate_codes_free(s->sub.decode.codes, z);
s->mode = TYPE;
s->bitk = 0;
s->bitb = 0;
s->read = s->write = s->window;
if (s->checkfn != Z_NULL)
z->adler = s->check = (*s->checkfn)(0L, (const Bytef *)Z_NULL, 0);
Tracev((stderr, "inflate: blocks reset\n"));
}
inflate_blocks_statef *inflate_blocks_new(z, c, w)
z_streamp z;
check_func c;
uInt w;
{
inflate_blocks_statef *s;
if ((s = (inflate_blocks_statef *)ZALLOC
(z,1,sizeof(struct inflate_blocks_state))) == Z_NULL)
return s;
if ((s->hufts =
(inflate_huft *)ZALLOC(z, sizeof(inflate_huft), MANY)) == Z_NULL)
{
ZFREE(z, s);
return Z_NULL;
}
if ((s->window = (Bytef *)ZALLOC(z, 1, w)) == Z_NULL)
{
ZFREE(z, s->hufts);
ZFREE(z, s);
return Z_NULL;
}
s->end = s->window + w;
s->checkfn = c;
s->mode = TYPE;
Tracev((stderr, "inflate: blocks allocated\n"));
inflate_blocks_reset(s, z, Z_NULL);
return s;
}
int inflate_blocks(s, z, r)
inflate_blocks_statef *s;
z_streamp z;
int r;
{
uInt t; /* temporary storage */
uLong b; /* bit buffer */
uInt k; /* bits in bit buffer */
Bytef *p; /* input data pointer */
uInt n; /* bytes available there */
Bytef *q; /* output window write pointer */
uInt m; /* bytes to end of window or read pointer */
/* copy input/output information to locals (UPDATE macro restores) */
LOAD
/* process input based on current state */
while (1) switch (s->mode)
{
case TYPE:
NEEDBITS(3)
t = (uInt)b & 7;
s->last = t & 1;
switch (t >> 1)
{
case 0: /* stored */
Tracev((stderr, "inflate: stored block%s\n",
s->last ? " (last)" : ""));
DUMPBITS(3)
t = k & 7; /* go to byte boundary */
DUMPBITS(t)
s->mode = LENS; /* get length of stored block */
break;
case 1: /* fixed */
Tracev((stderr, "inflate: fixed codes block%s\n",
s->last ? " (last)" : ""));
{
uInt bl, bd;
inflate_huft *tl, *td;
inflate_trees_fixed(&bl, &bd, &tl, &td, z);
s->sub.decode.codes = inflate_codes_new(bl, bd, tl, td, z);
if (s->sub.decode.codes == Z_NULL)
{
r = Z_MEM_ERROR;
LEAVE
}
}
DUMPBITS(3)
s->mode = CODES;
break;
case 2: /* dynamic */
Tracev((stderr, "inflate: dynamic codes block%s\n",
s->last ? " (last)" : ""));
DUMPBITS(3)
s->mode = TABLE;
break;
case 3: /* illegal */
DUMPBITS(3)
s->mode = BAD;
z->msg = (char*)"invalid block type";
r = Z_DATA_ERROR;
LEAVE
}
break;
case LENS:
NEEDBITS(32)
if ((((~b) >> 16) & 0xffff) != (b & 0xffff))
{
s->mode = BAD;
z->msg = (char*)"invalid stored block lengths";
r = Z_DATA_ERROR;
LEAVE
}
s->sub.left = (uInt)b & 0xffff;
b = k = 0; /* dump bits */
Tracev((stderr, "inflate: stored length %u\n", s->sub.left));
s->mode = s->sub.left ? STORED : (s->last ? DRY : TYPE);
break;
case STORED:
if (n == 0)
LEAVE
NEEDOUT
t = s->sub.left;
if (t > n) t = n;
if (t > m) t = m;
zmemcpy(q, p, t);
p += t; n -= t;
q += t; m -= t;
if ((s->sub.left -= t) != 0)
break;
Tracev((stderr, "inflate: stored end, %lu total out\n",
z->total_out + (q >= s->read ? q - s->read :
(s->end - s->read) + (q - s->window))));
s->mode = s->last ? DRY : TYPE;
break;
case TABLE:
NEEDBITS(14)
s->sub.trees.table = t = (uInt)b & 0x3fff;
#ifndef PKZIP_BUG_WORKAROUND
if ((t & 0x1f) > 29 || ((t >> 5) & 0x1f) > 29)
{
s->mode = BAD;
z->msg = (char*)"too many length or distance symbols";
r = Z_DATA_ERROR;
LEAVE
}
#endif
t = 258 + (t & 0x1f) + ((t >> 5) & 0x1f);
if ((s->sub.trees.blens = (uIntf*)ZALLOC(z, t, sizeof(uInt))) == Z_NULL)
{
r = Z_MEM_ERROR;
LEAVE
}
DUMPBITS(14)
s->sub.trees.index = 0;
Tracev((stderr, "inflate: table sizes ok\n"));
s->mode = BTREE;
case BTREE:
while (s->sub.trees.index < 4 + (s->sub.trees.table >> 10))
{
NEEDBITS(3)
s->sub.trees.blens[border[s->sub.trees.index++]] = (uInt)b & 7;
DUMPBITS(3)
}
while (s->sub.trees.index < 19)
s->sub.trees.blens[border[s->sub.trees.index++]] = 0;
s->sub.trees.bb = 7;
t = inflate_trees_bits(s->sub.trees.blens, &s->sub.trees.bb,
&s->sub.trees.tb, s->hufts, z);
if (t != Z_OK)
{
ZFREE(z, s->sub.trees.blens);
r = t;
if (r == Z_DATA_ERROR)
s->mode = BAD;
LEAVE
}
s->sub.trees.index = 0;
Tracev((stderr, "inflate: bits tree ok\n"));
s->mode = DTREE;
case DTREE:
while (t = s->sub.trees.table,
s->sub.trees.index < 258 + (t & 0x1f) + ((t >> 5) & 0x1f))
{
inflate_huft *h;
uInt i, j, c;
t = s->sub.trees.bb;
NEEDBITS(t)
h = s->sub.trees.tb + ((uInt)b & inflate_mask[t]);
t = h->bits;
c = h->base;
if (c < 16)
{
DUMPBITS(t)
s->sub.trees.blens[s->sub.trees.index++] = c;
}
else /* c == 16..18 */
{
i = c == 18 ? 7 : c - 14;
j = c == 18 ? 11 : 3;
NEEDBITS(t + i)
DUMPBITS(t)
j += (uInt)b & inflate_mask[i];
DUMPBITS(i)
i = s->sub.trees.index;
t = s->sub.trees.table;
if (i + j > 258 + (t & 0x1f) + ((t >> 5) & 0x1f) ||
(c == 16 && i < 1))
{
ZFREE(z, s->sub.trees.blens);
s->mode = BAD;
z->msg = (char*)"invalid bit length repeat";
r = Z_DATA_ERROR;
LEAVE
}
c = c == 16 ? s->sub.trees.blens[i - 1] : 0;
do {
s->sub.trees.blens[i++] = c;
} while (--j);
s->sub.trees.index = i;
}
}
s->sub.trees.tb = Z_NULL;
{
uInt bl, bd;
inflate_huft *tl, *td;
inflate_codes_statef *c;
bl = 9; /* must be <= 9 for lookahead assumptions */
bd = 6; /* must be <= 9 for lookahead assumptions */
t = s->sub.trees.table;
t = inflate_trees_dynamic(257 + (t & 0x1f), 1 + ((t >> 5) & 0x1f),
s->sub.trees.blens, &bl, &bd, &tl, &td,
s->hufts, z);
ZFREE(z, s->sub.trees.blens);
if (t != Z_OK)
{
if (t == (uInt)Z_DATA_ERROR)
s->mode = BAD;
r = t;
LEAVE
}
Tracev((stderr, "inflate: trees ok\n"));
if ((c = inflate_codes_new(bl, bd, tl, td, z)) == Z_NULL)
{
r = Z_MEM_ERROR;
LEAVE
}
s->sub.decode.codes = c;
}
s->mode = CODES;
case CODES:
UPDATE
if ((r = inflate_codes(s, z, r)) != Z_STREAM_END)
return inflate_flush(s, z, r);
r = Z_OK;
inflate_codes_free(s->sub.decode.codes, z);
LOAD
Tracev((stderr, "inflate: codes end, %lu total out\n",
z->total_out + (q >= s->read ? q - s->read :
(s->end - s->read) + (q - s->window))));
if (!s->last)
{
s->mode = TYPE;
break;
}
s->mode = DRY;
case DRY:
FLUSH
if (s->read != s->write)
LEAVE
s->mode = DONE;
case DONE:
r = Z_STREAM_END;
LEAVE
case BAD:
r = Z_DATA_ERROR;
LEAVE
default:
r = Z_STREAM_ERROR;
LEAVE
}
}
int inflate_blocks_free(s, z)
inflate_blocks_statef *s;
z_streamp z;
{
inflate_blocks_reset(s, z, Z_NULL);
ZFREE(z, s->window);
ZFREE(z, s->hufts);
ZFREE(z, s);
Tracev((stderr, "inflate: blocks freed\n"));
return Z_OK;
}
void inflate_set_dictionary(s, d, n)
inflate_blocks_statef *s;
const Bytef *d;
uInt n;
{
zmemcpy(s->window, d, n);
s->read = s->write = s->window + n;
}
/* Returns true if inflate is currently at the end of a block generated
* by Z_SYNC_FLUSH or Z_FULL_FLUSH.
* IN assertion: s != Z_NULL
*/
int inflate_blocks_sync_point(s)
inflate_blocks_statef *s;
{
return s->mode == LENS;
}

View File

@@ -0,0 +1,39 @@
/* infblock.h -- header to use infblock.c
* Copyright (C) 1995-1998 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
/* WARNING: this file should *not* be used by applications. It is
part of the implementation of the compression library and is
subject to change. Applications should only use zlib.h.
*/
struct inflate_blocks_state;
typedef struct inflate_blocks_state FAR inflate_blocks_statef;
extern inflate_blocks_statef * inflate_blocks_new OF((
z_streamp z,
check_func c, /* check function */
uInt w)); /* window size */
extern int inflate_blocks OF((
inflate_blocks_statef *,
z_streamp ,
int)); /* initial return code */
extern void inflate_blocks_reset OF((
inflate_blocks_statef *,
z_streamp ,
uLongf *)); /* check value on output */
extern int inflate_blocks_free OF((
inflate_blocks_statef *,
z_streamp));
extern void inflate_set_dictionary OF((
inflate_blocks_statef *s,
const Bytef *d, /* dictionary */
uInt n)); /* dictionary length */
extern int inflate_blocks_sync_point OF((
inflate_blocks_statef *s));

View File

@@ -0,0 +1,257 @@
/* infcodes.c -- process literals and length/distance pairs
* Copyright (C) 1995-1998 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "zutil.h"
#include "inftrees.h"
#include "infblock.h"
#include "infcodes.h"
#include "infutil.h"
#include "inffast.h"
/* simplify the use of the inflate_huft type with some defines */
#define exop word.what.Exop
#define bits word.what.Bits
typedef enum { /* waiting for "i:"=input, "o:"=output, "x:"=nothing */
START, /* x: set up for LEN */
LEN, /* i: get length/literal/eob next */
LENEXT, /* i: getting length extra (have base) */
DIST, /* i: get distance next */
DISTEXT, /* i: getting distance extra */
COPY, /* o: copying bytes in window, waiting for space */
LIT, /* o: got literal, waiting for output space */
WASH, /* o: got eob, possibly still output waiting */
END, /* x: got eob and all data flushed */
BADCODE} /* x: got error */
inflate_codes_mode;
/* inflate codes private state */
struct inflate_codes_state {
/* mode */
inflate_codes_mode mode; /* current inflate_codes mode */
/* mode dependent information */
uInt len;
union {
struct {
inflate_huft *tree; /* pointer into tree */
uInt need; /* bits needed */
} code; /* if LEN or DIST, where in tree */
uInt lit; /* if LIT, literal */
struct {
uInt get; /* bits to get for extra */
uInt dist; /* distance back to copy from */
} copy; /* if EXT or COPY, where and how much */
} sub; /* submode */
/* mode independent information */
Byte lbits; /* ltree bits decoded per branch */
Byte dbits; /* dtree bits decoder per branch */
inflate_huft *ltree; /* literal/length/eob tree */
inflate_huft *dtree; /* distance tree */
};
inflate_codes_statef *inflate_codes_new(bl, bd, tl, td, z)
uInt bl, bd;
inflate_huft *tl;
inflate_huft *td; /* need separate declaration for Borland C++ */
z_streamp z;
{
inflate_codes_statef *c;
if ((c = (inflate_codes_statef *)
ZALLOC(z,1,sizeof(struct inflate_codes_state))) != Z_NULL)
{
c->mode = START;
c->lbits = (Byte)bl;
c->dbits = (Byte)bd;
c->ltree = tl;
c->dtree = td;
Tracev((stderr, "inflate: codes new\n"));
}
return c;
}
int inflate_codes(s, z, r)
inflate_blocks_statef *s;
z_streamp z;
int r;
{
uInt j; /* temporary storage */
inflate_huft *t; /* temporary pointer */
uInt e; /* extra bits or operation */
uLong b; /* bit buffer */
uInt k; /* bits in bit buffer */
Bytef *p; /* input data pointer */
uInt n; /* bytes available there */
Bytef *q; /* output window write pointer */
uInt m; /* bytes to end of window or read pointer */
Bytef *f; /* pointer to copy strings from */
inflate_codes_statef *c = s->sub.decode.codes; /* codes state */
/* copy input/output information to locals (UPDATE macro restores) */
LOAD
/* process input and output based on current state */
while (1) switch (c->mode)
{ /* waiting for "i:"=input, "o:"=output, "x:"=nothing */
case START: /* x: set up for LEN */
#ifndef SLOW
if (m >= 258 && n >= 10)
{
UPDATE
r = inflate_fast(c->lbits, c->dbits, c->ltree, c->dtree, s, z);
LOAD
if (r != Z_OK)
{
c->mode = r == Z_STREAM_END ? WASH : BADCODE;
break;
}
}
#endif /* !SLOW */
c->sub.code.need = c->lbits;
c->sub.code.tree = c->ltree;
c->mode = LEN;
case LEN: /* i: get length/literal/eob next */
j = c->sub.code.need;
NEEDBITS(j)
t = c->sub.code.tree + ((uInt)b & inflate_mask[j]);
DUMPBITS(t->bits)
e = (uInt)(t->exop);
if (e == 0) /* literal */
{
c->sub.lit = t->base;
Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ?
"inflate: literal '%c'\n" :
"inflate: literal 0x%02x\n", t->base));
c->mode = LIT;
break;
}
if (e & 16) /* length */
{
c->sub.copy.get = e & 15;
c->len = t->base;
c->mode = LENEXT;
break;
}
if ((e & 64) == 0) /* next table */
{
c->sub.code.need = e;
c->sub.code.tree = t + t->base;
break;
}
if (e & 32) /* end of block */
{
Tracevv((stderr, "inflate: end of block\n"));
c->mode = WASH;
break;
}
c->mode = BADCODE; /* invalid code */
z->msg = (char*)"invalid literal/length code";
r = Z_DATA_ERROR;
LEAVE
case LENEXT: /* i: getting length extra (have base) */
j = c->sub.copy.get;
NEEDBITS(j)
c->len += (uInt)b & inflate_mask[j];
DUMPBITS(j)
c->sub.code.need = c->dbits;
c->sub.code.tree = c->dtree;
Tracevv((stderr, "inflate: length %u\n", c->len));
c->mode = DIST;
case DIST: /* i: get distance next */
j = c->sub.code.need;
NEEDBITS(j)
t = c->sub.code.tree + ((uInt)b & inflate_mask[j]);
DUMPBITS(t->bits)
e = (uInt)(t->exop);
if (e & 16) /* distance */
{
c->sub.copy.get = e & 15;
c->sub.copy.dist = t->base;
c->mode = DISTEXT;
break;
}
if ((e & 64) == 0) /* next table */
{
c->sub.code.need = e;
c->sub.code.tree = t + t->base;
break;
}
c->mode = BADCODE; /* invalid code */
z->msg = (char*)"invalid distance code";
r = Z_DATA_ERROR;
LEAVE
case DISTEXT: /* i: getting distance extra */
j = c->sub.copy.get;
NEEDBITS(j)
c->sub.copy.dist += (uInt)b & inflate_mask[j];
DUMPBITS(j)
Tracevv((stderr, "inflate: distance %u\n", c->sub.copy.dist));
c->mode = COPY;
case COPY: /* o: copying bytes in window, waiting for space */
#ifndef __TURBOC__ /* Turbo C bug for following expression */
f = (uInt)(q - s->window) < c->sub.copy.dist ?
s->end - (c->sub.copy.dist - (q - s->window)) :
q - c->sub.copy.dist;
#else
f = q - c->sub.copy.dist;
if ((uInt)(q - s->window) < c->sub.copy.dist)
f = s->end - (c->sub.copy.dist - (uInt)(q - s->window));
#endif
while (c->len)
{
NEEDOUT
OUTBYTE(*f++)
if (f == s->end)
f = s->window;
c->len--;
}
c->mode = START;
break;
case LIT: /* o: got literal, waiting for output space */
NEEDOUT
OUTBYTE(c->sub.lit)
c->mode = START;
break;
case WASH: /* o: got eob, possibly more output */
if (k > 7) /* return unused byte, if any */
{
Assert(k < 16, "inflate_codes grabbed too many bytes")
k -= 8;
n++;
p--; /* can always return one */
}
FLUSH
if (s->read != s->write)
LEAVE
c->mode = END;
case END:
r = Z_STREAM_END;
LEAVE
case BADCODE: /* x: got error */
r = Z_DATA_ERROR;
LEAVE
default:
r = Z_STREAM_ERROR;
LEAVE
}
#ifdef NEED_DUMMY_RETURN
return Z_STREAM_ERROR; /* Some dumb compilers complain without this */
#endif
}
void inflate_codes_free(c, z)
inflate_codes_statef *c;
z_streamp z;
{
ZFREE(z, c);
Tracev((stderr, "inflate: codes free\n"));
}

View File

@@ -0,0 +1,27 @@
/* infcodes.h -- header to use infcodes.c
* Copyright (C) 1995-1998 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
/* WARNING: this file should *not* be used by applications. It is
part of the implementation of the compression library and is
subject to change. Applications should only use zlib.h.
*/
struct inflate_codes_state;
typedef struct inflate_codes_state FAR inflate_codes_statef;
extern inflate_codes_statef *inflate_codes_new OF((
uInt, uInt,
inflate_huft *, inflate_huft *,
z_streamp ));
extern int inflate_codes OF((
inflate_blocks_statef *,
z_streamp ,
int));
extern void inflate_codes_free OF((
inflate_codes_statef *,
z_streamp ));

View File

@@ -0,0 +1,170 @@
/* inffast.c -- process literals and length/distance pairs fast
* Copyright (C) 1995-1998 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "zutil.h"
#include "inftrees.h"
#include "infblock.h"
#include "infcodes.h"
#include "infutil.h"
#include "inffast.h"
struct inflate_codes_state {int dummy;}; /* for buggy compilers */
/* simplify the use of the inflate_huft type with some defines */
#define exop word.what.Exop
#define bits word.what.Bits
/* macros for bit input with no checking and for returning unused bytes */
#define GRABBITS(j) {while(k<(j)){b|=((uLong)NEXTBYTE)<<k;k+=8;}}
#define UNGRAB {c=z->avail_in-n;c=(k>>3)<c?k>>3:c;n+=c;p-=c;k-=c<<3;}
/* Called with number of bytes left to write in window at least 258
(the maximum string length) and number of input bytes available
at least ten. The ten bytes are six bytes for the longest length/
distance pair plus four bytes for overloading the bit buffer. */
int inflate_fast(bl, bd, tl, td, s, z)
uInt bl, bd;
inflate_huft *tl;
inflate_huft *td; /* need separate declaration for Borland C++ */
inflate_blocks_statef *s;
z_streamp z;
{
inflate_huft *t; /* temporary pointer */
uInt e; /* extra bits or operation */
uLong b; /* bit buffer */
uInt k; /* bits in bit buffer */
Bytef *p; /* input data pointer */
uInt n; /* bytes available there */
Bytef *q; /* output window write pointer */
uInt m; /* bytes to end of window or read pointer */
uInt ml; /* mask for literal/length tree */
uInt md; /* mask for distance tree */
uInt c; /* bytes to copy */
uInt d; /* distance back to copy from */
Bytef *r; /* copy source pointer */
/* load input, output, bit values */
LOAD
/* initialize masks */
ml = inflate_mask[bl];
md = inflate_mask[bd];
/* do until not enough input or output space for fast loop */
do { /* assume called with m >= 258 && n >= 10 */
/* get literal/length code */
GRABBITS(20) /* max bits for literal/length code */
if ((e = (t = tl + ((uInt)b & ml))->exop) == 0)
{
DUMPBITS(t->bits)
Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ?
"inflate: * literal '%c'\n" :
"inflate: * literal 0x%02x\n", t->base));
*q++ = (Byte)t->base;
m--;
continue;
}
do {
DUMPBITS(t->bits)
if (e & 16)
{
/* get extra bits for length */
e &= 15;
c = t->base + ((uInt)b & inflate_mask[e]);
DUMPBITS(e)
Tracevv((stderr, "inflate: * length %u\n", c));
/* decode distance base of block to copy */
GRABBITS(15); /* max bits for distance code */
e = (t = td + ((uInt)b & md))->exop;
do {
DUMPBITS(t->bits)
if (e & 16)
{
/* get extra bits to add to distance base */
e &= 15;
GRABBITS(e) /* get extra bits (up to 13) */
d = t->base + ((uInt)b & inflate_mask[e]);
DUMPBITS(e)
Tracevv((stderr, "inflate: * distance %u\n", d));
/* do the copy */
m -= c;
if ((uInt)(q - s->window) >= d) /* offset before dest */
{ /* just copy */
r = q - d;
*q++ = *r++; c--; /* minimum count is three, */
*q++ = *r++; c--; /* so unroll loop a little */
}
else /* else offset after destination */
{
e = d - (uInt)(q - s->window); /* bytes from offset to end */
r = s->end - e; /* pointer to offset */
if (c > e) /* if source crosses, */
{
c -= e; /* copy to end of window */
do {
*q++ = *r++;
} while (--e);
r = s->window; /* copy rest from start of window */
}
}
do { /* copy all or what's left */
*q++ = *r++;
} while (--c);
break;
}
else if ((e & 64) == 0)
{
t += t->base;
e = (t += ((uInt)b & inflate_mask[e]))->exop;
}
else
{
z->msg = (char*)"invalid distance code";
UNGRAB
UPDATE
return Z_DATA_ERROR;
}
} while (1);
break;
}
if ((e & 64) == 0)
{
t += t->base;
if ((e = (t += ((uInt)b & inflate_mask[e]))->exop) == 0)
{
DUMPBITS(t->bits)
Tracevv((stderr, t->base >= 0x20 && t->base < 0x7f ?
"inflate: * literal '%c'\n" :
"inflate: * literal 0x%02x\n", t->base));
*q++ = (Byte)t->base;
m--;
break;
}
}
else if (e & 32)
{
Tracevv((stderr, "inflate: * end of block\n"));
UNGRAB
UPDATE
return Z_STREAM_END;
}
else
{
z->msg = (char*)"invalid literal/length code";
UNGRAB
UPDATE
return Z_DATA_ERROR;
}
} while (1);
} while (m >= 258 && n >= 10);
/* not enough input or output--restore pointers and return */
UNGRAB
UPDATE
return Z_OK;
}

View File

@@ -0,0 +1,17 @@
/* inffast.h -- header to use inffast.c
* Copyright (C) 1995-1998 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
/* WARNING: this file should *not* be used by applications. It is
part of the implementation of the compression library and is
subject to change. Applications should only use zlib.h.
*/
extern int inflate_fast OF((
uInt,
uInt,
inflate_huft *,
inflate_huft *,
inflate_blocks_statef *,
z_streamp ));

View File

@@ -0,0 +1,151 @@
/* inffixed.h -- table for decoding fixed codes
* Generated automatically by the maketree.c program
*/
/* WARNING: this file should *not* be used by applications. It is
part of the implementation of the compression library and is
subject to change. Applications should only use zlib.h.
*/
local uInt fixed_bl = 9;
local uInt fixed_bd = 5;
local inflate_huft fixed_tl[] = {
{{{96,7}},256}, {{{0,8}},80}, {{{0,8}},16}, {{{84,8}},115},
{{{82,7}},31}, {{{0,8}},112}, {{{0,8}},48}, {{{0,9}},192},
{{{80,7}},10}, {{{0,8}},96}, {{{0,8}},32}, {{{0,9}},160},
{{{0,8}},0}, {{{0,8}},128}, {{{0,8}},64}, {{{0,9}},224},
{{{80,7}},6}, {{{0,8}},88}, {{{0,8}},24}, {{{0,9}},144},
{{{83,7}},59}, {{{0,8}},120}, {{{0,8}},56}, {{{0,9}},208},
{{{81,7}},17}, {{{0,8}},104}, {{{0,8}},40}, {{{0,9}},176},
{{{0,8}},8}, {{{0,8}},136}, {{{0,8}},72}, {{{0,9}},240},
{{{80,7}},4}, {{{0,8}},84}, {{{0,8}},20}, {{{85,8}},227},
{{{83,7}},43}, {{{0,8}},116}, {{{0,8}},52}, {{{0,9}},200},
{{{81,7}},13}, {{{0,8}},100}, {{{0,8}},36}, {{{0,9}},168},
{{{0,8}},4}, {{{0,8}},132}, {{{0,8}},68}, {{{0,9}},232},
{{{80,7}},8}, {{{0,8}},92}, {{{0,8}},28}, {{{0,9}},152},
{{{84,7}},83}, {{{0,8}},124}, {{{0,8}},60}, {{{0,9}},216},
{{{82,7}},23}, {{{0,8}},108}, {{{0,8}},44}, {{{0,9}},184},
{{{0,8}},12}, {{{0,8}},140}, {{{0,8}},76}, {{{0,9}},248},
{{{80,7}},3}, {{{0,8}},82}, {{{0,8}},18}, {{{85,8}},163},
{{{83,7}},35}, {{{0,8}},114}, {{{0,8}},50}, {{{0,9}},196},
{{{81,7}},11}, {{{0,8}},98}, {{{0,8}},34}, {{{0,9}},164},
{{{0,8}},2}, {{{0,8}},130}, {{{0,8}},66}, {{{0,9}},228},
{{{80,7}},7}, {{{0,8}},90}, {{{0,8}},26}, {{{0,9}},148},
{{{84,7}},67}, {{{0,8}},122}, {{{0,8}},58}, {{{0,9}},212},
{{{82,7}},19}, {{{0,8}},106}, {{{0,8}},42}, {{{0,9}},180},
{{{0,8}},10}, {{{0,8}},138}, {{{0,8}},74}, {{{0,9}},244},
{{{80,7}},5}, {{{0,8}},86}, {{{0,8}},22}, {{{192,8}},0},
{{{83,7}},51}, {{{0,8}},118}, {{{0,8}},54}, {{{0,9}},204},
{{{81,7}},15}, {{{0,8}},102}, {{{0,8}},38}, {{{0,9}},172},
{{{0,8}},6}, {{{0,8}},134}, {{{0,8}},70}, {{{0,9}},236},
{{{80,7}},9}, {{{0,8}},94}, {{{0,8}},30}, {{{0,9}},156},
{{{84,7}},99}, {{{0,8}},126}, {{{0,8}},62}, {{{0,9}},220},
{{{82,7}},27}, {{{0,8}},110}, {{{0,8}},46}, {{{0,9}},188},
{{{0,8}},14}, {{{0,8}},142}, {{{0,8}},78}, {{{0,9}},252},
{{{96,7}},256}, {{{0,8}},81}, {{{0,8}},17}, {{{85,8}},131},
{{{82,7}},31}, {{{0,8}},113}, {{{0,8}},49}, {{{0,9}},194},
{{{80,7}},10}, {{{0,8}},97}, {{{0,8}},33}, {{{0,9}},162},
{{{0,8}},1}, {{{0,8}},129}, {{{0,8}},65}, {{{0,9}},226},
{{{80,7}},6}, {{{0,8}},89}, {{{0,8}},25}, {{{0,9}},146},
{{{83,7}},59}, {{{0,8}},121}, {{{0,8}},57}, {{{0,9}},210},
{{{81,7}},17}, {{{0,8}},105}, {{{0,8}},41}, {{{0,9}},178},
{{{0,8}},9}, {{{0,8}},137}, {{{0,8}},73}, {{{0,9}},242},
{{{80,7}},4}, {{{0,8}},85}, {{{0,8}},21}, {{{80,8}},258},
{{{83,7}},43}, {{{0,8}},117}, {{{0,8}},53}, {{{0,9}},202},
{{{81,7}},13}, {{{0,8}},101}, {{{0,8}},37}, {{{0,9}},170},
{{{0,8}},5}, {{{0,8}},133}, {{{0,8}},69}, {{{0,9}},234},
{{{80,7}},8}, {{{0,8}},93}, {{{0,8}},29}, {{{0,9}},154},
{{{84,7}},83}, {{{0,8}},125}, {{{0,8}},61}, {{{0,9}},218},
{{{82,7}},23}, {{{0,8}},109}, {{{0,8}},45}, {{{0,9}},186},
{{{0,8}},13}, {{{0,8}},141}, {{{0,8}},77}, {{{0,9}},250},
{{{80,7}},3}, {{{0,8}},83}, {{{0,8}},19}, {{{85,8}},195},
{{{83,7}},35}, {{{0,8}},115}, {{{0,8}},51}, {{{0,9}},198},
{{{81,7}},11}, {{{0,8}},99}, {{{0,8}},35}, {{{0,9}},166},
{{{0,8}},3}, {{{0,8}},131}, {{{0,8}},67}, {{{0,9}},230},
{{{80,7}},7}, {{{0,8}},91}, {{{0,8}},27}, {{{0,9}},150},
{{{84,7}},67}, {{{0,8}},123}, {{{0,8}},59}, {{{0,9}},214},
{{{82,7}},19}, {{{0,8}},107}, {{{0,8}},43}, {{{0,9}},182},
{{{0,8}},11}, {{{0,8}},139}, {{{0,8}},75}, {{{0,9}},246},
{{{80,7}},5}, {{{0,8}},87}, {{{0,8}},23}, {{{192,8}},0},
{{{83,7}},51}, {{{0,8}},119}, {{{0,8}},55}, {{{0,9}},206},
{{{81,7}},15}, {{{0,8}},103}, {{{0,8}},39}, {{{0,9}},174},
{{{0,8}},7}, {{{0,8}},135}, {{{0,8}},71}, {{{0,9}},238},
{{{80,7}},9}, {{{0,8}},95}, {{{0,8}},31}, {{{0,9}},158},
{{{84,7}},99}, {{{0,8}},127}, {{{0,8}},63}, {{{0,9}},222},
{{{82,7}},27}, {{{0,8}},111}, {{{0,8}},47}, {{{0,9}},190},
{{{0,8}},15}, {{{0,8}},143}, {{{0,8}},79}, {{{0,9}},254},
{{{96,7}},256}, {{{0,8}},80}, {{{0,8}},16}, {{{84,8}},115},
{{{82,7}},31}, {{{0,8}},112}, {{{0,8}},48}, {{{0,9}},193},
{{{80,7}},10}, {{{0,8}},96}, {{{0,8}},32}, {{{0,9}},161},
{{{0,8}},0}, {{{0,8}},128}, {{{0,8}},64}, {{{0,9}},225},
{{{80,7}},6}, {{{0,8}},88}, {{{0,8}},24}, {{{0,9}},145},
{{{83,7}},59}, {{{0,8}},120}, {{{0,8}},56}, {{{0,9}},209},
{{{81,7}},17}, {{{0,8}},104}, {{{0,8}},40}, {{{0,9}},177},
{{{0,8}},8}, {{{0,8}},136}, {{{0,8}},72}, {{{0,9}},241},
{{{80,7}},4}, {{{0,8}},84}, {{{0,8}},20}, {{{85,8}},227},
{{{83,7}},43}, {{{0,8}},116}, {{{0,8}},52}, {{{0,9}},201},
{{{81,7}},13}, {{{0,8}},100}, {{{0,8}},36}, {{{0,9}},169},
{{{0,8}},4}, {{{0,8}},132}, {{{0,8}},68}, {{{0,9}},233},
{{{80,7}},8}, {{{0,8}},92}, {{{0,8}},28}, {{{0,9}},153},
{{{84,7}},83}, {{{0,8}},124}, {{{0,8}},60}, {{{0,9}},217},
{{{82,7}},23}, {{{0,8}},108}, {{{0,8}},44}, {{{0,9}},185},
{{{0,8}},12}, {{{0,8}},140}, {{{0,8}},76}, {{{0,9}},249},
{{{80,7}},3}, {{{0,8}},82}, {{{0,8}},18}, {{{85,8}},163},
{{{83,7}},35}, {{{0,8}},114}, {{{0,8}},50}, {{{0,9}},197},
{{{81,7}},11}, {{{0,8}},98}, {{{0,8}},34}, {{{0,9}},165},
{{{0,8}},2}, {{{0,8}},130}, {{{0,8}},66}, {{{0,9}},229},
{{{80,7}},7}, {{{0,8}},90}, {{{0,8}},26}, {{{0,9}},149},
{{{84,7}},67}, {{{0,8}},122}, {{{0,8}},58}, {{{0,9}},213},
{{{82,7}},19}, {{{0,8}},106}, {{{0,8}},42}, {{{0,9}},181},
{{{0,8}},10}, {{{0,8}},138}, {{{0,8}},74}, {{{0,9}},245},
{{{80,7}},5}, {{{0,8}},86}, {{{0,8}},22}, {{{192,8}},0},
{{{83,7}},51}, {{{0,8}},118}, {{{0,8}},54}, {{{0,9}},205},
{{{81,7}},15}, {{{0,8}},102}, {{{0,8}},38}, {{{0,9}},173},
{{{0,8}},6}, {{{0,8}},134}, {{{0,8}},70}, {{{0,9}},237},
{{{80,7}},9}, {{{0,8}},94}, {{{0,8}},30}, {{{0,9}},157},
{{{84,7}},99}, {{{0,8}},126}, {{{0,8}},62}, {{{0,9}},221},
{{{82,7}},27}, {{{0,8}},110}, {{{0,8}},46}, {{{0,9}},189},
{{{0,8}},14}, {{{0,8}},142}, {{{0,8}},78}, {{{0,9}},253},
{{{96,7}},256}, {{{0,8}},81}, {{{0,8}},17}, {{{85,8}},131},
{{{82,7}},31}, {{{0,8}},113}, {{{0,8}},49}, {{{0,9}},195},
{{{80,7}},10}, {{{0,8}},97}, {{{0,8}},33}, {{{0,9}},163},
{{{0,8}},1}, {{{0,8}},129}, {{{0,8}},65}, {{{0,9}},227},
{{{80,7}},6}, {{{0,8}},89}, {{{0,8}},25}, {{{0,9}},147},
{{{83,7}},59}, {{{0,8}},121}, {{{0,8}},57}, {{{0,9}},211},
{{{81,7}},17}, {{{0,8}},105}, {{{0,8}},41}, {{{0,9}},179},
{{{0,8}},9}, {{{0,8}},137}, {{{0,8}},73}, {{{0,9}},243},
{{{80,7}},4}, {{{0,8}},85}, {{{0,8}},21}, {{{80,8}},258},
{{{83,7}},43}, {{{0,8}},117}, {{{0,8}},53}, {{{0,9}},203},
{{{81,7}},13}, {{{0,8}},101}, {{{0,8}},37}, {{{0,9}},171},
{{{0,8}},5}, {{{0,8}},133}, {{{0,8}},69}, {{{0,9}},235},
{{{80,7}},8}, {{{0,8}},93}, {{{0,8}},29}, {{{0,9}},155},
{{{84,7}},83}, {{{0,8}},125}, {{{0,8}},61}, {{{0,9}},219},
{{{82,7}},23}, {{{0,8}},109}, {{{0,8}},45}, {{{0,9}},187},
{{{0,8}},13}, {{{0,8}},141}, {{{0,8}},77}, {{{0,9}},251},
{{{80,7}},3}, {{{0,8}},83}, {{{0,8}},19}, {{{85,8}},195},
{{{83,7}},35}, {{{0,8}},115}, {{{0,8}},51}, {{{0,9}},199},
{{{81,7}},11}, {{{0,8}},99}, {{{0,8}},35}, {{{0,9}},167},
{{{0,8}},3}, {{{0,8}},131}, {{{0,8}},67}, {{{0,9}},231},
{{{80,7}},7}, {{{0,8}},91}, {{{0,8}},27}, {{{0,9}},151},
{{{84,7}},67}, {{{0,8}},123}, {{{0,8}},59}, {{{0,9}},215},
{{{82,7}},19}, {{{0,8}},107}, {{{0,8}},43}, {{{0,9}},183},
{{{0,8}},11}, {{{0,8}},139}, {{{0,8}},75}, {{{0,9}},247},
{{{80,7}},5}, {{{0,8}},87}, {{{0,8}},23}, {{{192,8}},0},
{{{83,7}},51}, {{{0,8}},119}, {{{0,8}},55}, {{{0,9}},207},
{{{81,7}},15}, {{{0,8}},103}, {{{0,8}},39}, {{{0,9}},175},
{{{0,8}},7}, {{{0,8}},135}, {{{0,8}},71}, {{{0,9}},239},
{{{80,7}},9}, {{{0,8}},95}, {{{0,8}},31}, {{{0,9}},159},
{{{84,7}},99}, {{{0,8}},127}, {{{0,8}},63}, {{{0,9}},223},
{{{82,7}},27}, {{{0,8}},111}, {{{0,8}},47}, {{{0,9}},191},
{{{0,8}},15}, {{{0,8}},143}, {{{0,8}},79}, {{{0,9}},255}
};
local inflate_huft fixed_td[] = {
{{{80,5}},1}, {{{87,5}},257}, {{{83,5}},17}, {{{91,5}},4097},
{{{81,5}},5}, {{{89,5}},1025}, {{{85,5}},65}, {{{93,5}},16385},
{{{80,5}},3}, {{{88,5}},513}, {{{84,5}},33}, {{{92,5}},8193},
{{{82,5}},9}, {{{90,5}},2049}, {{{86,5}},129}, {{{192,5}},24577},
{{{80,5}},2}, {{{87,5}},385}, {{{83,5}},25}, {{{91,5}},6145},
{{{81,5}},7}, {{{89,5}},1537}, {{{85,5}},97}, {{{93,5}},24577},
{{{80,5}},4}, {{{88,5}},769}, {{{84,5}},49}, {{{92,5}},12289},
{{{82,5}},13}, {{{90,5}},3073}, {{{86,5}},193}, {{{192,5}},24577}
};

View File

@@ -0,0 +1,366 @@
/* inflate.c -- zlib interface to inflate modules
* Copyright (C) 1995-1998 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "zutil.h"
#include "infblock.h"
struct inflate_blocks_state {int dummy;}; /* for buggy compilers */
typedef enum {
METHOD, /* waiting for method byte */
FLAG, /* waiting for flag byte */
DICT4, /* four dictionary check bytes to go */
DICT3, /* three dictionary check bytes to go */
DICT2, /* two dictionary check bytes to go */
DICT1, /* one dictionary check byte to go */
DICT0, /* waiting for inflateSetDictionary */
BLOCKS, /* decompressing blocks */
CHECK4, /* four check bytes to go */
CHECK3, /* three check bytes to go */
CHECK2, /* two check bytes to go */
CHECK1, /* one check byte to go */
DONE, /* finished check, done */
BAD} /* got an error--stay here */
inflate_mode;
/* inflate private state */
struct internal_state {
/* mode */
inflate_mode mode; /* current inflate mode */
/* mode dependent information */
union {
uInt method; /* if FLAGS, method byte */
struct {
uLong was; /* computed check value */
uLong need; /* stream check value */
} check; /* if CHECK, check values to compare */
uInt marker; /* if BAD, inflateSync's marker bytes count */
} sub; /* submode */
/* mode independent information */
int nowrap; /* flag for no wrapper */
uInt wbits; /* log2(window size) (8..15, defaults to 15) */
inflate_blocks_statef
*blocks; /* current inflate_blocks state */
};
int ZEXPORT inflateReset(z)
z_streamp z;
{
if (z == Z_NULL || z->state == Z_NULL)
return Z_STREAM_ERROR;
z->total_in = z->total_out = 0;
z->msg = Z_NULL;
z->state->mode = z->state->nowrap ? BLOCKS : METHOD;
inflate_blocks_reset(z->state->blocks, z, Z_NULL);
Tracev((stderr, "inflate: reset\n"));
return Z_OK;
}
int ZEXPORT inflateEnd(z)
z_streamp z;
{
if (z == Z_NULL || z->state == Z_NULL || z->zfree == Z_NULL)
return Z_STREAM_ERROR;
if (z->state->blocks != Z_NULL)
inflate_blocks_free(z->state->blocks, z);
ZFREE(z, z->state);
z->state = Z_NULL;
Tracev((stderr, "inflate: end\n"));
return Z_OK;
}
int ZEXPORT inflateInit2_(z, w, version, stream_size)
z_streamp z;
int w;
const char *version;
int stream_size;
{
if (version == Z_NULL || version[0] != ZLIB_VERSION[0] ||
stream_size != sizeof(z_stream))
return Z_VERSION_ERROR;
/* initialize state */
if (z == Z_NULL)
return Z_STREAM_ERROR;
z->msg = Z_NULL;
if (z->zalloc == Z_NULL)
{
z->zalloc = zcalloc;
z->opaque = (voidpf)0;
}
if (z->zfree == Z_NULL) z->zfree = zcfree;
if ((z->state = (struct internal_state FAR *)
ZALLOC(z,1,sizeof(struct internal_state))) == Z_NULL)
return Z_MEM_ERROR;
z->state->blocks = Z_NULL;
/* handle undocumented nowrap option (no zlib header or check) */
z->state->nowrap = 0;
if (w < 0)
{
w = - w;
z->state->nowrap = 1;
}
/* set window size */
if (w < 8 || w > 15)
{
inflateEnd(z);
return Z_STREAM_ERROR;
}
z->state->wbits = (uInt)w;
/* create inflate_blocks state */
if ((z->state->blocks =
inflate_blocks_new(z, z->state->nowrap ? Z_NULL : adler32, (uInt)1 << w))
== Z_NULL)
{
inflateEnd(z);
return Z_MEM_ERROR;
}
Tracev((stderr, "inflate: allocated\n"));
/* reset state */
inflateReset(z);
return Z_OK;
}
int ZEXPORT inflateInit_(z, version, stream_size)
z_streamp z;
const char *version;
int stream_size;
{
return inflateInit2_(z, DEF_WBITS, version, stream_size);
}
#define NEEDBYTE {if(z->avail_in==0)return r;r=f;}
#define NEXTBYTE (z->avail_in--,z->total_in++,*z->next_in++)
int ZEXPORT inflate(z, f)
z_streamp z;
int f;
{
int r;
uInt b;
if (z == Z_NULL || z->state == Z_NULL || z->next_in == Z_NULL)
return Z_STREAM_ERROR;
f = f == Z_FINISH ? Z_BUF_ERROR : Z_OK;
r = Z_BUF_ERROR;
while (1) switch (z->state->mode)
{
case METHOD:
NEEDBYTE
if (((z->state->sub.method = NEXTBYTE) & 0xf) != Z_DEFLATED)
{
z->state->mode = BAD;
z->msg = (char*)"unknown compression method";
z->state->sub.marker = 5; /* can't try inflateSync */
break;
}
if ((z->state->sub.method >> 4) + 8 > z->state->wbits)
{
z->state->mode = BAD;
z->msg = (char*)"invalid window size";
z->state->sub.marker = 5; /* can't try inflateSync */
break;
}
z->state->mode = FLAG;
case FLAG:
NEEDBYTE
b = NEXTBYTE;
if (((z->state->sub.method << 8) + b) % 31)
{
z->state->mode = BAD;
z->msg = (char*)"incorrect header check";
z->state->sub.marker = 5; /* can't try inflateSync */
break;
}
Tracev((stderr, "inflate: zlib header ok\n"));
if (!(b & PRESET_DICT))
{
z->state->mode = BLOCKS;
break;
}
z->state->mode = DICT4;
case DICT4:
NEEDBYTE
z->state->sub.check.need = (uLong)NEXTBYTE << 24;
z->state->mode = DICT3;
case DICT3:
NEEDBYTE
z->state->sub.check.need += (uLong)NEXTBYTE << 16;
z->state->mode = DICT2;
case DICT2:
NEEDBYTE
z->state->sub.check.need += (uLong)NEXTBYTE << 8;
z->state->mode = DICT1;
case DICT1:
NEEDBYTE
z->state->sub.check.need += (uLong)NEXTBYTE;
z->adler = z->state->sub.check.need;
z->state->mode = DICT0;
return Z_NEED_DICT;
case DICT0:
z->state->mode = BAD;
z->msg = (char*)"need dictionary";
z->state->sub.marker = 0; /* can try inflateSync */
return Z_STREAM_ERROR;
case BLOCKS:
r = inflate_blocks(z->state->blocks, z, r);
if (r == Z_DATA_ERROR)
{
z->state->mode = BAD;
z->state->sub.marker = 0; /* can try inflateSync */
break;
}
if (r == Z_OK)
r = f;
if (r != Z_STREAM_END)
return r;
r = f;
inflate_blocks_reset(z->state->blocks, z, &z->state->sub.check.was);
if (z->state->nowrap)
{
z->state->mode = DONE;
break;
}
z->state->mode = CHECK4;
case CHECK4:
NEEDBYTE
z->state->sub.check.need = (uLong)NEXTBYTE << 24;
z->state->mode = CHECK3;
case CHECK3:
NEEDBYTE
z->state->sub.check.need += (uLong)NEXTBYTE << 16;
z->state->mode = CHECK2;
case CHECK2:
NEEDBYTE
z->state->sub.check.need += (uLong)NEXTBYTE << 8;
z->state->mode = CHECK1;
case CHECK1:
NEEDBYTE
z->state->sub.check.need += (uLong)NEXTBYTE;
if (z->state->sub.check.was != z->state->sub.check.need)
{
z->state->mode = BAD;
z->msg = (char*)"incorrect data check";
z->state->sub.marker = 5; /* can't try inflateSync */
break;
}
Tracev((stderr, "inflate: zlib check ok\n"));
z->state->mode = DONE;
case DONE:
return Z_STREAM_END;
case BAD:
return Z_DATA_ERROR;
default:
return Z_STREAM_ERROR;
}
#ifdef NEED_DUMMY_RETURN
return Z_STREAM_ERROR; /* Some dumb compilers complain without this */
#endif
}
int ZEXPORT inflateSetDictionary(z, dictionary, dictLength)
z_streamp z;
const Bytef *dictionary;
uInt dictLength;
{
uInt length = dictLength;
if (z == Z_NULL || z->state == Z_NULL || z->state->mode != DICT0)
return Z_STREAM_ERROR;
if (adler32(1L, dictionary, dictLength) != z->adler) return Z_DATA_ERROR;
z->adler = 1L;
if (length >= ((uInt)1<<z->state->wbits))
{
length = (1<<z->state->wbits)-1;
dictionary += dictLength - length;
}
inflate_set_dictionary(z->state->blocks, dictionary, length);
z->state->mode = BLOCKS;
return Z_OK;
}
int ZEXPORT inflateSync(z)
z_streamp z;
{
uInt n; /* number of bytes to look at */
Bytef *p; /* pointer to bytes */
uInt m; /* number of marker bytes found in a row */
uLong r, w; /* temporaries to save total_in and total_out */
/* set up */
if (z == Z_NULL || z->state == Z_NULL)
return Z_STREAM_ERROR;
if (z->state->mode != BAD)
{
z->state->mode = BAD;
z->state->sub.marker = 0;
}
if ((n = z->avail_in) == 0)
return Z_BUF_ERROR;
p = z->next_in;
m = z->state->sub.marker;
/* search */
while (n && m < 4)
{
static const Byte mark[4] = {0, 0, 0xff, 0xff};
if (*p == mark[m])
m++;
else if (*p)
m = 0;
else
m = 4 - m;
p++, n--;
}
/* restore */
z->total_in += p - z->next_in;
z->next_in = p;
z->avail_in = n;
z->state->sub.marker = m;
/* return no joy or set up to restart on a new block */
if (m != 4)
return Z_DATA_ERROR;
r = z->total_in; w = z->total_out;
inflateReset(z);
z->total_in = r; z->total_out = w;
z->state->mode = BLOCKS;
return Z_OK;
}
/* Returns true if inflate is currently at the end of a block generated
* by Z_SYNC_FLUSH or Z_FULL_FLUSH. This function is used by one PPP
* implementation to provide an additional safety check. PPP uses Z_SYNC_FLUSH
* but removes the length bytes of the resulting empty stored block. When
* decompressing, PPP checks that at the end of input packet, inflate is
* waiting for these length bytes.
*/
int ZEXPORT inflateSyncPoint(z)
z_streamp z;
{
if (z == Z_NULL || z->state == Z_NULL || z->state->blocks == Z_NULL)
return Z_STREAM_ERROR;
return inflate_blocks_sync_point(z->state->blocks);
}

View File

@@ -0,0 +1,455 @@
/* inftrees.c -- generate Huffman trees for efficient decoding
* Copyright (C) 1995-1998 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "zutil.h"
#include "inftrees.h"
#if !defined(BUILDFIXED) && !defined(STDC)
# define BUILDFIXED /* non ANSI compilers may not accept inffixed.h */
#endif
const char inflate_copyright[] =
" inflate 1.1.3 Copyright 1995-1998 Mark Adler ";
/*
If you use the zlib library in a product, an acknowledgment is welcome
in the documentation of your product. If for some reason you cannot
include such an acknowledgment, I would appreciate that you keep this
copyright string in the executable of your product.
*/
struct internal_state {int dummy;}; /* for buggy compilers */
/* simplify the use of the inflate_huft type with some defines */
#define exop word.what.Exop
#define bits word.what.Bits
local int huft_build OF((
uIntf *, /* code lengths in bits */
uInt, /* number of codes */
uInt, /* number of "simple" codes */
const uIntf *, /* list of base values for non-simple codes */
const uIntf *, /* list of extra bits for non-simple codes */
inflate_huft * FAR*,/* result: starting table */
uIntf *, /* maximum lookup bits (returns actual) */
inflate_huft *, /* space for trees */
uInt *, /* hufts used in space */
uIntf * )); /* space for values */
/* Tables for deflate from PKZIP's appnote.txt. */
local const uInt cplens[31] = { /* Copy lengths for literal codes 257..285 */
3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
/* see note #13 above about 258 */
local const uInt cplext[31] = { /* Extra bits for literal codes 257..285 */
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 112, 112}; /* 112==invalid */
local const uInt cpdist[30] = { /* Copy offsets for distance codes 0..29 */
1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
8193, 12289, 16385, 24577};
local const uInt cpdext[30] = { /* Extra bits for distance codes */
0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
12, 12, 13, 13};
/*
Huffman code decoding is performed using a multi-level table lookup.
The fastest way to decode is to simply build a lookup table whose
size is determined by the longest code. However, the time it takes
to build this table can also be a factor if the data being decoded
is not very long. The most common codes are necessarily the
shortest codes, so those codes dominate the decoding time, and hence
the speed. The idea is you can have a shorter table that decodes the
shorter, more probable codes, and then point to subsidiary tables for
the longer codes. The time it costs to decode the longer codes is
then traded against the time it takes to make longer tables.
This results of this trade are in the variables lbits and dbits
below. lbits is the number of bits the first level table for literal/
length codes can decode in one step, and dbits is the same thing for
the distance codes. Subsequent tables are also less than or equal to
those sizes. These values may be adjusted either when all of the
codes are shorter than that, in which case the longest code length in
bits is used, or when the shortest code is *longer* than the requested
table size, in which case the length of the shortest code in bits is
used.
There are two different values for the two tables, since they code a
different number of possibilities each. The literal/length table
codes 286 possible values, or in a flat code, a little over eight
bits. The distance table codes 30 possible values, or a little less
than five bits, flat. The optimum values for speed end up being
about one bit more than those, so lbits is 8+1 and dbits is 5+1.
The optimum values may differ though from machine to machine, and
possibly even between compilers. Your mileage may vary.
*/
/* If BMAX needs to be larger than 16, then h and x[] should be uLong. */
#define BMAX 15 /* maximum bit length of any code */
local int huft_build(b, n, s, d, e, t, m, hp, hn, v)
uIntf *b; /* code lengths in bits (all assumed <= BMAX) */
uInt n; /* number of codes (assumed <= 288) */
uInt s; /* number of simple-valued codes (0..s-1) */
const uIntf *d; /* list of base values for non-simple codes */
const uIntf *e; /* list of extra bits for non-simple codes */
inflate_huft * FAR *t; /* result: starting table */
uIntf *m; /* maximum lookup bits, returns actual */
inflate_huft *hp; /* space for trees */
uInt *hn; /* hufts used in space */
uIntf *v; /* working area: values in order of bit length */
/* Given a list of code lengths and a maximum table size, make a set of
tables to decode that set of codes. Return Z_OK on success, Z_BUF_ERROR
if the given code set is incomplete (the tables are still built in this
case), Z_DATA_ERROR if the input is invalid (an over-subscribed set of
lengths), or Z_MEM_ERROR if not enough memory. */
{
uInt a; /* counter for codes of length k */
uInt c[BMAX+1]; /* bit length count table */
uInt f; /* i repeats in table every f entries */
int g; /* maximum code length */
int h; /* table level */
register uInt i; /* counter, current code */
register uInt j; /* counter */
register int k; /* number of bits in current code */
int l; /* bits per table (returned in m) */
uInt mask; /* (1 << w) - 1, to avoid cc -O bug on HP */
register uIntf *p; /* pointer into c[], b[], or v[] */
inflate_huft *q; /* points to current table */
struct inflate_huft_s r; /* table entry for structure assignment */
inflate_huft *u[BMAX]; /* table stack */
register int w; /* bits before this table == (l * h) */
uInt x[BMAX+1]; /* bit offsets, then code stack */
uIntf *xp; /* pointer into x */
int y; /* number of dummy codes added */
uInt z; /* number of entries in current table */
/* Generate counts for each bit length */
p = c;
#define C0 *p++ = 0;
#define C2 C0 C0 C0 C0
#define C4 C2 C2 C2 C2
C4 /* clear c[]--assume BMAX+1 is 16 */
p = b; i = n;
do {
c[*p++]++; /* assume all entries <= BMAX */
} while (--i);
if (c[0] == n) /* null input--all zero length codes */
{
*t = (inflate_huft *)Z_NULL;
*m = 0;
return Z_OK;
}
/* Find minimum and maximum length, bound *m by those */
l = *m;
for (j = 1; j <= BMAX; j++)
if (c[j])
break;
k = j; /* minimum code length */
if ((uInt)l < j)
l = j;
for (i = BMAX; i; i--)
if (c[i])
break;
g = i; /* maximum code length */
if ((uInt)l > i)
l = i;
*m = l;
/* Adjust last length count to fill out codes, if needed */
for (y = 1 << j; j < i; j++, y <<= 1)
if ((y -= c[j]) < 0)
return Z_DATA_ERROR;
if ((y -= c[i]) < 0)
return Z_DATA_ERROR;
c[i] += y;
/* Generate starting offsets into the value table for each length */
x[1] = j = 0;
p = c + 1; xp = x + 2;
while (--i) { /* note that i == g from above */
*xp++ = (j += *p++);
}
/* Make a table of values in order of bit lengths */
p = b; i = 0;
do {
if ((j = *p++) != 0)
v[x[j]++] = i;
} while (++i < n);
n = x[g]; /* set n to length of v */
/* Generate the Huffman codes and for each, make the table entries */
x[0] = i = 0; /* first Huffman code is zero */
p = v; /* grab values in bit order */
h = -1; /* no tables yet--level -1 */
w = -l; /* bits decoded == (l * h) */
u[0] = (inflate_huft *)Z_NULL; /* just to keep compilers happy */
q = (inflate_huft *)Z_NULL; /* ditto */
z = 0; /* ditto */
/* go through the bit lengths (k already is bits in shortest code) */
for (; k <= g; k++)
{
a = c[k];
while (a--)
{
/* here i is the Huffman code of length k bits for value *p */
/* make tables up to required level */
while (k > w + l)
{
h++;
w += l; /* previous table always l bits */
/* compute minimum size table less than or equal to l bits */
z = g - w;
z = z > (uInt)l ? l : z; /* table size upper limit */
if ((f = 1 << (j = k - w)) > a + 1) /* try a k-w bit table */
{ /* too few codes for k-w bit table */
f -= a + 1; /* deduct codes from patterns left */
xp = c + k;
if (j < z)
while (++j < z) /* try smaller tables up to z bits */
{
if ((f <<= 1) <= *++xp)
break; /* enough codes to use up j bits */
f -= *xp; /* else deduct codes from patterns */
}
}
z = 1 << j; /* table entries for j-bit table */
/* allocate new table */
if (*hn + z > MANY) /* (note: doesn't matter for fixed) */
return Z_MEM_ERROR; /* not enough memory */
u[h] = q = hp + *hn;
*hn += z;
/* connect to last table, if there is one */
if (h)
{
x[h] = i; /* save pattern for backing up */
r.bits = (Byte)l; /* bits to dump before this table */
r.exop = (Byte)j; /* bits in this table */
j = i >> (w - l);
r.base = (uInt)(q - u[h-1] - j); /* offset to this table */
u[h-1][j] = r; /* connect to last table */
}
else
*t = q; /* first table is returned result */
}
/* set up table entry in r */
r.bits = (Byte)(k - w);
if (p >= v + n)
r.exop = 128 + 64; /* out of values--invalid code */
else if (*p < s)
{
r.exop = (Byte)(*p < 256 ? 0 : 32 + 64); /* 256 is end-of-block */
r.base = *p++; /* simple code is just the value */
}
else
{
r.exop = (Byte)(e[*p - s] + 16 + 64);/* non-simple--look up in lists */
r.base = d[*p++ - s];
}
/* fill code-like entries with r */
f = 1 << (k - w);
for (j = i >> w; j < z; j += f)
q[j] = r;
/* backwards increment the k-bit code i */
for (j = 1 << (k - 1); i & j; j >>= 1)
i ^= j;
i ^= j;
/* backup over finished tables */
mask = (1 << w) - 1; /* needed on HP, cc -O bug */
while ((i & mask) != x[h])
{
h--; /* don't need to update q */
w -= l;
mask = (1 << w) - 1;
}
}
}
/* Return Z_BUF_ERROR if we were given an incomplete table */
return y != 0 && g != 1 ? Z_BUF_ERROR : Z_OK;
}
int inflate_trees_bits(c, bb, tb, hp, z)
uIntf *c; /* 19 code lengths */
uIntf *bb; /* bits tree desired/actual depth */
inflate_huft * FAR *tb; /* bits tree result */
inflate_huft *hp; /* space for trees */
z_streamp z; /* for messages */
{
int r;
uInt hn = 0; /* hufts used in space */
uIntf *v; /* work area for huft_build */
if ((v = (uIntf*)ZALLOC(z, 19, sizeof(uInt))) == Z_NULL)
return Z_MEM_ERROR;
r = huft_build(c, 19, 19, (uIntf*)Z_NULL, (uIntf*)Z_NULL,
tb, bb, hp, &hn, v);
if (r == Z_DATA_ERROR)
z->msg = (char*)"oversubscribed dynamic bit lengths tree";
else if (r == Z_BUF_ERROR || *bb == 0)
{
z->msg = (char*)"incomplete dynamic bit lengths tree";
r = Z_DATA_ERROR;
}
ZFREE(z, v);
return r;
}
int inflate_trees_dynamic(nl, nd, c, bl, bd, tl, td, hp, z)
uInt nl; /* number of literal/length codes */
uInt nd; /* number of distance codes */
uIntf *c; /* that many (total) code lengths */
uIntf *bl; /* literal desired/actual bit depth */
uIntf *bd; /* distance desired/actual bit depth */
inflate_huft * FAR *tl; /* literal/length tree result */
inflate_huft * FAR *td; /* distance tree result */
inflate_huft *hp; /* space for trees */
z_streamp z; /* for messages */
{
int r;
uInt hn = 0; /* hufts used in space */
uIntf *v; /* work area for huft_build */
/* allocate work area */
if ((v = (uIntf*)ZALLOC(z, 288, sizeof(uInt))) == Z_NULL)
return Z_MEM_ERROR;
/* build literal/length tree */
r = huft_build(c, nl, 257, cplens, cplext, tl, bl, hp, &hn, v);
if (r != Z_OK || *bl == 0)
{
if (r == Z_DATA_ERROR)
z->msg = (char*)"oversubscribed literal/length tree";
else if (r != Z_MEM_ERROR)
{
z->msg = (char*)"incomplete literal/length tree";
r = Z_DATA_ERROR;
}
ZFREE(z, v);
return r;
}
/* build distance tree */
r = huft_build(c + nl, nd, 0, cpdist, cpdext, td, bd, hp, &hn, v);
if (r != Z_OK || (*bd == 0 && nl > 257))
{
if (r == Z_DATA_ERROR)
z->msg = (char*)"oversubscribed distance tree";
else if (r == Z_BUF_ERROR) {
#ifdef PKZIP_BUG_WORKAROUND
r = Z_OK;
}
#else
z->msg = (char*)"incomplete distance tree";
r = Z_DATA_ERROR;
}
else if (r != Z_MEM_ERROR)
{
z->msg = (char*)"empty distance tree with lengths";
r = Z_DATA_ERROR;
}
ZFREE(z, v);
return r;
#endif
}
/* done */
ZFREE(z, v);
return Z_OK;
}
/* build fixed tables only once--keep them here */
#ifdef BUILDFIXED
local int fixed_built = 0;
#define FIXEDH 544 /* number of hufts used by fixed tables */
local inflate_huft fixed_mem[FIXEDH];
local uInt fixed_bl;
local uInt fixed_bd;
local inflate_huft *fixed_tl;
local inflate_huft *fixed_td;
#else
#include "inffixed.h"
#endif
int inflate_trees_fixed(bl, bd, tl, td, z)
uIntf *bl; /* literal desired/actual bit depth */
uIntf *bd; /* distance desired/actual bit depth */
inflate_huft * FAR *tl; /* literal/length tree result */
inflate_huft * FAR *td; /* distance tree result */
z_streamp z; /* for memory allocation */
{
#ifdef BUILDFIXED
/* build fixed tables if not already */
if (!fixed_built)
{
int k; /* temporary variable */
uInt f = 0; /* number of hufts used in fixed_mem */
uIntf *c; /* length list for huft_build */
uIntf *v; /* work area for huft_build */
/* allocate memory */
if ((c = (uIntf*)ZALLOC(z, 288, sizeof(uInt))) == Z_NULL)
return Z_MEM_ERROR;
if ((v = (uIntf*)ZALLOC(z, 288, sizeof(uInt))) == Z_NULL)
{
ZFREE(z, c);
return Z_MEM_ERROR;
}
/* literal table */
for (k = 0; k < 144; k++)
c[k] = 8;
for (; k < 256; k++)
c[k] = 9;
for (; k < 280; k++)
c[k] = 7;
for (; k < 288; k++)
c[k] = 8;
fixed_bl = 9;
huft_build(c, 288, 257, cplens, cplext, &fixed_tl, &fixed_bl,
fixed_mem, &f, v);
/* distance table */
for (k = 0; k < 30; k++)
c[k] = 5;
fixed_bd = 5;
huft_build(c, 30, 0, cpdist, cpdext, &fixed_td, &fixed_bd,
fixed_mem, &f, v);
/* done */
ZFREE(z, v);
ZFREE(z, c);
fixed_built = 1;
}
#endif
*bl = fixed_bl;
*bd = fixed_bd;
*tl = fixed_tl;
*td = fixed_td;
return Z_OK;
}

View File

@@ -0,0 +1,58 @@
/* inftrees.h -- header to use inftrees.c
* Copyright (C) 1995-1998 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
/* WARNING: this file should *not* be used by applications. It is
part of the implementation of the compression library and is
subject to change. Applications should only use zlib.h.
*/
/* Huffman code lookup table entry--this entry is four bytes for machines
that have 16-bit pointers (e.g. PC's in the small or medium model). */
typedef struct inflate_huft_s FAR inflate_huft;
struct inflate_huft_s {
union {
struct {
Byte Exop; /* number of extra bits or operation */
Byte Bits; /* number of bits in this code or subcode */
} what;
uInt pad; /* pad structure to a power of 2 (4 bytes for */
} word; /* 16-bit, 8 bytes for 32-bit int's) */
uInt base; /* literal, length base, distance base,
or table offset */
};
/* Maximum size of dynamic tree. The maximum found in a long but non-
exhaustive search was 1004 huft structures (850 for length/literals
and 154 for distances, the latter actually the result of an
exhaustive search). The actual maximum is not known, but the
value below is more than safe. */
#define MANY 1440
extern int inflate_trees_bits OF((
uIntf *, /* 19 code lengths */
uIntf *, /* bits tree desired/actual depth */
inflate_huft * FAR *, /* bits tree result */
inflate_huft *, /* space for trees */
z_streamp)); /* for messages */
extern int inflate_trees_dynamic OF((
uInt, /* number of literal/length codes */
uInt, /* number of distance codes */
uIntf *, /* that many (total) code lengths */
uIntf *, /* literal desired/actual bit depth */
uIntf *, /* distance desired/actual bit depth */
inflate_huft * FAR *, /* literal/length tree result */
inflate_huft * FAR *, /* distance tree result */
inflate_huft *, /* space for trees */
z_streamp)); /* for messages */
extern int inflate_trees_fixed OF((
uIntf *, /* literal desired/actual bit depth */
uIntf *, /* distance desired/actual bit depth */
inflate_huft * FAR *, /* literal/length tree result */
inflate_huft * FAR *, /* distance tree result */
z_streamp)); /* for memory allocation */

View File

@@ -0,0 +1,87 @@
/* inflate_util.c -- data and routines common to blocks and codes
* Copyright (C) 1995-1998 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "zutil.h"
#include "infblock.h"
#include "inftrees.h"
#include "infcodes.h"
#include "infutil.h"
struct inflate_codes_state {int dummy;}; /* for buggy compilers */
/* And'ing with mask[n] masks the lower n bits */
uInt inflate_mask[17] = {
0x0000,
0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff,
0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff
};
/* copy as much as possible from the sliding window to the output area */
int inflate_flush(s, z, r)
inflate_blocks_statef *s;
z_streamp z;
int r;
{
uInt n;
Bytef *p;
Bytef *q;
/* local copies of source and destination pointers */
p = z->next_out;
q = s->read;
/* compute number of bytes to copy as far as end of window */
n = (uInt)((q <= s->write ? s->write : s->end) - q);
if (n > z->avail_out) n = z->avail_out;
if (n && r == Z_BUF_ERROR) r = Z_OK;
/* update counters */
z->avail_out -= n;
z->total_out += n;
/* update check information */
if (s->checkfn != Z_NULL)
z->adler = s->check = (*s->checkfn)(s->check, q, n);
/* copy as far as end of window */
zmemcpy(p, q, n);
p += n;
q += n;
/* see if more to copy at beginning of window */
if (q == s->end)
{
/* wrap pointers */
q = s->window;
if (s->write == s->end)
s->write = s->window;
/* compute bytes to copy */
n = (uInt)(s->write - q);
if (n > z->avail_out) n = z->avail_out;
if (n && r == Z_BUF_ERROR) r = Z_OK;
/* update counters */
z->avail_out -= n;
z->total_out += n;
/* update check information */
if (s->checkfn != Z_NULL)
z->adler = s->check = (*s->checkfn)(s->check, q, n);
/* copy */
zmemcpy(p, q, n);
p += n;
q += n;
}
/* update pointers */
z->next_out = p;
s->read = q;
/* done */
return r;
}

View File

@@ -0,0 +1,98 @@
/* infutil.h -- types and macros common to blocks and codes
* Copyright (C) 1995-1998 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
/* WARNING: this file should *not* be used by applications. It is
part of the implementation of the compression library and is
subject to change. Applications should only use zlib.h.
*/
#ifndef _INFUTIL_H
#define _INFUTIL_H
typedef enum {
TYPE, /* get type bits (3, including end bit) */
LENS, /* get lengths for stored */
STORED, /* processing stored block */
TABLE, /* get table lengths */
BTREE, /* get bit lengths tree for a dynamic block */
DTREE, /* get length, distance trees for a dynamic block */
CODES, /* processing fixed or dynamic block */
DRY, /* output remaining window bytes */
DONE, /* finished last block, done */
BAD} /* got a data error--stuck here */
inflate_block_mode;
/* inflate blocks semi-private state */
struct inflate_blocks_state {
/* mode */
inflate_block_mode mode; /* current inflate_block mode */
/* mode dependent information */
union {
uInt left; /* if STORED, bytes left to copy */
struct {
uInt table; /* table lengths (14 bits) */
uInt index; /* index into blens (or border) */
uIntf *blens; /* bit lengths of codes */
uInt bb; /* bit length tree depth */
inflate_huft *tb; /* bit length decoding tree */
} trees; /* if DTREE, decoding info for trees */
struct {
inflate_codes_statef
*codes;
} decode; /* if CODES, current state */
} sub; /* submode */
uInt last; /* true if this block is the last block */
/* mode independent information */
uInt bitk; /* bits in bit buffer */
uLong bitb; /* bit buffer */
inflate_huft *hufts; /* single malloc for tree space */
Bytef *window; /* sliding window */
Bytef *end; /* one byte after sliding window */
Bytef *read; /* window read pointer */
Bytef *write; /* window write pointer */
check_func checkfn; /* check function */
uLong check; /* check on output */
};
/* defines for inflate input/output */
/* update pointers and return */
#define UPDBITS {s->bitb=b;s->bitk=k;}
#define UPDIN {z->avail_in=n;z->total_in+=p-z->next_in;z->next_in=p;}
#define UPDOUT {s->write=q;}
#define UPDATE {UPDBITS UPDIN UPDOUT}
#define LEAVE {UPDATE return inflate_flush(s,z,r);}
/* get bytes and bits */
#define LOADIN {p=z->next_in;n=z->avail_in;b=s->bitb;k=s->bitk;}
#define NEEDBYTE {if(n)r=Z_OK;else LEAVE}
#define NEXTBYTE (n--,*p++)
#define NEEDBITS(j) {while(k<(j)){NEEDBYTE;b|=((uLong)NEXTBYTE)<<k;k+=8;}}
#define DUMPBITS(j) {b>>=(j);k-=(j);}
/* output bytes */
#define WAVAIL (uInt)(q<s->read?s->read-q-1:s->end-q)
#define LOADOUT {q=s->write;m=(uInt)WAVAIL;}
#define WRAP {if(q==s->end&&s->read!=s->window){q=s->window;m=(uInt)WAVAIL;}}
#define FLUSH {UPDOUT r=inflate_flush(s,z,r); LOADOUT}
#define NEEDOUT {if(m==0){WRAP if(m==0){FLUSH WRAP if(m==0) LEAVE}}r=Z_OK;}
#define OUTBYTE(a) {*q++=(Byte)(a);m--;}
/* load local pointers */
#define LOAD {LOADIN LOADOUT}
/* masks for lower bits (size given to avoid silly warnings with Visual C++) */
extern uInt inflate_mask[17];
/* copy as much as possible from the sliding window to the output area */
extern int inflate_flush OF((
inflate_blocks_statef *,
z_streamp ,
int));
struct internal_state {int dummy;}; /* for buggy compilers */
#endif

View File

@@ -0,0 +1,445 @@
/**************************************************************************
* *
* Module : intersection.c *
* *
* Function: computing the intersection of regular sections *
* *
* ONLY for INTERNAL USE *
* ========================================= *
* *
* (l1:u1:s1) is section 1 *
* (l2:u2:s2) is section 2 *
* *
* (l3:u3:s3) will be the intersection of section 1 and section 2 *
* *
* s_s_intersect (l1, u1, l2, u2, *l3, *u3) *
* *
* - special case : s1 = s2 = 1, note that s3 = 1 *
* *
* r_s_intersect (l1, u1, s1, l2, u2, *l3, *u3, *s3) *
* *
* - special case : s2 = 1 (makes computation easier) *
* *
* r_r_intersect (l1, u1, s1, l2, u2, s3, *l3, *u3, *s3) *
* *
* f : (s_lb:...:s_str) -> (t_lb:...:t_str) *
* *
* (l1:u1:s1) is input, (l2:u2:s2) is f((l1:u1:s1)) *
* *
* map_section (s_lb,s_str,t_lb,t_str, l1, u1, s1, *l2, *u2, *s2) *
* *
* *
**************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <fstream>
using namespace std;
extern ofstream prot;
/*********************************************************
* *
* functions for minimum and maximum *
* *
*********************************************************/
static int my_min (long val1, long val2)
{ if (val1 < val2)
return (val1);
else
return (val2);
} /* my_min */
static int my_max (long val1, long val2)
{
if (val1 > val2)
return (val1);
else
return (val2);
} /* my_max */
/*********************************************************
* *
* correct_upper_bound of low:high:step *
* *
* -> returns high' that high' is value of section *
* *
* 101:100:2 -> 100 *
* *
*********************************************************/
static int correct_upper_bound (long low, long high, long step)
{
#ifdef P_DEBUG
prot <<"correct upper bound, low = " << low << " high = " << high << ", step = " << step
<<" is " << (low + ( (high - low) / step) * step) << endl;
#endif
if (low > high)
return (high);
else
return (low + ( (high - low) / step) * step);
}
/*********************************************************
* *
* raise_lower_bound (low1, low2, step) *
* *
* low1 <= low2 <= low1+k*step (k !minimal) *
* *
*********************************************************/
static int raise_lower_bound (long low1, long low2, long step)
{
if (low1 < low2)
return (low1 + ((low2 - low1 - 1) / step + 1) * step);
else
return (low1);
}
/*********************************************************
* *
* Intersection of (l1 : u1) and (l2 : u2) *
* *
*********************************************************/
void s_s_intersect (long l1, long u1, long l2, long u2, long * l3, long *u3)
{
*l3 = my_max (l1, l2);
*u3 = my_min (u1, u2);
# ifdef P_DEBUG
prot << "s_s_intersect: (" << l1 << ':' << u1 << ") * (" << l2 << ':' << u2
<< ") = (" << *l3 << ':' << *u3 << ')' << endl;
# endif
} /* s_s_intersect */
/*********************************************************
* *
* Intersection of (l1 : u1 : s1) and (l2 : u2) *
* *
*********************************************************/
void r_s_intersect (long l1, long u1, long s1, long l2, long u2, long * l3, long * u3, long * s3)
{
long high;
//====
if(s1==0) s1=1;
//=***
if (s1 == 1)
{ s_s_intersect (l1, u1, l2, u2, l3, u3);
*s3 = s1;
}
else if (s1 < 0)
{ r_s_intersect (correct_upper_bound (l1, u1, s1), l1, -s1,
l2, u2, l3, u3, s3);
*s3 = - *s3;
high = *u3;
*u3 = *l3;
*l3 = high;
}
else
{ high = my_min (correct_upper_bound (l1, u1, s1), u2);
*l3 = raise_lower_bound (l1, l2, s1);
*u3 = correct_upper_bound (*l3, high, s1);
*s3 = s1;
}
# ifdef P_DEBUG
prot << "r_s_intersect: (" << l1 << ':' << u1 << ':' << s1 << ") * ("
<< l2 << ':' << u2 << ") = (" << *l3 << ':' << *u3 << ':' << *s3
<< ')' << endl;
# endif
} /* r_s_intersect */
/**********************************************************
* *
* gcd (a, b, x, y, g) *
* *
* solves equation: g = x * a - y * b *
* *
* with x > 0, and y > 0 *
* *
* g is greatest common divisor of a and b *
* *
**********************************************************/
static void gcd (long a, long b, long * x, long * y, long * g)
{
long d, r;
if (b == 0)
{ *x = 1; *y = 0; *g = a; }
else
{ /* we can divide a by b */
d = a / b;
r = a % b; /* a = d * b + r */
gcd (b, r, y, x, g);
/* note : g = y * b - x * r
= y * b - x * a + x * d * b
= - x * a - (- x * d - y) * b
= (b - x) * a - (a - x * d - y ) * b
*/
*y = a - *y - *x * d;
*x = b - *x;
}
}
/**********************************************************
* *
* input : diff , diff >= 0 *
* s1 > 0, s2 > 0 *
* *
* find k1 >= 0, k2 >= 0 with *
* *
* k1 * s1 = diff + k2 * s2 = add *
* *
**********************************************************/
static void find_lower_bound (long diff, long s1, long s2, long * found, long * add, long * s3)
{
long x, y, g;
long kgV;
long k1;
gcd (s1, s2, &x, &y, &g);
/* g = x * s1 - y * s2 */
kgV = s1 * s2 / g;
*s3 = kgV;
/* Idea : find k1 with k1 * s1 = diff (mod s2)
hd = diff / g
diff = hd * g = hd * x * s1 - hd * y * s2
so we know that (hd * x) = diff (mod s2)
*/
if (diff % g != 0)
{ *found = 0;
*add = 0;
}
else
{ *found = 1;
/* solution 1 :
k1 = (diff * x / g) % s2;
causes serious error as diff * x can become out of range
see example prime.hpf with N = 10.000.000 and P = 2 */
k1 = ( (diff / g % s2) * (x % s2) ) % s2;
*add = k1 * s1;
/* now make sure that *add + x * kgV is >= diff */
*add = raise_lower_bound (*add, diff, kgV);
}
} /* find_lower_bound */
/**********************************************************
* *
* intersection (l1:u1:s1, l2:u2:s2) *
* *
**********************************************************/
void r_r_intersect (long l1, long u1, long s1, long l2, long u2, long s2,
long * l3, long * u3, long * s3)
{
long high;
long found, add;
//====
if(s1==0) s1=1;
if(s2==0) s2=1;
//=***
if (s2 == 1)
r_s_intersect (l1, u1, s1, l2, u2, l3, u3, s3);
else if (s1 == 1)
r_s_intersect (l2, u2, s2, l1, u1, l3, u3, s3);
else if (s1 < 0)
{ r_r_intersect (correct_upper_bound (l1, u1, s1), l1, -s1,
l2, u2, s2, l3, u3, s3);
/* inverse the result range */
*s3 = - *s3;
high = *u3;
*u3 = *l3;
*l3 = high;
}
else if (s2 < 0)
r_r_intersect (l1, u1, s1, correct_upper_bound (l2, u2, s2), l2, -s2, l3, u3, s3);
else
{
high = my_min (correct_upper_bound (l1, u1, s1),
correct_upper_bound (l2, u2, s2));
/* find l3 with l3 = l1 + k1 * s1, l3 = l2 + k2 * s2
or k1 * s1 = l2 - l1 + k2 * s2 */
if (l1 <= l2)
{ find_lower_bound (l2 - l1, s1, s2, &found, &add, s3);
*l3 = l1 + add;
}
else
{ find_lower_bound (l1 - l2, s2, s1, &found, &add, s3);
*l3 = l2 + add;
}
if (found == 0)
{ *l3 = l1;
*u3 = l1 - *s3;
}
else
*u3 = correct_upper_bound (*l3, high, *s3);
}
} /* r_r_intersect */
/**********************************************************
* *
* mapping is defined by (s_lb:s_ub:s_str) *
* to (t_lb:t_ub:t_str) *
* *
* map : s_lb -> t_lb *
* s_lb + s_str -> t_lb + t_str *
* s_lb + 2*s_str -> t_lb + 2*t_str *
* ..... *
* *
* find map of (l1:u1:s1), is (l2:u2:s2) *
* *
**********************************************************/
static void map_normal (long x, long y, long base, long str, long * x1, long * y1)
{ /* x = base + x1 * str , y = base + x2 * str */
/* attention: for empty sections y (str > 0) or x (str < 0)
might be not well defined */
if (str > 0)
{ if (x > y) y = x - str; }
if (str < 0)
{ if (x < y) x = y + str; }
*x1 = (x - base) / str;
*y1 = (y - base) / str;
if (base + *x1 * str != x)
{ prot << "map normal has serious problems" << endl;
prot << "x(=" << x << ") != base(=" << base << ") + x1(=" << *x1
<< ") * str(=" << str << ')' << endl;
exit (0);
}
if (base + *y1 * str != y)
{ prot << "map normal has serious problems" << endl;
prot << "y(=" << y << ") != base(=" << base << ") + y1(=" << *y1
<< ") * str(=" << str << ')' << endl;
exit (0);
}
#ifdef P_DEBUG
prot << "map normal : " << x << " = " << base << " + (x1=" << *x1 << ") * " << str
<< ", " << y << " = " << base << " + (y1=" << *y1 << ") * " << str << endl;
#endif
} /* map_normal */
// s_lb, s_str, t_lb, t_str; - definition of mapping
// l1, u1, s1, *l2, *u2, *s2; - source and target sections
void map_section (long s_lb, long s_str, long t_lb, long t_str, long l1, long u1, long s1,
long * l2, long * u2, long * s2)
{
// long hl1, hu1;
if (s_str == t_str)
{ /* e.g. [5::2] -> [10::2],
[21:41:10] becomes [26:46:10] */
*l2 = l1 + t_lb - s_lb;
*u2 = u1 + t_lb - s_lb;
*s2 = s1;
}
else if (s_str == 1)
{ /* e.g. [5:] -> [10::4]
[20:30:5] becomes [70:110:20] */
*l2 = t_lb + (l1 - s_lb) * t_str;
*u2 = t_lb + (u1 - s_lb) * t_str;
*s2 = s1 * t_str;
}
else if (t_str == 1)
{ /* e.g. [10:400:4] -> [5:]
[70:110:20] becomes [20:30:5] */
map_normal (l1, u1, s_lb, s_str, l2, u2);
*l2 += t_lb;
*u2 += t_lb;
*s2 = s1 / s_str;
/* Problem: [1::2] -> [1::1], map([1:0:2]) = [1:1:1]
so make sure that u1 = s_lb + k * str */
}
else
{ /* there are really different strides */
map_normal (l1, u1, s_lb, s_str, l2, u2);
*l2 *= t_str; *l2 += t_lb;
*u2 *= t_str; *u2 += t_lb;
*s2 = s1 / s_str * t_str;
}
# ifdef P_DEBUG
prot << "map : [" << s_lb << "::" << s_str << "] -> [" << t_lb << "::" << t_str << "], map(["
<< l1 << ':' << u1 << ':' << s1 << "]) = [" << *l2 << ':' << *u2 << ':' << *s2 << ']' << endl;
# endif
}

View File

@@ -0,0 +1,312 @@
#include <stdio.h>
#include <string.h>
#include <exception>
#include <fstream>
#include <iomanip>
#include <vector>
#include <math.h>
//====
#ifdef _MSC_VER
/*Windows*/
#include <io.h>
#else
/*Unix*/
#include <sys/types.h>
#include <dirent.h>
#endif
//=***
#include "Ver.h"
#include "Ps.h"
#include "TraceLine.h"
#include "FuncCall.h"
#include "Interval.h"
#include "Vm.h"
#include "ModelStructs.h"
using namespace std;
ofstream prot;
int search_opt_mode;
vector<long> SizeArray;
//====
extern long StrToLong(char* str, int base);
struct tr_line
{ int func_id;
double time;
char *mask;
long **extra_koef;
};
//=***
// External functions
extern void SaveHTMLInterval(ofstream hfile);
extern void TraceParsingCleanup();
extern FuncType GetFuncType(Event func_id);
ofstream hfile;
extern double grig_time_call;
extern long currentAM_ID;
//grig
typedef std::vector<long> long_vect;
typedef std::vector<long_vect> long2_vect;
long_vect MinSizesOfAM; // <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
extern bool FirstTrace;
extern _PSInfo *PSInfo ;
extern _AMInfo *AMInfo ;
extern _AMViewInfo *AMViews;
void resetInfos()
{
_PSInfo::count = 0;
_AMInfo::count = 0;
_AMViewInfo::count = 0;
if(PSInfo!=NULL) {free(PSInfo); PSInfo=NULL;}
if(AMInfo!=NULL) {free(AMInfo); AMInfo=NULL;}
if(AMViews!=NULL) {free(AMViews); AMViews=NULL;}
}
//\grig
/*
void ModelExec(VectorTraceLine * tl)
{
FuncCall * func_call;
while (!tl->end()) {
func_call = new FuncCall(tl);
FuncType func_type = GetFuncType(func_call->func_id);
#ifdef P_DEBUG
prot << " func_id = " << func_call->func_id
<< " time = " << procElapsedTime[0]
<< " file = " << func_call->source_file
<< " line = " << func_call->source_line
<< endl;
#endif
if (func_call->func_id == Event_dvm_exit)
return;
switch(func_type) {
case __IntervalFunc :
func_call->IntervalTime();
break;
case __IOFunc :
func_call->IOTime();
break;
case __MPS_AMFunc :
func_call->MPS_AMTime();
break;
case __DArrayFunc :
func_call->DArrayTime();
break;
case __ShadowFunc :
func_call->ShadowTime();
break;
case __ReductFunc :
func_call->ReductTime();
break;
case __ParLoopFunc :
func_call->ParLoopTime();
break;
case __RemAccessFunc :
func_call->RemAccessTime();
break;
case __RegularFunc :
func_call->RegularTime();
break;
case __UnknownFunc :
func_call->UnknownTime();
break;
}
delete func_call;
}
}
*/
void CreateHTMLfile()
{
//====// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
hfile << setiosflags(ios::fixed) << setprecision(3);
// Write intervals in output file
if (CurrInterval->count == 0) {
// there is no nested intervals
CurrInterval->SaveTree(hfile, 1, CurrInterval->ID, CurrInterval->ID);
} else {
CurrInterval->SaveTree(hfile, 1,
CurrInterval->nested_intervals[0]->ID,
CurrInterval->nested_intervals[CurrInterval->count - 1]->ID);
}
// close output file
hfile.close();
}
static void message()
{
std::cerr << "ERROR : missing required command line parameter." << endl;
std::cerr << "SYNTAX : predictor <param_file> <trc_file> <html_file> <processors>" << endl;
std::cerr << "where : <param_file> - parameter file name," << endl;
std::cerr << " : <trc_file> - trace file name" << endl;
std::cerr << " : <html_file> - resulting HTML file name," << endl;
std::cerr << " : <processors> - processors topology," << endl;
std::cerr << " i.e. extension on each dimension," << endl;
std::cerr << " separated by the space." << endl;
exit(EXIT_FAILURE);
}
void Getsimplefactors(std::vector<long> & result,int N) // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>!!!
{
int i;
int del1;
int tempN;
result.resize(0);
tempN=N;
i=1;
while(true)
{
if(i>sqrt(N)) break;
del1=tempN/i;
if(i*del1==tempN) //- <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
{
// printf("%d ",i);
tempN=del1;
result.push_back(i);
if(i==1)
i++;
}
else
i++;
}
if(tempN!=1)
{
result.push_back(tempN);
// printf("%d ",tempN);
}
// printf("\n");
}
void GetAllFactors(std::vector<long>& result,int N) // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
{
int i;
int i1;
result.resize(0);
for(i=1;i<=N;i++)
{
i1=N/i;
if(i1*i==N) result.push_back(i);
}
}
void getRNK(std::vector<long> res,int N,int K,std::vector<long_vect>& glob_res) // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> N <20><> K <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
{
if(K==1)
{
std::vector<long> temp1;
temp1=res;
temp1.push_back(N);
//res.push_back(N);
glob_res.push_back(temp1);
}
else
{
std::vector<long> temp_fact;
GetAllFactors(temp_fact,N);
for(int i=0;i<temp_fact.size();i++)
{
std::vector<long> temp_2;
temp_2=res;
temp_2.push_back(temp_fact[i]);
getRNK(temp_2,N/temp_fact[i],K-1,glob_res);
}
}
}
void getNK(std::vector<long_vect>& res,int N,int K) // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> N <20><> K <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
{
res.resize(0);
std::vector<long> temp;
temp.resize(0);
getRNK(temp,N,K,res);
}
bool MakeAllConfigurations(int proc_number,int rank,std::vector<long_vect>& result)
{
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> proc_number <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// <20><><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD> <20><> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
//==// <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>,
//==// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20><> <20><> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD>, <20> <20><><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20><> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
//==// <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> !
std::vector<long> factors;
Getsimplefactors(factors,proc_number);
//==// if(factors.size()<rank) return false;
getNK(result,proc_number,rank);
return true;
}
float GetEuristik(long j, long a_size /*, long p, long first_p, long last_p*/)
{
long q;
double ost;
q=(long)ceil(((float)a_size)/j);
ost=(double)((a_size%q)?(a_size%q):q);
if(q*(j-1)+ost>a_size)// <20><> <20><><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>,
ost=0; // <09><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
ost/=q;
return ost;
}
int CheckEuristik(std::vector<long> & who)
{ bool mode=0, flag=1;
int i;
double ost, min_ost=10001;
for(i=0;i<who.size();i++)
{
if(who[i]>MinSizesOfAM[i]) return 0; // always bad
}
for(i=0;i<who.size();i++)
{
ost=GetEuristik(who[i],MinSizesOfAM[i]);
if(ost<min_ost) min_ost=ost;
}
if(mode) for(int ii=0;ii<who.size();ii++) printf("%d ",who[ii]);
return (int)(min_ost*10000);
}
bool IsBestConfiguration(Interval* best,Interval * current)
{
// prot<<"curr="<<current->GetEffectiveParameter() <<"; best="<<best->GetEffectiveParameter()<<"\n";
if(best->GetEffectiveParameter() <= current->GetEffectiveParameter())
return false;
return true;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,128 @@
/* header created automatically with -DGEN_TREES_H */
local const ct_data static_ltree[L_CODES+2] = {
{{ 12},{ 8}}, {{140},{ 8}}, {{ 76},{ 8}}, {{204},{ 8}}, {{ 44},{ 8}},
{{172},{ 8}}, {{108},{ 8}}, {{236},{ 8}}, {{ 28},{ 8}}, {{156},{ 8}},
{{ 92},{ 8}}, {{220},{ 8}}, {{ 60},{ 8}}, {{188},{ 8}}, {{124},{ 8}},
{{252},{ 8}}, {{ 2},{ 8}}, {{130},{ 8}}, {{ 66},{ 8}}, {{194},{ 8}},
{{ 34},{ 8}}, {{162},{ 8}}, {{ 98},{ 8}}, {{226},{ 8}}, {{ 18},{ 8}},
{{146},{ 8}}, {{ 82},{ 8}}, {{210},{ 8}}, {{ 50},{ 8}}, {{178},{ 8}},
{{114},{ 8}}, {{242},{ 8}}, {{ 10},{ 8}}, {{138},{ 8}}, {{ 74},{ 8}},
{{202},{ 8}}, {{ 42},{ 8}}, {{170},{ 8}}, {{106},{ 8}}, {{234},{ 8}},
{{ 26},{ 8}}, {{154},{ 8}}, {{ 90},{ 8}}, {{218},{ 8}}, {{ 58},{ 8}},
{{186},{ 8}}, {{122},{ 8}}, {{250},{ 8}}, {{ 6},{ 8}}, {{134},{ 8}},
{{ 70},{ 8}}, {{198},{ 8}}, {{ 38},{ 8}}, {{166},{ 8}}, {{102},{ 8}},
{{230},{ 8}}, {{ 22},{ 8}}, {{150},{ 8}}, {{ 86},{ 8}}, {{214},{ 8}},
{{ 54},{ 8}}, {{182},{ 8}}, {{118},{ 8}}, {{246},{ 8}}, {{ 14},{ 8}},
{{142},{ 8}}, {{ 78},{ 8}}, {{206},{ 8}}, {{ 46},{ 8}}, {{174},{ 8}},
{{110},{ 8}}, {{238},{ 8}}, {{ 30},{ 8}}, {{158},{ 8}}, {{ 94},{ 8}},
{{222},{ 8}}, {{ 62},{ 8}}, {{190},{ 8}}, {{126},{ 8}}, {{254},{ 8}},
{{ 1},{ 8}}, {{129},{ 8}}, {{ 65},{ 8}}, {{193},{ 8}}, {{ 33},{ 8}},
{{161},{ 8}}, {{ 97},{ 8}}, {{225},{ 8}}, {{ 17},{ 8}}, {{145},{ 8}},
{{ 81},{ 8}}, {{209},{ 8}}, {{ 49},{ 8}}, {{177},{ 8}}, {{113},{ 8}},
{{241},{ 8}}, {{ 9},{ 8}}, {{137},{ 8}}, {{ 73},{ 8}}, {{201},{ 8}},
{{ 41},{ 8}}, {{169},{ 8}}, {{105},{ 8}}, {{233},{ 8}}, {{ 25},{ 8}},
{{153},{ 8}}, {{ 89},{ 8}}, {{217},{ 8}}, {{ 57},{ 8}}, {{185},{ 8}},
{{121},{ 8}}, {{249},{ 8}}, {{ 5},{ 8}}, {{133},{ 8}}, {{ 69},{ 8}},
{{197},{ 8}}, {{ 37},{ 8}}, {{165},{ 8}}, {{101},{ 8}}, {{229},{ 8}},
{{ 21},{ 8}}, {{149},{ 8}}, {{ 85},{ 8}}, {{213},{ 8}}, {{ 53},{ 8}},
{{181},{ 8}}, {{117},{ 8}}, {{245},{ 8}}, {{ 13},{ 8}}, {{141},{ 8}},
{{ 77},{ 8}}, {{205},{ 8}}, {{ 45},{ 8}}, {{173},{ 8}}, {{109},{ 8}},
{{237},{ 8}}, {{ 29},{ 8}}, {{157},{ 8}}, {{ 93},{ 8}}, {{221},{ 8}},
{{ 61},{ 8}}, {{189},{ 8}}, {{125},{ 8}}, {{253},{ 8}}, {{ 19},{ 9}},
{{275},{ 9}}, {{147},{ 9}}, {{403},{ 9}}, {{ 83},{ 9}}, {{339},{ 9}},
{{211},{ 9}}, {{467},{ 9}}, {{ 51},{ 9}}, {{307},{ 9}}, {{179},{ 9}},
{{435},{ 9}}, {{115},{ 9}}, {{371},{ 9}}, {{243},{ 9}}, {{499},{ 9}},
{{ 11},{ 9}}, {{267},{ 9}}, {{139},{ 9}}, {{395},{ 9}}, {{ 75},{ 9}},
{{331},{ 9}}, {{203},{ 9}}, {{459},{ 9}}, {{ 43},{ 9}}, {{299},{ 9}},
{{171},{ 9}}, {{427},{ 9}}, {{107},{ 9}}, {{363},{ 9}}, {{235},{ 9}},
{{491},{ 9}}, {{ 27},{ 9}}, {{283},{ 9}}, {{155},{ 9}}, {{411},{ 9}},
{{ 91},{ 9}}, {{347},{ 9}}, {{219},{ 9}}, {{475},{ 9}}, {{ 59},{ 9}},
{{315},{ 9}}, {{187},{ 9}}, {{443},{ 9}}, {{123},{ 9}}, {{379},{ 9}},
{{251},{ 9}}, {{507},{ 9}}, {{ 7},{ 9}}, {{263},{ 9}}, {{135},{ 9}},
{{391},{ 9}}, {{ 71},{ 9}}, {{327},{ 9}}, {{199},{ 9}}, {{455},{ 9}},
{{ 39},{ 9}}, {{295},{ 9}}, {{167},{ 9}}, {{423},{ 9}}, {{103},{ 9}},
{{359},{ 9}}, {{231},{ 9}}, {{487},{ 9}}, {{ 23},{ 9}}, {{279},{ 9}},
{{151},{ 9}}, {{407},{ 9}}, {{ 87},{ 9}}, {{343},{ 9}}, {{215},{ 9}},
{{471},{ 9}}, {{ 55},{ 9}}, {{311},{ 9}}, {{183},{ 9}}, {{439},{ 9}},
{{119},{ 9}}, {{375},{ 9}}, {{247},{ 9}}, {{503},{ 9}}, {{ 15},{ 9}},
{{271},{ 9}}, {{143},{ 9}}, {{399},{ 9}}, {{ 79},{ 9}}, {{335},{ 9}},
{{207},{ 9}}, {{463},{ 9}}, {{ 47},{ 9}}, {{303},{ 9}}, {{175},{ 9}},
{{431},{ 9}}, {{111},{ 9}}, {{367},{ 9}}, {{239},{ 9}}, {{495},{ 9}},
{{ 31},{ 9}}, {{287},{ 9}}, {{159},{ 9}}, {{415},{ 9}}, {{ 95},{ 9}},
{{351},{ 9}}, {{223},{ 9}}, {{479},{ 9}}, {{ 63},{ 9}}, {{319},{ 9}},
{{191},{ 9}}, {{447},{ 9}}, {{127},{ 9}}, {{383},{ 9}}, {{255},{ 9}},
{{511},{ 9}}, {{ 0},{ 7}}, {{ 64},{ 7}}, {{ 32},{ 7}}, {{ 96},{ 7}},
{{ 16},{ 7}}, {{ 80},{ 7}}, {{ 48},{ 7}}, {{112},{ 7}}, {{ 8},{ 7}},
{{ 72},{ 7}}, {{ 40},{ 7}}, {{104},{ 7}}, {{ 24},{ 7}}, {{ 88},{ 7}},
{{ 56},{ 7}}, {{120},{ 7}}, {{ 4},{ 7}}, {{ 68},{ 7}}, {{ 36},{ 7}},
{{100},{ 7}}, {{ 20},{ 7}}, {{ 84},{ 7}}, {{ 52},{ 7}}, {{116},{ 7}},
{{ 3},{ 8}}, {{131},{ 8}}, {{ 67},{ 8}}, {{195},{ 8}}, {{ 35},{ 8}},
{{163},{ 8}}, {{ 99},{ 8}}, {{227},{ 8}}
};
local const ct_data static_dtree[D_CODES] = {
{{ 0},{ 5}}, {{16},{ 5}}, {{ 8},{ 5}}, {{24},{ 5}}, {{ 4},{ 5}},
{{20},{ 5}}, {{12},{ 5}}, {{28},{ 5}}, {{ 2},{ 5}}, {{18},{ 5}},
{{10},{ 5}}, {{26},{ 5}}, {{ 6},{ 5}}, {{22},{ 5}}, {{14},{ 5}},
{{30},{ 5}}, {{ 1},{ 5}}, {{17},{ 5}}, {{ 9},{ 5}}, {{25},{ 5}},
{{ 5},{ 5}}, {{21},{ 5}}, {{13},{ 5}}, {{29},{ 5}}, {{ 3},{ 5}},
{{19},{ 5}}, {{11},{ 5}}, {{27},{ 5}}, {{ 7},{ 5}}, {{23},{ 5}}
};
const uch _dist_code[DIST_CODE_LEN] = {
0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8,
8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13,
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 0, 16, 17,
18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22,
23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29
};
const uch _length_code[MAX_MATCH-MIN_MATCH+1]= {
0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12, 12,
13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28
};
local const int base_length[LENGTH_CODES] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
64, 80, 96, 112, 128, 160, 192, 224, 0
};
local const int base_dist[D_CODES] = {
0, 1, 2, 3, 4, 6, 8, 12, 16, 24,
32, 48, 64, 96, 128, 192, 256, 384, 512, 768,
1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576
};

View File

@@ -0,0 +1,58 @@
/* uncompr.c -- decompress a memory buffer
* Copyright (C) 1995-1998 Jean-loup Gailly.
* For conditions of distribution and use, see copyright notice in zlib.h
*/
/* @(#) $Id$ */
#include "zlib.h"
/* ===========================================================================
Decompresses the source buffer into the destination buffer. sourceLen is
the byte length of the source buffer. Upon entry, destLen is the total
size of the destination buffer, which must be large enough to hold the
entire uncompressed data. (The size of the uncompressed data must have
been saved previously by the compressor and transmitted to the decompressor
by some mechanism outside the scope of this compression library.)
Upon exit, destLen is the actual size of the compressed buffer.
This function can be used to decompress a whole file at once if the
input file is mmap'ed.
uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
enough memory, Z_BUF_ERROR if there was not enough room in the output
buffer, or Z_DATA_ERROR if the input data was corrupted.
*/
int ZEXPORT uncompress (dest, destLen, source, sourceLen)
Bytef *dest;
uLongf *destLen;
const Bytef *source;
uLong sourceLen;
{
z_stream stream;
int err;
stream.next_in = (Bytef*)source;
stream.avail_in = (uInt)sourceLen;
/* Check for source > 64K on 16-bit machine: */
if ((uLong)stream.avail_in != sourceLen) return Z_BUF_ERROR;
stream.next_out = dest;
stream.avail_out = (uInt)*destLen;
if ((uLong)stream.avail_out != *destLen) return Z_BUF_ERROR;
stream.zalloc = (alloc_func)0;
stream.zfree = (free_func)0;
err = inflateInit(&stream);
if (err != Z_OK) return err;
err = inflate(&stream, Z_FINISH);
if (err != Z_STREAM_END) {
inflateEnd(&stream);
return err == Z_OK ? Z_BUF_ERROR : err;
}
*destLen = stream.total_out;
err = inflateEnd(&stream);
return err;
}

View File

@@ -0,0 +1,279 @@
/* zconf.h -- configuration of the zlib compression library
* Copyright (C) 1995-1998 Jean-loup Gailly.
* For conditions of distribution and use, see copyright notice in zlib.h
*/
/* @(#) $Id$ */
#ifndef _ZCONF_H
#define _ZCONF_H
/*
* If you *really* need a unique prefix for all types and library functions,
* compile with -DZ_PREFIX. The "standard" zlib should be compiled without it.
*/
#ifdef Z_PREFIX
# define deflateInit_ z_deflateInit_
# define deflate z_deflate
# define deflateEnd z_deflateEnd
# define inflateInit_ z_inflateInit_
# define inflate z_inflate
# define inflateEnd z_inflateEnd
# define deflateInit2_ z_deflateInit2_
# define deflateSetDictionary z_deflateSetDictionary
# define deflateCopy z_deflateCopy
# define deflateReset z_deflateReset
# define deflateParams z_deflateParams
# define inflateInit2_ z_inflateInit2_
# define inflateSetDictionary z_inflateSetDictionary
# define inflateSync z_inflateSync
# define inflateSyncPoint z_inflateSyncPoint
# define inflateReset z_inflateReset
# define compress z_compress
# define compress2 z_compress2
# define uncompress z_uncompress
# define adler32 z_adler32
# define crc32 z_crc32
# define get_crc_table z_get_crc_table
# define Byte z_Byte
# define uInt z_uInt
# define uLong z_uLong
# define Bytef z_Bytef
# define charf z_charf
# define intf z_intf
# define uIntf z_uIntf
# define uLongf z_uLongf
# define voidpf z_voidpf
# define voidp z_voidp
#endif
#if (defined(_WIN32) || defined(__WIN32__)) && !defined(WIN32)
# define WIN32
#endif
#if defined(__GNUC__) || defined(WIN32) || defined(__386__) || defined(i386)
# ifndef __32BIT__
# define __32BIT__
# endif
#endif
#if defined(__MSDOS__) && !defined(MSDOS)
# define MSDOS
#endif
/*
* Compile with -DMAXSEG_64K if the alloc function cannot allocate more
* than 64k bytes at a time (needed on systems with 16-bit int).
*/
#if defined(MSDOS) && !defined(__32BIT__)
# define MAXSEG_64K
#endif
#ifdef MSDOS
# define UNALIGNED_OK
#endif
#if (defined(MSDOS) || defined(_WINDOWS) || defined(WIN32)) && !defined(STDC)
# define STDC
#endif
#if defined(__STDC__) || defined(__cplusplus) || defined(__OS2__)
# ifndef STDC
# define STDC
# endif
#endif
#ifndef STDC
# ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */
# define const
# endif
#endif
/* Some Mac compilers merge all .h files incorrectly: */
#if defined(__MWERKS__) || defined(applec) ||defined(THINK_C) ||defined(__SC__)
# define NO_DUMMY_DECL
#endif
/* Old Borland C incorrectly complains about missing returns: */
#if defined(__BORLANDC__) && (__BORLANDC__ < 0x500)
# define NEED_DUMMY_RETURN
#endif
/* Maximum value for memLevel in deflateInit2 */
#ifndef MAX_MEM_LEVEL
# ifdef MAXSEG_64K
# define MAX_MEM_LEVEL 8
# else
# define MAX_MEM_LEVEL 9
# endif
#endif
/* Maximum value for windowBits in deflateInit2 and inflateInit2.
* WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files
* created by gzip. (Files created by minigzip can still be extracted by
* gzip.)
*/
#ifndef MAX_WBITS
# define MAX_WBITS 15 /* 32K LZ77 window */
#endif
/* The memory requirements for deflate are (in bytes):
(1 << (windowBits+2)) + (1 << (memLevel+9))
that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values)
plus a few kilobytes for small objects. For example, if you want to reduce
the default memory requirements from 256K to 128K, compile with
make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7"
Of course this will generally degrade compression (there's no free lunch).
The memory requirements for inflate are (in bytes) 1 << windowBits
that is, 32K for windowBits=15 (default value) plus a few kilobytes
for small objects.
*/
/* Type declarations */
#ifndef OF /* function prototypes */
# ifdef STDC
# define OF(args) args
# else
# define OF(args) ()
# endif
#endif
/* The following definitions for FAR are needed only for MSDOS mixed
* model programming (small or medium model with some far allocations).
* This was tested only with MSC; for other MSDOS compilers you may have
* to define NO_MEMCPY in zutil.h. If you don't need the mixed model,
* just define FAR to be empty.
*/
#if (defined(M_I86SM) || defined(M_I86MM)) && !defined(__32BIT__)
/* MSC small or medium model */
# define SMALL_MEDIUM
# ifdef _MSC_VER
# define FAR _far
# else
# define FAR far
# endif
#endif
#if defined(__BORLANDC__) && (defined(__SMALL__) || defined(__MEDIUM__))
# ifndef __32BIT__
# define SMALL_MEDIUM
# define FAR _far
# endif
#endif
/* Compile with -DZLIB_DLL for Windows DLL support */
#if defined(ZLIB_DLL)
# if defined(_WINDOWS) || defined(WINDOWS)
# ifdef FAR
# undef FAR
# endif
# include <windows.h>
# define ZEXPORT WINAPI
# ifdef WIN32
# define ZEXPORTVA WINAPIV
# else
# define ZEXPORTVA FAR _cdecl _export
# endif
# endif
# if defined (__BORLANDC__)
# if (__BORLANDC__ >= 0x0500) && defined (WIN32)
# include <windows.h>
# define ZEXPORT __declspec(dllexport) WINAPI
# define ZEXPORTRVA __declspec(dllexport) WINAPIV
# else
# if defined (_Windows) && defined (__DLL__)
# define ZEXPORT _export
# define ZEXPORTVA _export
# endif
# endif
# endif
#endif
#if defined (__BEOS__)
# if defined (ZLIB_DLL)
# define ZEXTERN extern __declspec(dllexport)
# else
# define ZEXTERN extern __declspec(dllimport)
# endif
#endif
#ifndef ZEXPORT
# define ZEXPORT
#endif
#ifndef ZEXPORTVA
# define ZEXPORTVA
#endif
#ifndef ZEXTERN
# define ZEXTERN extern
#endif
#ifndef FAR
# define FAR
#endif
#if !defined(MACOS) && !defined(TARGET_OS_MAC)
typedef unsigned char Byte; /* 8 bits */
#endif
typedef unsigned int uInt; /* 16 bits or more */
typedef unsigned long uLong; /* 32 bits or more */
#ifdef SMALL_MEDIUM
/* Borland C/C++ and some old MSC versions ignore FAR inside typedef */
# define Bytef Byte FAR
#else
typedef Byte FAR Bytef;
#endif
typedef char FAR charf;
typedef int FAR intf;
typedef uInt FAR uIntf;
typedef uLong FAR uLongf;
#ifdef STDC
typedef void FAR *voidpf;
typedef void *voidp;
#else
typedef Byte FAR *voidpf;
typedef Byte *voidp;
#endif
#ifdef HAVE_UNISTD_H
# include <sys/types.h> /* for off_t */
# include <unistd.h> /* for SEEK_* and off_t */
# define z_off_t off_t
#endif
#ifndef SEEK_SET
# define SEEK_SET 0 /* Seek from beginning of file. */
# define SEEK_CUR 1 /* Seek from current position. */
# define SEEK_END 2 /* Set file pointer to EOF plus "offset" */
#endif
#ifndef z_off_t
# define z_off_t long
#endif
/* MVS linker does not support external names larger than 8 bytes */
#if defined(__MVS__)
# pragma map(deflateInit_,"DEIN")
# pragma map(deflateInit2_,"DEIN2")
# pragma map(deflateEnd,"DEEND")
# pragma map(inflateInit_,"ININ")
# pragma map(inflateInit2_,"ININ2")
# pragma map(inflateEnd,"INEND")
# pragma map(inflateSync,"INSY")
# pragma map(inflateSetDictionary,"INSEDI")
# pragma map(inflate_blocks,"INBL")
# pragma map(inflate_blocks_new,"INBLNE")
# pragma map(inflate_blocks_free,"INBLFR")
# pragma map(inflate_blocks_reset,"INBLRE")
# pragma map(inflate_codes_free,"INCOFR")
# pragma map(inflate_codes,"INCO")
# pragma map(inflate_fast,"INFA")
# pragma map(inflate_flush,"INFLU")
# pragma map(inflate_mask,"INMA")
# pragma map(inflate_set_dictionary,"INSEDI2")
# pragma map(inflate_copyright,"INCOPY")
# pragma map(inflate_trees_bits,"INTRBI")
# pragma map(inflate_trees_dynamic,"INTRDY")
# pragma map(inflate_trees_fixed,"INTRFI")
# pragma map(inflate_trees_free,"INTRFR")
#endif
#endif /* _ZCONF_H */

View File

@@ -0,0 +1,893 @@
/* zlib.h -- interface of the 'zlib' general purpose compression library
version 1.1.3, July 9th, 1998
Copyright (C) 1995-1998 Jean-loup Gailly and Mark Adler
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
Jean-loup Gailly Mark Adler
jloup@gzip.org madler@alumni.caltech.edu
The data format used by the zlib library is described by RFCs (Request for
Comments) 1950 to 1952 in the files ftp://ds.internic.net/rfc/rfc1950.txt
(zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format).
*/
#ifndef _ZLIB_H
#define _ZLIB_H
#include "zconf.h"
#ifdef __cplusplus
extern "C" {
#endif
#define ZLIB_VERSION "1.1.3"
/*
The 'zlib' compression library provides in-memory compression and
decompression functions, including integrity checks of the uncompressed
data. This version of the library supports only one compression method
(deflation) but other algorithms will be added later and will have the same
stream interface.
Compression can be done in a single step if the buffers are large
enough (for example if an input file is mmap'ed), or can be done by
repeated calls of the compression function. In the latter case, the
application must provide more input and/or consume the output
(providing more output space) before each call.
The library also supports reading and writing files in gzip (.gz) format
with an interface similar to that of stdio.
The library does not install any signal handler. The decoder checks
the consistency of the compressed data, so the library should never
crash even in case of corrupted input.
*/
typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size));
typedef void (*free_func) OF((voidpf opaque, voidpf address));
struct internal_state;
typedef struct z_stream_s {
Bytef *next_in; /* next input byte */
uInt avail_in; /* number of bytes available at next_in */
uLong total_in; /* total nb of input bytes read so far */
Bytef *next_out; /* next output byte should be put there */
uInt avail_out; /* remaining free space at next_out */
uLong total_out; /* total nb of bytes output so far */
char *msg; /* last error message, NULL if no error */
struct internal_state FAR *state; /* not visible by applications */
alloc_func zalloc; /* used to allocate the internal state */
free_func zfree; /* used to free the internal state */
voidpf opaque; /* private data object passed to zalloc and zfree */
int data_type; /* best guess about the data type: ascii or binary */
uLong adler; /* adler32 value of the uncompressed data */
uLong reserved; /* reserved for future use */
} z_stream;
typedef z_stream FAR *z_streamp;
/*
The application must update next_in and avail_in when avail_in has
dropped to zero. It must update next_out and avail_out when avail_out
has dropped to zero. The application must initialize zalloc, zfree and
opaque before calling the init function. All other fields are set by the
compression library and must not be updated by the application.
The opaque value provided by the application will be passed as the first
parameter for calls of zalloc and zfree. This can be useful for custom
memory management. The compression library attaches no meaning to the
opaque value.
zalloc must return Z_NULL if there is not enough memory for the object.
If zlib is used in a multi-threaded application, zalloc and zfree must be
thread safe.
On 16-bit systems, the functions zalloc and zfree must be able to allocate
exactly 65536 bytes, but will not be required to allocate more than this
if the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS,
pointers returned by zalloc for objects of exactly 65536 bytes *must*
have their offset normalized to zero. The default allocation function
provided by this library ensures this (see zutil.c). To reduce memory
requirements and avoid any allocation of 64K objects, at the expense of
compression ratio, compile the library with -DMAX_WBITS=14 (see zconf.h).
The fields total_in and total_out can be used for statistics or
progress reports. After compression, total_in holds the total size of
the uncompressed data and may be saved for use in the decompressor
(particularly if the decompressor wants to decompress everything in
a single step).
*/
/* constants */
#define Z_NO_FLUSH 0
#define Z_PARTIAL_FLUSH 1 /* will be removed, use Z_SYNC_FLUSH instead */
#define Z_SYNC_FLUSH 2
#define Z_FULL_FLUSH 3
#define Z_FINISH 4
/* Allowed flush values; see deflate() below for details */
#define Z_OK 0
#define Z_STREAM_END 1
#define Z_NEED_DICT 2
#define Z_ERRNO (-1)
#define Z_STREAM_ERROR (-2)
#define Z_DATA_ERROR (-3)
#define Z_MEM_ERROR (-4)
#define Z_BUF_ERROR (-5)
#define Z_VERSION_ERROR (-6)
/* Return codes for the compression/decompression functions. Negative
* values are errors, positive values are used for special but normal events.
*/
#define Z_NO_COMPRESSION 0
#define Z_BEST_SPEED 1
#define Z_BEST_COMPRESSION 9
#define Z_DEFAULT_COMPRESSION (-1)
/* compression levels */
#define Z_FILTERED 1
#define Z_HUFFMAN_ONLY 2
#define Z_DEFAULT_STRATEGY 0
/* compression strategy; see deflateInit2() below for details */
#define Z_BINARY 0
#define Z_ASCII 1
#define Z_UNKNOWN 2
/* Possible values of the data_type field */
#define Z_DEFLATED 8
/* The deflate compression method (the only one supported in this version) */
#define Z_NULL 0 /* for initializing zalloc, zfree, opaque */
#define zlib_version zlibVersion()
/* for compatibility with versions < 1.0.2 */
/* basic functions */
ZEXTERN const char * ZEXPORT zlibVersion OF((void));
/* The application can compare zlibVersion and ZLIB_VERSION for consistency.
If the first character differs, the library code actually used is
not compatible with the zlib.h header file used by the application.
This check is automatically made by deflateInit and inflateInit.
*/
/*
ZEXTERN int ZEXPORT deflateInit OF((z_streamp strm, int level));
Initializes the internal stream state for compression. The fields
zalloc, zfree and opaque must be initialized before by the caller.
If zalloc and zfree are set to Z_NULL, deflateInit updates them to
use default allocation functions.
The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9:
1 gives best speed, 9 gives best compression, 0 gives no compression at
all (the input data is simply copied a block at a time).
Z_DEFAULT_COMPRESSION requests a default compromise between speed and
compression (currently equivalent to level 6).
deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not
enough memory, Z_STREAM_ERROR if level is not a valid compression level,
Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible
with the version assumed by the caller (ZLIB_VERSION).
msg is set to null if there is no error message. deflateInit does not
perform any compression: this will be done by deflate().
*/
ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush));
/*
deflate compresses as much data as possible, and stops when the input
buffer becomes empty or the output buffer becomes full. It may introduce some
output latency (reading input without producing any output) except when
forced to flush.
The detailed semantics are as follows. deflate performs one or both of the
following actions:
- Compress more input starting at next_in and update next_in and avail_in
accordingly. If not all input can be processed (because there is not
enough room in the output buffer), next_in and avail_in are updated and
processing will resume at this point for the next call of deflate().
- Provide more output starting at next_out and update next_out and avail_out
accordingly. This action is forced if the parameter flush is non zero.
Forcing flush frequently degrades the compression ratio, so this parameter
should be set only when necessary (in interactive applications).
Some output may be provided even if flush is not set.
Before the call of deflate(), the application should ensure that at least
one of the actions is possible, by providing more input and/or consuming
more output, and updating avail_in or avail_out accordingly; avail_out
should never be zero before the call. The application can consume the
compressed output when it wants, for example when the output buffer is full
(avail_out == 0), or after each call of deflate(). If deflate returns Z_OK
and with zero avail_out, it must be called again after making room in the
output buffer because there might be more output pending.
If the parameter flush is set to Z_SYNC_FLUSH, all pending output is
flushed to the output buffer and the output is aligned on a byte boundary, so
that the decompressor can get all input data available so far. (In particular
avail_in is zero after the call if enough output space has been provided
before the call.) Flushing may degrade compression for some compression
algorithms and so it should be used only when necessary.
If flush is set to Z_FULL_FLUSH, all output is flushed as with
Z_SYNC_FLUSH, and the compression state is reset so that decompression can
restart from this point if previous compressed data has been damaged or if
random access is desired. Using Z_FULL_FLUSH too often can seriously degrade
the compression.
If deflate returns with avail_out == 0, this function must be called again
with the same value of the flush parameter and more output space (updated
avail_out), until the flush is complete (deflate returns with non-zero
avail_out).
If the parameter flush is set to Z_FINISH, pending input is processed,
pending output is flushed and deflate returns with Z_STREAM_END if there
was enough output space; if deflate returns with Z_OK, this function must be
called again with Z_FINISH and more output space (updated avail_out) but no
more input data, until it returns with Z_STREAM_END or an error. After
deflate has returned Z_STREAM_END, the only possible operations on the
stream are deflateReset or deflateEnd.
Z_FINISH can be used immediately after deflateInit if all the compression
is to be done in a single step. In this case, avail_out must be at least
0.1% larger than avail_in plus 12 bytes. If deflate does not return
Z_STREAM_END, then it must be called again as described above.
deflate() sets strm->adler to the adler32 checksum of all input read
so far (that is, total_in bytes).
deflate() may update data_type if it can make a good guess about
the input data type (Z_ASCII or Z_BINARY). In doubt, the data is considered
binary. This field is only for information purposes and does not affect
the compression algorithm in any manner.
deflate() returns Z_OK if some progress has been made (more input
processed or more output produced), Z_STREAM_END if all input has been
consumed and all output has been produced (only when flush is set to
Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example
if next_in or next_out was NULL), Z_BUF_ERROR if no progress is possible
(for example avail_in or avail_out was zero).
*/
ZEXTERN int ZEXPORT deflateEnd OF((z_streamp strm));
/*
All dynamically allocated data structures for this stream are freed.
This function discards any unprocessed input and does not flush any
pending output.
deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the
stream state was inconsistent, Z_DATA_ERROR if the stream was freed
prematurely (some input or output was discarded). In the error case,
msg may be set but then points to a static string (which must not be
deallocated).
*/
/*
ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm));
Initializes the internal stream state for decompression. The fields
next_in, avail_in, zalloc, zfree and opaque must be initialized before by
the caller. If next_in is not Z_NULL and avail_in is large enough (the exact
value depends on the compression method), inflateInit determines the
compression method from the zlib header and allocates all data structures
accordingly; otherwise the allocation will be deferred to the first call of
inflate. If zalloc and zfree are set to Z_NULL, inflateInit updates them to
use default allocation functions.
inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough
memory, Z_VERSION_ERROR if the zlib library version is incompatible with the
version assumed by the caller. msg is set to null if there is no error
message. inflateInit does not perform any decompression apart from reading
the zlib header if present: this will be done by inflate(). (So next_in and
avail_in may be modified, but next_out and avail_out are unchanged.)
*/
ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush));
/*
inflate decompresses as much data as possible, and stops when the input
buffer becomes empty or the output buffer becomes full. It may some
introduce some output latency (reading input without producing any output)
except when forced to flush.
The detailed semantics are as follows. inflate performs one or both of the
following actions:
- Decompress more input starting at next_in and update next_in and avail_in
accordingly. If not all input can be processed (because there is not
enough room in the output buffer), next_in is updated and processing
will resume at this point for the next call of inflate().
- Provide more output starting at next_out and update next_out and avail_out
accordingly. inflate() provides as much output as possible, until there
is no more input data or no more space in the output buffer (see below
about the flush parameter).
Before the call of inflate(), the application should ensure that at least
one of the actions is possible, by providing more input and/or consuming
more output, and updating the next_* and avail_* values accordingly.
The application can consume the uncompressed output when it wants, for
example when the output buffer is full (avail_out == 0), or after each
call of inflate(). If inflate returns Z_OK and with zero avail_out, it
must be called again after making room in the output buffer because there
might be more output pending.
If the parameter flush is set to Z_SYNC_FLUSH, inflate flushes as much
output as possible to the output buffer. The flushing behavior of inflate is
not specified for values of the flush parameter other than Z_SYNC_FLUSH
and Z_FINISH, but the current implementation actually flushes as much output
as possible anyway.
inflate() should normally be called until it returns Z_STREAM_END or an
error. However if all decompression is to be performed in a single step
(a single call of inflate), the parameter flush should be set to
Z_FINISH. In this case all pending input is processed and all pending
output is flushed; avail_out must be large enough to hold all the
uncompressed data. (The size of the uncompressed data may have been saved
by the compressor for this purpose.) The next operation on this stream must
be inflateEnd to deallocate the decompression state. The use of Z_FINISH
is never required, but can be used to inform inflate that a faster routine
may be used for the single inflate() call.
If a preset dictionary is needed at this point (see inflateSetDictionary
below), inflate sets strm-adler to the adler32 checksum of the
dictionary chosen by the compressor and returns Z_NEED_DICT; otherwise
it sets strm->adler to the adler32 checksum of all output produced
so far (that is, total_out bytes) and returns Z_OK, Z_STREAM_END or
an error code as described below. At the end of the stream, inflate()
checks that its computed adler32 checksum is equal to that saved by the
compressor and returns Z_STREAM_END only if the checksum is correct.
inflate() returns Z_OK if some progress has been made (more input processed
or more output produced), Z_STREAM_END if the end of the compressed data has
been reached and all uncompressed output has been produced, Z_NEED_DICT if a
preset dictionary is needed at this point, Z_DATA_ERROR if the input data was
corrupted (input stream not conforming to the zlib format or incorrect
adler32 checksum), Z_STREAM_ERROR if the stream structure was inconsistent
(for example if next_in or next_out was NULL), Z_MEM_ERROR if there was not
enough memory, Z_BUF_ERROR if no progress is possible or if there was not
enough room in the output buffer when Z_FINISH is used. In the Z_DATA_ERROR
case, the application may then call inflateSync to look for a good
compression block.
*/
ZEXTERN int ZEXPORT inflateEnd OF((z_streamp strm));
/*
All dynamically allocated data structures for this stream are freed.
This function discards any unprocessed input and does not flush any
pending output.
inflateEnd returns Z_OK if success, Z_STREAM_ERROR if the stream state
was inconsistent. In the error case, msg may be set but then points to a
static string (which must not be deallocated).
*/
/* Advanced functions */
/*
The following functions are needed only in some special applications.
*/
/*
ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm,
int level,
int method,
int windowBits,
int memLevel,
int strategy));
This is another version of deflateInit with more compression options. The
fields next_in, zalloc, zfree and opaque must be initialized before by
the caller.
The method parameter is the compression method. It must be Z_DEFLATED in
this version of the library.
The windowBits parameter is the base two logarithm of the window size
(the size of the history buffer). It should be in the range 8..15 for this
version of the library. Larger values of this parameter result in better
compression at the expense of memory usage. The default value is 15 if
deflateInit is used instead.
The memLevel parameter specifies how much memory should be allocated
for the internal compression state. memLevel=1 uses minimum memory but
is slow and reduces compression ratio; memLevel=9 uses maximum memory
for optimal speed. The default value is 8. See zconf.h for total memory
usage as a function of windowBits and memLevel.
The strategy parameter is used to tune the compression algorithm. Use the
value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a
filter (or predictor), or Z_HUFFMAN_ONLY to force Huffman encoding only (no
string match). Filtered data consists mostly of small values with a
somewhat random distribution. In this case, the compression algorithm is
tuned to compress them better. The effect of Z_FILTERED is to force more
Huffman coding and less string matching; it is somewhat intermediate
between Z_DEFAULT and Z_HUFFMAN_ONLY. The strategy parameter only affects
the compression ratio but not the correctness of the compressed output even
if it is not set appropriately.
deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
memory, Z_STREAM_ERROR if a parameter is invalid (such as an invalid
method). msg is set to null if there is no error message. deflateInit2 does
not perform any compression: this will be done by deflate().
*/
ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm,
const Bytef *dictionary,
uInt dictLength));
/*
Initializes the compression dictionary from the given byte sequence
without producing any compressed output. This function must be called
immediately after deflateInit, deflateInit2 or deflateReset, before any
call of deflate. The compressor and decompressor must use exactly the same
dictionary (see inflateSetDictionary).
The dictionary should consist of strings (byte sequences) that are likely
to be encountered later in the data to be compressed, with the most commonly
used strings preferably put towards the end of the dictionary. Using a
dictionary is most useful when the data to be compressed is short and can be
predicted with good accuracy; the data can then be compressed better than
with the default empty dictionary.
Depending on the size of the compression data structures selected by
deflateInit or deflateInit2, a part of the dictionary may in effect be
discarded, for example if the dictionary is larger than the window size in
deflate or deflate2. Thus the strings most likely to be useful should be
put at the end of the dictionary, not at the front.
Upon return of this function, strm->adler is set to the Adler32 value
of the dictionary; the decompressor may later use this value to determine
which dictionary has been used by the compressor. (The Adler32 value
applies to the whole dictionary even if only a subset of the dictionary is
actually used by the compressor.)
deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a
parameter is invalid (such as NULL dictionary) or the stream state is
inconsistent (for example if deflate has already been called for this stream
or if the compression method is bsort). deflateSetDictionary does not
perform any compression: this will be done by deflate().
*/
ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest,
z_streamp source));
/*
Sets the destination stream as a complete copy of the source stream.
This function can be useful when several compression strategies will be
tried, for example when there are several ways of pre-processing the input
data with a filter. The streams that will be discarded should then be freed
by calling deflateEnd. Note that deflateCopy duplicates the internal
compression state which can be quite large, so this strategy is slow and
can consume lots of memory.
deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not
enough memory, Z_STREAM_ERROR if the source stream state was inconsistent
(such as zalloc being NULL). msg is left unchanged in both source and
destination.
*/
ZEXTERN int ZEXPORT deflateReset OF((z_streamp strm));
/*
This function is equivalent to deflateEnd followed by deflateInit,
but does not free and reallocate all the internal compression state.
The stream will keep the same compression level and any other attributes
that may have been set by deflateInit2.
deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
stream state was inconsistent (such as zalloc or state being NULL).
*/
ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm,
int level,
int strategy));
/*
Dynamically update the compression level and compression strategy. The
interpretation of level and strategy is as in deflateInit2. This can be
used to switch between compression and straight copy of the input data, or
to switch to a different kind of input data requiring a different
strategy. If the compression level is changed, the input available so far
is compressed with the old level (and may be flushed); the new level will
take effect only at the next call of deflate().
Before the call of deflateParams, the stream state must be set as for
a call of deflate(), since the currently available input may have to
be compressed and flushed. In particular, strm->avail_out must be non-zero.
deflateParams returns Z_OK if success, Z_STREAM_ERROR if the source
stream state was inconsistent or if a parameter was invalid, Z_BUF_ERROR
if strm->avail_out was zero.
*/
/*
ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm,
int windowBits));
This is another version of inflateInit with an extra parameter. The
fields next_in, avail_in, zalloc, zfree and opaque must be initialized
before by the caller.
The windowBits parameter is the base two logarithm of the maximum window
size (the size of the history buffer). It should be in the range 8..15 for
this version of the library. The default value is 15 if inflateInit is used
instead. If a compressed stream with a larger window size is given as
input, inflate() will return with the error code Z_DATA_ERROR instead of
trying to allocate a larger window.
inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
memory, Z_STREAM_ERROR if a parameter is invalid (such as a negative
memLevel). msg is set to null if there is no error message. inflateInit2
does not perform any decompression apart from reading the zlib header if
present: this will be done by inflate(). (So next_in and avail_in may be
modified, but next_out and avail_out are unchanged.)
*/
ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm,
const Bytef *dictionary,
uInt dictLength));
/*
Initializes the decompression dictionary from the given uncompressed byte
sequence. This function must be called immediately after a call of inflate
if this call returned Z_NEED_DICT. The dictionary chosen by the compressor
can be determined from the Adler32 value returned by this call of
inflate. The compressor and decompressor must use exactly the same
dictionary (see deflateSetDictionary).
inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a
parameter is invalid (such as NULL dictionary) or the stream state is
inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the
expected one (incorrect Adler32 value). inflateSetDictionary does not
perform any decompression: this will be done by subsequent calls of
inflate().
*/
ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm));
/*
Skips invalid compressed data until a full flush point (see above the
description of deflate with Z_FULL_FLUSH) can be found, or until all
available input is skipped. No output is provided.
inflateSync returns Z_OK if a full flush point has been found, Z_BUF_ERROR
if no more input was provided, Z_DATA_ERROR if no flush point has been found,
or Z_STREAM_ERROR if the stream structure was inconsistent. In the success
case, the application may save the current current value of total_in which
indicates where valid compressed data was found. In the error case, the
application may repeatedly call inflateSync, providing more input each time,
until success or end of the input data.
*/
ZEXTERN int ZEXPORT inflateReset OF((z_streamp strm));
/*
This function is equivalent to inflateEnd followed by inflateInit,
but does not free and reallocate all the internal decompression state.
The stream will keep attributes that may have been set by inflateInit2.
inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
stream state was inconsistent (such as zalloc or state being NULL).
*/
/* utility functions */
/*
The following utility functions are implemented on top of the
basic stream-oriented functions. To simplify the interface, some
default options are assumed (compression level and memory usage,
standard memory allocation functions). The source code of these
utility functions can easily be modified if you need special options.
*/
ZEXTERN int ZEXPORT compress OF((Bytef *dest, uLongf *destLen,
const Bytef *source, uLong sourceLen));
/*
Compresses the source buffer into the destination buffer. sourceLen is
the byte length of the source buffer. Upon entry, destLen is the total
size of the destination buffer, which must be at least 0.1% larger than
sourceLen plus 12 bytes. Upon exit, destLen is the actual size of the
compressed buffer.
This function can be used to compress a whole file at once if the
input file is mmap'ed.
compress returns Z_OK if success, Z_MEM_ERROR if there was not
enough memory, Z_BUF_ERROR if there was not enough room in the output
buffer.
*/
ZEXTERN int ZEXPORT compress2 OF((Bytef *dest, uLongf *destLen,
const Bytef *source, uLong sourceLen,
int level));
/*
Compresses the source buffer into the destination buffer. The level
parameter has the same meaning as in deflateInit. sourceLen is the byte
length of the source buffer. Upon entry, destLen is the total size of the
destination buffer, which must be at least 0.1% larger than sourceLen plus
12 bytes. Upon exit, destLen is the actual size of the compressed buffer.
compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
memory, Z_BUF_ERROR if there was not enough room in the output buffer,
Z_STREAM_ERROR if the level parameter is invalid.
*/
ZEXTERN int ZEXPORT uncompress OF((Bytef *dest, uLongf *destLen,
const Bytef *source, uLong sourceLen));
/*
Decompresses the source buffer into the destination buffer. sourceLen is
the byte length of the source buffer. Upon entry, destLen is the total
size of the destination buffer, which must be large enough to hold the
entire uncompressed data. (The size of the uncompressed data must have
been saved previously by the compressor and transmitted to the decompressor
by some mechanism outside the scope of this compression library.)
Upon exit, destLen is the actual size of the compressed buffer.
This function can be used to decompress a whole file at once if the
input file is mmap'ed.
uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
enough memory, Z_BUF_ERROR if there was not enough room in the output
buffer, or Z_DATA_ERROR if the input data was corrupted.
*/
typedef voidp gzFile;
ZEXTERN gzFile ZEXPORT gzopen OF((const char *path, const char *mode));
/*
Opens a gzip (.gz) file for reading or writing. The mode parameter
is as in fopen ("rb" or "wb") but can also include a compression level
("wb9") or a strategy: 'f' for filtered data as in "wb6f", 'h' for
Huffman only compression as in "wb1h". (See the description
of deflateInit2 for more information about the strategy parameter.)
gzopen can be used to read a file which is not in gzip format; in this
case gzread will directly read from the file without decompression.
gzopen returns NULL if the file could not be opened or if there was
insufficient memory to allocate the (de)compression state; errno
can be checked to distinguish the two cases (if errno is zero, the
zlib error is Z_MEM_ERROR). */
ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode));
/*
gzdopen() associates a gzFile with the file descriptor fd. File
descriptors are obtained from calls like open, dup, creat, pipe or
fileno (in the file has been previously opened with fopen).
The mode parameter is as in gzopen.
The next call of gzclose on the returned gzFile will also close the
file descriptor fd, just like fclose(fdopen(fd), mode) closes the file
descriptor fd. If you want to keep fd open, use gzdopen(dup(fd), mode).
gzdopen returns NULL if there was insufficient memory to allocate
the (de)compression state.
*/
ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy));
/*
Dynamically update the compression level or strategy. See the description
of deflateInit2 for the meaning of these parameters.
gzsetparams returns Z_OK if success, or Z_STREAM_ERROR if the file was not
opened for writing.
*/
ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len));
/*
Reads the given number of uncompressed bytes from the compressed file.
If the input file was not in gzip format, gzread copies the given number
of bytes into the buffer.
gzread returns the number of uncompressed bytes actually read (0 for
end of file, -1 for error). */
ZEXTERN int ZEXPORT gzwrite OF((gzFile file,
const voidp buf, unsigned len));
/*
Writes the given number of uncompressed bytes into the compressed file.
gzwrite returns the number of uncompressed bytes actually written
(0 in case of error).
*/
ZEXTERN int ZEXPORTVA gzprintf OF((gzFile file, const char *format, ...));
/*
Converts, formats, and writes the args to the compressed file under
control of the format string, as in fprintf. gzprintf returns the number of
uncompressed bytes actually written (0 in case of error).
*/
ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s));
/*
Writes the given null-terminated string to the compressed file, excluding
the terminating null character.
gzputs returns the number of characters written, or -1 in case of error.
*/
ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len));
/*
Reads bytes from the compressed file until len-1 characters are read, or
a newline character is read and transferred to buf, or an end-of-file
condition is encountered. The string is then terminated with a null
character.
gzgets returns buf, or Z_NULL in case of error.
*/
ZEXTERN int ZEXPORT gzputc OF((gzFile file, int c));
/*
Writes c, converted to an unsigned char, into the compressed file.
gzputc returns the value that was written, or -1 in case of error.
*/
ZEXTERN int ZEXPORT gzgetc OF((gzFile file));
/*
Reads one byte from the compressed file. gzgetc returns this byte
or -1 in case of end of file or error.
*/
ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush));
/*
Flushes all pending output into the compressed file. The parameter
flush is as in the deflate() function. The return value is the zlib
error number (see function gzerror below). gzflush returns Z_OK if
the flush parameter is Z_FINISH and all output could be flushed.
gzflush should be called only when strictly necessary because it can
degrade compression.
*/
ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile file,
z_off_t offset, int whence));
/*
Sets the starting position for the next gzread or gzwrite on the
given compressed file. The offset represents a number of bytes in the
uncompressed data stream. The whence parameter is defined as in lseek(2);
the value SEEK_END is not supported.
If the file is opened for reading, this function is emulated but can be
extremely slow. If the file is opened for writing, only forward seeks are
supported; gzseek then compresses a sequence of zeroes up to the new
starting position.
gzseek returns the resulting offset location as measured in bytes from
the beginning of the uncompressed stream, or -1 in case of error, in
particular if the file is opened for writing and the new starting position
would be before the current position.
*/
ZEXTERN int ZEXPORT gzrewind OF((gzFile file));
/*
Rewinds the given file. This function is supported only for reading.
gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET)
*/
ZEXTERN z_off_t ZEXPORT gztell OF((gzFile file));
/*
Returns the starting position for the next gzread or gzwrite on the
given compressed file. This position represents a number of bytes in the
uncompressed data stream.
gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR)
*/
ZEXTERN int ZEXPORT gzeof OF((gzFile file));
/*
Returns 1 when EOF has previously been detected reading the given
input stream, otherwise zero.
*/
ZEXTERN int ZEXPORT gzclose OF((gzFile file));
/*
Flushes all pending output if necessary, closes the compressed file
and deallocates all the (de)compression state. The return value is the zlib
error number (see function gzerror below).
*/
ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum));
/*
Returns the error message for the last error which occurred on the
given compressed file. errnum is set to zlib error number. If an
error occurred in the file system and not in the compression library,
errnum is set to Z_ERRNO and the application may consult errno
to get the exact error code.
*/
/* checksum functions */
/*
These functions are not related to compression but are exported
anyway because they might be useful in applications using the
compression library.
*/
ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len));
/*
Update a running Adler-32 checksum with the bytes buf[0..len-1] and
return the updated checksum. If buf is NULL, this function returns
the required initial value for the checksum.
An Adler-32 checksum is almost as reliable as a CRC32 but can be computed
much faster. Usage example:
uLong adler = adler32(0L, Z_NULL, 0);
while (read_buffer(buffer, length) != EOF) {
adler = adler32(adler, buffer, length);
}
if (adler != original_adler) error();
*/
ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len));
/*
Update a running crc with the bytes buf[0..len-1] and return the updated
crc. If buf is NULL, this function returns the required initial value
for the crc. Pre- and post-conditioning (one's complement) is performed
within this function so it shouldn't be done by the application.
Usage example:
uLong crc = crc32(0L, Z_NULL, 0);
while (read_buffer(buffer, length) != EOF) {
crc = crc32(crc, buffer, length);
}
if (crc != original_crc) error();
*/
/* various hacks, don't look :) */
/* deflateInit and inflateInit are macros to allow checking the zlib version
* and the compiler's view of z_stream:
*/
ZEXTERN int ZEXPORT deflateInit_ OF((z_streamp strm, int level,
const char *version, int stream_size));
ZEXTERN int ZEXPORT inflateInit_ OF((z_streamp strm,
const char *version, int stream_size));
ZEXTERN int ZEXPORT deflateInit2_ OF((z_streamp strm, int level, int method,
int windowBits, int memLevel,
int strategy, const char *version,
int stream_size));
ZEXTERN int ZEXPORT inflateInit2_ OF((z_streamp strm, int windowBits,
const char *version, int stream_size));
#define deflateInit(strm, level) \
deflateInit_((strm), (level), ZLIB_VERSION, sizeof(z_stream))
#define inflateInit(strm) \
inflateInit_((strm), ZLIB_VERSION, sizeof(z_stream))
#define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \
deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\
(strategy), ZLIB_VERSION, sizeof(z_stream))
#define inflateInit2(strm, windowBits) \
inflateInit2_((strm), (windowBits), ZLIB_VERSION, sizeof(z_stream))
#if !defined(_Z_UTIL_H) && !defined(NO_DUMMY_DECL)
struct internal_state {int dummy;}; /* hack for buggy compilers */
#endif
ZEXTERN const char * ZEXPORT zError OF((int err));
ZEXTERN int ZEXPORT inflateSyncPoint OF((z_streamp z));
ZEXTERN const uLongf * ZEXPORT get_crc_table OF((void));
#ifdef __cplusplus
}
#endif
#endif /* _ZLIB_H */

View File

@@ -0,0 +1,225 @@
/* zutil.c -- target dependent utility functions for the compression library
* Copyright (C) 1995-1998 Jean-loup Gailly.
* For conditions of distribution and use, see copyright notice in zlib.h
*/
/* @(#) $Id$ */
#include "zutil.h"
struct internal_state {int dummy;}; /* for buggy compilers */
#ifndef STDC
extern void exit OF((int));
#endif
const char *z_errmsg[10] = {
"need dictionary", /* Z_NEED_DICT 2 */
"stream end", /* Z_STREAM_END 1 */
"", /* Z_OK 0 */
"file error", /* Z_ERRNO (-1) */
"stream error", /* Z_STREAM_ERROR (-2) */
"data error", /* Z_DATA_ERROR (-3) */
"insufficient memory", /* Z_MEM_ERROR (-4) */
"buffer error", /* Z_BUF_ERROR (-5) */
"incompatible version",/* Z_VERSION_ERROR (-6) */
""};
const char * ZEXPORT zlibVersion()
{
return ZLIB_VERSION;
}
#ifdef DEBUG
# ifndef verbose
# define verbose 0
# endif
int z_verbose = verbose;
void z_error (m)
char *m;
{
fprintf(stderr, "%s\n", m);
exit(1);
}
#endif
/* exported to allow conversion of error code to string for compress() and
* uncompress()
*/
const char * ZEXPORT zError(err)
int err;
{
return ERR_MSG(err);
}
#ifndef HAVE_MEMCPY
void zmemcpy(dest, source, len)
Bytef* dest;
const Bytef* source;
uInt len;
{
if (len == 0) return;
do {
*dest++ = *source++; /* ??? to be unrolled */
} while (--len != 0);
}
int zmemcmp(s1, s2, len)
const Bytef* s1;
const Bytef* s2;
uInt len;
{
uInt j;
for (j = 0; j < len; j++) {
if (s1[j] != s2[j]) return 2*(s1[j] > s2[j])-1;
}
return 0;
}
void zmemzero(dest, len)
Bytef* dest;
uInt len;
{
if (len == 0) return;
do {
*dest++ = 0; /* ??? to be unrolled */
} while (--len != 0);
}
#endif
#ifdef __TURBOC__
#if (defined( __BORLANDC__) || !defined(SMALL_MEDIUM)) && !defined(__32BIT__)
/* Small and medium model in Turbo C are for now limited to near allocation
* with reduced MAX_WBITS and MAX_MEM_LEVEL
*/
# define MY_ZCALLOC
/* Turbo C malloc() does not allow dynamic allocation of 64K bytes
* and farmalloc(64K) returns a pointer with an offset of 8, so we
* must fix the pointer. Warning: the pointer must be put back to its
* original form in order to free it, use zcfree().
*/
#define MAX_PTR 10
/* 10*64K = 640K */
local int next_ptr = 0;
typedef struct ptr_table_s {
voidpf org_ptr;
voidpf new_ptr;
} ptr_table;
local ptr_table table[MAX_PTR];
/* This table is used to remember the original form of pointers
* to large buffers (64K). Such pointers are normalized with a zero offset.
* Since MSDOS is not a preemptive multitasking OS, this table is not
* protected from concurrent access. This hack doesn't work anyway on
* a protected system like OS/2. Use Microsoft C instead.
*/
voidpf zcalloc (voidpf opaque, unsigned items, unsigned size)
{
voidpf buf = opaque; /* just to make some compilers happy */
ulg bsize = (ulg)items*size;
/* If we allocate less than 65520 bytes, we assume that farmalloc
* will return a usable pointer which doesn't have to be normalized.
*/
if (bsize < 65520L) {
buf = farmalloc(bsize);
if (*(ush*)&buf != 0) return buf;
} else {
buf = farmalloc(bsize + 16L);
}
if (buf == NULL || next_ptr >= MAX_PTR) return NULL;
table[next_ptr].org_ptr = buf;
/* Normalize the pointer to seg:0 */
*((ush*)&buf+1) += ((ush)((uch*)buf-0) + 15) >> 4;
*(ush*)&buf = 0;
table[next_ptr++].new_ptr = buf;
return buf;
}
void zcfree (voidpf opaque, voidpf ptr)
{
int n;
if (*(ush*)&ptr != 0) { /* object < 64K */
farfree(ptr);
return;
}
/* Find the original pointer */
for (n = 0; n < next_ptr; n++) {
if (ptr != table[n].new_ptr) continue;
farfree(table[n].org_ptr);
while (++n < next_ptr) {
table[n-1] = table[n];
}
next_ptr--;
return;
}
ptr = opaque; /* just to make some compilers happy */
Assert(0, "zcfree: ptr not found");
}
#endif
#endif /* __TURBOC__ */
#if defined(M_I86) && !defined(__32BIT__)
/* Microsoft C in 16-bit mode */
# define MY_ZCALLOC
#if (!defined(_MSC_VER) || (_MSC_VER <= 600))
# define _halloc halloc
# define _hfree hfree
#endif
voidpf zcalloc (voidpf opaque, unsigned items, unsigned size)
{
if (opaque) opaque = 0; /* to make compiler happy */
return _halloc((long)items, size);
}
void zcfree (voidpf opaque, voidpf ptr)
{
if (opaque) opaque = 0; /* to make compiler happy */
_hfree(ptr);
}
#endif /* MSC */
#ifndef MY_ZCALLOC /* Any system without a special alloc function */
#ifndef STDC
extern voidp calloc OF((uInt items, uInt size));
extern void free OF((voidpf ptr));
#endif
voidpf zcalloc (opaque, items, size)
voidpf opaque;
unsigned items;
unsigned size;
{
if (opaque) items += size - size; /* make compiler happy */
return (voidpf)calloc(items, size);
}
void zcfree (opaque, ptr)
voidpf opaque;
voidpf ptr;
{
free(ptr);
if (opaque) return; /* make compiler happy */
}
#endif /* MY_ZCALLOC */

View File

@@ -0,0 +1,220 @@
/* zutil.h -- internal interface and configuration of the compression library
* Copyright (C) 1995-1998 Jean-loup Gailly.
* For conditions of distribution and use, see copyright notice in zlib.h
*/
/* WARNING: this file should *not* be used by applications. It is
part of the implementation of the compression library and is
subject to change. Applications should only use zlib.h.
*/
/* @(#) $Id$ */
#ifndef _Z_UTIL_H
#define _Z_UTIL_H
#include "zlib.h"
#ifdef STDC
# include <stddef.h>
# include <string.h>
# include <stdlib.h>
#endif
#ifdef NO_ERRNO_H
extern int errno;
#else
# include <errno.h>
#endif
#ifndef local
# define local static
#endif
/* compile with -Dlocal if your debugger can't find static symbols */
typedef unsigned char uch;
typedef uch FAR uchf;
typedef unsigned short ush;
typedef ush FAR ushf;
typedef unsigned long ulg;
extern const char *z_errmsg[10]; /* indexed by 2-zlib_error */
/* (size given to avoid silly warnings with Visual C++) */
#define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)]
#define ERR_RETURN(strm,err) \
return (strm->msg = (char*)ERR_MSG(err), (err))
/* To be used only when the state is known to be valid */
/* common constants */
#ifndef DEF_WBITS
# define DEF_WBITS MAX_WBITS
#endif
/* default windowBits for decompression. MAX_WBITS is for compression only */
#if MAX_MEM_LEVEL >= 8
# define DEF_MEM_LEVEL 8
#else
# define DEF_MEM_LEVEL MAX_MEM_LEVEL
#endif
/* default memLevel */
#define STORED_BLOCK 0
#define STATIC_TREES 1
#define DYN_TREES 2
/* The three kinds of block type */
#define MIN_MATCH 3
#define MAX_MATCH 258
/* The minimum and maximum match lengths */
#define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */
/* target dependencies */
#ifdef MSDOS
# define OS_CODE 0x00
# if defined(__TURBOC__) || defined(__BORLANDC__)
# if(__STDC__ == 1) && (defined(__LARGE__) || defined(__COMPACT__))
/* Allow compilation with ANSI keywords only enabled */
void _Cdecl farfree( void *block );
void *_Cdecl farmalloc( unsigned long nbytes );
# else
# include <alloc.h>
# endif
# else /* MSC or DJGPP */
# include <malloc.h>
# endif
#endif
#ifdef OS2
# define OS_CODE 0x06
#endif
#ifdef WIN32 /* Window 95 & Windows NT */
# define OS_CODE 0x0b
#endif
#if defined(VAXC) || defined(VMS)
# define OS_CODE 0x02
# define F_OPEN(name, mode) \
fopen((name), (mode), "mbc=60", "ctx=stm", "rfm=fix", "mrs=512")
#endif
#ifdef AMIGA
# define OS_CODE 0x01
#endif
#if defined(ATARI) || defined(atarist)
# define OS_CODE 0x05
#endif
#if defined(MACOS) || defined(TARGET_OS_MAC)
# define OS_CODE 0x07
# if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os
# include <unix.h> /* for fdopen */
# else
# ifndef fdopen
# define fdopen(fd,mode) NULL /* No fdopen() */
# endif
# endif
#endif
#ifdef __50SERIES /* Prime/PRIMOS */
# define OS_CODE 0x0F
#endif
#ifdef TOPS20
# define OS_CODE 0x0a
#endif
#if defined(_BEOS_) || defined(RISCOS)
# define fdopen(fd,mode) NULL /* No fdopen() */
#endif
#if (defined(_MSC_VER) && (_MSC_VER > 600))
# define fdopen(fd,type) _fdopen(fd,type)
#endif
/* Common defaults */
#ifndef OS_CODE
# define OS_CODE 0x03 /* assume Unix */
#endif
#ifndef F_OPEN
# define F_OPEN(name, mode) fopen((name), (mode))
#endif
/* functions */
#ifdef HAVE_STRERROR
extern char *strerror OF((int));
# define zstrerror(errnum) strerror(errnum)
#else
# define zstrerror(errnum) ""
#endif
#if defined(pyr)
# define NO_MEMCPY
#endif
#if defined(SMALL_MEDIUM) && !defined(_MSC_VER) && !defined(__SC__)
/* Use our own functions for small and medium model with MSC <= 5.0.
* You may have to use the same strategy for Borland C (untested).
* The __SC__ check is for Symantec.
*/
# define NO_MEMCPY
#endif
#if defined(STDC) && !defined(HAVE_MEMCPY) && !defined(NO_MEMCPY)
# define HAVE_MEMCPY
#endif
#ifdef HAVE_MEMCPY
# ifdef SMALL_MEDIUM /* MSDOS small or medium model */
# define zmemcpy _fmemcpy
# define zmemcmp _fmemcmp
# define zmemzero(dest, len) _fmemset(dest, 0, len)
# else
# define zmemcpy memcpy
# define zmemcmp memcmp
# define zmemzero(dest, len) memset(dest, 0, len)
# endif
#else
extern void zmemcpy OF((Bytef* dest, const Bytef* source, uInt len));
extern int zmemcmp OF((const Bytef* s1, const Bytef* s2, uInt len));
extern void zmemzero OF((Bytef* dest, uInt len));
#endif
/* Diagnostic functions */
#ifdef DEBUG
# include <stdio.h>
extern int z_verbose;
extern void z_error OF((char *m));
# define Assert(cond,msg) {if(!(cond)) z_error(msg);}
# define Trace(x) {if (z_verbose>=0) fprintf x ;}
# define Tracev(x) {if (z_verbose>0) fprintf x ;}
# define Tracevv(x) {if (z_verbose>1) fprintf x ;}
# define Tracec(c,x) {if (z_verbose>0 && (c)) fprintf x ;}
# define Tracecv(c,x) {if (z_verbose>1 && (c)) fprintf x ;}
#else
# define Assert(cond,msg)
# define Trace(x)
# define Tracev(x)
# define Tracevv(x)
# define Tracec(c,x)
# define Tracecv(c,x)
#endif
typedef uLong (ZEXPORT *check_func) OF((uLong check, const Bytef *buf,
uInt len));
voidpf zcalloc OF((voidpf opaque, unsigned items, unsigned size));
void zcfree OF((voidpf opaque, voidpf ptr));
#define ZALLOC(strm, items, size) \
(*((strm)->zalloc))((strm)->opaque, (items), (size))
#define ZFREE(strm, addr) (*((strm)->zfree))((strm)->opaque, (voidpf)(addr))
#define TRY_FREE(s, p) {if (p) ZFREE(s, p);}
#endif /* _Z_UTIL_H */

View File

@@ -0,0 +1,668 @@
#include "../Utils/leak_detector.h"
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <cstdint>
#include <fstream>
#include <map>
#include <vector>
#include <set>
#include <string>
#include <queue>
#include "dvm.h"
#include "../DynamicAnalysis/gcov_info.h"
#include "PredictScheme.h"
#include "../Utils/SgUtils.h"
#include "../DirectiveProcessing/directive_parser.h"
#include "../Distribution/DvmhDirective.h"
#include "../GraphLoop/graph_loops_func.h"
#include "../ExpressionTransform/expr_transform.h"
#include "../DirectiveProcessing/directive_parser.h"
#include "../LoopAnalyzer/loop_analyzer.h"
#include "../CFGraph/CFGraph.h"
#include "json.hpp"
using std::map;
using std::string;
using std::vector;
using std::set;
using std::ofstream;
using std::pair;
using std::tuple;
using json = nlohmann::json;
static void fillParallel(SgExpression *exp, ParallelStats &parStats, int &totalScoreComm)
{
if (exp)
{
SgExprListExp *list;
switch (exp->variant())
{
case SHADOW_RENEW_OP:
list = isSgExprListExp(exp->lhs());
if (list)
parStats.ShadowCount += list->length();
totalScoreComm += list->length();
break;
case REDUCTION_OP:
list = isSgExprListExp(exp->lhs());
if (list)
parStats.ReductionCount += list->length();
totalScoreComm += list->length();
break;
case REMOTE_ACCESS_OP:
list = isSgExprListExp(exp->lhs());
if (list)
{
parStats.RemoteCount += list->length();
//TODO:
totalScoreComm += 100 * list->length();
}
break;
case ACROSS_OP:
if (exp->lhs()->variant() == DDOT)
list = isSgExprListExp(exp->lhs()->rhs());
else
list = isSgExprListExp(exp->lhs());
if (list)
parStats.AcrossCount += list->length();
totalScoreComm += 10 * list->length();
break;
default:
break;
}
fillParallel(exp->rhs(), parStats, totalScoreComm);
fillParallel(exp->lhs(), parStats, totalScoreComm);
}
}
void processFileToPredict(SgFile *file, PredictorStats &predictorCounts)
{
SgStatement* prev = NULL;
for (SgStatement *st = file->firstStatement(); st; st = st->lexNext())
{
SgExprListExp *list;
switch (st->variant())
{
case DVM_PARALLEL_ON_DIR:
predictorCounts.ParallelCount++;
for (int i = 0; i < 3; ++i)
fillParallel(st->expr(i), predictorCounts.ParallelStat, predictorCounts.TotalScoreComm);
break;
case DVM_REDISTRIBUTE_DIR:
case DVM_REALIGN_DIR:
if (prev->variant() == DVM_NEW_VALUE_DIR)
break;
list = isSgExprListExp(st->expr(0));
if (list)
{
int len = list->length();
predictorCounts.RedistributeCount += len;
predictorCounts.TotalScoreComm += 10000 * len;
}
else
{
predictorCounts.RedistributeCount++;
predictorCounts.TotalScoreComm += 10000;
}
break;
case DVM_REMOTE_ACCESS_DIR:
for (int i = 0; i < 3; ++i)
{
list = isSgExprListExp(st->expr(i));
if (list)
{
predictorCounts.RemoteCount += list->length();
predictorCounts.TotalScoreComm += 100 * list->length();
}
}
break;
case DVM_INTERVAL_DIR:
case DVM_ENDINTERVAL_DIR:
case DVM_EXIT_INTERVAL_DIR:
predictorCounts.IntervalCount++;
break;
default:
break;
}
prev = st;
}
predictorCounts.TotalScorePar += predictorCounts.ParallelCount;
}
static void calculateForParallelLoop(SgStatement* loop, const map<int, Gcov_info>& gcov,
uint64_t& paralle_exec_count, uint64_t& count_of_parallel_lines) {
for (auto st = loop; st != loop->lastNodeOfStmt(); st = st->lexNext()) {
int line = st->lineNumber();
if (line <= 0)
continue;
auto it = gcov.find(line);
if (it == gcov.end()) {
__spf_print(1, "bad gcov info\n");
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
}
auto& info = it->second;
if (info.getNumLine() != line) {
__spf_print(1, "bad gcov info\n");
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
}
count_of_parallel_lines++;
paralle_exec_count += info.getExecutedCount();
}
}
static json info;
void calculateStatsForPredictor(const map<string, vector<FuncInfo*>>& allFuncInfo,
const map<string, map<int, Gcov_info>>& gCovInfo) {
json cluster;
json program;
cluster["cluster_info"] = { {"num_nodes", 0},
{"cores_per_node", 0},
{"threads_per_node", 0},
{"memory_per_node_gb", 0},
{"network_bandwidth_gbps", 0},
{"network_latency_ms", 0}
};
program["program_info"]["sequential_execution_time_sec"] = 0.0;
program["program_info"]["launch_grid"] = { {"dimensions", {0, 0, 0} }, {"total_processes", 0} };
uint64_t total_exec_count = 0;
uint64_t parallel_exec_count = 0;
uint64_t count_of_parallel_lines = 0;
for (auto& byFile : allFuncInfo)
{
int ok = SgFile::switchToFile(byFile.first);
if (ok == -1)
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
auto it = gCovInfo.find(byFile.first);
if (it == gCovInfo.end())
{
__spf_print(1, "bad gcov info\n");
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
}
auto& gcov = it->second;
for (auto& func : byFile.second)
{
SgStatement* stat = func->funcPointer->GetOriginal();
for (auto st = stat->lexNext(); st != stat->lastNodeOfStmt(); st = st->lexNext())
{
uint64_t paralle_exec = 0;
uint64_t lines_count = 0;
if (st->variant() == DVM_PARALLEL_ON_DIR)
{
auto loop = st->lexNext();
checkNull(loop, convertFileName(__FILE__).c_str(), __LINE__);
if (loop->variant() != FOR_NODE)
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
calculateForParallelLoop(loop, gcov, paralle_exec, lines_count);
st = loop->lastNodeOfStmt();
parallel_exec_count += paralle_exec;
count_of_parallel_lines += lines_count;
__spf_print(1, " PAR LOOP [%d %s] total exec %llu, total exec lines %llu, avg %.16e\n",
loop->lineNumber(), byFile.first.c_str(), paralle_exec, lines_count, paralle_exec / (double)lines_count);
}
}
for (auto st = stat->lexNext(); st != stat->lastNodeOfStmt(); st = st->lexNext())
{
if (!isSgExecutableStatement(st) || isDVM_stat(st) || isSPF_stat(st))
continue;
int line = st->lineNumber();
if (line <= 0)
continue;
auto it = gcov.find(line);
if (it == gcov.end())
continue;
auto& info = it->second;
if (info.getNumLine() != line)
{
__spf_print(1, "bad gcov info\n");
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
}
total_exec_count += info.getExecutedCount();
}
}
}
__spf_print(1, " average_parallel_exec %.16e\n", parallel_exec_count / (double)count_of_parallel_lines);
__spf_print(1, " parallel_rate %.16e\n", parallel_exec_count / (double)total_exec_count);
program["program_info"]["average_parallel_line_executions"] = parallel_exec_count / (double)count_of_parallel_lines;
program["program_info"]["parallel_execution_fraction"] = parallel_exec_count / (double)total_exec_count;
info = { cluster, program };
}
static const Gcov_info& getInfo(SgStatement* st, const map<int, Gcov_info> &gcov)
{
auto stat = st;
while (isDVM_stat(stat))
stat = stat->lexPrev();
int line = stat->lineNumber(); // XXX
auto list = st->expr(1);
auto it = gcov.find(line);
auto& info = it->second;
if (info.getNumLine() != line)
{
__spf_print(1, "bad gcov info\n");
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
}
return info;
}
static json parseDistribution(const map<DIST::Array*, int>& byPos, SgSymbol* arr, SgExpression* list, int line)
{
json dist;
auto array = getArrayFromDeclarated(declaratedInStmt(arr), arr->identifier());
if (array == NULL || byPos.find(array) == byPos.end())
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
dist["line"] = line;
dist["array_id"] = byPos.at(array);
while (list)
{
dist["distribution_spec"].push_back(list->lhs()->unparse());
list = list->rhs();
}
return dist;
}
static json parseAlign(const map<DIST::Array*, int>& byPos, SgSymbol* srcArr, SgSymbol* tgtArr,
SgExpression *listSrc, SgExpression* listTgt, int line)
{
json align;
auto arraySrc = getArrayFromDeclarated(declaratedInStmt(srcArr), srcArr->identifier());
if (arraySrc == NULL || byPos.find(arraySrc) == byPos.end())
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
auto arrayTgt = getArrayFromDeclarated(declaratedInStmt(tgtArr), tgtArr->identifier());
if (arrayTgt == NULL || byPos.find(arrayTgt) == byPos.end())
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
align["line"] = line;
align["source_array_id"] = byPos.at(arraySrc);
align["target_array_id"] = byPos.at(arrayTgt);
vector<pair<string, SgSymbol*>> srcSymbs;
auto list = listSrc;
while (list)
{
srcSymbs.push_back({ list->lhs()->unparse(), list->lhs()->symbol() });
list = list->rhs();
}
vector<pair<int, int>> coefs(srcSymbs.size());
list = listTgt;
while (list)
{
auto exp = list->lhs();
bool has = false;
for (int z = 0; z < srcSymbs.size(); ++z)
{
has = recSymbolFind(exp, srcSymbs[z].first, VAR_REF);
if (has)
{
getCoefsOfSubscript(coefs[z], exp, srcSymbs[z].second);
if (coefs[z].first == 0)
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
break;
}
}
list = list->rhs();
}
for (int z = 0; z < coefs.size(); ++z)
{
if (coefs[z].first == 0)
continue;
if (coefs[z].second)
align["rules"].push_back({ z, coefs[z].first });
else
align["rules"].push_back({ z, coefs[z].first, coefs[z].second });
}
return align;
}
static SgStatement* findBefore(SgStatement* st)
{
while (st)
{
st = st->lexPrev();
if (isSgProgHedrStmt(st))
break;
if (isDVM_stat(st) || isSPF_stat(st))
continue;
if (isSgExecutableStatement(st))
break;
}
return st;
}
static void fillAcrossShadow(vector<pair<pair<Symbol*, string>, vector<pair<int, int>>>>& dirs, SgStatement *st,
const map<DIST::Array*, int>& byPos, const string& type, json& typed, json& parallel)
{
for (auto& dir : dirs)
{
auto& symb = dir.first;
auto& access = dir.second;
DIST::Array* arr = getArrayFromDeclarated(declaratedInStmt(symb.first), symb.first->identifier());
if (arr == NULL || byPos.find(arr) == byPos.end())
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
json item;
item["line"] = st->lineNumber();
item["array_id"] = byPos.at(arr);
item["communication_pattern"] = "NEAREST_NEIGHBOR";
if (access.size())
{
for (int z = 0; z < access.size(); ++z)
item["width"].push_back({ z, access[z].first, access[z].second });
}
else
{
auto& spec = arr->GetShadowSpec();
//TODO: analyze spec of array for shadow
for (int z = 0; z < spec.size(); ++z)
item["width"].push_back({ z, 1, 1 });
}
typed.push_back(item);
parallel["shadow_renews"].push_back(typed.size() - 1);
}
}
static void parallelDir(const map<DIST::Array*, int>& byPos, SgExpression* spec, SgSymbol* arr, SgExpression* arrSpec,
SgStatement* st, SgExpression* clauses, const map<int, Gcov_info>& gcov, json& directives,
const map<string, CommonBlock*>& commonBlocks, const map<string, vector<FuncInfo*>>& allFuncInfo)
{
json parallel;
json& shadow_renew = directives["shadow_renew"];
json& reduction = directives["reduction"];
json& remote_access = directives["remote_access"];
json& across = directives["across"];
vector<pair<string, SgSymbol*>> loopSymbs;
auto list = spec;
while (list)
{
loopSymbs.push_back({ list->lhs()->unparse(), list->lhs()->symbol() });
list = list->rhs();
}
parallel["line"] = st->lineNumber();
parallel["loops_count"] = loopSymbs.size();
SgStatement* loop = isSgForStmt(st->lexNext());
if (loop == NULL)
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
SgStatement* lastNode = loop->lastNodeOfStmt();
SgStatement* before = findBefore(loop);
if (before == NULL)
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
vector<int64_t> execs;
for (int z = 0; z < loopSymbs.size(); ++z)
{
auto& info = getInfo(loop, gcov);
execs.push_back(info.getExecutedCount());
loop = loop->lexNext();
}
for (int z = execs.size() - 1; z > 0; --z)
execs[z] /= execs[z - 1];
auto& info = getInfo(before, gcov);
execs[0] /= info.getExecutedCount();
parallel["iterations_count"] = execs;
DvmDirective directive;
fillInfoFromDirective(new Statement(st), directive);
vector<int> empty;
parallel["shadow_renews"] = empty;
parallel["reductions"] = empty;
parallel["remote_accesses"] = empty;
parallel["acrosses"] = empty;
for (auto& op : directive.reduction)
{
for (auto& var : op.second)
{
json item;
item["line"] = st->lineNumber();
item["operation"] = op.first;
if (!isSgArrayType(var->type()))
{
item["reduction_type"] = "SCALAR";
item["size_bytes"] = getSizeOfType(var->type());
item["elements_count"] = 1;
}
else
{
item["reduction_type"] = "ARRAY";
auto type = isSgArrayType(var->type());
item["size_bytes"] = getSizeOfType(type->baseType());
item["elements_count"] = type->dimension();
}
reduction.push_back(item);
parallel["reductions"].push_back(reduction.size() - 1);
}
}
fillAcrossShadow(directive.shadowRenew, st, byPos, "shadow_renews", shadow_renew, parallel);
fillAcrossShadow(directive.across, st, byPos, "acrosses", across, parallel);
auto func = getFuncStat(st);
auto& funcInFile = allFuncInfo.at(st->fileName());
FuncInfo* currF = NULL;
for (auto& elem : funcInFile)
if (elem->funcName == func->symbol()->identifier())
currF = elem;
if (currF == NULL)
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
auto cfg = buildCFGforCurrentFunc(func, SAPFOR::CFG_Settings(true, false, false, true, false, false, true), commonBlocks, allFuncInfo);
if (cfg.size() != 1)
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
//TODO IP analysis
unsigned countOfAccess = 0;
unsigned countOfOps = 0;
if (cfg.find(currF) == cfg.end())
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
//skip all parallel loops
loop = st->lexNext();
for (int z = 0; z < loopSymbs.size(); ++z)
loop = loop->lexNext();
int lineStart = loop->lineNumber();
int lineEnd = lastNode->lexNext()->lineNumber();
//dumpCFG(cfg, false);
//TODO: calculate access in bytes
for (auto& block : cfg[currF])
{
for (auto& ir : block->getInstructions())
{
auto line = ir->getLine();
if (line < lineStart || line >= lineEnd)
continue;
auto inst = ir->getInstruction();
if (inst->isAccess())
countOfAccess++;
if (inst->isArith())
countOfOps++;
//printf("%s %d %d\n", inst->dump().c_str(), inst->isAccess(), inst->isArith());
}
}
deleteCFG(cfg);
parallel["computational_intensity"] = countOfOps > 0 ? ((double)countOfOps / (double)countOfAccess) : 0;
directives["parallel"].push_back(parallel);
}
void parseDvmDirForPredictor(const map<tuple<int, string, string>, pair<DIST::Array*, DIST::ArrayAccessInfo*>>& declaredArrays,
const map<string, CommonBlock*>& commonBlocks,
const map<string, vector<FuncInfo*>>& allFuncInfo,
const map<string, map<int, Gcov_info>>& gCovInfo)
{
auto& program = info[1]["program_info"];
map<DIST::Array*, int> byPos;
int pos = 0;
for (auto& arrayElem : declaredArrays)
{
json jArray;
auto& array = arrayElem.second.first;
auto sizes = array->GetSizes();
for (int z = 0; z < array->GetDimSize(); ++z)
jArray["dimensions"].push_back(sizes[z].second - sizes[z].first + 1);
jArray["name"] = array->GetName();
jArray["element_size_bytes"] = array->GetTypeSize();
program["arrays_info"].push_back(jArray);
byPos[array] = pos++;
}
auto& directives = program["directives"];
for (auto& byFile : allFuncInfo)
{
int ok = SgFile::switchToFile(byFile.first);
if (ok == -1)
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
auto it = gCovInfo.find(byFile.first);
if (it == gCovInfo.end())
{
__spf_print(1, "bad gcov info\n");
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
}
auto& gcov = it->second;
for (auto& func : byFile.second)
{
SgStatement* stat = func->funcPointer->GetOriginal();
for (auto st = stat->lexNext(); st != stat->lastNodeOfStmt(); st = st->lexNext())
{
SgExpression* list;
SgExpression* dup;
auto line = 0;
switch (st->variant())
{
case DVM_PARALLEL_ON_DIR:
parallelDir(byPos, st->expr(2), st->expr(0)->symbol(), st->expr(0)->lhs(), st, st->expr(1), gcov, directives, commonBlocks, allFuncInfo);
break;
case DVM_VAR_DECL: // TODO
{
auto type = st->expr(2)->lhs();
if (type->variant() == DISTRIBUTE_OP)
{
list = st->expr(0);
while (list)
{
directives["distribute"].push_back(parseDistribution(byPos, list->lhs()->symbol(), type->lhs(), st->lineNumber()));
list = list->rhs();
}
}
else if (type->variant() == ALIGN_OP)
{
list = st->expr(0);
while (list)
{
directives["align"].push_back(parseAlign(byPos, list->lhs()->symbol(), type->rhs()->symbol(), type->lhs(), type->rhs()->lhs(), st->lineNumber()));
list = list->rhs();
}
}
}
break;
case DVM_DISTRIBUTE_DIR:
directives["distribute"].push_back(parseDistribution(byPos, st->expr(0)->lhs()->symbol(), st->expr(1), st->lineNumber()));
break;
case DVM_ALIGN_DIR:
directives["align"].push_back(parseAlign(byPos, st->expr(0)->lhs()->symbol(), st->expr(2)->symbol(), st->expr(1), st->expr(2)->lhs(), st->lineNumber()));
break;
case DVM_SHADOW_DIR:
//dirs << "1;" << "SHADOW;" << st->expr(0)->unparse() << "(" << st->expr(1)->unparse() << ");\n";
break;
case DVM_REMOTE_ACCESS_DIR:
{
line = st->lexNext()->lineNumber();
auto it = gcov.find(line);
auto& info = it->second;
if (info.getNumLine() != line)
{
__spf_print(1, "bad gcov info\n");
printInternalError(convertFileName(__FILE__).c_str(), __LINE__);
}
//dirs << info.getExecutedCount() << ";" << "REMOTE_ACCESS;";
list = st->expr(0);
while (list)
{
//dirs << list->lhs()->unparse() << ";";
list = list->rhs();
}
//dirs << "\n";
break;
}
default:
//printf("var = %d line %d\n", st->variant(), st->lineNumber());
break;
}
}
}
}
//printf("%s\n", info.dump(2).c_str());
ofstream dump("info.json");
dump << info.dump(2) << std::endl;
dump.flush();
dump.close();
}

View File

@@ -0,0 +1,58 @@
#pragma once
#include <vector>
#include "dvm.h"
#include "../GraphCall/graph_calls.h"
class ParallelStats
{
public:
ParallelStats()
{
RemoteCount = ShadowCount = ReductionCount = AcrossCount = 0;
}
int RemoteCount;
int ShadowCount;
int ReductionCount;
int AcrossCount;
};
class PredictorStats
{
public:
PredictorStats()
{
ParallelCount = RemoteCount = RedistributeCount = IntervalCount = 0;
TotalScorePar = TotalScoreComm = TotalScoreDist = 0;
}
ParallelStats ParallelStat;
int ParallelCount;
int RemoteCount;
int RedistributeCount;
int IntervalCount;
int TotalScoreComm;
int TotalScorePar;
int TotalScoreDist;
std::string to_string()
{
std::string res = "";
res += std::to_string(ParallelCount) + "|";
res += std::to_string(RemoteCount) + "|";
res += std::to_string(RedistributeCount) + "|";
res += std::to_string(IntervalCount) + "|";
res += std::to_string(ParallelStat.RemoteCount) + "|";
res += std::to_string(ParallelStat.ShadowCount) + "|";
res += std::to_string(ParallelStat.ReductionCount) + "|";
res += std::to_string(ParallelStat.AcrossCount);
return res;
}
};
void processFileToPredict(SgFile *file, PredictorStats &predictorCounts);
void calculateStatsForPredictor(const std::map<std::string, std::vector<FuncInfo*>>& allFuncInfo, const std::map<std::string, std::map<int, Gcov_info>>& gCovInfo);
void parseDvmDirForPredictor(const std::map<std::tuple<int, std::string, std::string>, std::pair<DIST::Array*, DIST::ArrayAccessInfo*>>& declaredArrays, const std::map<std::string, CommonBlock*>& commonBlocks, const std::map<std::string, std::vector<FuncInfo*>>& allFuncInfo, const std::map<std::string, std::map<int, Gcov_info>>& gCovInfo);

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,14 @@
#pragma once
#include <vector>
#include <list>
#include "../Distribution/Distribution.h"
#include "../CreateInterTree/CreateInterTree.h"
int predictScheme(ParallelRegion *reg, const std::vector<std::pair<DIST::Array*, const DistrVariant*>> &distVar,
const std::set<DIST::Array*> &allArrays, const std::map<LoopGraph*, ParallelDirective*> &dirsToPredict,
std::map<std::string, std::vector<SpfInterval*>> &intervals, std::map<std::string, std::vector<Messages>> &messagesByFile,
const std::vector<std::tuple<DIST::Array*, std::vector<long>, std::pair<std::string, int>>> &allSingleRemotes,
const int maxSizeDist, const int procNum);
std::vector<std::vector<long>> getTopologies(const int procNum, const int maxSizeDist);