1845 lines
65 KiB
C++
1845 lines
65 KiB
C++
// CommCost.cpp: implementation of the CommCost class.
|
|
//
|
|
//////////////////////////////////////////////////////////////////////
|
|
#include <assert.h>
|
|
|
|
#include "CommCost.h"
|
|
#include "Vm.h"
|
|
|
|
//====
|
|
#include "stdio.h"
|
|
#include "Interval.h"
|
|
#include "LoopBlock.h"
|
|
//extern int ShdWid[10];
|
|
#include "ModelStructs.h"
|
|
extern _ParLoopInfo ParLoopInfo;
|
|
int ShdWid[10];
|
|
//=***
|
|
|
|
|
|
using namespace std;
|
|
|
|
extern VM* rootVM; // pointer to root VM
|
|
extern ofstream prot;
|
|
extern _DArrayFlag * DAF_tmp;
|
|
|
|
|
|
//====
|
|
void calculate();
|
|
|
|
double TStart, TByte;
|
|
LoopBlock** ProcBlock;
|
|
vector<long> s, n;
|
|
long x, y, z, LoopSZ;
|
|
vector<int> p, dmax, conv_beg, conv_end;
|
|
int add, first;
|
|
double time_c,time_x,call_time;
|
|
int mode=0; //now mode=0 (no print) //was mode==0 (only global), 1(global+approach), 2(only approach), 3(global+aproach+no_print)
|
|
int full_mode=0; //full_mode==0(as old dvm), 1(max_rank-pipeline calc(full search the best)) 2(different order of cycles for find the best in full search)
|
|
|
|
vector<int> pip, mult_is, mm;
|
|
int mult,rank_mas,max_rank=4; //ìàêñèìàëüíûé ðàíê ìîäåëèðóåìîãî êîíâåéåðà
|
|
vector<int> ord; //ïîðÿäîê çàïóñêà öèêëîâ
|
|
int invers[10]; // 10 par loop inside each other
|
|
//=***
|
|
|
|
#define min(a, b) (a < b ? a : b)
|
|
#define max(a, b) (a > b ? a : b)
|
|
|
|
/*
|
|
#if defined (_MSC_VER) || (defined (__GNUG__) && (__GNUC__ < 3))
|
|
template <class T>
|
|
|
|
T max(T a, T b)
|
|
{
|
|
return a >= b ? a : b;
|
|
}
|
|
#endif
|
|
*/
|
|
|
|
//////////////////////////////////////////////////////////////////////
|
|
// Construction/Destruction
|
|
//////////////////////////////////////////////////////////////////////
|
|
|
|
CommCost::CommCost()
|
|
{ vm = 0;
|
|
lvector v(0);
|
|
transfer = Dim2Array(0, v);
|
|
}
|
|
|
|
CommCost::~CommCost()
|
|
{
|
|
|
|
}
|
|
|
|
CommCost::CommCost(VM *Avm)
|
|
{
|
|
lvector v(rootVM->GetLSize(), 0);
|
|
|
|
vm = Avm;
|
|
assert(vm != 0);
|
|
|
|
#ifdef P_DEBUG
|
|
// prot << *vm << endl;
|
|
#endif
|
|
transfer = Dim2Array(rootVM->GetLSize(), v); // èíèöèàëèçèðóåòñÿ íóëÿìè
|
|
|
|
}
|
|
|
|
void CommCost::Update(DArray * oldDA, DArray * newDA)
|
|
{
|
|
long p1,
|
|
p2,
|
|
size;
|
|
Block b1,
|
|
b2,
|
|
bi;
|
|
long i,
|
|
j;
|
|
bool transferIs = false;
|
|
vector<long> transferInf((long) vm->Rank(), 0);
|
|
vector<long> SI(2,0L);
|
|
|
|
long daRank = oldDA->Rank();
|
|
long amRank = oldDA->AM_Dis->Rank();
|
|
long vmRank = oldDA->AM_Dis->VM_Dis->Rank();
|
|
bool replAxisIs = false;
|
|
long amAxis;
|
|
long num;
|
|
|
|
/*
|
|
for(i=0; i<MPSProcCount(); i++)
|
|
{
|
|
b1 = Block(oldDA, i,1);
|
|
printf("BLOCK %d:%d %d:%d %d:%d \n",b1.GetLower(0), b1.GetUpper(0),b1.GetLower(1), b1.GetUpper(1),b1.GetLower(2), b1.GetUpper(2));
|
|
}
|
|
for(i=0; i<MPSProcCount(); i++)
|
|
{
|
|
b1 = Block(newDA, i,1);
|
|
printf("NEW BLOCK %d:%d %d:%d %d:%d \n",b1.GetLower(0), b1.GetUpper(0),b1.GetLower(1), b1.GetUpper(1),b1.GetLower(2), b1.GetUpper(2));
|
|
}
|
|
*/
|
|
|
|
|
|
//====
|
|
// printf("Start %d \n",daRank);
|
|
|
|
for(j=0; j<MPSProcCount(); j++)
|
|
{
|
|
b1 = Block(oldDA, j,1);
|
|
b2 = Block(newDA, j,1);
|
|
|
|
if(b1.empty() && !b2.empty() || b2.empty() && !b1.empty())
|
|
break;
|
|
|
|
|
|
for(i=0; i<daRank; i++)
|
|
{
|
|
if(b1.GetLower(i)!=b2.GetLower(i) || b1.GetUpper(i)!=b2.GetUpper(i)) break;
|
|
}
|
|
|
|
if(i!=daRank) break;
|
|
}
|
|
if(j==MPSProcCount()) //äëÿ âñåõ ïðîöåññîðîâ áëîêè íîâîãî è ñòàðîãî ìàññèâîâ ñîâïàäàþò, => íè÷åãî íå íàäî ïåðåñûëàòü
|
|
{ //printf("NOTHING to send!!!!!!!!!!!!!!!!!!\n");
|
|
return;
|
|
}
|
|
|
|
// printf("Start %d ok \n",daRank);
|
|
|
|
//=***
|
|
|
|
vector<char> replAxis(vmRank, 0);
|
|
|
|
if (oldDA->Repl) {
|
|
if (oldDA->AM_Dis == newDA->AM_Dis)
|
|
return;
|
|
for (i = 0; i < vm->Rank(); i++) {
|
|
if (oldDA->AM_Dis->FillArr[i] > newDA->AM_Dis->FillArr[i]) {
|
|
transferIs = true;
|
|
}
|
|
transferInf[i] = oldDA->AM_Dis->FillArr[i] - newDA->AM_Dis->FillArr[i];
|
|
}
|
|
|
|
if (!transferIs)
|
|
return;
|
|
|
|
// printf("GGGOOOOD\n");
|
|
//grig b1 = Block(oldDA, 0);
|
|
b1 = Block(oldDA, 0,1);
|
|
//\grig
|
|
// äëÿ 1-ìåðíîãî ñëó÷àÿ
|
|
if (vm->Rank() == 1) { p1=0;
|
|
for ( p2 = 0; p2 < vm->GetSize(1) - oldDA->AM_Dis->FillArr[0]; p2++) {
|
|
//grig b2 = Block(newDA, p2);
|
|
b2 = Block(newDA, p2,1);
|
|
//\grig
|
|
bi = b1 ^ b2;
|
|
size = bi.GetBlockSize();
|
|
transfer[p1][p2] += size * oldDA->TypeSize;
|
|
}
|
|
return;
|
|
}
|
|
|
|
// äëÿ 2-ìåðíîãî ñëó÷àÿ
|
|
if (transferInf[0] > 0 && transferInf[1] > 0) {
|
|
// a)
|
|
for (i = 0; i < vm->GetSize(2) - oldDA->AM_Dis->FillArr[1]; i++)
|
|
for (j = vm->GetSize(1) - oldDA->AM_Dis->FillArr[0];
|
|
j < vm->GetSize(1) - newDA->AM_Dis->FillArr[0]; j++) {
|
|
|
|
SI[0] = vm->GetSize(1) - oldDA->AM_Dis->FillArr[0] - 1;
|
|
SI[1] = i;
|
|
p1 = vm->GetLI(SI);
|
|
|
|
SI[0] = j;
|
|
SI[1] = i;
|
|
p2 = vm->GetLI(SI);
|
|
|
|
//grig b2 = Block(newDA, p2);
|
|
b2 = Block(newDA, p2,1);
|
|
//\grig
|
|
bi = b1 ^ b2;
|
|
size = bi.GetBlockSize();
|
|
transfer[p1][p2] += size * oldDA->TypeSize;
|
|
}
|
|
|
|
// b)
|
|
for ( i = 0; i < vm->GetSize(1) - oldDA->AM_Dis->FillArr[0]; i++)
|
|
for (j = vm->GetSize(2) - oldDA->AM_Dis->FillArr[1];
|
|
j < vm->GetSize(2) - newDA->AM_Dis->FillArr[1]; j++) {
|
|
|
|
SI[0] = i;
|
|
SI[1] = vm->GetSize(2) - oldDA->AM_Dis->FillArr[1] - 1;
|
|
p1 = vm->GetLI(SI);
|
|
|
|
SI[0] = i;
|
|
SI[1] = j;
|
|
p2 = vm->GetLI(SI);
|
|
//grig b2 = Block(newDA, p2);
|
|
b2 = Block(newDA, p2,1);
|
|
//\grig
|
|
|
|
bi = b1 ^ b2;
|
|
size = bi.GetBlockSize();
|
|
transfer[p1][p2] += size * oldDA->TypeSize;
|
|
}
|
|
|
|
// c)
|
|
SI[0] = vm->GetSize(1) - oldDA->AM_Dis->FillArr[0] - 1;
|
|
SI[1] = vm->GetSize(2) - oldDA->AM_Dis->FillArr[1] - 1;
|
|
p1 = vm->GetLI(SI);
|
|
|
|
for (i = vm->GetSize(1) - oldDA->AM_Dis->FillArr[0];
|
|
i < vm->GetSize(1) - newDA->AM_Dis->FillArr[0]; i++)
|
|
|
|
for (j = vm->GetSize(2) - oldDA->AM_Dis->FillArr[1];
|
|
j < vm->GetSize(2) - newDA->AM_Dis->FillArr[1]; j++) {
|
|
|
|
SI[0] = i;
|
|
SI[1] = j;
|
|
p2 = vm->GetLI(SI);
|
|
|
|
//grig b2 = Block(newDA, p2);
|
|
b2 = Block(newDA, p2,1);
|
|
//\grig
|
|
|
|
bi = b1 ^ b2;
|
|
size = bi.GetBlockSize();
|
|
transfer[p1][p2] += size * oldDA->TypeSize;
|
|
}
|
|
} else {
|
|
if (transferInf[0] > 0) {
|
|
for (i = 0; i < vm->GetSize(2) - newDA->AM_Dis->FillArr[1]; i++)
|
|
for (j = vm->GetSize(1) - oldDA->AM_Dis->FillArr[0];
|
|
j < vm->GetSize(1) - newDA->AM_Dis->FillArr[0]; j++) {
|
|
|
|
SI[0] = vm->GetSize(1) - oldDA->AM_Dis->FillArr[0] - 1;
|
|
SI[1] = i;
|
|
p1 = vm->GetLI(SI);
|
|
|
|
SI[0] = j;
|
|
SI[1] = i;
|
|
p2 = vm->GetLI(SI);
|
|
//grig b2 = Block(newDA, p2);
|
|
b2 = Block(newDA, p2,1);
|
|
//\grig
|
|
bi = b1 ^ b2;
|
|
size = bi.GetBlockSize();
|
|
transfer[p1][p2] += size * oldDA->TypeSize;
|
|
}
|
|
} else {
|
|
for ( i = 0; i < vm->GetSize(1) - newDA->AM_Dis->FillArr[0]; i++)
|
|
for (j = vm->GetSize(2) - oldDA->AM_Dis->FillArr[1];
|
|
j < vm->GetSize(2) - newDA->AM_Dis->FillArr[1]; j++) {
|
|
|
|
SI[0] = i;
|
|
SI[1] = vm->GetSize(2) - oldDA->AM_Dis->FillArr[1] - 1;
|
|
p1 = vm->GetLI(SI);
|
|
|
|
SI[0] = i;
|
|
SI[1] = j;
|
|
p2 = vm->GetLI(SI);
|
|
|
|
//grig b2 = Block(newDA, p2);
|
|
b2 = Block(newDA, p2,1);
|
|
//\grig
|
|
|
|
bi = b1 ^ b2;
|
|
size = bi.GetBlockSize();
|
|
transfer[p1][p2] += size * oldDA->TypeSize;
|
|
}
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
|
|
for (i = 0; i < vmRank; i++)
|
|
{
|
|
switch (oldDA->AM_Dis->DistRule[amRank + i].Attr)
|
|
{
|
|
case map_REPLICATE :
|
|
replAxis[i] = 1;
|
|
replAxisIs = true;
|
|
break;
|
|
case map_NORMVMAXIS :
|
|
amAxis = oldDA->AM_Dis->DistRule[amRank + i].Axis;
|
|
switch (oldDA->AlignRule[daRank + amAxis - 1].Attr)
|
|
{
|
|
case align_REPLICATE :
|
|
replAxis[i] = 1;
|
|
replAxisIs = true;
|
|
break;
|
|
case align_BOUNDREPL :
|
|
replAxis[i] = 2; // Çäåñü íóæíà äîïîëíèòåëüíàÿ èíôîðìàöèÿ - íîìåðà ïðîöåññîðîâ â ýòîì èçìåðåíèè íà êîòîðûå DArray ðàçìíîæåí
|
|
replAxisIs = true;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
// printf("UPDATE GOOD\n");
|
|
|
|
// äëÿ 2-ìåðíîãî ñëó÷àÿ(ñ÷èòàþ, ÷òî BOUNDREPL äëÿ 1-ìåðíîé ìàøèíû íåò,
|
|
// äëÿ 2-ìåðíîé ïîêà òîæå) ñëåäîâàòåëüíî REPLICATE òîëüêî ïî îäíîìó êàêîìó-òî èçìåðåíèþ
|
|
if (replAxisIs) {
|
|
if (replAxis[0]) {
|
|
// ðàçìíîæåí ïî 1-ìó èçìåðåíèþ
|
|
// printf("UPDATE 1DIM\n");
|
|
|
|
for (j = 0; j < vm->GetSize(2); j++) {
|
|
//grig b1 = Block(oldDA, vm->GetSpecLI(0, 2, j));
|
|
b1 = Block(oldDA, vm->GetSpecLI(0, 2, j),1);
|
|
//\grig
|
|
|
|
for (p2 = 0; p2 < vm->GetLSize(); p2++) {
|
|
//grig b2 = Block(newDA, p2);
|
|
b2 = Block(newDA, p2,1);
|
|
//\grig
|
|
bi = b1 ^ b2;
|
|
num = vm->GetNumInDim(p2, 1);
|
|
printf("Num=%d Block=%d-%d %d-%d %d-%d \n",num,b1.GetLower(0),b1.GetUpper(0),b1.GetLower(1),b1.GetUpper(1),b1.GetLower(2),b1.GetUpper(2));
|
|
if (num > (vm->GetSize(1) - oldDA->AM_Dis->FillArr[0] - 1))
|
|
SI[0] = vm->GetSize(1) - oldDA->AM_Dis->FillArr[0] - 1;
|
|
else
|
|
SI[0] = num;
|
|
SI[1] = j;
|
|
|
|
printf("j=%d p2=%d SI=[%d %d]\n",j,p2,SI[0],SI[1]);
|
|
p1 = vm->GetLI(SI);
|
|
if (p1 != p2)
|
|
transfer[p1][p2] += bi.GetBlockSize() * oldDA->TypeSize;
|
|
}
|
|
}
|
|
} else {
|
|
for (j = 0; j < vm->GetSize(1); j++) {
|
|
//grig b1 = Block(oldDA, vm->GetSpecLI(0, 1, j));
|
|
b1 = Block(oldDA, vm->GetSpecLI(0, 1, j),1);
|
|
//\grig
|
|
|
|
for (p2 = 0; p2 < vm->GetLSize(); p2++) {
|
|
//grig
|
|
// b2 = Block(newDA, p2);
|
|
b2 = Block(newDA, p2,1);
|
|
|
|
//\grig
|
|
bi = b1 ^ b2;
|
|
num = vm->GetNumInDim(p2, 2);
|
|
if (num > (vm->GetSize(2) - oldDA->AM_Dis->FillArr[1] - 1))
|
|
SI[1] = vm->GetSize(2) - oldDA->AM_Dis->FillArr[1] - 1;
|
|
else
|
|
SI[1] = num;
|
|
SI[0] = j;
|
|
p1 = vm->GetLI(SI);
|
|
if (p1 != p2)
|
|
transfer[p1][p2] += bi.GetBlockSize() * oldDA->TypeSize;
|
|
}
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
// ñëó÷àé êîãäà ìàññèâ äî ïåðå... íè êàê íå ðàçìíîæåí íè ïî îäíîìó èç èçìåðåíèé
|
|
// (â îáùåì âèäå)
|
|
for (p1 = 0; p1 < vm->GetLSize(); p1++) {
|
|
//grig b1 = Block(oldDA, p1);
|
|
b1 = Block(oldDA, p1,1);
|
|
|
|
//\grig
|
|
|
|
for (p2 = 0; p2 < vm->GetLSize(); p2++) {
|
|
if (p1 != p2) {
|
|
//grig b2 = Block(newDA, p2);
|
|
b2 = Block(newDA, p2,1);
|
|
//\grig
|
|
|
|
bi = b1 ^ b2;
|
|
size = bi.GetBlockSize();
|
|
transfer[p1][p2] += size * oldDA->TypeSize; // update
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
CommCost & CommCost::operator =(const CommCost & cc)
|
|
{
|
|
this->transfer = cc.transfer;
|
|
this->vm = cc.vm;
|
|
return *this;
|
|
}
|
|
|
|
double CommCost::GetCost()
|
|
{
|
|
double cost = 0.0;
|
|
long p1,
|
|
p2;
|
|
long Distance,
|
|
maxDistance = 0; // òåêóùåå è ìàêñèìàëüíîå ðàñòîÿíèå
|
|
// (äëèíà ìèíèìàëüíîãî ïóòè ìåæäó ïðîöåññîðàìè)
|
|
// ìåæäó ïðîöåññîðàìè
|
|
long Byte,
|
|
maxByte = 0; // -//- ÷èñëî ïåðåñûëàåìûõ áàéòîâ
|
|
long s; // ðàçìåð ïåðåñûëàåìîé ïî êîíâåéåðó ïîðöèè äàííûõ(â áàéòàõ)
|
|
long k; // ÷èñëî ïîðöèé
|
|
long e; // îñòàòîê
|
|
int c = 0; // 0 - åñëè îñòàòîê = 0, 1 - èíà÷å
|
|
|
|
long LSize = rootVM->GetLSize();
|
|
assert(vm != NULL);
|
|
double TStart = vm->getTStart();
|
|
double TByte = vm->getTByte();
|
|
|
|
switch (vm->getMType()) {
|
|
|
|
case mach_ETHERNET:
|
|
|
|
|
|
if (DAF_tmp == NULL)
|
|
{
|
|
for (p1 = 0; p1 < LSize; p1++)
|
|
for (p2 = 0; p2 < LSize; p2++)
|
|
if ((p1 != p2) && (transfer[p1][p2] != 0))
|
|
cost += TStart + TByte * transfer[p1][p2];
|
|
}
|
|
//====
|
|
else
|
|
{
|
|
for (p1 = 0; p1 < LSize; p1++)
|
|
DAF_tmp->ProcessTimeStamp[p1] = 0;
|
|
|
|
for (p1 = 0; p1 < LSize; p1++)
|
|
for (p2 = 0; p2 < LSize; p2++)
|
|
if ((p1 != p2) && (transfer[p1][p2] != 0))
|
|
{
|
|
cost += TStart + TByte * transfer[p1][p2];
|
|
DAF_tmp->ProcessTimeStamp[p1] = cost; //æäåò êîíöà êîïèðîâàíèÿ îòïðàâèòåëü
|
|
DAF_tmp->ProcessTimeStamp[p2] = cost; //æäåò êîíöà êîïèðîâàíèÿ ïîëó÷àòåëü
|
|
// printf("Trans %d -> %d = cost[%f]\n",p1,p2,cost);
|
|
}
|
|
}
|
|
|
|
//=***
|
|
break;
|
|
|
|
case mach_TRANSPUTER:
|
|
// îïðåäåëåíèå ðàññòîÿíèÿ (ìèíèìàëüíîãî ïóòè) äî ìàêñèìàëüíî
|
|
// óäàëåííûõ ïðîöåññîðîâ ìåæäó êîòîðûìè èìåþòñÿ ïåðåñûëêè
|
|
for (p1 = 0; p1 < LSize; p1++) {
|
|
for (p2 = 0; p2 < LSize; p2++) {
|
|
|
|
Distance = vm->GetDistance(p1, p2);
|
|
Byte = transfer[p1][p2];
|
|
|
|
if (Distance >= maxDistance && Byte != 0) {
|
|
if (Distance > maxDistance) {
|
|
maxDistance = Distance;
|
|
maxByte = Byte;
|
|
}
|
|
else
|
|
maxByte = max(Byte, maxByte);
|
|
}
|
|
}
|
|
}
|
|
|
|
// ïîäñ÷åò âðåìåíè ïåðåñûëêè ïðè èñïîëüçîâàíèè êîíâåéåðà
|
|
if (maxDistance != 0) {
|
|
if (maxDistance != 1) {
|
|
|
|
s = sqrt(TStart * maxByte / (TByte * (maxDistance - 1)));
|
|
if (s > maxByte)
|
|
s = maxByte;
|
|
if (s == 0)
|
|
s = 1;
|
|
k = maxByte / s;
|
|
e = maxByte % s;
|
|
if (e != 0)
|
|
c = 1;
|
|
cost = (TStart + TByte * s) * (maxDistance + k - 1) +
|
|
c * (TStart + TByte * e);
|
|
}
|
|
else
|
|
cost = TStart + TByte * maxByte;
|
|
}
|
|
|
|
|
|
for (p1 = 0; p1 < LSize; p1++)
|
|
DAF_tmp->ProcessTimeStamp[p1] = cost; //====// âñå æäóò êîíöà êîïèðîâàíèÿ
|
|
|
|
break;
|
|
|
|
case mach_MYRINET:
|
|
//====
|
|
if (mode)
|
|
{
|
|
printf("Myrinet!!!\n");
|
|
for (p1 = 0; p1 < LSize; p1++)
|
|
{
|
|
for (p2 = 0; p2 < LSize; p2++)
|
|
printf("%d\t", transfer[p1][p2]);
|
|
printf("\n");
|
|
}
|
|
}
|
|
//=***
|
|
for (p1 = 1; p1 < LSize; p1++) {
|
|
for (p2 = 0; p2 < p1; p2++) {
|
|
if (transfer[p1][p2] < transfer[p2][p1]) {
|
|
transfer[p1][p2] = transfer[p2][p1];
|
|
}
|
|
}
|
|
}
|
|
|
|
if (DAF_tmp == NULL)
|
|
{
|
|
for (p1 = 1; p1 < LSize; p1++)
|
|
for (p2 = 0; p2 < p1; p2++)
|
|
if ((p1 != p2) && (transfer[p1][p2] != 0))
|
|
cost += TStart + TByte * transfer[p1][p2];
|
|
}
|
|
//====
|
|
else
|
|
{
|
|
for (p1 = 0; p1 < LSize; p1++)
|
|
DAF_tmp->ProcessTimeStamp[p1] = 0;
|
|
|
|
for (p1 = 1; p1 < LSize; p1++)
|
|
for (p2 = 0; p2 < p1; p2++)
|
|
if ((p1 != p2) && (transfer[p1][p2] != 0))
|
|
{
|
|
cost += TStart + TByte * transfer[p1][p2];
|
|
DAF_tmp->ProcessTimeStamp[p1] += cost; //æäåò êîíöà êîïèðîâàíèÿ ïîëó÷àòåëü
|
|
DAF_tmp->ProcessTimeStamp[p2] += cost; //æäåò êîíöà êîïèðîâàíèÿ îòïðàâèòåëü
|
|
// printf("Trans %d -> %d = cost[%f]\n",p2,p1,TStart + TByte * transfer[p1][p2]);
|
|
}
|
|
}
|
|
|
|
//=***
|
|
break;
|
|
}
|
|
|
|
const int procC = vm->getProcCount();
|
|
const int numCh = vm->getNumChanels();
|
|
cost = (procC < numCh) ? cost / procC : cost / numCh;
|
|
return cost * vm->getScale();
|
|
}
|
|
|
|
|
|
//===========================================================================
|
|
//#define min(a,b) ((a<b)?a:b)
|
|
//#define max(a,b) ((a>b)?a:b)
|
|
|
|
#define mbeg(i,j) ((j<rank_mas)?ProcBlock[i]->LSDim[j].Lower:0)
|
|
#define mend(i,j) ((j<rank_mas)?ProcBlock[i]->LSDim[j].Upper:0)
|
|
#define mstep(i,j) ((j<rank_mas)?ProcBlock[i]->LSDim[j].Step:1)
|
|
#define msize(i,j) ((j<rank_mas)?(ProcBlock[i]->LSDim[j].Upper - ProcBlock[i]->LSDim[j].Lower + 1) / ProcBlock[i]->LSDim[j].Step:1)
|
|
|
|
#define bsize(i,j,z) ((msize(i,j)%z)?msize(i,j)/z+1:msize(i,j)/z)
|
|
|
|
#define for_calc(n,k,beg,end) for(n[k]=invers[k]?end:beg; invers[k]?n[k]>=beg:n[k]<=end; invers[k]?n[k]--:n[k]++)
|
|
|
|
#define ShdWid(k) ((!invers[k])?ParLoopInfo.SGnew->BoundGroup_Obj->dimInfo[k].LeftBSize:ParLoopInfo.SG->BoundGroup_Obj->dimInfo[k].RightBSize)
|
|
#define PreShdWid(k) (invers[k]?ParLoopInfo.SGnew->BoundGroup_Obj->dimInfo[k].LeftBSize:ParLoopInfo.SG->BoundGroup_Obj->dimInfo[k].RightBSize)
|
|
|
|
//internal functions
|
|
double calc_comm(long i)
|
|
{ long k,m; double res; long *save_n;
|
|
|
|
save_n=(long *)malloc(max_rank*sizeof(long));
|
|
if (i==max_rank)
|
|
{ for(k=1,m=ShdWid[n[0]];k<max_rank;k++)
|
|
m*=n[k];
|
|
return TStart+m*TByte;
|
|
}
|
|
else
|
|
{ for(k=i;k<max_rank;k++)
|
|
save_n[k]=n[k];
|
|
|
|
m=n[i];
|
|
n[i]=s[m];
|
|
res=(dmax[m]/s[m])*calc_comm(i+1);
|
|
|
|
//restore n[i+1..]
|
|
for(k=i+1;k<max_rank;k++)
|
|
n[k]=save_n[k];
|
|
n[i]=dmax[m]%s[m];
|
|
if(n[i]) res+=calc_comm(i+1);
|
|
return res;
|
|
}
|
|
}
|
|
|
|
void calculate_all_pipes()
|
|
{ int old_add=add;
|
|
int k,*n,*first_pipe;
|
|
int *old_conv_beg ,*old_conv_end,*old_Shd_Wid;
|
|
n=(int *)malloc(max_rank*sizeof(int));
|
|
first_pipe=(int *)malloc(max_rank*sizeof(int));
|
|
old_conv_beg=(int *)malloc(max_rank*sizeof(int));
|
|
old_conv_end=(int *)malloc(max_rank*sizeof(int));
|
|
old_Shd_Wid=(int *)malloc(max_rank*sizeof(int));
|
|
|
|
add=old_add;
|
|
|
|
for(k=0; k<max_rank;k++)
|
|
{ old_conv_beg[k]=conv_beg[k];
|
|
old_conv_end[k]=conv_end[k];
|
|
old_Shd_Wid[k]=ShdWid[k];
|
|
first_pipe[k]=1;
|
|
}
|
|
|
|
|
|
for(n[0]=0; n[0]<(mult_is[0]?p[0]:1); n[0]++)
|
|
for(n[1]=0; n[1]<(mult_is[1]?p[1]:1); n[1]++)
|
|
for(n[2]=0; n[2]<(mult_is[2]?p[2]:1); n[2]++)
|
|
for(n[3]=0; n[3]<(mult_is[3]?p[3]:1); n[3]++)
|
|
{ for(k=0; k<max_rank;k++)
|
|
if(mult_is[k])
|
|
{ conv_beg[k]=n[k];
|
|
conv_end[k]=n[k];
|
|
if(mult_is[k]==1 && first_pipe[k]==0 && add==1)
|
|
add=2;
|
|
}
|
|
else
|
|
{ conv_beg[k]=old_conv_beg[k];
|
|
conv_end[k]=old_conv_end[k];
|
|
}
|
|
|
|
if(add==2 && mode) printf("Insuff.par.usr...");
|
|
if(mode) printf("Pipe[%d %d %d %d] using procs %d-%d %d-%d %d-%d %d-%d\n",n[0],n[1],n[2],n[3],conv_beg[0],conv_end[0],conv_beg[1],conv_end[1],conv_beg[2],conv_end[2],conv_beg[3],conv_end[3]);
|
|
for(k=0;k<max_rank;k++)
|
|
pip[k]=conv_end[k]-conv_beg[k]+1;
|
|
// if(mode) printf("Konv procs=%d %d %d %d\n",pip[0],pip[1],pip[2],pip[3]);
|
|
|
|
calculate();
|
|
|
|
for(k=0; k<max_rank;k++)
|
|
ShdWid[k]=old_Shd_Wid[k];
|
|
|
|
for(k=0; k<max_rank;k++)
|
|
if(mult_is[k]==1 && first_pipe[k]==1) first_pipe[k]=0;
|
|
}
|
|
if(mode) printf("\n");
|
|
|
|
}
|
|
|
|
|
|
void calculate()
|
|
{
|
|
int i, j, d, k;
|
|
float cur, cur_beg;
|
|
float **comm;
|
|
vector<long> prev, b, post;
|
|
|
|
//mode=0;
|
|
|
|
//ord[0]=1; ord[1]=0; ord[2]=2; ord[3]=3;
|
|
//s[0]=1; s[1]=11; s[2]=1; s[3]=1;
|
|
|
|
prev.resize(max_rank);
|
|
post.resize(max_rank);
|
|
n.resize(max_rank);
|
|
b.resize(max_rank);
|
|
//prev = (long *)malloc(max_rank * sizeof(long));
|
|
//post = (long *)malloc(max_rank * sizeof(long));
|
|
//n = (long *)malloc(max_rank * sizeof(long));
|
|
//b = (long *)malloc(max_rank * sizeof(long));
|
|
|
|
comm = (float **)malloc(MPSProcCount() * sizeof(float *));
|
|
for (i = 0; i < MPSProcCount(); i++)
|
|
if (ProcBlock[i]->GetRank())
|
|
comm[i] = (float *)malloc(max_rank * sizeof(float));
|
|
|
|
//printf("Step %d %d %d\n",mstep(0,0),mstep(0,1),mstep(0,2));
|
|
//printf("Invers %d %d %d\n",invers[0],invers[1],invers[2]);
|
|
|
|
if (rank_mas >= 2) //was 3
|
|
{
|
|
float *****a, m, mwait, *com;
|
|
float **prev_comm, sz[4][4][4][4]; //max_rank=4; õðàíèò ðàçìåðû áëîêîâ òåêóùåãî ïðîöåññîðà
|
|
float beg, step, last;
|
|
int ind_beg;
|
|
double time_beg;
|
|
double last_real_comm, real_comm;
|
|
|
|
int pip_ord[4]; //äëÿ êîððåêòíîãî âû÷èñëåíèÿ prev è post
|
|
|
|
com = (float *)malloc(MPSProcCount() * sizeof(float));
|
|
|
|
|
|
prev_comm = (float **)malloc(max_rank * sizeof(float *));
|
|
for (i = 0; i < rank_mas; i++)
|
|
prev_comm[i] = (float *)malloc(rank_mas * sizeof(float));
|
|
|
|
//ïî÷åìó 4: 1-2-3=íà÷àëî à ïî 2-3 ìîæíî ñïðîãíîçèðîâàòü êîíåö=4
|
|
a = (float *****)malloc(MPSProcCount() * sizeof(float ****));
|
|
for (i = 0; i < MPSProcCount(); i++)
|
|
if (ProcBlock[i]->GetRank())
|
|
a[i] = (float ****)malloc(4 * sizeof(float ***));
|
|
for (i = 0; i < MPSProcCount(); i++)
|
|
if (ProcBlock[i]->GetRank())
|
|
for (n[0] = 0; n[0] < 4; n[0]++)
|
|
a[i][n[0]] = (float ***)malloc(4 * sizeof(float **));
|
|
for (i = 0; i < MPSProcCount(); i++)
|
|
if (ProcBlock[i]->GetRank())
|
|
for (n[0] = 0; n[0] < 4; n[0]++)
|
|
for (n[1] = 0; n[1] < 4; n[1]++)
|
|
a[i][n[0]][n[1]] = (float **)malloc(4 * sizeof(float *));
|
|
for (i = 0; i < MPSProcCount(); i++)
|
|
if (ProcBlock[i]->GetRank())
|
|
for (n[0] = 0; n[0] < 4; n[0]++)
|
|
for (n[1] = 0; n[1] < 4; n[1]++)
|
|
for (n[2] = 0; n[2] < 4; n[2]++)
|
|
a[i][n[0]][n[1]][n[2]] = (float *)malloc(4 * sizeof(float));
|
|
|
|
// printf("ms %d %d %d \n",bsize(0,0,s[0]),bsize(0,1,s[1]),bsize(0,2,s[2]));
|
|
// printf("ALL=%d\n",LoopSZ);
|
|
if (call_time / LoopSZ > 0.0000000001 && mode) printf("Send/Exec=%.f\n", (TStart + TByte) / (call_time / LoopSZ));
|
|
|
|
for (i = 0; i < MPSProcCount(); i++)
|
|
if (ProcBlock[i]->GetRank())
|
|
for (n[0] = 0; n[0] < min(4, bsize(i, ord[0], s[ord[0]])); n[0]++)
|
|
for (n[1] = 0; n[1] < min(4, bsize(i, ord[1], s[ord[1]])); n[1]++)
|
|
for (n[2] = 0; n[2] < min(4, bsize(i, ord[2], s[ord[2]])); n[2]++)
|
|
for (n[3] = 0; n[3] < min(4, bsize(i, ord[3], s[ord[3]])); n[3]++)
|
|
{
|
|
a[i][n[0]][n[1]][n[2]][n[3]] = 1;
|
|
for (j = 0; j < 4; j++)
|
|
if (n[j] < min(3, bsize(i, ord[j], s[ord[j]]) - 1)) a[i][n[0]][n[1]][n[2]][n[3]] *= s[j];
|
|
else a[i][n[0]][n[1]][n[2]][n[3]] *= (msize(i, j) % s[j]) ? msize(i, j) % s[j] : s[j];
|
|
// printf("%d-%d-%d-%d-%d=%.f\n",i,n[0],n[1],n[2],n[3],a[i][n[0]][n[1]][n[2]][n[3]]);
|
|
}
|
|
|
|
for (i = 0; i < MPSProcCount(); i++)
|
|
{
|
|
com[i] = 0;
|
|
if (ProcBlock[i]->GetRank())
|
|
for (k = 0; k < max_rank; k++)
|
|
comm[i][k] = 0;
|
|
}
|
|
|
|
|
|
//==== ïðåäâàðèòåëüíûé îáñ÷åò
|
|
|
|
for (i = 0, time_c = 0; i < max_rank; i++)
|
|
{
|
|
for (k = 0; k < max_rank; k++)
|
|
n[k] = ord[(i + k) % max_rank];
|
|
for (k = 1, m = pip[n[0]] - 1; k < max_rank; k++)
|
|
m *= pip[n[k]];
|
|
|
|
if (m) time_c += m * calc_comm(1);
|
|
}
|
|
if (mode) printf("Communication::time_c=%.10f\n", time_c);
|
|
|
|
for (i = 0, m = 0; i < MPSProcCount(); i++)
|
|
if (ProcBlock[i]->GetRank()) m++;
|
|
|
|
|
|
|
|
|
|
//printf("Time_comm=%f\n",time_c);
|
|
// printf("wanna insuff %f ->> %f - %f comm=%f\n",call_time, cur*call_time/LoopSZ, call_time/m, time_c);
|
|
if (add)
|
|
{
|
|
// AddMPSTime(__Wait_shadow, time_c/m);
|
|
// AddMPSTime(__CPU_time_usr, call_time/LoopSZ);
|
|
// AddMPSTime(__CPU_time_usr, call_time/(m*mult));
|
|
// AddMPSTime(__Insuff_parall_sys, (cur*call_time/LoopSZ-call_time/m)/mult-time_c/m);
|
|
// AddMPSTime(__CPU_time_sys, cur*call_time/LoopSZ-call_time/m);
|
|
}
|
|
|
|
|
|
//=*** âîçìîæíî ýòî íå òî÷íî íî ïðèìåðíî âåðíî ðàññ÷èòûâàåòñÿ îáúåì âû÷èñëåíèé è ïåðåäà÷, à äàëåå î÷åíü òî÷íî âû÷èñëÿåòñÿ êîãäà çàêîí÷èòñÿ îáùåå âûïîëíåíèå
|
|
|
|
ind_beg = -1;
|
|
for_calc(n, 0, conv_beg[0], conv_end[0]) //n[0] ïðîáåãàåò 1 èçìåðåíèå ïðîöîâ êîíâåéåðà â ïîðÿäêå äëÿ âû÷èñëåíèÿ áëîêîâ êîíâåéåðà
|
|
for_calc(n, 1, conv_beg[1], conv_end[1]) //n[1] ïðîáåãàåò 2 èçìåðåíèå ïðîöîâ êîíâåéåðà ...
|
|
for_calc(n, 2, conv_beg[2], conv_end[2]) //n[2] ïðîáåãàåò 3 èçìåðåíèå ïðîöîâ êîíâåéåðà ...
|
|
for_calc(n, 3, conv_beg[3], conv_end[3]) //n[3] ïðîáåãàåò 4 èçìåðåíèå ïðîöîâ êîíâåéåðà ...
|
|
{
|
|
//only comment i=n[0]*p[1]*p[2]+n[1]*p[2]+n[2]; only comment
|
|
for (k = 0, i = 0; k < rank_mas; k++)
|
|
{
|
|
for (j = k + 1, d = 1; j < rank_mas; j++)
|
|
d *= p[j];
|
|
i += n[k] * d;
|
|
}
|
|
|
|
if (add)
|
|
{
|
|
for (j = 0, cur = 1; j < max_rank; j++)
|
|
cur *= msize(i, j);
|
|
|
|
// printf("proc[%d] cpu+=%f\n",i,cur*call_time/LoopSZ);
|
|
AddTime(__CPU_time_usr, i, cur*call_time / LoopSZ);
|
|
if (add == 2) AddTime(__Insuff_parall_usr, i, cur*call_time / LoopSZ);
|
|
|
|
}
|
|
|
|
|
|
if (ProcBlock[i]->GetRank())
|
|
{
|
|
for (k = 0; k < rank_mas; k++)
|
|
{
|
|
for (j = k + 1, d = 1; j < rank_mas; j++)
|
|
d *= p[j];
|
|
//íàäî prev == -1 åñëè íåò ïðåä. ïðîöåññîðà äëÿ íåãî ïî ýòîìó èçìåðåíèþ, êîò. íàäî æäàòü
|
|
if (invers[k])
|
|
if (n[k] != conv_end[k]/*pip[k]-1*/ && i + d < MPSProcCount() && ProcBlock[i + d]->GetRank()) prev[k] = i + d;
|
|
else prev[k] = -1;
|
|
else
|
|
if (n[k] != conv_beg[k]/*0*/ && i - d >= 0 && ProcBlock[i - d]->GetRank()) prev[k] = i - d;
|
|
else prev[k] = -1;
|
|
|
|
if (!invers[k])
|
|
if (n[k] != conv_end[k]/*pip[k]-1*/ && i + d < MPSProcCount() && ProcBlock[i + d]->GetRank()) post[k] = i + d;
|
|
else post[k] = -1;
|
|
else
|
|
if (n[k] != conv_beg[k]/*0*/ && i - d >= 0 && ProcBlock[i - d]->GetRank()) post[k] = i - d;
|
|
else post[k] = -1;
|
|
}
|
|
|
|
|
|
if (ind_beg == -1)
|
|
{
|
|
// int tmp_shd_wid[4];
|
|
|
|
for (k = 0; k < max_rank; k++)
|
|
{
|
|
// tmp_shd_wid[k]=ShdWid[k];
|
|
// ShdWid[k]=0;
|
|
pip_ord[k] = -1;
|
|
}
|
|
|
|
for (k = 0, d = 1; k < rank_mas; k++)
|
|
{
|
|
if (post[k] != -1)
|
|
for (j = 0; j < rank_mas; j++)
|
|
if (mbeg(i, j) != mbeg(post[k], j) || mend(i, j) != mend(post[k], j))
|
|
{ //ShdWid[k]=tmp_shd_wid[j];
|
|
pip_ord[k] = j;
|
|
}
|
|
}
|
|
}
|
|
if (mode) { printf("PIPE ORDER [ProcDim -> ArrDim]"); {for (k = 0; k < max_rank; k++)if (pip_ord[k] != -1)printf(" %d -> %d ", k, pip_ord[k]); } printf("\n"); }
|
|
|
|
{ int old_prev[4], old_post[4];
|
|
for (k = 0; k < max_rank; k++)
|
|
{
|
|
old_prev[k] = prev[k];
|
|
old_post[k] = post[k];
|
|
prev[k] = -1;
|
|
post[k] = -1;
|
|
}
|
|
for (k = 0; k < max_rank; k++)
|
|
{
|
|
if (pip_ord[k] != -1)
|
|
{
|
|
prev[pip_ord[k]] = old_prev[k];
|
|
post[pip_ord[k]] = old_post[k];
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
if (mode) printf("WasPrev %2d= %2d %2d %2d\n", i, prev[0], prev[1], prev[2]);
|
|
if (mode) printf("WasPost %2d= %2d %2d %2d\n", i, post[0], post[1], post[2]);
|
|
if (mode) printf("ShdWid %2d= %2d %2d %2d\n", i, ShdWid[0], ShdWid[1], ShdWid[2]);
|
|
|
|
if (ind_beg == -1) { ind_beg = i; time_beg = CurrProcTime(ind_beg); }
|
|
|
|
/*
|
|
for(k=0;k<4;k++)
|
|
if(mm[k]==1)
|
|
{
|
|
for(j=3;j>k;j--)
|
|
{
|
|
prev[j]=prev[j-1];
|
|
post[j]=post[j-1];
|
|
}
|
|
prev[k]=-1;
|
|
post[k]=-1;
|
|
}
|
|
if(mode) printf("WillPrev %2d= %2d %2d %2d\n",i,prev[0],prev[1],prev[2]);
|
|
if(mode) printf("WillPost %2d= %2d %2d %2d\n",i,post[0],post[1],post[2]);
|
|
*/
|
|
// printf("LZ=%f\n",call_time/LoopSZ*1000000);
|
|
|
|
|
|
//printf("proc[%d] time=%f beg=%f cur_beg=%.f\n",i,CurrProcTime(i),time_beg,(CurrProcTime(i)-time_beg)*LoopSZ/call_time);
|
|
|
|
|
|
// for(j=0;j<max_rank;j++) prev_size[j]=0;
|
|
|
|
// cur=0; //òàê áûëî
|
|
cur = (CurrProcTime(i) - time_beg)*LoopSZ / call_time; //âîçìîæíî è îòðèöàòåëüíûé íî ïîòîì ïîñëå ïåðâîé ïåðåäà÷è âñå âñòàíåò íà ñâîè ìåñòà
|
|
cur_beg = cur;
|
|
real_comm = 0; last_real_comm = 0;
|
|
d = 0;
|
|
// ìàêðîñ 'aà' çàìåíÿåò (ñ+1)-òûé èíäåêñ â âûðàæåíèè a[prev][b[0]][b[1]][b[2]][b[3]] íà èíäåêñ 'p'
|
|
#define a_change(prev,c,p) ((c==0)?a[prev][p][b[1]][b[2]][b[3]]:(c==1)?a[prev][b[0]][p][b[2]][b[3]]:(c==2)?a[prev][b[0]][b[1]][p][b[3]]:a[prev][b[0]][b[1]][b[2]][p])
|
|
#define a_b(i) a[i][b[0]][b[1]][b[2]][b[3]]
|
|
|
|
// printf("MIN %d %d %d %d\n",min(3,bsize(i,ord[0],s[ord[0]])-1),min(3,bsize(i,ord[1],s[ord[1]])-1),min(3,bsize(i,ord[2],s[ord[2]])-1),min(3,bsize(i,ord[3],s[ord[3]])-1));
|
|
// printf("Invers %d %d %d %d\n",invers[ord[0]],invers[ord[1]],invers[ord[2]],invers[ord[3]]);
|
|
// printf("ord %d %d %d %d\n",ord[0],ord[1],ord[2],ord[3]);
|
|
// ord[0]==0 && ord[1]==1 && ord[2]==2 ~~~~~~~~ z,x,y ~ 0 1 2
|
|
for_calc(b, ord[3], 0, min(3, bsize(i, ord[3], s[ord[3]]) - 1)) //b[ord[3]] ïðîáåãàåò áëîêè âäîëü ñâîåãî èçìåðåíèÿ â i-òîì ïðîöå
|
|
for_calc(b, ord[2], 0, min(3, bsize(i, ord[2], s[ord[2]]) - 1)) //b[ord[2]] ïðîáåãàåò áëîêè ...
|
|
for_calc(b, ord[0], 0, min(3, bsize(i, ord[0], s[ord[0]]) - 1)) //b[ord[0]] ïðîáåãàåò áëîêè ...
|
|
for_calc(b, ord[1], 0, min(3, bsize(i, ord[1], s[ord[1]]) - 1)) //b[ord[1]] ïðîáåãàåò áëîêè ...
|
|
{
|
|
m = cur; mwait = m;
|
|
d = 0; //÷òîáû ïðîéòè äàëüøå åñëè íåâîçìîæíî ñäåëàòü óñêîðåííûé ïðîáåã
|
|
// if(mode) printf("B=%d %d %d %d\n",b[ord[1]],b[ord[0]],b[ord[2]],b[ord[3]]);
|
|
|
|
//óñêîðåííûé ïðîáåã
|
|
if (1) //âñåãäà äåëàòü [íî âîîáùå ìîæíî åãî îòëþ÷èòü è ñðàâíèòü ðåçóëüòàòû (ñ íèì & áåç íåãî)]
|
|
for (k = 0; k < rank_mas; k++)
|
|
{
|
|
if (!invers[ord[k]] && b[ord[k]] == 3 || invers[ord[k]] && b[ord[k]] == 0 && (bsize(i, ord[k], s[ord[k]]) - 1 > 3))
|
|
{
|
|
b[ord[k]] = invers[ord[k]] ? 2 : 1;
|
|
beg = a_b(i); // âðåìÿ çàâåðøåíèÿ âû÷èñëåíèÿ áëîêà 1
|
|
b[ord[k]] = invers[ord[k]] ? 1 : 2;
|
|
step = a_b(i) - beg; // ïðîìåæóòîê âðåìåíè ìåæäó âðåìåíàìè çàâåðøåíèÿ âû÷èñëåíèé ñîñåäíèõ áëîêîâ
|
|
last = sz[b[0]][b[1]][b[2]][b[3]];
|
|
b[ord[k]] = invers[ord[k]] ? 0 : 3;
|
|
|
|
// if(mode) printf("prev=%.f sz=%.f Beg %.f step=%.f * %d\n",last,a_b(i),beg,step,(bsize(i,ord[k],s[ord[k]])-2));
|
|
|
|
sz[b[0]][b[1]][b[2]][b[3]] = a_b(i);
|
|
a_b(i) += beg + step * (bsize(i, ord[k], s[ord[k]]) - 2) - last; // ðàñ÷åò âðåìåíè ïîñëåäíåãî áëîêà â ýòîì èçìåðåíèè
|
|
cur = a_b(i);
|
|
|
|
for (j = 0; j < rank_mas; j++)
|
|
{ //ïðîãíîçèðóåì çàãðóçêó êîììóíèêàöèîííûõ êàíàëîâ
|
|
comm[i][j] += (bsize(i, ord[k], s[ord[k]]) - 3)*prev_comm[ord[k]][j];
|
|
// printf("comm=== %.f increased by %.f\n",comm[i][j],(bsize(i,ord[k],s[ord[k]])-3)*prev_comm[ord[k]][j]);
|
|
prev_comm[ord[k]][j] = 0; //÷òîáû ïîâûøàòü îäèí ðàç
|
|
}
|
|
|
|
real_comm += (bsize(i, ord[k], s[ord[k]]) - 3)*last_real_comm;
|
|
// printf("[last=%f] * %d = [real_comm=%f]\n",last_real_comm,bsize(i,ord[k],s[ord[k]])-3,real_comm);
|
|
|
|
|
|
if (mode) printf("%d-%d-%d-%d-%d==%.f\n", i, b[0], b[1], b[2], b[3], a_b(i));
|
|
d = -1;
|
|
break; //÷òîáû îäèí ðàç âû÷èñëÿë ýòîò áëîê
|
|
}
|
|
}
|
|
//åñëè áûë ñäåëàí óñêîðåííûé ïðîáåã, òî íå íàäî âû÷èñëÿòü ýòîò áëîê åùå ðàç
|
|
if (d < 0) continue;
|
|
|
|
last_real_comm = 0;
|
|
//comm - êîììóíèêàöèîíûé êàíàë íà âõîä è îòâå÷àåò çà òî êîãäà îí çàêîí÷èò ïåðåäà÷ó äàííûõ
|
|
for (k = 0; k < rank_mas; k++)
|
|
{
|
|
// if(mode) printf("Invers=%d Prev[%d]=%d b[k]=%d\n",invers[k],k,prev[k],b[k]);
|
|
if (!invers[k] && b[k] == 0 && prev[k] != -1)
|
|
{
|
|
comm[i][k] = max(comm[i][k], a_change(prev[k], k, min(3, bsize(i, ord[k], s[ord[k]]) - 1))) + (TStart + (a_b(i) / min(s[k], msize(i, k))*ShdWid[k])*TByte)*LoopSZ / call_time;
|
|
last_real_comm = max(last_real_comm, (TStart + (a_b(i) / min(s[k], msize(i, k))*ShdWid[k])*TByte));
|
|
}
|
|
|
|
if (invers[k] && b[k] == min(3, bsize(i, ord[k], s[ord[k]]) - 1) && prev[k] != -1)
|
|
{
|
|
comm[i][k] = max(comm[i][k], a_change(prev[k], k, 0)) + (TStart + (a_b(i) / min(s[k], msize(i, k))*ShdWid[k])*TByte)*LoopSZ / call_time;
|
|
last_real_comm = max(last_real_comm, (TStart + (a_b(i) / min(s[k], msize(i, k))*ShdWid[k])*TByte));
|
|
}
|
|
|
|
//÷òîáû è îòïðàâèòåëü æäàë êîíöà ïåðåäà÷è äàííûõ íàäî ñäåëàòü ñëåäóþùåå
|
|
if (invers[k] && b[k] == 0 && post[k] != -1 || !invers[k] && b[k] == min(3, bsize(i, ord[k], s[ord[k]]) - 1) && post[k] != -1)
|
|
{
|
|
last_real_comm = max(last_real_comm, (TStart + (0 * a_b(i) / min(s[k], msize(i, k))*ShdWid[k])*TByte));
|
|
}
|
|
|
|
|
|
if (mode) printf("Proc[%d] last_real_comm=%f\n", i, last_real_comm);
|
|
|
|
//ó prev_comm åñòü íàïðàâëåíèå [j] âäîëü êîòîðîãî îí èùåò áëîêè ñ íîìåðàìè 1 è 2
|
|
//äëÿ êàæäîãî íàïðàâëåíèÿ îí äîëæåí ïîñìîòðåòü, êàê èçìåíÿëèñü êîììóíèêàöèîííûå êàíàëû [k]
|
|
for (j = 0; j < max_rank; j++)
|
|
if (!invers[ord[j]] && b[ord[j]] == 1 || invers[ord[j]] && b[ord[j]] == 2)
|
|
prev_comm[ord[j]][k] = comm[i][k];
|
|
for (j = 0; j < max_rank; j++)
|
|
if (!invers[ord[j]] && b[ord[j]] == 2 || invers[ord[j]] && b[ord[j]] == 1)
|
|
prev_comm[ord[j]][k] = comm[i][k] - prev_comm[ord[j]][k];
|
|
}
|
|
|
|
|
|
for (k = 0; k < rank_mas; k++)
|
|
{
|
|
// printf("[k=%d] m=%.f comm=%.f\n",k,m,comm[i][k]);
|
|
m = max(m, comm[i][k]);
|
|
}
|
|
mwait = m - mwait;
|
|
com[i] += mwait * call_time / LoopSZ;
|
|
real_comm += last_real_comm;
|
|
|
|
//mwait è com[i] äåëàþò àñèíõðîííóþ ïåðåäà÷ó äàííûõ (ñîâìåùåííóþ ñ âû÷èñëåíèÿìè - è êàê ðåçóëüòàò ó÷èòûâàåòñÿ òîëüêî ïåðâàÿ ïåðåäà÷à à îñòàëüíûå ñîâìåùàþòñÿ)
|
|
//last_real_comm è real_comm äåëàåò ñèíõðîííóþ (íå ñîâìåùåííóþ ñ âû÷èñëåíèÿìè - è êàê ðåçóëüòàò ó÷èòûâàþòñÿ âñå ïåðåäà÷è)
|
|
|
|
// if(mwait>=0) printf("%d-%d-%d-%d wait[%d]=%.f [%f sec] last_real_comm=%f [sync=idle||insuf.sys]=%f\n",b[0],b[1],b[2],b[3],i,mwait,com[i],last_real_comm,com[i]-last_real_comm);
|
|
if (add && com[i] > last_real_comm)
|
|
{
|
|
AddTime(__CPU_time_sys, i, com[i] - last_real_comm);
|
|
AddTime(__Insuff_parall_sys, i, com[i] - last_real_comm);
|
|
com[i] = last_real_comm;
|
|
}
|
|
|
|
sz[b[0]][b[1]][b[2]][b[3]] = a_b(i);
|
|
a_b(i) += m;
|
|
cur = a_b(i);
|
|
if (mode) printf("%d-%d-%d-%d-%d=%.f\n", i, b[0], b[1], b[2], b[3], cur);
|
|
if (mode) printf("Proc[%d] last_real_comm=%f\n", i, last_real_comm);
|
|
}
|
|
|
|
// printf("proc %d-%d-%d-%d done_time=%f\n",n[0],n[1],n[2],n[3],cur*call_time/LoopSZ);
|
|
|
|
if (add)
|
|
{
|
|
// AddTime(__Synchronize,i,time_beg+cur*call_time/LoopSZ);
|
|
|
|
// printf("proc[%d] wait_shad+= %f (async) || %f (sync)\n",i,com[i],real_comm);
|
|
if (mode) printf("proc[%d] wait_shad+= %f\n", i, real_comm);
|
|
// AddTime(__Wait_shadow,i,com[i]);//async
|
|
AddTime(__Wait_shadow, i, real_comm);//sync
|
|
|
|
// printf("wait[%d]=%f sec cur_beg=%.f cur=%.f raznica=%f\n",i,com[i],cur_beg,cur,(cur-cur_beg)*call_time/LoopSZ);
|
|
// AddTime(__CPU_time_usr, i, (cur-cur_beg)*call_time/LoopSZ - com[i]); //call_time/(m*mult)
|
|
}
|
|
|
|
/*
|
|
if(mode && com[i]>0.000001)
|
|
{ printf("proc(%d) wait %2.10f sec from [ ",i,com[i]);
|
|
for(k=0;k<rank_mas;k++)
|
|
if(prev[k]!=-1) printf("%d ",prev[k]);
|
|
printf("]\n");
|
|
}
|
|
*/
|
|
}
|
|
}
|
|
|
|
time_x = cur * call_time / LoopSZ;
|
|
|
|
//printf("Total=%f Idle=%f\n",cur*call_time/LoopSZ*m,cur*call_time/LoopSZ*m-call_time);
|
|
|
|
// printf("Result %d-%d-%d=%d (time=%5.15f)\n",x,y,z,cur,cur*call_time/LoopSZ);
|
|
// printf("cur=%d call=%5.15f LoopSZ=%d\n",cur,call_time,LoopSZ);
|
|
// printf("Result %d-%d-%d=%d (time=%f)\n",x,y,z,cur,cur*call_time/LoopSZ);
|
|
for (i = 0; i < MPSProcCount(); i++)
|
|
if (ProcBlock[i]->GetRank())
|
|
for (n[0] = 0; n[0] < 4; n[0]++)
|
|
for (n[1] = 0; n[1] < 4; n[1]++)
|
|
for (n[2] = 0; n[2] < 4; n[2]++)
|
|
free(a[i][n[0]][n[1]][n[2]]);
|
|
for (i = 0; i < MPSProcCount(); i++)
|
|
if (ProcBlock[i]->GetRank())
|
|
for (n[0] = 0; n[0] < 4; n[0]++)
|
|
for (n[1] = 0; n[1] < 4; n[1]++)
|
|
free(a[i][n[0]][n[1]]);
|
|
for (i = 0; i < MPSProcCount(); i++)
|
|
if (ProcBlock[i]->GetRank())
|
|
for (n[0] = 0; n[0] < 4; n[0]++)
|
|
free(a[i][n[0]]);
|
|
for (i = 0; i < MPSProcCount(); i++)
|
|
if (ProcBlock[i]->GetRank())
|
|
free(a[i]);
|
|
free(a);
|
|
}
|
|
|
|
for (i = 0; i < MPSProcCount(); i++)
|
|
if (ProcBlock[i]->GetRank())
|
|
free(comm[i]);
|
|
free(comm);
|
|
//free(prev);
|
|
//free(n);
|
|
//free(b);
|
|
//mode=0;
|
|
return;
|
|
}
|
|
|
|
|
|
//==== procedure =========================================================================
|
|
void CommCost::Across(double call_timeArg, long LoopSZArg, LoopBlock** ProcBlockArg, int type_size)
|
|
{
|
|
int i, j, i0, k;
|
|
int rank, r;
|
|
vector<int> dim;
|
|
int ind[10];
|
|
vector<long> pp;
|
|
|
|
ProcBlock = ProcBlockArg;
|
|
call_time = call_timeArg;
|
|
LoopSZ = LoopSZArg;
|
|
|
|
TStart = vm->getTStart();
|
|
TByte = vm->getTByte()*type_size;
|
|
|
|
pp = vm->getSizeArray();
|
|
rank = pp.size();
|
|
|
|
// ïå÷àòü êîíôèãóðàöèè ïðîöåññîðíîé ðåøåòêè
|
|
if (mode)
|
|
{
|
|
printf("VM %d", pp[0]);
|
|
for (i = 1; i < rank; i++)
|
|
printf("x%d", pp[i]);
|
|
printf("\n");
|
|
}
|
|
|
|
//calc invers
|
|
for (i = 0; i < MPSProcCount(); i++)
|
|
for (j = 0; j < ProcBlock[i]->GetRank(); j++)
|
|
if (ProcBlock[i]->LSDim[j].Step < 0) { invers[j] = 1; ProcBlock[i]->LSDim[j].Step = -ProcBlock[i]->LSDim[j].Step; }
|
|
else invers[j] = 0;
|
|
|
|
// printf("Inverse %d %d %d\n",invers[0],invers[1],invers[2]);
|
|
|
|
|
|
//calc rank_mas
|
|
for (i = 0, rank_mas = 0; i < MPSProcCount(); i++)
|
|
{
|
|
r = ProcBlock[i]->GetRank();
|
|
if (rank_mas < r) rank_mas = r;
|
|
}
|
|
|
|
|
|
//correct rank if other dims equals 1
|
|
//xp for(i=k=rank_mas;i<rank;i++)
|
|
//xp if(pp[i]!=1) k=i;
|
|
//xp rank=k;
|
|
|
|
|
|
if (mode) printf("rank=%d rank_mas=%d\n", rank, rank_mas);
|
|
// if(rank>rank_mas) { printf("Too many dimensions %d>%d\n",rank,rank_mas); exit(1);}
|
|
if (rank_mas > max_rank) { printf("Cannot model ACROSS pipeline because of Array rank=%d>%d\n", rank_mas, max_rank); return; }
|
|
|
|
s.resize(max_rank);
|
|
p.resize(max_rank);
|
|
dmax.resize(max_rank);
|
|
dim.resize(max_rank);
|
|
conv_beg.resize(max_rank);
|
|
conv_end.resize(max_rank);
|
|
mult_is.resize(max_rank);
|
|
pip.resize(max_rank);
|
|
ord.resize(max_rank);
|
|
|
|
//s = (long *)malloc(max_rank * sizeof(long));
|
|
//p = (int *)malloc(max_rank * sizeof(int));
|
|
//dmax = (int *)malloc(max_rank * sizeof(int));
|
|
//dim = (int *)malloc(max_rank * sizeof(int));
|
|
//conv_beg = (int *)malloc(max_rank * sizeof(int));
|
|
//conv_end = (int *)malloc(max_rank * sizeof(int));
|
|
|
|
//mult_is = (int *)malloc(max_rank * sizeof(int));
|
|
//pip = (int *)malloc(max_rank * sizeof(int));
|
|
//ord = (int *)malloc(max_rank * sizeof(int));
|
|
|
|
|
|
for (i = rank; i < max_rank; i++)
|
|
ShdWid[i] = 0;
|
|
for (i = 0; i < rank; i++)
|
|
ShdWid[i] = ShdWid(i);
|
|
|
|
|
|
if (mode) printf("COMM ACROSS %5.10f ShdWid=%d %d %d %d\n", call_time, ShdWid[0], ShdWid[1], ShdWid[2], ShdWid[3]);
|
|
|
|
for (k = 0; k < rank; k++)
|
|
p[k] = pp[k];
|
|
//ïî äðóãèì èçìåðåíèÿ ðåøåòêà ïðîöîâ èìååò øèðèíó 1
|
|
for (k = rank; k < max_rank; k++)
|
|
p[k] = 1;
|
|
|
|
//ïå÷àòü áëîêîâ êàæäîãî ïðîöåññîðà
|
|
if (mode)
|
|
{
|
|
for (i = 0; i < MPSProcCount(); i++)
|
|
{
|
|
r = ProcBlock[i]->GetRank();
|
|
for (k = 0; k < rank_mas; k++)
|
|
ind[k] = i;
|
|
//only comment i=ind[0]*p[2]*p[1]+ind[1]*p[2]+ind[2]; only comment
|
|
for (k = rank_mas - 1; k >= 0; k--)
|
|
{
|
|
ind[k] = ind[k] % p[k];
|
|
for (x = 0; x < k; x++)
|
|
ind[x] = ind[x] / p[k];
|
|
}
|
|
|
|
printf("[%d", ind[0]);
|
|
for (k = 1; k < max_rank; k++)
|
|
if (k >= rank_mas)
|
|
printf(",0");
|
|
else
|
|
printf(",%d", ind[k]);
|
|
printf("]%2d. ", i);
|
|
|
|
for (j = 0; j < r; j++)
|
|
printf("%d:%d:%d ", mbeg(i, j), mend(i, j), mstep(i, j));
|
|
printf("\n");
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < max_rank; i++) { dmax[i] = 1; dim[i] = 1; }
|
|
|
|
for (j = 0; j < MPSProcCount(); j++)
|
|
if (ProcBlock[j]->GetRank()) break;
|
|
first = j;
|
|
//first proc with data
|
|
|
|
//calc dmax(max_dim_size)
|
|
for (k = 0; k < max_rank; k++)
|
|
for (x = j, dmax[k] = msize(j, k), i = j + 1; i < MPSProcCount(); i++)
|
|
if (ProcBlock[i]->GetRank() && (msize(i, k) > msize(x, k)))
|
|
{
|
|
x = i;
|
|
dmax[k] = msize(i, k);
|
|
}
|
|
|
|
//calc mult (number of parallelizing pipelines)
|
|
//ýòà ÷àñòü ïîìîãàåò âû÷èñëèòü mult â äàëüíåéøåì
|
|
if (rank_mas >= 2)
|
|
{
|
|
mult = 1;
|
|
for (k = 0; k < max_rank; k++)
|
|
mult_is[k] = 0;
|
|
|
|
//only comment i=i0*p[2]*p[1]+j0*p[2]+e0; only comment
|
|
for (i = 0; i < MPSProcCount(); i++)
|
|
if (ProcBlock[i]->GetRank()) break;
|
|
//i=first proc with data
|
|
|
|
|
|
if (mode)
|
|
{
|
|
printf("p=");
|
|
for (k = 0; k < rank_mas; k++)
|
|
printf("%d ", p[k]);
|
|
printf("\n");
|
|
}
|
|
|
|
// Íàäî (x=0 i0=p[2]*p[1]) (x=1 i0=p[2]) (x=2 i0=1)
|
|
// ýòî ñìåùåíèÿ äëÿ âû÷èñëåíèÿ ñîñåäíèõ ïðîöîâ âäîëü êàêîãî-òî èçìåðåíèÿ
|
|
for (x = 0; x < max_rank; x++)
|
|
{
|
|
for (k = x + 1, i0 = 1; k < rank_mas; k++)
|
|
i0 = i0 * p[k];
|
|
|
|
if (p[x] > 1 && i + i0 < MPSProcCount())
|
|
{
|
|
for (k = 0, r = 1; k < rank_mas; k++)
|
|
if (mbeg(i + i0, k) != mbeg(i, k) || mend(i + i0, k) != mend(i, k))
|
|
{
|
|
if (ShdWid[k] != 0) { r = 0; break; }
|
|
else r = 2;
|
|
}
|
|
// r==0 (ñîñåäíèå ïðîöû ïî îäíîìó èçìåðåíèþ èìåþò ðàçíûå áëîêè) r==1 (îäèíàêîâûå áëîêè)
|
|
if (r) { mult *= p[x]; mult_is[x] = r; }
|
|
}
|
|
}
|
|
}
|
|
|
|
if (mode) printf("MULT %d %d %d %d = %d\n", mult_is[0], mult_is[1], mult_is[2], mult_is[3], mult);
|
|
|
|
|
|
// for(i=0;i<rank_mas;i++)
|
|
// if(ShdWid[i]==0) { mult_is[i]=1;}
|
|
|
|
// if(mode) printf("MULT + ZeroWidth %d %d %d %d\n",mult_is[0],mult_is[1],mult_is[2],mult_is[3]);
|
|
|
|
j = first;
|
|
// mm[i]==0 (ïî i-òîìó èçìåðåíèþ åñòü êîíâåéåð) ==1 (êîíâåéåðà íåò)
|
|
mm.resize(max_rank);
|
|
//mm = (int *)malloc(max_rank * sizeof(int));
|
|
for (k = 0; k < max_rank; k++)
|
|
mm[k] = 1;
|
|
|
|
for (i = 0; i < MPSProcCount(); i++)
|
|
{
|
|
r = ProcBlock[i]->GetRank();
|
|
for (k = 0; k < r; k++)
|
|
if (mbeg(i, k) != mbeg(j, k) || mend(i, k) != mend(j, k)) mm[k] = 0;
|
|
}
|
|
|
|
|
|
// for(i=0;i<rank_mas;i++)
|
|
// if(ShdWid[i]==0) { mm[i]=1;}
|
|
|
|
if (mode) printf("MM = %d %d %d %d\n", mm[0], mm[1], mm[2], mm[3]);
|
|
// for(i=0;i<max_rank;i++)
|
|
// mult_is[i]=(i<rank_mas)?1:0; //çà ïðåäåëàìè ðàçìåðíîñòè ìàññèâà ñ÷èòàåì 0 (íå ðàçìíîæåííûìè) òàê ïî òåì èçìåðåíèÿì 1 ïðîöåññîð, èíà÷å ðàíüøå âûëåòàåò ñ îøèáêîé ïîëüçîâàòåëÿ
|
|
/*
|
|
for(i=0,j=0;i<rank_mas;i++)
|
|
{
|
|
if(mm[i]==0)
|
|
{ if(ShdWid[i]==0 && mult_is[j]==0) {mult*=p[j]; mult_is[j++]=2;}
|
|
else mult_is[j++]=0;
|
|
}
|
|
if(mm[i]==1)
|
|
{ mult_is[j++]=1;}
|
|
|
|
}
|
|
*/
|
|
|
|
|
|
// mult_is == 1 (ðàçìíîæåííûé ìàññèâ, => insuff.par.usr) mult_is == 2 (shdwid==0, => ïàðàëëåëüíûå êîíâåéåðû)
|
|
if (mode) printf("MULT %d %d %d %d\n", mult_is[0], mult_is[1], mult_is[2], mult_is[3]);
|
|
|
|
//âû÷èñëÿåì êàêèå ïðîöû ó÷àñòâóþò â êîíâåéåðå! (ïðîöû [îò i1 äî i2][îò j1 äî j2] )
|
|
i = first;
|
|
//conv_beg[0,1,2] ~ i
|
|
for (k = 0; k < max_rank; k++)
|
|
conv_beg[k] = i;
|
|
for (k = max_rank - 1; k >= 0; k--)
|
|
{
|
|
conv_beg[k] = conv_beg[k] % p[k];
|
|
for (x = 0; x < k; x++)
|
|
conv_beg[x] = conv_beg[x] / p[k];
|
|
}
|
|
|
|
for (k = 0; k < max_rank; k++)
|
|
conv_end[k] = conv_beg[k];
|
|
|
|
for (x = 0; x < rank_mas; x++)
|
|
{
|
|
for (k = x + 1, i0 = 1; k < rank_mas; k++)
|
|
i0 = i0 * p[k];
|
|
|
|
for (j = 1; j < p[x] && i + i0 < MPSProcCount() && ProcBlock[i + i0]->GetRank(); j++, i += i0)
|
|
{
|
|
for (k = 0, r = 1; k < rank_mas; k++)
|
|
if (mbeg(i + i0, k) != mbeg(i, k) || mend(i + i0, k) != mend(i, k)) { r = 0; break; }
|
|
// r==0 (ñîñåäíèå ïðîöû ïî îäíîìó èçìåðåíèþ èìåþò ðàçíûå áëîêè) r==1 (îäèíàêîâûå áëîêè)
|
|
if (r == 0) { conv_end[x]++; }
|
|
}
|
|
}
|
|
|
|
////âû÷èñëåíèå ïðîöåññîðîâ ïî êîòîðûì åñòü êîíâåéåð è çàíîñèòñÿ â ìàññèâ 'pip'
|
|
for (i = 0; i < max_rank; i++)
|
|
pip[i] = conv_end[i] - conv_beg[i] + 1;
|
|
if (mode) printf("Konv procs=%d %d %d %d\n", pip[0], pip[1], pip[2], pip[3]);
|
|
|
|
|
|
if (mode)
|
|
{
|
|
printf("We got %d KONV ", mult);
|
|
for (k = 0; k < max_rank; k++)
|
|
if (k >= rank_mas) printf("%d=%d ", conv_beg[k], conv_end[k]);
|
|
else printf("%d:%d ", conv_beg[k], conv_end[k]);
|
|
printf("\n");
|
|
}
|
|
|
|
//âû÷èñëÿåì ðàçìåðû ìàññèâà êîòîðûé íàäî âûïîëíèòü â êîíâåéåðå
|
|
//only comment i=ind[0]*p[2]*p[1]+ind[1]*p[2]+ind[2]; only comment
|
|
for (k = 0; k < max_rank; k++)
|
|
{
|
|
for (x = k + 1, y = 1; x < max_rank; x++)
|
|
y *= p[x];
|
|
dim[k] = mend(conv_end[k] * y, k) - mbeg(conv_beg[k] * y, k) + 1;
|
|
}
|
|
if (mode) printf("Dim %d %d %d %d\n", dim[0], dim[1], dim[2], dim[3]);
|
|
|
|
|
|
//!!!!!!!!!!! ïîêà åùå íå ðàññìîòðåí ñëó÷àé êîãäà rank>3
|
|
// è êîãäà åñòü >=2 ïàðàëëåëüíûõ êîíâåéåðà (îäèíàêîâûå)
|
|
|
|
//ïîäñ÷åò âðåìåíè êîììóíèêàöèé è âðåìåíè ðàáîòû ïðîöîâ
|
|
if (rank_mas == 1) //îäíîìåðíûé ìàññèâ, => íåò êîíâåéåðà
|
|
{
|
|
if (mode) printf("No pipeline\n");
|
|
double a, b, c, d;
|
|
k = 0;
|
|
a = call_time;
|
|
b = (p[0] - 1)*(TStart + ShdWid[0] * TByte);
|
|
if (mode) printf("exec=%f comm=%f\n", a, b);
|
|
for (i = 1; i < p[0]; i++) //íå ó÷òåíî ÷òî ïîðÿäîê ìîæåò áûòü îáðàòíûì
|
|
{
|
|
if (i != 1) c = CurrInterval->GetProcPred(i - 1, _Execution_time);
|
|
else c = 0.0;
|
|
d = CurrInterval->GetProcPred(i, _Execution_time);
|
|
|
|
// AddTime(__Insuff_parall_usr, currentVM->map(i), i*call_time/p[0]); //óáðàë, ïîòîìó ÷òî íåò äóáëèðîâàíèé, åñòü ðàçáàëàíñèðîâêà
|
|
AddTime(__Wait_shadow, currentVM->map(i), (TStart + ShdWid[0] * TByte) + max(0, c - d));
|
|
}
|
|
for (i = 0; i < p[0]; i++)
|
|
{
|
|
AddTime(__CPU_time_usr, currentVM->map(i), call_time / p[0]);
|
|
}
|
|
}
|
|
else
|
|
{ //pipeline in old dvm
|
|
long M, N, P, W, Q;
|
|
|
|
double DD, shad;
|
|
|
|
// M-êîíâåéåðèçóåìûå N-ÍÅêîíâ.(êâàíòóåìûå) P-êîë-âî ïðîöîâ äëÿ êîíâåéåðà
|
|
for (i = 0, M = 1, N = 1, P = 1; i < rank_mas; i++)
|
|
{
|
|
if (mm[i] || ShdWid[i] == 0)
|
|
N *= dim[i];
|
|
else
|
|
{
|
|
M *= dim[i];
|
|
P *= pip[rank - 1 - i];
|
|
}
|
|
}
|
|
|
|
for (i = 0, DD = 1; i < rank_mas; i++)
|
|
DD *= dmax[i];
|
|
for (i = 0, W = 0, shad = 0; i < rank_mas; i++)
|
|
if (mm[i] == 0) shad += DD / dmax[i] * ShdWid[i]; // åñòü êîíâåéåð ïî ýòîìó èçìåðåíèþ
|
|
W = ceil(shad / N);
|
|
|
|
if (mode) printf("M=%d N=%d W=%d P=%d\n", M, N, W, P);
|
|
if (mode) printf("W=%d (Shad=%5.0f)\n", W, shad);
|
|
|
|
|
|
double Tc = call_time / LoopSZ;
|
|
// double Tm=0.000023/500;
|
|
// double T0wrecv=0.000001;
|
|
// double Tirecv=0.000001;
|
|
// double Tisend=0.000001;
|
|
double Tm = TStart + TByte;
|
|
double T0wrecv = TStart;
|
|
double Tirecv = TStart;
|
|
double Tisend = TStart;
|
|
double tt1, tt2;
|
|
|
|
if (mode)
|
|
{ //printf("pip=%d %d %d \n",pip[0],pip[1],pip[2]);
|
|
if (W*Tm > 0.0000000001) printf("Greater? P=%d or %7.0f\n", P, (M*Tc) / (W*Tm));
|
|
}
|
|
|
|
if (W*Tm > 0.0000000001 && P > (M*Tc) / (W*Tm))
|
|
{
|
|
if (TStart > 0)
|
|
{
|
|
int tmp1 = N*(P*(Tc*M + P * Tm*W) - 2 * Tc*M);
|
|
int tmp2 = (P*(Tirecv + T0wrecv));
|
|
Q = sqrt(N*(P*(Tc*M + P * Tm*W) - 2 * Tc*M) / (P*(Tirecv + T0wrecv)));
|
|
}
|
|
else
|
|
Q = -1;
|
|
if (Q > N)
|
|
Q = N;
|
|
if (Q < 1)
|
|
Q = 1;
|
|
// tt1 = (Tirecv + T0wrecv)*Q + (N*( P*(Tc*M + P*Tm*W) - 2*Tc*M ) / P )/Q;
|
|
// tt2 = Tirecv*P + Tisend*(P - 1) + T0wrecv + N*Tm*W;
|
|
tt1 = (N * (P - 2) * Tc * M) / (P * Q);
|
|
tt2 = Tirecv * P + Tisend * (P - 1) + T0wrecv + (Tirecv + T0wrecv) * Q + N * Tm * W + N * P * Tm * W / Q;
|
|
}
|
|
else
|
|
{
|
|
if (TStart > 0)
|
|
Q = sqrt(N*(P - 1)*(M*Tc + P * Tm*W) / (P*(Tirecv + T0wrecv)));
|
|
else
|
|
Q = -1;
|
|
if (Q > N)
|
|
Q = N;
|
|
if (Q < 1)
|
|
Q = 1;
|
|
// tt1 = (Tirecv + T0wrecv)*Q + (N*(P - 1)*(Tc*M + P*Tm*W)/P)/Q;
|
|
// tt2 = Tirecv*P + Tisend*(P - 1) + T0wrecv + Tc*M*N/P;
|
|
tt1 = (N*(P - 1)*Tc*M) / (P*Q) + Tc * M*N / P;
|
|
tt2 = Tirecv * P + Tisend * (P - 1) + T0wrecv + (Tirecv + T0wrecv)*Q + N * (P - 1)*Tm*W / Q;
|
|
}
|
|
|
|
|
|
if (mode) printf("Q=%d time=%5.10f+%5.10f=%5.10f\n", Q, tt1, tt2, tt1 + tt2);
|
|
|
|
// {
|
|
// long *kk;
|
|
// kk=(long*)malloc(max_rank*sizeof(long));
|
|
for (i = 0; i < rank_mas; i++)
|
|
s[i] = dmax[i];
|
|
|
|
for (i = rank_mas; i < max_rank; i++)
|
|
s[i] = 1;
|
|
|
|
|
|
// Êâàíòóåì ïî ìíîãèì èçìåðåíèÿì
|
|
for (i = 0, N = min(Q, N); i < rank_mas; i++)
|
|
if (mm[i] || ShdWid[i] == 0)
|
|
{
|
|
if (N >= dim[i])
|
|
{
|
|
s[i] = 1;
|
|
if (mode) printf("QQ=%d ", dim[i]);
|
|
}
|
|
else
|
|
{
|
|
s[i] = dim[i] / N;
|
|
if (mode) printf("QQ=%d ", N);
|
|
break;
|
|
}
|
|
N /= dim[i];
|
|
}
|
|
|
|
|
|
// x=kk[0]; y=kk[1]; z=kk[2];
|
|
// free(kk);
|
|
// }
|
|
|
|
// printf("Shd=%d %d %d\n",ShdWid[0],ShdWid[1],ShdWid[2]);
|
|
|
|
if (mode) printf("Konv Steps [%d,%d,%d,%d]\n", s[0], s[1], s[2], s[3]);
|
|
// x=dmax[0];y=1;z=242;
|
|
for (k = 0; k < max_rank; k++) // standart order of doing cycles
|
|
ord[k] = k;
|
|
|
|
if (full_mode == 0) add = 1; //çàïèñûâàåò âðåìåíà â èíòåðâàë
|
|
calculate_all_pipes();
|
|
|
|
if (mode) printf("Time_c=%5.10f time_x=%5.10f\n", time_c, time_x);
|
|
//end of pipeline in old dvm
|
|
|
|
if (full_mode) // full_mode - searching better than pipe in old dvm
|
|
{
|
|
double ttt = 1000000000;
|
|
long *best, *ord_best, count, *st;//st=step in full_search
|
|
|
|
best = (long *)malloc(max_rank * sizeof(long));
|
|
ord_best = (long *)malloc(max_rank * sizeof(long));
|
|
st = (long *)malloc(max_rank * sizeof(long));
|
|
add = 0; // íå çàïèñûâàåò â èíòåðâàë, à òîëüêî èùåò íàèëó÷øèé
|
|
|
|
// if(full_mode==2) for(i=1;i<max_rank;i++) k*=i;
|
|
// else k=1;
|
|
//k=ñêîëüêî íàäî ðàçíûõ ïîðÿäêîâ îáðàáîòàòü
|
|
// printf("Orders=%d\n",k);
|
|
|
|
//ëó÷øèé ðåçóëüòàò è ïîðÿäîê ïî ñõåìå ñòàðîãî dvm êîíâåéåðà
|
|
for (i = 0; i < max_rank; i++)
|
|
{
|
|
best[i] = s[i];
|
|
ord_best[i] = i;
|
|
}
|
|
|
|
ttt = time_x;
|
|
count = 0;
|
|
for (ord[0] = 0; ord[0] < max_rank; ord[0]++)
|
|
for (ord[1] = 0; ord[1] < max_rank; ord[1]++)
|
|
for (ord[2] = 0; ord[2] < max_rank; ord[2]++)
|
|
for (ord[3] = 0; ord[3] < max_rank; ord[3]++)
|
|
{
|
|
if (ord[0] + ord[1] + ord[2] + ord[3] != 6) continue;
|
|
i = ord[0] * ord[1] + ord[2] * ord[3];
|
|
if (i != 6 && i != 3 && i != 2) continue;
|
|
//çäåñü îíè óæå âñå ðàçíûå è çàíèìàþò âñå ÷èñëà [îò 0 äî max_rank-1]
|
|
//need to check if you increase max_rank!!!
|
|
for (k = 0, i = 0; k < max_rank; k++)
|
|
if (dmax[ord[k]] == 1 && ord[k] != k) i = 1;
|
|
if (i) continue; //îñòàâèòü òîëüêî îäèí ïîðÿäîê ïî ýòîìó èçìåðåíèþ
|
|
|
|
if (mode)
|
|
printf("Ord %d %d %d %d\n", ord[0], ord[1], ord[2], ord[3]);
|
|
|
|
//âû÷èñëåíèå øàãà ñåòêè
|
|
for (i = 0; i < max_rank; i++)
|
|
st[i] = dmax[ord[i]] / 10 + 1; //ðàçìåð ñåòêè == 10 ïî êàæäîìó èçìåðåíèþ
|
|
|
|
if (rank_mas >= 2) //full_search
|
|
{
|
|
// printf("Step %d %d %d %d\n",st[0],st[1],st[2],st[3]);
|
|
for (s[0] = 1; s[0] < dmax[ord[0]] || s[0] < dmax[ord[0]] + st[0] && (s[0] = dmax[ord[0]]); s[0] += st[0])
|
|
for (s[1] = 1; s[1] < dmax[ord[1]] || s[1] < dmax[ord[1]] + st[1] && (s[1] = dmax[ord[1]]); s[1] += st[1])
|
|
for (s[2] = 1; s[2] < dmax[ord[2]] || s[2] < dmax[ord[2]] + st[2] && (s[2] = dmax[ord[2]]); s[2] += st[2])
|
|
for (s[3] = 1; s[3] < dmax[ord[3]] || s[3] < dmax[ord[3]] + st[3] && (s[3] = dmax[ord[3]]); s[3] += st[3])
|
|
{
|
|
calculate_all_pipes();
|
|
if (time_x < ttt)
|
|
{
|
|
ttt = time_x;
|
|
for (k = 0; k < max_rank; k++)
|
|
{
|
|
best[ord[k]] = s[k];
|
|
ord_best[k] = ord[k];
|
|
}
|
|
if (mode) printf("BETTER %d %d %d %d [comm=%.10f] time=%.10f\n", s[ord[0]], s[ord[1]], s[ord[2]], s[ord[3]], time_c, time_x);
|
|
}
|
|
count++;
|
|
}
|
|
}
|
|
if (full_mode == 1) { ord[0] = ord[1] = ord[2] = ord[3] = max_rank; }
|
|
}
|
|
|
|
for (k = 0; k < max_rank; k++)
|
|
{
|
|
ord[k] = ord_best[k];
|
|
s[k] = best[ord[k]];
|
|
}
|
|
if (mode) printf("BEST[%d %d %d %d] %d %d %d %d = %5.10f [tried %d variants]\n", ord[0], ord[1], ord[2], ord[3], s[0], s[1], s[2], s[3], ttt, count);
|
|
add = 1; //çàïèñü â èíòåðâàë
|
|
calculate_all_pipes();
|
|
}
|
|
}
|
|
|
|
//free(pip);
|
|
//free(mult_is);
|
|
//free(dmax);
|
|
//free(conv_beg);
|
|
//free(conv_end);
|
|
//free(s);
|
|
return;
|
|
}
|
|
//=**************************************************************************************
|
|
|
|
void CommCost::BoundUpdate(DArray *daPtr, vector<DimBound> & dimInfo, bool IsCorner)
|
|
{
|
|
long p1,
|
|
p2;
|
|
long bound_size,
|
|
coner_size;
|
|
Block b;
|
|
long sizeNoConerDim;
|
|
|
|
for (p1 = 0; p1 < vm->GetLSize(); p1++) {
|
|
|
|
// Áëîê ìàññèâà daPtr, ðàñïðåäåëåííûé íà ïðîöåññîð p1
|
|
//grig b = Block(daPtr, p1);
|
|
b = Block(daPtr, p1,1);
|
|
//\grig
|
|
|
|
if (b.empty())
|
|
continue;
|
|
|
|
vector<DimBound>::iterator first = dimInfo.begin();
|
|
vector<DimBound>::iterator last = dimInfo.end();
|
|
|
|
// îïðåäåëåíèå òðåáóåìûõ ïåðåñûëîê äëÿ ïðàâîé è ëåâîé ãðàíèöû
|
|
while (first != last) {
|
|
if (b.IsLeft(first->arrDim, 0)) {
|
|
// åñòü ñîñåä ñëåâà
|
|
if (first->RightBSize > 0) {
|
|
|
|
bound_size = b.GetBlockSizeMult(first->arrDim) * first->RightBSize;
|
|
p2 = vm->GetSpecLI(p1, first->vmDim, - first->dir);
|
|
|
|
// += òàê êàê Update
|
|
if(p2>=0 && p2<vm->GetLSize()) //====//
|
|
transfer[vm->map(p1)][vm->map(p2)] += bound_size * daPtr->TypeSize;
|
|
}
|
|
|
|
}
|
|
|
|
if (b.IsRight(first->arrDim, daPtr->GetSize(first->arrDim) - 1)) {
|
|
// åñòü ñîñåä ñïðàâà
|
|
if (first->LeftBSize >0) {
|
|
|
|
bound_size = b.GetBlockSizeMult(first->arrDim) * first->LeftBSize;
|
|
p2 = vm->GetSpecLI(p1, first->vmDim, first->dir);
|
|
|
|
if(p2>=0 && p2<vm->GetLSize()) //====//
|
|
transfer[vm->map(p1)][vm->map(p2)] += bound_size * daPtr->TypeSize;
|
|
}
|
|
}
|
|
first++;
|
|
}
|
|
|
|
// îïðåäåëåíèå íåîáõîäèìûõ ïåðåñûëîê "óãëîâûõ" ýëåìåíòîâ
|
|
// Âíèìàíèå (òîëüêî äëÿ 2-õ ìåðíîãî ñëó÷àÿ)!!!
|
|
// Ñëó÷àé âêëþ÷åíèÿ â ãðàíèöû ìàññèâà "óãëîâûõ" ýëåìåíòîâ
|
|
// ðàññìîòðåí â 2-õ ìåðíîì âàðèàíòå(> 2 ïîêà íå ó÷èòûâàþ)
|
|
|
|
if (IsCorner) {
|
|
|
|
sizeNoConerDim = b.GetBlockSizeMult2(dimInfo[0].arrDim, dimInfo[1].arrDim);
|
|
if (b.IsLeft(dimInfo[0].arrDim, 0) && b.IsLeft(dimInfo[1].arrDim, 0)) {
|
|
if (dimInfo[0].RightBSize > 0 && dimInfo[1].RightBSize > 0) {
|
|
|
|
coner_size = sizeNoConerDim * dimInfo[0].RightBSize * dimInfo[1].RightBSize;
|
|
p2 = vm->GetSpecLI(p1, dimInfo[0].vmDim, - dimInfo[0].dir);
|
|
p2 = vm->GetSpecLI(p2, dimInfo[1].vmDim, - dimInfo[1].dir);
|
|
transfer[vm->map(p1)][vm->map(p2)] += coner_size * daPtr ->TypeSize;
|
|
}
|
|
}
|
|
|
|
if (b.IsLeft(dimInfo[0].arrDim, 0) && b.IsRight(dimInfo[1].arrDim, daPtr->GetSize(dimInfo[1].arrDim) - 1)) {
|
|
if (dimInfo[0].RightBSize > 0 && dimInfo[1].LeftBSize > 0) {
|
|
|
|
coner_size = sizeNoConerDim * dimInfo[0].RightBSize * dimInfo[1].LeftBSize;
|
|
p2 = vm->GetSpecLI(p1, dimInfo[0].vmDim, - dimInfo[0].dir);
|
|
p2 = vm->GetSpecLI(p2, dimInfo[1].vmDim, dimInfo[1].dir);
|
|
transfer[vm->map(p1)][vm->map(p2)] += coner_size * daPtr ->TypeSize;
|
|
}
|
|
}
|
|
|
|
if (b.IsRight(dimInfo[0].arrDim, daPtr->GetSize(dimInfo[0].arrDim) - 1) && b.IsLeft(dimInfo[1].arrDim, 0)) {
|
|
if (dimInfo[0].LeftBSize > 0 && dimInfo[1].RightBSize > 0) {
|
|
|
|
coner_size = sizeNoConerDim * dimInfo[0].LeftBSize * dimInfo[1].RightBSize;
|
|
p2 = vm->GetSpecLI(p1, dimInfo[0].vmDim, dimInfo[0].dir);
|
|
p2 = vm->GetSpecLI(p2, dimInfo[1].vmDim, - dimInfo[1].dir);
|
|
transfer[vm->map(p1)][vm->map(p2)] += coner_size * daPtr ->TypeSize;
|
|
}
|
|
}
|
|
|
|
if (b.IsRight(dimInfo[0].arrDim, daPtr->GetSize(dimInfo[0].arrDim) - 1) && b.IsRight(dimInfo[1].arrDim, daPtr->GetSize(dimInfo[1].arrDim) - 1)) {
|
|
if (dimInfo[0].LeftBSize > 0 && dimInfo[1].LeftBSize > 0) {
|
|
|
|
coner_size = sizeNoConerDim * dimInfo[0].LeftBSize * dimInfo[1].LeftBSize;
|
|
p2 = vm->GetSpecLI(p1, dimInfo[0].vmDim, dimInfo[0].dir);
|
|
p2 = vm->GetSpecLI(p2, dimInfo[1].vmDim, dimInfo[1].dir);
|
|
transfer[vm->map(p1)][vm->map(p2)] += coner_size * daPtr ->TypeSize;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// prot << endl;
|
|
#ifdef _TIME_TRACE_
|
|
// ïîòîì óáðàòü
|
|
prot << "CommCost::BoundUpdate: transfer" << endl;
|
|
long pp1,
|
|
pp2;
|
|
for (pp1 = 0; pp1 < rootVM->GetLSize(); pp1++) {
|
|
for (pp2 = 0; pp2 < rootVM->GetLSize(); pp2++) {
|
|
if (transfer[pp1][pp2] != 0)
|
|
prot << '[' << pp1 << ',' << pp2 << "] = " << transfer[pp1][pp2] << "; ";
|
|
}
|
|
prot << endl;
|
|
}
|
|
// ïîòîì óáðàòü
|
|
#endif
|
|
}
|
|
|
|
|
|
void CommCost::CopyUpdate(DArray * FromArray, Block & readBlock)
|
|
{
|
|
long p1,
|
|
p2,
|
|
size;
|
|
//====
|
|
if(mode) printf("My CopyUpdate\n");
|
|
//=***
|
|
|
|
for (p1 = 0; p1 < vm->GetLSize(); p1++) {
|
|
|
|
Block locBlock(FromArray, p1,1);
|
|
//printf("LocBlock^readBlock: ");
|
|
Block bi = locBlock ^ readBlock;
|
|
//printf("Loc^readDone\n ");
|
|
//printf("loc[%d].0=%d-%d\n",p1,locBlock.GetLower(0),locBlock.GetUpper(0));
|
|
//printf("loc[%d].1=%d-%d\n",p1,locBlock.GetLower(1),locBlock.GetUpper(1));
|
|
//printf("read.0=%d-%d\n",readBlock.GetLower(0),readBlock.GetUpper(0));
|
|
//printf("read.1=%d-%d\n",readBlock.GetLower(1),readBlock.GetUpper(1));
|
|
//printf("bi.0=%d-%d\n",bi.GetLower(0),bi.GetUpper(0));
|
|
//printf("bi.1=%d-%d\n",bi.GetLower(1),bi.GetUpper(1));
|
|
//printf("*********************************************\n");
|
|
|
|
// printf("locBlock=%x, readBlock=%x, bi=%x\n", &locBlock, &readBlock, &bi);
|
|
|
|
if (!locBlock.empty() && !bi.empty()) {
|
|
// ìîæíî îñòàâèòü òîëüêî ïîñëåäíåå óñëîâèå
|
|
// ëîêàëüíàÿ ÷àñòü ÷èòàåìîãî ìàññèâà íå ïóñòà
|
|
|
|
size = bi.GetBlockSize();
|
|
|
|
for (p2 = 0; p2 < vm->GetLSize(); p2++) {
|
|
if (p1 != p2) {
|
|
Block locBlock1(FromArray, p2,1);
|
|
// printf("locBlock1=%x\n", &locBlock1);
|
|
//printf("LocBlock1^bi: ");
|
|
if (locBlock1.empty() || (locBlock1 ^ bi).empty()) {
|
|
// ïîñëåäíåå óñëîâèå ìîæíî òîëüêî îñòàâèòü
|
|
// íåò ýòîé ÷àñòè ÷èòàåìîãî ìàññèâà íà äàííîì ïðîöåññîðå
|
|
transfer[p1][p2] += size * FromArray->TypeSize; // update
|
|
if(mode) printf("Transfer[%d][%d]+=%d*%d\n",p1,p2,FromArray->TypeSize,size);
|
|
|
|
}
|
|
}
|
|
}
|
|
}
|
|
p2 = p1;
|
|
}
|
|
}
|
|
|
|
//====
|
|
void CommCost::CopyUpdateDistr(DArray * FromArray, Block &readBlock, long p1)
|
|
{
|
|
long size, p2;
|
|
if(mode) printf("***My CopyUpdateDistr***\n");
|
|
|
|
/*
|
|
for (p2=0; p2 < vm->GetLSize(); p2++)
|
|
{
|
|
Block locBlock1(FromArray, p2,1);
|
|
if(mode) printf("Proc[%d]=%d-%d %d-%d\n",p2,locBlock1.GetLower(0),locBlock1.GetUpper(0),locBlock1.GetLower(1),locBlock1.GetUpper(1));
|
|
// if(mode) printf("Proc[%d]=%d-%d %d-%d\n",p2,locBlock1.GetLower(0),locBlock1.GetUpper(0),locBlock1.GetLower(1),locBlock1.GetUpper(1),locBlock1.GetLower(2),locBlock1.GetUpper(2));
|
|
}
|
|
*/
|
|
for (p2=0; p2 < vm->GetLSize(); p2++)
|
|
{
|
|
if (p1 != p2)
|
|
{
|
|
Block locBlock1(FromArray, p2,1);
|
|
|
|
//printf("loc[%d].0=%d-%d\n",p2,locBlock1.GetLower(0),locBlock1.GetUpper(0));
|
|
//printf("loc[%d].1=%d-%d\n",p2,locBlock1.GetLower(1),locBlock1.GetUpper(1));
|
|
//printf("read[%d].0=%d-%d\n",p2,readBlock.GetLower(0),readBlock.GetUpper(0));
|
|
//printf("read[%d].1=%d-%d\n",p2,readBlock.GetLower(1),readBlock.GetUpper(1));
|
|
Block bi2 = locBlock1 ^ readBlock;
|
|
|
|
size = bi2.GetBlockSize();
|
|
|
|
if (!bi2.empty()) {
|
|
//printf("bi2.0=%d-%d\n",bi2.GetLower(0),bi2.GetUpper(0));
|
|
//printf("bi2.1=%d-%d\n",bi2.GetLower(1),bi2.GetUpper(1));
|
|
//printf("*********************************************\n");
|
|
transfer[p1][p2] += size* FromArray->TypeSize;
|
|
if(mode) printf("Transfer[%d][%d]+=%d*%d\n",p1,p2,FromArray->TypeSize,size);
|
|
}
|
|
//else
|
|
// printf("bi2.EMPTY\n*********************************************\n");
|
|
}
|
|
}
|
|
}
|
|
|
|
long CommCost::GetLSize()
|
|
{ return vm->GetLSize();
|
|
}
|
|
|
|
|
|
//=***
|