Files
SAPFOR/dvm/tools/pppa/trunk/src/statfile.cpp
2023-09-15 08:30:58 +03:00

1093 lines
49 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#include "bool.h"
#define _STATFILE_
#include "statread.h"
#include <stdlib.h>
#include <string.h>
#include <float.h>
#include <exception>
#include <new>
#ifdef __SPF_BUILT_IN_PPPA
#define VERS "406"
#define PLATFORM "ntmpich"
#else
#include "dvmvers.h"
#endif
#include "statprintf.h"
using namespace std;
/**
* Человекопонятный формат размера
*
* @param bytes размер в байтах
* @param precision количество знаков после запятой
*
* @return отформатированная строка размера
*/
static char *humanizeSize(const unsigned long long bytes, const unsigned short precision);
/**
* Подготовить значение dvmh статистики к печати
*
* @param value сырое значение
* @param isPositive значение строго положительно?
* @param isSize значение это размер?
* @param dashOnZero выводить вместо нуля? "-"
* @param isTime значение это время (не работает, при установленном isSize)?
*
* @return отформатированное значение в виде строки
*/
static char *dvmhPrepareValue(
const double value,
const unsigned short isPositive,
const unsigned short isSize,
const unsigned short dashOnZero,
const unsigned short isTime
);
/**
* Маски определяющие уровень вывода информации
*/
enum VerbosityLevel {
VERBOSITY_LVL_NONE = 0,
VERBOSITY_LVL_CPU = 1,
VERBOSITY_LVL_GPU = 2
};
static void printHelp()
{
printf("Performance analyser parameters: <file _name1> <file_name2> [additional_opt] \n");
printf(" <file_name1> Statistics file name.\n");
printf(" <file_name2> Output file name.\n");
printf("--------------------------------------------------------------------------\n");
printf("Additional options:\n");
printf(" -l <level> Interval <level> number.\n");
printf(" -n <list> List of processor numbers,\n");
printf(" <list> = 0 - without processor characteristics.\n");
printf(" -t <list> List of threads numbers,\n");
printf(" <list> = 0 - all threads characteristics.\n");
printf(" -v <verbosity> Verbosity level, set that detailed information whould be\n");
printf(" displayed. <verbosity>:\n");
printf(" gpu - display only GPU characteristics.\n");
printf(" cpu - display only CPU characteristics.\n");
printf(" none - do not display detailed characteristics.\n");
printf(" gpu,cpu - (default) display CPU and GPU characteristics.\n");
printf(" -c <compr_level> [0..9],-1: compression level of output file,-1-no compression\n");
printf(" -m <matrix> <i[:<groupname>]|j[:<groupname>]\n");
printf(" i|j - sum of row or column elements of matrix of\n");
printf(" interval characteristics.\n");
printf(" <groupname> Name of group, set to output row or column per elements.\n");
printf(" <groupname>:\n");
printf(" UserGrp|MsgPasGrp|StartRedGrp|WaitRedGrp|RedGrp|\n");
printf(" StartShdGrp|WaitShdGrp|ShdGrp|DistrGrp|ReDistrGrp|\n");
printf(" MapPLGrp|DoPLGrp|ProgBlockGrp|IOGrp|RemAccessGrp|\n");
printf(" UserDebGrp|StatistGrp|SystemGrp.\n");
}
#ifdef __SPF_BUILT_IN_PPPA
int pppa_analyzer(int argv, char** argc)
#else
int main(int argv, char **argc)
#endif
{
//sfn outfn -l d>=0 -c d=-1..9 -n d-d>=0 -m text
if (argv < 3 || strcmp(argc[1], "help") == 0)
{
printHelp();
#ifdef __SPF_BUILT_IN_PPPA
return 1;
#else
return 0;
#endif
}
if (argv > 14) {
printf("Incorrect number of parameters\n");
printHelp();
#ifdef __SPF_BUILT_IN_PPPA
return 1;
#else
exit(1);
#endif
}
BOOL proc = TRUE, comp = TRUE, gen = TRUE;
BOOL dvmh_gpu = TRUE, dvmh_threads = FALSE, dvmh_threads_full = FALSE;
int verbosity = VERBOSITY_LVL_CPU | VERBOSITY_LVL_GPU;
char compr[3], mode[5];
strcpy(mode, "wb0");
int nlevel = 9999, qnumb = 1, qnumthreads = 1;
unsigned long *pnumb = NULL;
unsigned long *pnumthreads = NULL;
char * token;
int iIM = 0, jIM = 0, nparin = 0, nparout = 0;
short sore = 0;
size_t st = 0;
//new_handler set_new_handler(NULL);
nparin = 1;// statistics file
nparout = 2;// output file
for (int npar = 3; npar < argv; npar++) { // key parameters
int i, cc;
char let;
char arrs[24] = " "; // strlen nameGen[i]
if (argc[npar][2] != 0) {
printf("Incorrect parameter %s\n", argc[npar]);
#ifdef __SPF_BUILT_IN_PPPA
return 1;
#else
exit(1);
#endif
}
if (argv == npar + 1) {
printf("Parameter for %s not set\n", argc[npar]);
#ifdef __SPF_BUILT_IN_PPPA
return 1;
#else
exit(1);
#endif
}
switch (argc[npar][1]) {
case 'l':
//interval level
npar++;
nlevel = atoi(argc[npar]);
if (nlevel < 0 || (nlevel == 0 && strcmp(argc[npar], "0") != 0)) {
printf("Incorrect number of level %s \n", argc[npar]);
#ifdef __SPF_BUILT_IN_PPPA
return 1;
#else
exit(1);
#endif
}
break;
case 'c':
// compression level
npar++;
mode[0] = 0; compr[0] = 0;
cc = atoi(argc[npar]);
if (cc < -1 || cc>9) {
printf("Incorrect compression level of file %s \n", argc[3]);
#ifdef __SPF_BUILT_IN_PPPA
return 1;
#else
exit(1);
#endif
}
if (cc == -1) {
strcpy(compr, "0");
}
else {
int comprd;
if (cc == 0) strcpy(compr, "-1");
else {
comprd = cc;
sprintf(compr, "%d", comprd);
}
}
strcpy(mode, "wb");
strcat(mode, compr);
break;
case 'n':
// list of processor numbers
npar++;
qnumb = 1;
i = 0;
while (argc[npar][i] != 0) {
if (argc[npar][i] == ',') qnumb++;
i++;
}
pnumb = new unsigned long(qnumb * 2);
if (pnumb == NULL) throw("Out of memory\n");
token = strtok(argc[npar], ",");
i = 0;
while (token != NULL) {
char *tdiap = strchr(token, '-');
if (tdiap == NULL) {
pnumb[i] = atoi(token);
pnumb[i + 1] = pnumb[i];
}
else {
pnumb[i + 1] = atoi(tdiap + 1);
tdiap[0] = '\0';
pnumb[i] = atoi(token);
}
token = strtok(NULL, ",\n");
i = i + 2;
} // end while
// -n 0 - not print processor characteristics
if (qnumb == 1 && pnumb[0] == 0 && pnumb[1] == 0)
{
proc = FALSE;
dvmh_threads = FALSE;
dvmh_gpu = FALSE;
}
break;
case 't':
//list of threads to show
npar++;
qnumthreads = 1;
i = 0;
while (argc[npar][i] != 0) {
if (argc[npar][i] == ',') qnumthreads++;
i++;
}
pnumthreads = new unsigned long(qnumthreads * 2);
if (pnumthreads == NULL) throw("Out of memory\n");
token = strtok(argc[npar], ",");
i = 0;
while (token != NULL) {
char *tdiap = strchr(token, '-');
if (tdiap == NULL) {
pnumthreads[i] = atoi(token);
pnumthreads[i + 1] = pnumthreads[i];
}
else {
pnumthreads[i + 1] = atoi(tdiap + 1);
tdiap[0] = '\0';
pnumthreads[i] = atoi(token);
}
token = strtok(NULL, ",\n");
i = i + 2;
} // end while
// -t 0 - print all threads characteristics
if (qnumthreads == 1 && pnumthreads[0] == 0 && pnumthreads[1] == 0)
{
dvmh_threads = TRUE;
dvmh_threads_full = TRUE;
}
break;
case 'm':
// interval matrix
npar++;
let = argc[npar][0];
if (let != 'i' && let != 'j') {
printf("Incorrect %d parameter %c, must be i/j\n", npar, let);
#ifdef __SPF_BUILT_IN_PPPA
return 1;
#else
exit(1);
#endif
}
sore = 1;
if (let == 'i') iIM = 1; else jIM = 1;
st = strlen(argc[npar]);
if (st == 1) break;
sore = 0; // element
if (argc[npar][1] != ':') {
printf("Incorrect %d parameter %s, must be i=<groupname>/j=<groupname>\n", npar, argc[npar]);
#ifdef __SPF_BUILT_IN_PPPA
return 1;
#else
exit(1);
#endif
}
strncpy(arrs, &(argc[npar][2]), st - 2);
iIM = 0; jIM = 0;
for (i = 1; i <= StatGrpCount; i++) {
if (strcmp(nameGenMT[i - 1], arrs) == 0) {
if (let == 'i') iIM = i;
else jIM = i;
break;
}
} // end for
if (iIM == 0 && jIM == 0) {
printf("Incorrect group name %s \n", argc[npar]);
#ifdef __SPF_BUILT_IN_PPPA
return 1;
#else
exit(1);
#endif
}
break;
case 'v':
// verbosity level
npar++;
verbosity = VERBOSITY_LVL_NONE;
{
char *buf = argc[npar];
char *flag = buf;
char cond = 1;
do {
if (*buf == '\0') cond = 0;
if (*buf == '\0' || *buf == ',') {
*buf = '\0';
if (strcmp(flag, "cpu") == 0) {
verbosity = verbosity | VERBOSITY_LVL_CPU;
}
else
if (strcmp(flag, "gpu") == 0) {
verbosity = verbosity | VERBOSITY_LVL_GPU;
}
else
if (strcmp(flag, "none") != 0) {
printf("Incorrect verbosity level %s \n", argc[npar]);
#ifdef __SPF_BUILT_IN_PPPA
return 1;
#else
exit(1);
#endif
}
flag = ++buf;
}
else ++buf;
} while (cond);
}
break;
default:
printf("Incorrect parameter %s\n", argc[npar]);
#ifdef __SPF_BUILT_IN_PPPA
return 1;
#else
exit(1);
#endif
break;
} // end switch
} // end for key parameters
// read time characteristics and syn times
try {
CStatRead stat(argc[nparin], iIM, jIM, sore);
int warn;
if (stat.Valid(&warn) != TRUE) {
char t[80];
stat.TextErr(t);
printf("%s", t);
printHelp();
#ifdef __SPF_BUILT_IN_PPPA
return 1;
#else
exit(1);
#endif
}
// Возвращает количество процессоров, на которых считалась задача.
unsigned long qproc = stat.QProc();
if (qproc == 0)
{
#ifdef __SPF_BUILT_IN_PPPA
return 1;
#else
exit(1);
#endif
}
// string for processor characteristics - max
// printf for compressed and not compressed out file
CStatPrintf statpr(argc[nparout], 1024, mode);
if (statpr.Valid() != TRUE) {
char t[80];
statpr.TextErr(t);
printf("%s", t);
#ifdef __SPF_BUILT_IN_PPPA
return 1;
#else
exit(1);
#endif
}
double min[ITER + 1];
double max[ITER + 1];
double sum[ITER + 1];
// communication
double minc[RED + 1];
double maxc[RED + 1];
double sumc[RED + 1];
// real communication
double minrc[RED + 1];
double maxrc[RED + 1];
double sumrc[RED + 1];
// synchronization
double mins[RED + 1];
double maxs[RED + 1];
double sums[RED + 1];
// variation
double minv[RED + 1];
double maxv[RED + 1];
double sumv[RED + 1];
// overlap
double minov[RED + 1];
double maxov[RED + 1];
double sumov[RED + 1];
// number of processor
unsigned long nprocmin[ITER + 1], nprocmax[ITER + 1];
unsigned long nprocminc[RED + 1], nprocmaxc[RED + 1];
unsigned long nprocminrc[RED + 1], nprocmaxrc[RED + 1];
unsigned long nprocmins[RED + 1], nprocmaxs[RED + 1];
unsigned long nprocminv[RED + 1], nprocmaxv[RED + 1];
unsigned long nprocminov[RED + 1], nprocmaxov[RED + 1];
const char *namecomp[3] = { "Tmin","Tmax","Tmid" };
int ltxt = strlen(nameCom[0]) + 1;
char p_heading[80];
int lenstr = 0;
char *poutstr = NULL;
int i;
stat.VMSSize(p_heading);
statpr.StatPrintf("Processor system=%s\n", p_heading);
char * pvers = stat.ReadVers();
char *pplat = stat.ReadPlatform();
statpr.StatPrintf("Statistics has been accumulated on DVM-system version %s, platform %s\n", pvers, pplat);
statpr.StatPrintf("Analyzer is executing on DVM-system version %s, platform %s\n", VERS, PLATFORM);
for (i = 0; i < warn; i++) { // warning message
stat.WasErrAccum(p_heading);
statpr.StatPrintf("!! %s", p_heading);
}
short dig_time = 0;
unsigned long n = stat.BeginTreeWalk();
while (n != 0) {
short nlev = stat.ReadTitle(p_heading); // информация о заголовке, идентифицирующем интервал (видимо уровень)
if (nlev <= nlevel) {
statpr.StatPrintf("%s", "-------------------------------------------------------------------------\n");
statpr.StatPrintf("%s", p_heading);
// calculate min,max,sum values for all times
stat.MinMaxSum(PRGEN, min, nprocmin, max, nprocmax, sum);
stat.MinMaxSum(PRCOM, minc, nprocminc, maxc, nprocmaxc, sumc);
stat.MinMaxSum(PRRCOM, minrc, nprocminrc, maxrc, nprocmaxrc, sumrc);
stat.MinMaxSum(PRSYN, mins, nprocmins, maxs, nprocmaxs, sums);
stat.MinMaxSum(PRVAR, minv, nprocminv, maxv, nprocmaxv, sumv);
stat.MinMaxSum(PROVER, minov, nprocminov, maxov, nprocmaxov, sumov);
if (dig_time == 0) { // format for print
double max_val = 0.0;
for (i = 0; i <= RED; i++) {
if (max_val < sumc[i]) max_val = sumc[i];
if (max_val < sumrc[i]) max_val = sumrc[i];
if (max_val < sums[i]) max_val = sums[i];
if (max_val < sumv[i]) max_val = sumv[i];
if (max_val < sumov[i]) max_val = sumov[i];
} // end for
char tval[80];
sprintf(tval, "%*.*lf", DIGTIME, PREC, max_val);
dig_time = (short)strlen(tval);
lenstr = (dig_time + 1)*qproc + strlen(nameGen[0]) + 1;
if (lenstr <= 1024) lenstr = 1024; else statpr.ChangeLenStr(lenstr);
poutstr = new char[lenstr];
if (poutstr == NULL) throw("Out of memory\n");
}
if (gen == TRUE) {
statpr.StatPrintf("--- The main characteristics --- \n");
// double time1,prodcpu,timef,prod;
double prod_cpu, prod_sys, prod_io, prod;
prod_cpu = sum[CPUUSR];
prod_sys = sum[CPU];
prod_io = sum[IOTIME];
prod = prod_cpu + prod_sys + prod_io;
double exec_time, sys_time, efficiency;
exec_time = max[EXEC];
sys_time = n * max[EXEC];
efficiency = sys_time ? prod / sys_time : 0.0;
unsigned long threadsOfAllProcs = 0;
for (unsigned int not_i = 1; not_i <= n; ++not_i)
threadsOfAllProcs += stat.getThreadsAmountByProcess(not_i);
statpr.StatPrintf("%s %*.*lf \n", "Parallelization efficiency ", dig_time, PREC, efficiency);
statpr.StatPrintf("%s %*.*lf \n", "Execution time ", dig_time, PREC, exec_time);
statpr.StatPrintf("%s %*d\n", "Processors ", dig_time, n);
statpr.StatPrintf("%s %*d\n", "Threads amount ", dig_time, threadsOfAllProcs);
statpr.StatPrintf("%s %*.*lf\n", "Total time ", dig_time, PREC, sys_time);
if (prod > 0.0) {
statpr.StatPrintf("%s %*.*lf %s %.*lf %s %.*lf %s %.*lf %c\n",
"Productive time ", dig_time, PREC, prod,
"( CPU=", PREC, prod_cpu,
"Sys=", PREC, prod_sys,
"I/O=", PREC, prod_io,
')'
);
}
if (sum[LOST] > 0.0)statpr.StatPrintf("%s %*.*lf \n", "Lost time ", dig_time, PREC, sum[LOST]);
if (sum[INSUFUSR] + sum[INSUF] != 0.0)
statpr.StatPrintf("%s %*.*lf %s %.*lf %s %.*lf %c\n", " Insufficient parallelism", dig_time, PREC, sum[INSUFUSR] + sum[INSUF],
"( User=", PREC, sum[INSUFUSR], "Sys=", PREC, sum[INSUF], ')');
if (sum[SUMCOM] != 0.0)
statpr.StatPrintf("%s %*.*lf %s %.*lf %s %.*lf %c\n", " Communication ", dig_time, PREC, sum[SUMCOM],
"( Real_sync=", PREC, sum[SUMRCOM], "Starts=", PREC, sum[START], ')');
if (sum[IDLE] != 0.0) statpr.StatPrintf("%s %*.*lf\n", " Idle time ", dig_time, PREC, sum[IDLE]);
if (sum[IMB] != 0.0)statpr.StatPrintf("%s %*.*lf\n", "Load imbalance ",
dig_time, PREC, sum[IMB]);
if (sum[SUMSYN] != 0.0) statpr.StatPrintf("%s %*.*lf\n", "Synchronization ", dig_time, PREC, sum[SUMSYN]);
if (sum[SUMVAR] != 0.0) statpr.StatPrintf("%s %*.*lf\n", "Time variation ", dig_time, PREC, sum[SUMVAR]);
if (sum[SUMOVERLAP] > 0.0)statpr.StatPrintf("%s %*.*lf\n", "Overlap ", dig_time, PREC,
sum[SUMOVERLAP]);
if (sum[DVMH_THREADS_USER_TIME] > 0.0)
statpr.StatPrintf("%s %*.*lf\n", "Threads user time ", dig_time, PREC, sum[DVMH_THREADS_USER_TIME]);
if (sum[DVMH_THREADS_SYSTEM_TIME] > 0.0)
statpr.StatPrintf("%s %*.*lf\n", "Threads system time ", dig_time, PREC, sum[DVMH_THREADS_SYSTEM_TIME]);
if (sum[DVMH_GPU_TIME_PRODUCTIVE] > 0.0)
statpr.StatPrintf("%s %*.*lf\n", "Productive time GPU ", dig_time, PREC, sum[DVMH_GPU_TIME_PRODUCTIVE]);
if (sum[DVMH_GPU_TIME_LOST] > 0.0)
statpr.StatPrintf("%s %*.*lf\n", "Lost time GPU ", dig_time, PREC, sum[DVMH_GPU_TIME_LOST]);
long ncall = 0;
int dig_stat = DIGSTAT;
for (i = 0; i <= RED; i++) ncall = ncall + stat.ReadCall(typecom(i));
if (ncall > 0) {
statpr.StatPrintf("%*c", ltxt, ' ');
char tval[20];
sprintf(tval, "%ld", ncall);
int l = strlen(tval);
if (l > DIGSTAT)dig_stat = l;
statpr.StatPrintf("%*s", dig_stat, " Nop ");
for (int j = SUMCOM; j <= SUMOVERLAP; j++) {
if (sum[j] > 0.0)
statpr.StatPrintf("%*s ", dig_time, nameOper[j - SUMCOM]);
} // end for
statpr.StatPrintf("\n");
}// end if
for (i = 0; i <= RED; i++) {
ncall = stat.ReadCall(typecom(i));
if (ncall > 0) {
statpr.StatPrintf("%s", nameCom[i]);
statpr.StatPrintf("%*d ", dig_stat, ncall);
if (sum[SUMCOM] > 0.0)statpr.StatPrintf("%*.*lf ", dig_time, PREC, sumc[i]);
if (sum[SUMRCOM] > 0.0)statpr.StatPrintf("%*.*lf ", dig_time, PREC, sumrc[i]);
if (sum[SUMSYN] > 0.0)statpr.StatPrintf("%*.*lf ", dig_time, PREC, sums[i]);
if (sum[SUMVAR] > 0.0)statpr.StatPrintf("%*.*lf ", dig_time, PREC, sumv[i]);
if (sum[SUMOVERLAP] > 0.0)statpr.StatPrintf("%*.*lf ", dig_time, PREC, sumov[i]);
statpr.StatPrintf("\n");
} // end if ncall>0
} // end for
if (iIM > 0 || jIM > 0) { // statistics matrix
for (unsigned long np = 1; np <= qproc; np++) {
double prod[StatGrpCount], lost[StatGrpCount], sumprod = 0.0, sumlost = 0.0;
double calls[StatGrpCount], sumcalls = 0.0;
stat.GrpTimes(prod, lost, calls, np);
sprintf(p_heading, "%ld", np);
int ll = strlen(nameGen[0]) - strlen(p_heading) - strlen(" Nproc=") - 2;
statpr.StatPrintf("%s %d %*c %*s %*s %*s\n", " Nproc=", np, ll, ' ',
dig_time, "CALL COUNT", dig_time, "PRODUCT TIME", dig_time,
"LOST TIME");
for (i = 0; i < StatGrpCount; i++) {
sumprod = sumprod + prod[i];
sumlost = sumlost + lost[i];
sumcalls = sumcalls + calls[i];
//if (calls[i]>0 || prod[i]!=0.0 || lost[i]!=0.0 ) {
statpr.StatPrintf("%s %*.*lf %*.*lf %*.*lf \n", nameGenMT[i],
dig_time, PREC, calls[i],
dig_time, PREC, prod[i],
dig_time, PREC, lost[i]);
//}
} // end for
statpr.StatPrintf("%s %*.*lf %*.*lf %*.*lf \n", " Total: ",
dig_time, PREC, sumcalls, dig_time, PREC, sumprod, dig_time, PREC, sumlost);
} // end for qproc
} // end statistics matrix
} // end main characteristics
comp = comp && ((verbosity & VERBOSITY_LVL_CPU) > 0);
if (comp == TRUE) {
// comparative characteristics
statpr.StatPrintf("--- The comparative characteristics --- \n");
poutstr[0] = 0;
statpr.StatPrintf("%*c", strlen(nameGen[0]) + 1, ' ');
int i;
for (i = 0; i < 3; i++) {
if (i == 2)statpr.StatPrintf("%*s\n", dig_time, namecomp[i]);
else statpr.StatPrintf("%*s %*s", dig_time, namecomp[i],
DIGSTAT, "N proc");
}
// general characteristics
for (i = 0; i <= ITER; i++) {
if (sum[i] > 0.0) {
int prec;
double tt = sum[i] / n;
if ((typetime)(i) == PROC || (typetime)(i) == ITER)
prec = 0; else prec = PREC;
statpr.StatPrintf("%s %*.*lf %*d %*.*lf %*d %*.*lf \n",
nameGen[i], dig_time, prec,
min[i], DIGSTAT, nprocmin[i], dig_time, prec, max[i], DIGSTAT,
nprocmax[i], dig_time, prec, tt);
}
}
long ncall = 0;
// characteristics of collective operations
for (i = 0; i <= RED; i++) ncall = ncall + stat.ReadCall(typecom(i));
if (ncall > 0) {
statpr.StatPrintf("%*c", ltxt - 2, ' ');
for (int j = SUMCOM; j <= SUMOVERLAP; j++) {
if (sum[j] > 0.0)
statpr.StatPrintf("%*s ", dig_time + DIGSTAT, nameOper[j - SUMCOM]);
}
statpr.StatPrintf("\n");
for (i = 0; i <= RED; i++) {
for (int k = 0; k < 3; k++) {
if (sumc[i] == 0.0 && sumrc[i] == 0.0 && sums[i] == 0.0 &&
sumv[i] == 0.0 &&sumov[i] == 0.0) break;
double t[CALL];//com,realcom,syn,var,overlap
unsigned long pnp[CALL];
// 0 - min; 1 - max; 2 - sum
switch (k) {
case 0:
t[0] = minc[i]; t[1] = minrc[i];
t[2] = mins[i]; t[3] = minv[i];
t[4] = minov[i];
pnp[0] = nprocminc[i];
pnp[1] = nprocminrc[i];
pnp[2] = nprocmins[i];
pnp[3] = nprocminv[i];
pnp[4] = nprocminov[i];
break;
case 1:
t[0] = maxc[i]; t[1] = maxrc[i];
t[2] = maxs[i]; t[3] = maxv[i];
t[4] = maxov[i];
pnp[0] = nprocmaxc[i];
pnp[1] = nprocmaxrc[i];
pnp[2] = nprocmaxs[i];
pnp[3] = nprocmaxv[i];
pnp[4] = nprocmaxov[i];
break;
case 2:
t[0] = sumc[i] / n; t[1] = sumrc[i] / n;
t[2] = sums[i] / n; t[3] = sumv[i] / n;
t[4] = sumov[i] / n;
pnp[0] = 0;
pnp[1] = 0;
pnp[2] = 0;
pnp[3] = 0;
pnp[4] = 0;
break;
default:
statpr.StatPrintf("Unknown type=%d\n", k);
#ifdef __SPF_BUILT_IN_PPPA
return 1;
#else
exit(1);
#endif
}// end switch
statpr.StatPrintf("%s%s", nameCom[i], namecomp[k]);
for (int j = SUMCOM; j <= SUMOVERLAP; j++) {
if (sum[j] > 0.0) {
if (pnp[0] == 0) {
statpr.StatPrintf("%*.*lf ",
dig_time, PREC, t[j - SUMCOM]);
statpr.StatPrintf("%*c", DIGSTAT + 1, ' ');
}
else
statpr.StatPrintf("%*.*lf %*d ",
dig_time, PREC, t[j - SUMCOM],
DIGSTAT, pnp[j - SUMCOM]);
}
}
statpr.StatPrintf("\n");
} //end for
}//end for
}
}
proc = proc && ((verbosity & VERBOSITY_LVL_CPU) > 0);
if (proc == TRUE) {
// execution characteristics
statpr.StatPrintf("--- The execution characteristics --- \n");
statpr.StatPrintf("%s", " ");
unsigned long i;
// print numbers of processor
for (i = 0; i < n; i++) {//!!! qproc
int pr = FALSE;
if (pnumb == NULL) pr = TRUE;
else {
for (int j = 0; j < qnumb; j++) {
if (i + 1 >= pnumb[j] && i + 1 <= pnumb[j + 1]) pr = TRUE;
}
}
if (pr == TRUE) {
statpr.StatPrintf("%*d ", dig_time, i + 1); // probel
}
}
statpr.StatPrintf("\n");
// general characteristics
for (i = 0; i <= ITER; i++) {
stat.ReadProc(PRGEN, pnumb, qnumb, dig_time, sum[i], poutstr);
if (poutstr[0] != '\0')statpr.StatPrintf("%s\n", poutstr);
//statpr.StatPrintf("\n");
}
//statpr.StatPrintf("\n");
// characteristics of collective operatios
long ncall = 0;
for (int k = 0; k <= RED; k++) ncall = ncall + stat.ReadCall(typecom(k));
for (int j = SUMCOM; j <= SUMOVERLAP; j++) {
double *ps;
switch (j) {
case SUMCOM: ps = sumc; break;
case SUMRCOM: ps = sumrc; break;
case SUMSYN: ps = sums; break;
case SUMVAR: ps = sumv; break;
case SUMOVERLAP: ps = sumov; break;
default:statpr.StatPrintf("Unknown type=%d\n", j);
#ifdef __SPF_BUILT_IN_PPPA
return 1;
#else
exit(1);
#endif
}// end for j
int i = 0;
//if (j==SUMOVERLAP) ncall=0; //for pc sum[j]=0.0
if (sum[j] > 0.0 && ncall > 0) {
statpr.StatPrintf(" %s\n", nameGen[j]);
for (i = 0; i <= RED; i++) {
stat.ReadProc((typeprint)(j - SUMCOM + 1), pnumb, qnumb, dig_time, ps[i], poutstr);
if (poutstr[0] != '\0')statpr.StatPrintf("%s\n", poutstr);
}
}
} // end for k
}//exec characteristics
//dvmh_threads = dvmh_threads && ((verbosity & VERBOSITY_LVL_CPU) > 0);
//if(dvmh_threads == TRUE)
if ((verbosity & VERBOSITY_LVL_CPU) > 0);
{
short isThreadsHeaderPrinted = 0;
// Выводим информацию по процессам (нити)
for (unsigned long np = 1; np <= qproc; ++np)
{
int show_this_proc = FALSE;
if (pnumb == NULL) show_this_proc = TRUE;
else {
for (int j = 0; j < qnumb * 2; j += 2) {
if (np >= pnumb[j] && np <= pnumb[j + 1]) show_this_proc = TRUE;
}
}
if (show_this_proc)
{
dvmh_stat_interval *dvmhStatInterval = stat.getDvmhStatIntervalByProcess(np);
dvmh_stat_header_gpu_info *dvmhStatGpuInfo;
if (!dvmhStatInterval) {
continue;
}
if (!dvmhStatInterval->threadsUsed)
continue;
if (!isThreadsHeaderPrinted)
{
statpr.StatPrintf("\n--- Threads characteristics --- \n");
isThreadsHeaderPrinted = 1;
}
double maxs_t[2];
double mins_t[2];
double meds_t[2];
unsigned long n_t[4];
double max_val = 0;
stat.getMaxsAndMins(maxs_t, mins_t, meds_t, n_t, np);
for (int i = 0; i < 2; ++i)
{
if (maxs_t[i] > max_val) max_val = maxs_t[i];
if (mins_t[i] > max_val) max_val = mins_t[i];
if (meds_t[i] > max_val) max_val = meds_t[i];
}
char tval[40];
sprintf(tval, "%*.*lf", DIGTIME, PREC, max_val);
short tvlen = strlen(tval);
short tnlen = 0;
unsigned long tnumval = stat.getThreadsAmountByProcess(np);
while (tnumval > 0) { tnumval /= 10; tnlen++; }
statpr.StatPrintf("\nProcessor# %ld:\n", np);
statpr.StatPrintf(" ");
for (int i = 0; i < 3; i++) {
if (i == 2)statpr.StatPrintf("%*s\n", dig_time, namecomp[i]);
else statpr.StatPrintf("%*s %*s ", dig_time, namecomp[i], tnlen, "N");
}
statpr.StatPrintf("User time %*.*f %.*ld %*.*f %.*ld %*.*f\n", tvlen, PREC, mins_t[0], tnlen, n_t[1]
, tvlen, PREC, maxs_t[0], tnlen, n_t[0]
, tvlen, PREC, meds_t[0]);
statpr.StatPrintf("System time %*.*f %.*ld %*.*f %.*ld %*.*f\n", tvlen, PREC, mins_t[1], tnlen, n_t[3]
, tvlen, PREC, maxs_t[1], tnlen, n_t[2]
, tvlen, PREC, meds_t[1]);
if (dvmh_threads)
for (unsigned long i = 0; i < stat.getThreadsAmountByProcess(np)/*dvmhStatInterval->max_threads*/; ++i)
{
int show_this_thread = FALSE;
if (pnumthreads == NULL) show_this_thread = TRUE;
else {
for (int j = 0; j < qnumthreads * 2; j += 2) {
if (i + 1 >= pnumthreads[j] && i + 1 <= pnumthreads[j + 1]) show_this_thread = TRUE;
}
}
if (show_this_thread || dvmh_threads_full)
{
statpr.StatPrintf(" Thread[%.4ld]\n", i + 1);
statpr.StatPrintf(" User time %10.*f\n", PREC, dvmhStatInterval->threads[i].user_time);
statpr.StatPrintf(" System time %10.*f\n", PREC, dvmhStatInterval->threads[i].system_time);
}
}
}
}
statpr.StatPrintf("\n");
}
dvmh_gpu = dvmh_gpu && ((verbosity & VERBOSITY_LVL_GPU) > 0);
if (dvmh_gpu == TRUE) {
short isGPUHeaderPrinted = 0;
// Выводим информацию по процессам (GPU)
for (unsigned long np = 1; np <= qproc; ++np)
{
short isProcHeaderPrinted = 0;
dvmh_stat_interval *dvmhStatInterval = stat.getDvmhStatIntervalByProcess(np);
dvmh_stat_header_gpu_info *dvmhStatGpuInfo;
if (!dvmhStatInterval) {
continue;
}
short havePrevGpu = 0;
// Выводи информацию по каждому GPU на процессе
for (int gpu = 0; gpu < DVMH_STAT_MAX_GPU_CNT; ++gpu) {
if (((dvmhStatInterval->mask >> gpu) & 1) == 0) continue;
dvmhStatGpuInfo = stat.getDvmhStatGpuInfoByProcess(np, gpu);
dvmh_stat_interval_gpu *dvmhStatGpu = &dvmhStatInterval->gpu[gpu];
havePrevGpu = 1;
if (!isGPUHeaderPrinted) {
statpr.StatPrintf("\n--- The GPU characteristics --- \n");
isGPUHeaderPrinted = 1;
}
if (!isProcHeaderPrinted) {
statpr.StatPrintf(" ┌───────────────┐\n");
statpr.StatPrintf(" │ Proc: #%-6d │\n", np);
statpr.StatPrintf(" └─────┬─────────┘\n");
statpr.StatPrintf("\n");
}
int lenName = strlen((char *)dvmhStatGpuInfo->name);
if (lenName == 0) {
strcpy((char *)dvmhStatGpuInfo->name, DVMH_STAT_GPU_UNKNOWN);
lenName = strlen(DVMH_STAT_GPU_UNKNOWN);
}
int nameSpace = 9;
if (dvmhStatGpuInfo->id == -1) nameSpace = -1;
statpr.StatPrintf("");
for (int i = 0; i < lenName + 12 + nameSpace; ++i) {
if (i == 3 && (havePrevGpu || !isProcHeaderPrinted)) statpr.StatPrintf("");
else statpr.StatPrintf("");
}
isProcHeaderPrinted = 1;
statpr.StatPrintf("\n");
if (dvmhStatGpuInfo->id != -1)
statpr.StatPrintf(" │ GPU #%-d[%08X] (%*s) │\n",
gpu + 1, dvmhStatGpuInfo->id, lenName, dvmhStatGpuInfo->name);
else
statpr.StatPrintf(" │ GPU #%-d (%*s) │\n",
gpu + 1, lenName, dvmhStatGpuInfo->name);
statpr.StatPrintf("");
#if DVMH_EXTENDED_STAT == 1
int hr_len = 170;
#else
int hr_len = 128;
#endif
for (int i = 0; i < hr_len; ++i) {
if (lenName + 12 + nameSpace == i) statpr.StatPrintf("");
else statpr.StatPrintf("");
}
statpr.StatPrintf("\n");
#if DVMH_EXTENDED_STAT == 1
statpr.StatPrintf(" │ %35s %6s %13s %13s %13s %13s %13s %13s %13s %13s %13s │\n",
"", "#", "Min", "Max", "Sum", "Average",
"Q1", "Median", "Q3", "Productive", "Lost");
#else
statpr.StatPrintf(" │ %35s %6s %13s %13s %13s %13s %13s %13s │\n",
"", "#", "Min", "Max", "Sum", "Average",
"Productive", "Lost");
#endif
statpr.StatPrintf("");
for (int i = 0; i < hr_len; ++i) {
statpr.StatPrintf("");
}
statpr.StatPrintf("\n");
// Выводим информацию по метрикам
for (int metric = 0; metric < DVMH_STAT_METRIC_CNT; ++metric) {
dvmh_stat_interval_gpu_metric *dvmhStatMetric = &dvmhStatGpu->metrics[metric];
if (dvmhStatMetric->countMeasures <= 0) continue;
short isSize = metric >= DVMH_STAT_METRIC_CPY_DTOH &&
metric <= DVMH_STAT_METRIC_CPY_GET_ACTUAL ||
metric == DVMH_STAT_METRIC_UTIL_ARRAY_TRANSFORMATION;
short isPositive = isSize;
short isZeroDash = 0;
short isZeroDashEx = dvmhStatMetric->hasOwnMeasures <= 0;
char *sMin = dvmhPrepareValue(dvmhStatMetric->min, isPositive, isSize, isZeroDash, 0);
char *sMax = dvmhPrepareValue(dvmhStatMetric->max, isPositive, isSize, isZeroDash, 0);
char *sSum = dvmhPrepareValue(dvmhStatMetric->sum, isPositive, isSize, isZeroDash, 0);
char *sMean = dvmhPrepareValue(dvmhStatMetric->mean, isPositive, isSize, isZeroDash, 0);
#if DVMH_EXTENDED_STAT == 1
char *sQ1 = dvmhPrepareValue(dvmhStatMetric->q1, isPositive, isSize, isZeroDashEx, 0);
char *sMedian = dvmhPrepareValue(dvmhStatMetric->median, isPositive, isSize, isZeroDashEx, 0);
char *sQ3 = dvmhPrepareValue(dvmhStatMetric->q3, isPositive, isSize, isZeroDashEx, 0);
#endif
char *timeProductive = dvmhPrepareValue(dvmhStatMetric->timeProductive, 1, 0, 1, 1);
char *timeLost = dvmhPrepareValue(dvmhStatMetric->timeLost, 1, 0, 1, 1);
#if DVMH_EXTENDED_STAT == 1
statpr.StatPrintf(" │ %-35s %6d %13s %13s %13s %13s %13s %13s %13s %13s %13s │\n",
dvmhStatMetricsTitles[metric],
dvmhStatMetric->countMeasures,
sMin,
sMax,
sSum,
sMean,
sQ1,
sMedian,
sQ3,
timeProductive,
timeLost);
#else
statpr.StatPrintf(" │ %-35s %6d %13s %13s %13s %13s %13s %13s │\n",
dvmhStatMetricsTitles[metric],
dvmhStatMetric->countMeasures,
sMin,
sMax,
sSum,
sMean,
timeProductive,
timeLost);
#endif
free(sMin);
free(sMax);
free(sSum);
free(sMean);
#if DVMH_EXTENDED_STAT == 1
free(sQ1);
free(sMedian);
free(sQ3);
#endif
free(timeProductive);
free(timeLost);
}
statpr.StatPrintf("");
for (int i = 0; i < hr_len; ++i) {
statpr.StatPrintf("");
}
statpr.StatPrintf("\n");
#if DVMH_EXTENDED_STAT == 1
statpr.StatPrintf(" │ Productive time: %10.4fs%141s│\n", dvmhStatGpu->timeProductive, "");
statpr.StatPrintf(" │ Lost time : %10.4fs%141s│\n", dvmhStatGpu->timeLost, "");
#else
statpr.StatPrintf(" │ Productive time: %10.4fs%99s│\n", dvmhStatGpu->timeProductive, "");
statpr.StatPrintf(" │ Lost time : %10.4fs%99s│\n", dvmhStatGpu->timeLost, "");
#endif
statpr.StatPrintf("");
for (int i = 0; i < hr_len; ++i) {
if ((dvmhStatInterval->mask >> (gpu + 1)) > 0 && i == 3)
statpr.StatPrintf("");
else
statpr.StatPrintf("");
}
statpr.StatPrintf("\n");
}
if (isProcHeaderPrinted && np < qproc)
statpr.StatPrintf("\n");
}
statpr.StatPrintf("\n");
}
} // if nlev<=nlevel
n = stat.TreeWalk();
}
//names and times of processors
char *pname = NULL, *pnamemin = NULL, *pnamemax = NULL;
double time, mintime = DBL_MAX, maxtime = 0.0, sumtime = 0.0;
stat.NameTimeProc(0, &pname, &time);
if (pname == NULL)
{
//not MPI
#ifdef __SPF_BUILT_IN_PPPA
return 1;
#else
exit(1);
#endif
}
unsigned long minn = 0, maxn = 0;
statpr.StatPrintf("%s", "-------------------------------------------------------------------------\n");
statpr.StatPrintf("Name (number) and performance time of processors\n");
for (unsigned long i1 = 0; i1 < qproc; i1++) {
stat.NameTimeProc(i1, &pname, &time);
sumtime = sumtime + time;
if (time < mintime) { mintime = time; minn = i1 + 1; pnamemin = pname; }
if (time > maxtime) { maxtime = time; maxn = i1 + 1; pnamemax = pname; }
statpr.StatPrintf("%s(%d) %lf\n", pname, i1 + 1, time);
}
statpr.StatPrintf("min - %s(%d) %lf; max - %s(%d) %lf; mid - %lf\n",
pnamemin, minn, mintime, pnamemax, maxn, maxtime, sumtime / qproc);
} // end try
catch (bad_alloc ex) {
printf("Out of memory\n");
#ifdef __SPF_BUILT_IN_PPPA
return 1;
#else
exit(1);
#endif
}
catch (exception ex) {
printf("Exception in standart library %s\n", ex.what());
#ifdef __SPF_BUILT_IN_PPPA
return 1;
#else
exit(1);
#endif
}
catch (char *str) {
printf("%s\n", str);
#ifdef __SPF_BUILT_IN_PPPA
return 1;
#else
exit(1);
#endif
}
return 0;
}
static char *humanizeSize(const unsigned long long bytes, const unsigned short precision) {
char *buf;
buf = (char *)malloc(16);
if (!buf) printf("-- humanizeSize No memeory! \n"); fflush(stdout);
if (bytes < 0) strcpy(buf, "-");
else if (bytes >= (1ll << 30)) sprintf(buf, "%.*lfG", precision, (double)bytes / (1 << 30));
else if (bytes >= (1ll << 20)) sprintf(buf, "%.*lfM", precision, (double)bytes / (1 << 20));
else if (bytes >= (1ll << 10)) sprintf(buf, "%.*lfK", precision, (double)bytes / (1 << 10));
else if (bytes >= 0) sprintf(buf, "%lluB", bytes);
else strcpy(buf, "?");
return buf;
}
static char *dvmhPrepareValue(
const double value,
const unsigned short isPositive,
const unsigned short isSize,
const unsigned short dashOnZero,
const unsigned short isTime )
{
char *buf;
//printf("dvmhPrepareValue: %.4lf isPositive=%d isSize=%d dashOnZero=%d\n", value, isPositive, isSize, dashOnZero);
buf = (char *) malloc(16);
if (!buf) printf("-- dvmhPrepareValue No memeory! \n"); fflush(stdout);
if (isPositive && value < 0) {
strcpy(buf, "-");
} else if (value == 0 && dashOnZero) {
strcpy(buf, "-");
} else if (isSize) {
free(buf);
buf = humanizeSize((unsigned long long) value, 3);
} else if (isTime) {
sprintf(buf, "%12.4lfs", value);
} else {
sprintf(buf, "%13.4lf", value);
}
return buf;
}