/* File is constructed in perfrun.hoc by printing the tperf for rank 0
using p_perf where the vector tperf is retrieved after a psolve via
tdat_.x[5] = pnm.pc.send_time(4, chist, tperf)
The first item in the file after the header is the size of tperf
See bgpdma.cpp nrn_bgp_receive_time for the meaning of the first arg. In
particular, 4 returns the number of extra conservation checks, fills chist
with the histogram of number of intervals using 0-11 conservation checks,
and fills tperf with itbuf items with the itbuf+1 item being the DCMF_Tick().
The items are in sets of DCMF_Timebase() as indicated below.
It seems itbuf is initialized only at launch and not in bgp_dma_init (which
would allow multiple runs per launch).
All the TBUF fills take place in bgp_dma_receive
Given the several multisend methods we are using and the variation in
network parameters, it is a good idea to prepend run information to the file
*/
// allgather 201 (usually) sets of
// 0 enter nrn_spike_exchange_compressed
// 1 after barrier
// 2 after nrnmpi_spike_exchange_compressed (allgather, allgatherv)
// 3 #sent (cells that spiked)
// 4 #received
// 5 exit nrn_spike_exchange_compressed (after find and enqueue)
// multisend 401 (usually) sets of
// 0 enter bgp_dma_receive
// 1 after DCMF_Messager_advance (or bgp_advance)
// 2 after barrier
// 3 after conserve
// 4 ncons
// 5 #cells_that_fired
// 6 #sent (cell spiked * fanout)
// 7 #received
// 8 # cumulative spike send time (BGP_DMASend::send
// if ENQUEUE == 1 then 9 incoming spikes are saved in buffer, TBUF, then to queue
// if ENQUEUE == 2 then 9 and 10 are find and enqueue times for the interval
// but do not include time during barrier and conserve
// 9 + ENQUEUE exit bgp_dma_receive
{load_file("nrngui.hoc")}
objref td[1], vmore[1]
strdef fname
proc rd() {local i, j, k, n, size, toff, tm, methodinfo \
localobj f, indices, s
f = new File()
f.chooser("r", "Read", "*.*")
if (numarg() == 1) {
fname = $s1
}else{
if (!f.chooser()){
quit()
}
fname = f.getname()
}
f.ropen(fname)
// read header
s = new String()
f.gets(s.s) printf("%s", s.s)
nver=0
sscanf(s.s, "%*[^(](%d)", &nver)
f.gets(s.s) printf("%s", s.s)
if (nver >= 534) { f.gets(s.s) printf("%s", s.s) }
methodinfo = f.scanvar()
method = methodinfo%4
enqueue=int(methodinfo/32)
phase2=int(methodinfo/8)%2
if (method == 0) { enqueue = 0 }
n_bgp_interval = (int(methodinfo/4)%2) + 1
printf("method=%d n_bgp_interval=%g enqueue=%g phase2=%g\n", method, n_bgp_interval,enqueue,phase2)
nhost = f.scanvar()
printf("nhost=%d\n", nhost)
f.gets(s.s) printf("%s", s.s)
f.gets(s.s) printf("%s", s.s)
f.gets(s.s) printf("%s", s.s)
while(1) {
f.gets(s.s) printf("%s", s.s)
if (strcmp(s.s,"@@@end header@@@\n") == 0) { break }
sscanf(s.s, "%[^\n]", s.s)
execute(s.s)
}
indices = new Vector()
if (nver < 519) {
nd = 8 + (enqueue == 1)
indices.append(0,1,2,3)
}else if (nver < 523){
nd = 9 + (enqueue == 1)
indices.append(0,1,2,3)
}else{
if (method == 0) {
nd = 6
indices.append(0,1,2,5)
}else{
if (nver < 544) {
nd = 10 + enqueue
}else{
nd = 10 + enqueue + 2*phase2
}
indices.append(0,1,2,3,nd-1)
if (enqueue == 1) {
indices.append(nd-2)
}
}
}
objref td[nd]
// read data
size = (f.scanvar()-1)/nd
print size
for i=0, nd-1 {td[i] = new Vector(size)}
for i = 0, size-1 {
for j=0,nd-1 {
td[j].x[i] = f.scanvar()
}
}
ts = f.scanvar()
nmore = f.scanvar()
if (nmore > 0) objref vmore[nmore]
for i=0, nmore-1 {
n = f.scanvar()
vmore[i] = new Vector(n)
vmore[i].scanf(f, n)
}
tm = 2^32
toff = td[0].x[0]
for k=0, indices.size-1 {
j = indices.x[k]
// all vectors start at 0
td[j].sub(toff)
// take wrap around into account so vectors increase monotonically
for i=1, size-1 {
while (td[j].x[i] < td[j].x[i-1]) {
td[j].x[i] += tm
}
}
//td[j].mul(ts)
}
print ts
f.close
}
//rd(1)
obfunc combine() { local i localobj x
x = new Vector()
printf("combine size=%d\n", $o1.size)
for (i=0; i < $o1.size; i += 2) {
x.append($o1.x[i] + $o1.x[i+1])
}
return x
}
objref g, g1, g2, yy[7], name[7]
proc anal() {local i, j, a, b, n, c localobj tt, xx, s1
// for multisend
// yy[0] is from after conserve (3) to before barrier (1). i.e
// overlap between computation and spike exchange and enqueing
// yy[1] is from after conserve to next after conserve, i.e.
// the total interval time.
// for allgather, yy[0] is from after exchange(2) to enter exchange(0)
// yy[1] is from enter to enter
for i=0, 1 yy[i] = new Vector()
if (method == 0) {
for i=0, td[2].size-2 yy[0].append(td[0].x[i+1] - td[2].x[i])
for i=0, td[0].size-2 yy[1].append(td[0].x[i+1] - td[0].x[i])
}else{
for i=0, td[3].size-2 yy[0].append(td[1].x[i+1] - td[3].x[i])
for i=0, td[1].size-2 yy[1].append(td[3].x[i+1] - td[3].x[i])
}
for i=0, 1 yy[i].div(1e6)
name[0] = new String("interval compute")
name[1] = new String("total interval")
}
proc gr() {local i, j, a, b, n, c localobj tt, xx, s1
g1 = new Graph(0)
g1.view(0,0,200,12, 100, 100, 500, 250)
//g2 = new Graph()
g1.size(0,200,0,15)
//g2.size(0,40,0,6e6)
g1.label(.3, .9, fname, 2)
//g2.label(.5, .9, fname, 2)
g = g1
anal()
n = yy[0].size
if (n > 300) {
g1.size(0,400,0,6)
}else{
g1.size(0,200,0,12)
}
for case (&i, 0, 1) {
labl(yy[i], i)
c = i+1
if (nmore > 0) {
if (i == 0) j = 1 else j = 0
vmore[j*2].c.div(1e6).div(nhost).line(g, c, 3)
vmore[j*2+1].c.div(1e6).line(g, c, 6)
}
yy[i].line(g, c, 1)
}
}
proc labl() {local x localobj s
s = new String()
x = $o1.sum*ts*1e6
printf("%3.2fs %s\n", x, name[$2].s)
sprint(s.s, "%3.2fs %s", x, name[$2].s)
g1.label($o1.size*1.02, $o1.c($o1.size-101, $o1.size-1).mean,s.s,1,1,0,.5,1)
}
//gr()
objref list
list = new List()
proc again() {
list.append(g1) list.append(g2)
rd()
gr()
}
proc ag() {
list.append(g1) list.append(g2)
rd($s1)
gr()
}
objref flist
flist = new List()
proc fchoose() {local i, n localobj s, po
s = new String()
system("cd dat ; ls t*", s.s)
po = new PythonObject()
po.s = s.s
nrnpython("fnames = s.splitlines()")
n = po.len(po.fnames)
for i=0, n-1 {
flist.append(new String(po.fnames._[i]))
}
flist.browser("Choose File", "s")
flist.select_action("select(hoc_ac_)")
}
proc select() {localobj s
if ($1 < 0) { return }
s = new String()
sprint(s.s, "dat/%s", flist.o($1).s)
ag(s.s)
}
fchoose()
/*
ag("nrn-results/1kconn/results_256_8/t0perf18.1k.8K.dat")
ag("nrn-results/1kconn/results_256_8/t2perf18.1k.8K.dat")
ag("nrn-results/1kconn/results_256_8/t6perf18.1k.8K.dat")
ag("nrn-results/1kconn/results_256_16/t0perf18.0.16384.dat")
ag("nrn-results/1kconn/results_256_16/t2perf18.0.16384.dat")
ag("nrn-results/1kconn/results_256_16/t6perf18.0.16384.dat")
ag("nrn-results/1kconn/results_256_32/t0perf18.0.32768.dat")
ag("nrn-results/1kconn/results_256_32/t2perf18.0.32768.dat")
ag("nrn-results/1kconn/results_256_32/t6perf18.0.32768.dat")
*/
proc perf_t2_1k() {
ag("dat/t2perf22.1k.16K.86.550.0.0")
ag("dat/t2perf22.1k.16K.94.550.0.0")
ag("dat/t2perf22.1k.16K.82.551+.0.0")
ag("dat/t2perf22.1k.16K.90.552.0.0")
}
proc perf_t2_10k() {
ag("dat/t2perf21.10k.64K.86.549+.7.0")
ag("dat/t2perf21.10k.64K.94.549+.10.0")
ag("dat/t2perf21.10k.64K.82.552.0.0")
ag("dat/t2perf21.10k.64K.90.549+.31.0")
}
proc perf_t3() {
ag("dat/t3perf24.1k.64K.83.549+.24.0")
ag("dat/t3perf24.1k.64K.87.542.0")
ag("dat/t3perf21.10k.64K.83.549+.32.0")
ag("dat/t3perf21.10k.64K.87.549+.6.0")
}
proc perf_t2_greedy_enqueue() {
ag("debug/t1perf17.1k.512.93.542.1.0")
}
proc table() {local i localobj fnl, sl, r, m, f
sl = new StringFunctions()
r = new String()
fnl = new List()
for i=0, flist.count-1 {
if (sl.head(flist.o(i).s, "54[789].*\\.0$", r.s) != -1) {
fnl.append(flist.o(i))
}else if (sl.head(flist.o(i).s, "551.*\\.0$", r.s) != -1) {
fnl.append(flist.o(i))
}
}
//ncell ncon nhost meth invl phase comp total
m = new Matrix(fnl.count, 9)
chdir("dat")
for i=0, fnl.count-1 {
print fnl.o(i).s
rd(fnl.o(i).s)
anal()
m.x[i][0] = 2^ncellpow
m.x[i][1] = ncon
m.x[i][2] = nhost
m.x[i][3] = method
m.x[i][4] = n_bgp_interval
m.x[i][5] = phase2
m.x[i][6] = yy[0].sum*ts*1e6
m.x[i][7] = yy[1].sum*ts*1e6
m.x[i][8] = set_maxstep_time
}
chdir("..")
m.printf(" %4.2f")
f = new File()
f.wopen("table.dat")
m.fprint(f, " %.20g")
f.close()
}
//table()