// each cell has a gid and each piece has a special spgid which will be
// equal to gid on the piece containing the output port. The idea is that
// from a spgid, one can derive the gid.
// Also the set of sid for each cell is encoded to make one cells set distinct
// from any other cell.
{load_file("stdlib.hoc")}
if (name_declared("localloadfile")) {
execute("localloadfile(\"msdiv.hoc\")")
}else{
load_file("msdiv.hoc")
}
begintemplate CellBalancePiece
public sid, idvec, cx, host, merge
objref idvec
proc init() {local i, n
sid = $2
host = -1
cx = $o1.scanvar
n = $o1.scanvar
idvec = new Vector()
for i=0, n-1 {
idvec.append($o1.scanvar)
}
}
// all subtrees of the idvec connected to 0 of the first idvec
// $o1 is the cell SectionRef list
proc merge() {local i localobj sr
if(0) for i=0,idvec.size-1 {
sr = $o1.object(idvec.x[i])
sr.parent printf("original %s connect ", secname())
sr.sec printf("%s(%g), (%g) : %d\n", secname(), section_orientation(), parent_connection(), idvec.x[i])
}
for i=1, idvec.size-1 {
sr = $o1.object(idvec.x[i])
//sr.sec {printf("disconnect %d %s\n", i, secname())}
//$o1.object(idvec.x[0]).sec { printf("reconnect to %s(0)\n", secname())}
sr.sec { disconnect() }
$o1.object(idvec.x[0]).sec { connect sr.sec(0), 0 }
}
}
endtemplate CellBalancePiece
begintemplate CellBalanceInfo
public gid, cx, subtrees, pcx, host, distinct_hosts, nsid, spgid
public multisplit, tmphost
external multisplit_divide
objref subtrees
proc init() {local isid, nsub, isub
tmphost = -1 // prevent multiple pieces on one host for trial
host = -1
subtrees = new List()
gid = $o1.scanvar
spgid = gid
cx = $o1.scanvar
nsid = $o1.scanvar
for isid = 0, nsid-1 {
nsub = $o1.scanvar
for isub = 0, nsub-1 {
subtrees.append(new CellBalancePiece($o1, isid))
}
}
if (subtrees.count == 1) {
// printf("%d only has 1 piece. Do not split\n", gid)
nsid = 0
subtrees.remove_all
}
}
func pcx() {local i, c
c = 0
if (subtrees.count == 0 || $1 == 0) {
c = cx
}else for i = 0, subtrees.count-1 {
c += subtrees.object(i).cx
}
return c
}
func distinct_hosts() {local i, j, h
for i=0, subtrees.count-2 {
h = subtrees.object(i).host
for j=i+1, subtrees.count-1 {
if (h == subtrees.object(j).host) {
return 0
}
}
}
return 1
}
// $o1 NetCon with cell output source
// $2 is the binfo msgid.
// $o3 is the ParallelContext
// on entry the entire cell exists, on exit only the subtrees
// for this host. spgid will be gid for the subtree left containing
// the output port and otherwise will be derived from $2 and the subtree index
// The spgid will be registered for this machine and associated with the
// root of the subtree (but without it being an output port). The exception
// is that the subtree with the output port will have gid registered and
// be associated with that output port.
proc multisplit() {localobj srout, cell
$o1.preloc() srout = new SectionRef() pop_section()
cell = $o1.precell
// spgid = gid + $2*(subtree_index + 1) except the subtree
// with the output port has spgid = gid
msdiv(cell, srout, $2, $o3)
if (srout.exists()) {
$o3.cell(gid, $o1, 1)
//srout.sec printf("%d gid=%d for %s\n", $o3.id, gid, secname())
}
}
proc msdiv() {local i, sid, spgid, msgid \
localobj cell, srout, pc, sl, allsr, srlist, sidvec, sr, vsav, hosts, nil
cell = $o1
srout = $o2
msgid = $3
pc = $o4
allsr = new List()
sl = new SectionList()
// if top level cell use currently accessed section
if (object_id(cell) == 0) {
sl.wholetree
}else{
if (section_exists("soma", cell)) {
$o1.soma { sl.wholetree }
}else{ // at least there should be an all SectionList
i = 0
forsec cell.all {
if (i == 0) {
sl.wholetree
}
i += 1
}
}
}
forsec sl { allsr.append(new SectionRef()) }
vsav = new Vector(allsr.count)
for i=0, subtrees.count-1 {
subtrees.object(i).merge(allsr)
}
for i=0, allsr.count-1 allsr.object(i).sec {
vsav.x[i] = v(.0001)
v(.0001) = -1
}
srlist = new List()
sidvec = new Vector()
hosts = new Vector()
sid = 0
for i=1, subtrees.count-1 {
sr = allsr.object(subtrees.object(i).idvec.x[0])
sr.parent if (v(.0001) == -1) {v(.0001) = sid sid += 1 }
}
for (i = subtrees.count-1; i > 0; i -= 1) {
sr = allsr.object(subtrees.object(i).idvec.x[0])
srlist.append(sr)
sr.parent { sidvec.append(gid*100 + v(.0001)) }
hosts.append(subtrees.object(i).host)
}
sidvec.append(-1)
allsr.object(0).root srlist.append(new SectionRef())
hosts.append(subtrees.object(0).host)
if (0 && pc.id == 0) for i=0, sidvec.size-1 srlist.object(i).sec {
printf("%d %ld %d %s\n", i, sidvec.x[i], hosts.x[i], secname())
}
multisplit_divide(sidvec, srlist, hosts)
for i=0, allsr.count -1 {
sr = allsr.object(i)
if (sr.exists) sr.sec {
v(.0001) = vsav.x[i]
}
}
for i=0, srlist.count-1 {
sr = srlist.object(i)
if (sr.exists()) {
spgid = gid + msgid*(i + 1)
if (srout.exists()) srout.root if (sr.is_cas()) {
spgid = gid
}
pc.set_gid2node(spgid, pc.id)
if (spgid != gid) sr.sec {
pc.cell(spgid, new NetCon(&v(.5), nil), 0)
//printf("%d spgid=%d for %s\n", pc.id, spgid, secname())
} // else pc.cell will be called on outport after return
}
}
}
endtemplate CellBalanceInfo
begintemplate BalanceInfo
public bilist, cx, npiece, metis_weight, run_metis, metis_hosts, distinct_hosts
public nhost, stat, basename, write_balhost, msgid, write_hostcontext
public items, gids, sindices, mymetis, mymetis2, mybal
public base_gid, thishost_gid, gid2cx, write_colgid
objref bilist
strdef basename
objref items, gids, sindices // when only a few are read
proc init() {
msgid = -1
nhost = -1
ihost = -1
bilist = new List()
if (numarg() == 0) {
return
}
basename = $s1
if (numarg() == 1) {
read_all()
}else if (numarg() == 3) {
read_host($2, $3)
}
}
proc read_all() {local i1, i2, n1, n2 localobj f, s
f = new File()
s = new String()
sprint(s.s, "%s.dat", basename)
f.ropen(s.s)
n1 = f.scanvar
for i1=0, n1 - 1 {
n2 = f.scanvar
for i2=0, n2 -1 {
bilist.append(new CellBalanceInfo(f))
}
}
f.close()
}
proc read_host() {local i, n, k, i1, i2, n1, n2 localobj f, s, cb
items = new Vector()
gids = new Vector()
sindices = new Vector()
f = new File()
s = new String()
sprint(s.s, "%s.%d.dat", basename, $2)
f.ropen(s.s)
msgid = f.scanvar
nhost = f.scanvar
ihost = $1
for i=0, $1-1 {f.gets(s.s)}
if (f.scanvar != $1) { execerror("read_host() format error", "") }
n = f.scanvar // number of triples
for i=0, n-1 {
items.append(f.scanvar)
gids.append(f.scanvar)
sindices.append(f.scanvar)
}
f.close
if (0) {
printf("msgid=%d nhost=%d ihost=%d\n", msgid, nhost, ihost)
print "items" items.printf
print "gids" gids.printf
print "sindices" sindices.printf
}
if (items.size == 0) { return }
i = 0
k = 0
sprint(s.s, "%s.dat", basename)
f.ropen(s.s)
n1 = f.scanvar
for i1=0, n1 - 1 {
n2 = f.scanvar
for i2=0, n2 - 1 {
if (k == items.x[i]) {
cb = new CellBalanceInfo(f)
bilist.append(cb)
//printf("k=%d items.x[%d]=%d gid=%d %d\n", k, i, items.x[i], gids.x[i], \
//bilist.object(i).gid)
while(1) {
if (gids.x[i] != cb.gid) {
execerror("gid inconsistency in read_host ", "")
}
cb.host = ihost
cb.subtrees.object(sindices.x[i]).host = ihost
i += 1
if ( i >= items.size) {
f.close
return
}
if (k != items.x[i]) { break }
}
}else{
s = new CellBalanceInfo(f) // skip
}
k += 1
}
}
f.close()
}
proc metis_weight() {local i, j, c localobj cb, f, s
f = new File()
s = new String()
sprint(s.s, "%s.wt.dat", basename)
f.wopen(s.s)
f.printf("%d 0 10\n", npiece())
for i = 0, bilist.count - 1 {
cb = bilist.object(i)
if (cb.subtrees.count == 0) {
f.printf("%g\n", cb.cx)
} else for j=0, cb.subtrees.count-1 {
f.printf("%g\n", cb.subtrees.object(j).cx)
}
}
f.close
}
proc run_metis() {localobj s
s = new String()
sprint(s.s, "pmetis %s.wt.dat %d", basename, $1)
system(s.s)
}
proc metis_hosts() {local i, j localobj f, s, cb
s = new String()
f = new File()
sprint(s.s, "%s.wt.dat.part.%d", basename, $1)
if (f.ropen(s.s) == 0) {
run_metis($1)
f.ropen(s.s)
}
for i = 0, bilist.count - 1 {
cb = bilist.object(i)
if (cb.subtrees.count == 0) {
cb.host = f.scanvar
}else for j=0, cb.subtrees.count - 1 {
cb.subtrees.object(j).host = f.scanvar
}
}
nhost = $1
}
// Metis seems to be quite order dependent. In fact ordering from greatest
// to least can improve things considerably. I wonder if we can do this
// "simple" problem just as well with a trivial algorithm
proc mymetis() {local i, j, k, ncpu, th localobj wt, cb, cpu, wcpu, icell
wt = new Vector()
icell = new Vector()
for i=0, bilist.count-1 {
cb = bilist.object(i)
if (cb.subtrees.count == 0) {
wt.append(cb.cx)
icell.append(i)
} else for j=0, cb.subtrees.count-1 {
wt.append(cb.subtrees.object(j).cx)
icell.append(i)
}
}
cpu = new Vector(wt.size)
wcpu = new Vector($1)
ncpu = 1e9
// do some trials with increasing threshold til it fits into $1
for (th = wt.sum/$1; ncpu > $1; th += th/100) {
ncpu = trial(th, wt, cpu, wcpu, icell)
//printf("wt.sum=%g wcpu.sum=%g\n", wt.sum, wcpu.sum)
//printf("balance = %d %% ncpu=%d\n", (wcpu.max/(wt.sum/$1) - 1)*100, ncpu)
}
k = -1
for i=0, bilist.count-1 {
cb = bilist.object(i)
if (cb.subtrees.count == 0) {
cb.host = cpu.x[k += 1]
} else for j=0, cb.subtrees.count-1 {
cb.subtrees.object(j).host = cpu.x[k += 1]
}
}
nhost = $1
}
proc mymetis2() {local i, j, k, ncpu, th, nsrt \
localobj wt, cb, cpu, wcpu, icell, srt, wsrt, pcnt, cells, pindex
wt = new Vector()
icell = new Vector()
pindex = new Vector()
for i=0, bilist.count-1 {
cb = bilist.object(i)
cb.host = -1
if (cb.subtrees.count == 0) {
wt.append(cb.cx)
icell.append(i)
pindex.append(-1)
} else for j=0, cb.subtrees.count-1 {
cb.subtrees.object(j).host = -1
wt.append(cb.subtrees.object(j).cx)
icell.append(i)
pindex.append(j)
}
}
srt = wt.sortindex.reverse
printf("%d pieces size max=%g min=%g\n", wt.size, wt.x[srt.x[0]], wt.x[srt.x[wt.size-1]])
// always put the next piece in the least filled cpu
ncpu = $1
cpu = new Vector(wt.size)
cpu.fill(-1)
wcpu = new Vector($1)
pcnt = new Vector($1)
for i=0, wt.size-1 { // i is cell index
j = wcpu.min_ind
//if j already has a piece from this cell, choose another
// No. allow multiple pieces on same cpu
if (0) j = distinct_piece(j, wcpu, bilist.object(icell.x[srt.x[i]]))
wcpu.x[j] += wt.x[srt.x[i]]
cpu.x[srt.x[i]] = j
pcnt.x[j] += 1
if (pindex.x[srt.x[i]] == -1) {
bilist.object(icell.x[srt.x[i]]).host = j
}else{
bilist.object(icell.x[srt.x[i]]).subtrees.object(pindex.x[srt.x[i]]).host = j
}
}
printf("ncell = %d nhost = %d\n", i, $1)
printf("max and min complexity %g %g avg = %g\n", wcpu.max, wcpu.min, wcpu.mean)
printf("load balance %g\n", wcpu.max/wcpu.mean)
printf("# max and min # cells on cpu %d %d\n", pcnt.max, pcnt.min)
k = -1
for i=0, bilist.count-1 {
cb = bilist.object(i)
if (cb.subtrees.count == 0) {
cb.host = cpu.x[k += 1]
} else for j=0, cb.subtrees.count-1 {
cb.subtrees.object(j).host = cpu.x[k += 1]
}
}
nhost = $1
}
func distinct_piece() {local i, j, savx localobj piece
j = $1
for i=0, $o3.subtrees.count-1 {
piece = $o3.subtrees.object(i)
if (piece.host == j) { // choose another
printf("not distinct on %d, choose another\n", $1)
savx = $o2.x[$1]
$o2.x[$1] = 1e9
j = distinct_piece($o2.min_ind, $o2, $o3)
$o2.x[$1] = savx
return j
}
}
return j
}
// read a weight file in metis format and do our own balance
// $s1 weight file name, $2 fits into that size nhost
proc mybal() { local i, ncpu, th localobj wt, cb, cpu, wcpu, f
execerror("no longer works. implement icell", "")
f = new File()
f.ropen($s1)
wt = new Vector()
wt.resize(f.scanvar()) f.scanvar() f.scanvar()
for i = 0, wt.size-1 {
wt.x[i] = f.scanvar()
}
cpu = new Vector(wt.size)
wcpu = new Vector($2)
ncpu = 1e9
// do some trials with increasing threshold til it fits into $2
for (th = wt.max; ncpu > $2; th += th/100) {
ncpu = trial(th, wt, cpu, wcpu)
}
printf("wt.sum=%g wcpu.sum=%g\n", wt.sum, wcpu.sum)
printf("balance = %.2f ncpu=%d\n", wcpu.max/(wt.sum/$2), ncpu)
}
// return number of cpus filled based on first threshold arg
// $o2 is the sorted wt vector
// $o3 is returned as the vector of cpu#
// $o4 is returned as the weight on each cpu
// $o5 is parallel to wt and gives the index into bilist
// for purposes of avoiding more than one cell piece on a cpu
func trial() {local iwt, icpu, w, wcpu, i \
localobj tobj, filled, wt, srt, cpu, cand, wcand, icell
th = $1
wt = $o2.c
srt = wt.sortindex.reverse
cpu = $o3
$o4.resize(0)
icell = $o5
for i=0, bilist.count-1 { bilist.object(i).tmphost = -1 }
// minimum threshold is the largest weight
if (th < wt.x[srt.x[0]]) { th = wt.x[srt.x[0]] }
cpu.resize(wt.size)
filled = wt.c.fill(0)
icpu = 0
iwt = 0
wcpu = 0 // weight on icpu
while (iwt < wt.size) {
if (filled.x[srt.x[iwt]]) { iwt += 1 continue }
w = wt.x[srt.x[iwt]]
if (trial_large(wcpu + w <= th, icpu, bilist.object(icell.x[srt.x[iwt]]).tmphost)) {
//printf("fill iwt=%d wcpu=%g w=%g icpu=%d\n", iwt, wcpu, w, icpu)
tobj = bilist.object(icell.x[srt.x[iwt]])
if (tobj.tmphost == icpu) {
printf("can fill problem with %d\n", icell.x[srt.x[iwt]])
}
tobj.tmphost = icpu
wcpu += w
filled.x[srt.x[iwt]] = 1
wt.x[srt.x[iwt]] = 1e9 // do not use again
cpu.x[srt.x[iwt]] = icpu
iwt += 1
}else{ // does not fit
// is a smaller one available
cand = wt.c.indvwhere("<=", th - wcpu)
if (trial_small(wt, cand, icpu, &i, tobj, icell)) {
tobj = bilist.object(icell.x[i])
if (tobj.tmphost == icpu) {
printf("smaller problem with %d\n", icell.x[i])
}
tobj.tmphost = icpu
//printf("max i=%d size=%d wcpu=%g max=%g\n", i, cand.size, wcpu, wcand.max)
wcpu += wt.x[i]
wt.x[i] = 1e9
cpu.x[i] = icpu
filled.x[i] = 1
}else{
// guess that cpu is filled as much as feasible
//printf("filled iwt=%d wcpu=%g icpu=%d\n", iwt, wcpu, icpu)
if(wcpu == 0) { execerror("error in trial", "") }
$o4.append(wcpu)
wcpu = 0
icpu += 1
}
}
}
// may be a last one that did not get into wcpu
if (wcpu) {
$o4.append(wcpu)
icpu += 1
}
// printf("trial $1=%g th=%g ncpu=%d\n", $1, th, icpu)
return icpu
}
func trial_large() {
if ($1 == 0) { return 0 }
if ($2 == $3) {
// printf("trial_large problem fixed\n")
return 0
}
return 1
}
func trial_small() {local i, icpu localobj wt, cand, tobj, wcand, icell
wt = $o1
cand = $o2
icpu = $3
tobj = $o5
icell = $o6
while (cand.size) {
wcand = cand.c.index(wt, cand)
i = cand.x[wcand.max_ind]
tobj = bilist.object(icell.x[i])
if (tobj.tmphost == icpu) {
// printf("trial_small problem with %d, fixed\n", icell.x[i])
cand.remove(wcand.max_ind)
continue
}
$&4 = i
return 1
}
return 0
}
proc distinct_hosts() { local i
for i=0, bilist.count-1 {
if (bilist.object(i).distinct_hosts == 0) {
printf("hosts for bilist[%d] not distinct\n", i)
}
}
}
func cx() {local i, c, j
j = 0
if (numarg() == 1) {
j = $1
}
c = 0
for i = 0, bilist.count-1 {
c += bilist.object(i).pcx(j)
}
return c
}
func npiece() {local i, n
n = 0
for i=0, bilist.count-1 {
if ( bilist.object(i).subtrees.count == 0) {
n += 1
}else{
n += bilist.object(i).subtrees.count
}
}
return n
}
proc stat() {local i, j, c, cmax, mcp, mxs, mcpi, mxsi localobj cxvec, npvec, cb, p
if (nhost == -1) return
mcp = 1
mxs = 0
cxvec = new Vector(nhost)
npvec = new Vector(nhost)
for i=0, bilist.count-1 {
cb = bilist.object(i)
if (cb.nsid > mxs) {
mxs = cb.nsid
mxsi = i
}
if (cb.subtrees.count > mcp) {
mcp = cb.subtrees.count
mcpi = i
}
if (cb.subtrees.count == 0) {
cxvec.x[cb.host] += cb.cx
npvec.x[cb.host] += 1
}else for j=0, cb.subtrees.count-1 {
p = cb.subtrees.object(j)
cxvec.x[p.host] += p.cx
npvec.x[p.host] += 1
}
}
c = cx()
cmax = cxvec.max
printf("total complexity is %g\n", c)
printf("%d cells\n", bilist.count)
printf("%d pieces\n", npiece())
printf("maximum complexity is %g for host %g\n", cmax, cxvec.max_ind)
printf("load imbalance is %.0f%%\n", 100*cmax*nhost/c - 100)
printf("maximum of %d pieces on host %g\n", npvec.max, npvec.max_ind)
if (mcp > 1) {
printf("at least one cell is broken into %d pieces (bilist[%d], gid %d)\n", mcp, mcpi, bilist.object(mcpi).gid)
printf("at least one cell has %d sids (bilist[%d], gid %d)\n", mxs, mxsi, bilist.object(mxsi).gid)
}else{
printf("no broken cells\n")
}
}
// write a host file with the format
// max_gid
// nhost
// ihost ngid for that host followed by a list of (index, gid, subtreeindex) triples
// the indices will be in order and the gid is just for error checking
// the idea is to allow the parallel run to not have to save all the info
proc write_balhost() {local i, j, mg localobj hostgids, cb, p, f, s
if (nhost == -1) return
hostgids = new List()
for i=0, nhost-1 { hostgids.append(new Vector()) }
mg = -1
for i=0, bilist.count-1 {
cb = bilist.object(i)
if (cb.gid > mg) {
mg = cb.gid
}
}
for (msgid=10; msgid <= mg; msgid *= 10) {}// encode gid in spgid along with subtree index
if (msgid < 1e7) {
msgid = 1e7 // there may be unused gids in BlueBrain
}
for i=0, bilist.count-1 {
cb = bilist.object(i)
if (cb.subtrees.count == 0) {
hostgids.object(cb.host).append(i)
hostgids.object(cb.host).append(0)
}else for j=0, cb.subtrees.count-1 {
p = cb.subtrees.object(j)
hostgids.object(p.host).append(i)
hostgids.object(p.host).append(j)
}
}
s = new String()
sprint(s.s, "%s.%d.dat", basename, nhost)
f = new File()
f.wopen(s.s)
f.printf("msgid %d\n", msgid)
f.printf("nhost %d\n", hostgids.count)
for i=0, hostgids.count-1 {
p = hostgids.object(i)
f.printf("%d %d", i, p.size/2)
for (j = 0; j < p.size; j += 2) {
f.printf(" %d %d %d", p.x[j], bilist.object(p.x[j]).gid, p.x[j+1])
}
f.printf("\n")
}
f.close
}
// derived from write_balhost but output format is suitable for
// the fast_create.hoc file
proc write_colgid() {local i, j, mg localobj hostgids, cb, p, f, s
if (nhost == -1) return
hostgids = new List()
for i=0, nhost-1 { hostgids.append(new Vector()) }
mg = -1
for i=0, bilist.count-1 {
cb = bilist.object(i)
if (cb.gid > mg) {
mg = cb.gid
}
}
for (msgid=10; msgid <= mg; msgid *= 10) {}// encode gid in spgid along with subtree index
if (msgid < 1e7) {
msgid = 1e7 // there may be unused gids in BlueBrain
}
for i=0, bilist.count-1 {
cb = bilist.object(i)
if (cb.subtrees.count == 0) {
hostgids.object(cb.host).append(i)
hostgids.object(cb.host).append(0)
}else for j=0, cb.subtrees.count-1 {
p = cb.subtrees.object(j)
hostgids.object(p.host).append(i)
hostgids.object(p.host).append(j)
}
}
s = new String()
sprint(s.s, "%s.%d.dat", basename, nhost)
f = new File()
f.wopen(s.s)
f.printf("ncell %d\nnhost %d\n", bilist.count, hostgids.count)
for i=0, hostgids.count-1 {
p = hostgids.object(i)
for (j = 0; j < p.size; j += 2) {
f.printf("%d %d\n", bilist.object(p.x[j]).gid, i)
}
}
f.close
}
// just enough basename.ncpu.dat to get complete host communication for
// specified host arg. 2nd arg is the nhost for the original basename.nhost.dat
// file
// $3 is the level
proc write_hostcontext() {local i, j, il, ngid, gid, host \
localobj f, s, cb, hostgids, gvec, mark, marked, gidvec, hostvec, gi
//first, read the complete basename.$2.dat file
f = new File()
s = new String()
sprint(s.s, "%s.%d.dat", basename, $2)
f.ropen(s.s)
msgid = f.scanvar
nhost = f.scanvar
gidvec = new Vector()
hostvec = new Vector()
hostgids = new List()
mark = new Vector(nhost)
for i=0, nhost-1 {
if (i != f.scanvar) { execerror("bad format for ", s.s) }
gvec = new Vector()
hostgids.append(gvec)
ngid = f.scanvar
for j=0, ngid-1 {
gvec.append(f.scanvar)
gid = f.scanvar
gvec.append(gid)
gvec.append(f.scanvar)
gidvec.append(gid)
hostvec.append(i)
}
}
// now mark the hosts we need, ie. every host that has a gid
// referred to by $1
mark.x[$1] = 1
gvec = hostgids.object($1)
domark(gvec, gidvec, hostvec, mark)
for il=1, $3-1 {
// try the second level
gi = mark.c.indvwhere("==", 1)
for i=0, gi.size-1 {
domark(hostgids.object(gi.x[i]), gidvec, hostvec, mark)
}
}
// print the reduced basename.ncpu.dat file
marked = mark.c.indvwhere("==", 1)
sprint(s.s, "%s.%d.dat", basename, marked.size)
f.wopen(s.s)
f.printf("msgid %d\n", msgid)
f.printf("nhost %d\n", marked.size)
for i=0, marked.size-1 {
j = marked.x[i]
gvec = hostgids.object(j)
f.printf("%d %d", i, gvec.size/3)
// f.printf("%d %d", marked.x[i], gvec.size/3)
for (j=0; j < gvec.size; j += 3) {
f.printf(" %d %d %d", gvec.x[j], gvec.x[j+1], gvec.x[j+2])
}
f.printf("\n")
}
print "wrote ", s.s
}
proc domark() {local i, j, gid localobj gvec, gidvec, hostvec, mark, gi
gvec = $o1 gidvec = $o2 hostvec=$o3 mark=$o4
for (i=0; i < gvec.size-1; i += 3) {
gid = gvec.x[i+1]
gi = gidvec.c.indvwhere("==", gid)
for j=0, gi.size-1 {
mark.x[hostvec.x[gi.x[j]]] = 1
}
}
}
func base_gid() {
return $1 % msgid
}
func thishost_gid() {local i
if ($1 >= msgid) { return $1 }
i = gids.indwhere("==", $1)
if (i < 0) { return -1 }
return bilist.object(i).spgid
}
func gid2cx() {local i localobj cb
for i=0, bilist.count-1 {
cb = bilist.object(i)
if (cb.spgid == $1) {
return cb.cx
}
}
return 0
}
endtemplate BalanceInfo
proc mkbalinfo() { localobj bi
bi = new BalanceInfo($s1)
if (0) {
bi.metis_weight()
bi.run_metis($2)
bi.metis_hosts($2)
}else{
bi.mymetis($2)
}
bi.distinct_hosts()
bi.stat()
bi.write_balhost()
}
proc mkcolgid() { localobj bi
bi = new BalanceInfo($s1)
bi.mymetis($2)
bi.distinct_hosts()
bi.stat()
bi.write_colgid()
}
proc mymetis2() { localobj bi
bi = new BalanceInfo($s1)
bi.mymetis2($2)
bi.stat()
bi.write_colgid()
}
proc mymetis3() { localobj bi
bi = new BalanceInfo($s1)
bi.mymetis2($2)
bi.stat()
bi.write_balhost()
}
objref bi
if (0) {
bi = new BalanceInfo("balinfo")
print "total cell complexity = ", bi.cx()
print "total piece complexity = ", bi.cx(1)
print bi.npiece(), " pieces"
print bi.bilist.count, " cells"
bi.metis_weight()
//bi.run_metis(1024)
bi.metis_hosts(1024)
bi.distinct_hosts()
bi.stat()
bi.write_balhost()
}
if (0) {
objref bi
bi = new BalanceInfo("balinfo", 21, 1024)
}
if (0) {
objref bi
bi = new BalanceInfo("balinfo")
bi.write_hostcontext(860, 1024, 5)
}
if (0) {
bi = new BalanceInfo("balbb520")
bi.metis_weight()
bi.run_metis(1024)
bi.metis_hosts(1024)
bi.distinct_hosts()
bi.stat()
}
if (0) {
bi = new BalanceInfo("bi")
bi.mymetis(8192)
bi.distinct_hosts()
bi.stat()
bi.write_balhost()
}
if (0) {
bi = new BalanceInfo("cx")
bi.mymetis2(8192)
}