Skip to content

Commit

Permalink
Version 1.55.
Browse files Browse the repository at this point in the history
Modified bootci.py and changed its user interface. It now takes only a single filename
argument: a flat file produced either by flatfile.py or bma/booma. Output is the
same, apart from comments.
  • Loading branch information
alanrogers committed Aug 21, 2018
1 parent f878cb6 commit a92feb0
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 76 deletions.
140 changes: 65 additions & 75 deletions src/bootci.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,19 @@
###
#@file bootci.py
#@page bootci
#@brief Calculate confidence interval from legofit output files.
#@brief Calculate confidence interval from a flat file.
#
#bootci.py: calculate confidence interval from legofit output
#============================================================
#bootci.py: calculate confidence interval from a flat file.
#==========================================================
#
# usage: bootci.py [options] <central_tendency> <boot1> <boot2> ...
# usage: bootci.py [options] <foo.flat>
#
# where <foo.flat> is a file with parameters in columns and data sets
# in rows. Such files are produced by flatfile.py and booma. The first
# row should contain a column header listing parameter names. The second
# should contain parameter estimates for the real data. Each succeeding
# row should contain a parameter estimate for a bootstrap replicate.
#
# where <central_tendency> is the legofit output for the real data
# and the <boot*> files are legofit output for bootstrap replicates.
# Options may include:
#
# -c <x> or --conf <x> Set confidence level.
Expand All @@ -19,14 +23,6 @@
#
# The program writes to standard output.
#
#For example, suppose that we have run legofit against the real data
#to produce a file called lancre.legofit, and we've run it against 50
#bootstrap replicates to produce files boot1.legofit, boot2.legofit,
#and so on. Then the command
#
# bootci.py lancre.legofit boot*.legofit
#
#would read these files and print output like this
#
# # bootci.py run at: 2017-01-06 22:32:58.916983
# # real data: lancre.legofit
Expand Down Expand Up @@ -86,10 +82,14 @@ def usage(msg1):
print >> sys.stderr, msg1
msg = \
"""
usage: bootci.py [options] <central_tendency> <boot1> <boot2> ...
usage: bootci.py [options] <foo.flat>
where <foo.flat> is a file with parameters in columns and data sets
in rows. Such files are produced by flatfile.py and booma. The first
row should contain a column header listing parameter names. The second
should contain parameter estimates for the real data. Each succeeding
row should contain a parameter estimate for a bootstrap replicate.
where <central_tendency> is the legofit output for the real data
and the <boot*> files are legofit output for bootstrap replicates.
Options may include:
-c <x> or --conf <x> Set confidence level.
Expand All @@ -101,39 +101,50 @@ def usage(msg1):
print >> sys.stderr, msg
exit(1)

# Parse legofit output file. Return a tuple containing two lists:
# first a list of parameter names; second a list of parameter
# estimates.
def parselegofit(fname):
# Parse a flat file. Return (parnames, est, boot), where
# parnames is a list of parameter names, est is a list of parameter
# estimates, and boot is an array whose ij'th entry is the estimate
# of the i'th parameter in the j'th bootstrap replicate.
def parseflat(fname):
ifile = open(fname, "r")
parmap = {}
estmap = {}

parnames = None
estimates = None

for line in ifile:
# ignore comments and blank lines
if line[0] == "#":
continue
line = line.split("=")
if len(line) != 2:
line = line.strip().split("#")
line = line[0].strip()

line = line.split()
if len(line) == 0:
continue

# header (first non-comment line) contains parameter names
if parnames == None:
parnames = line
npar = len(parnames)
boot = [[] for i in range(npar)] # an array of empty lists
continue

key = line[0].strip()
if "Gaussian" in line[1]:
value = 1.0
else:
value = float(line[1].strip())
if len(line) != npar:
print "npar=%d linelen=%d" % (npar, len(line))
print line
assert len(line) == npar

if key in parmap:
estmap[key] = value
else:
parmap[key] = value
# line after header contains parameter estimates for real data
if estimates == None:
estimates = [float(x) for x in line]

# remaining lines contain bootstrap replicates
for i in range(npar):
boot[i].append(float(line[i]))

ifile.close()

parnames = sorted(estmap.keys())
estimates = len(parnames)*[0.0]
for i in range(len(parnames)):
estimates[i] = estmap[parnames[i]]
return (parnames, estimates)
return (parnames, estimates, boot)

# Interpolate in order to approximate the value v[p*(len-1)]. Raise
# exception if len(v)==0.
Expand All @@ -150,8 +161,7 @@ def interpolate(p, v):
return (1.0-w)*v[i] + w*v[j]

conf = 0.95
realdata = None
bootnames = []
fname = None
lbl = None

# Loop over command line arguments, ignoring the 0th.
Expand All @@ -175,45 +185,25 @@ def interpolate(p, v):
elif sys.argv[i][0] == "-":
usage("Unknown argument: %s" % sys.argv[i])
else:
if realdata == None:
realdata = sys.argv[i]
else:
bootnames.append(sys.argv[i])
if fname != None:
usage("Only one input file is allowed")
fname = sys.argv[i]
i += 1

if len(bootnames) < 2:
usage("Command line must list at least 3 input files")
if fname == None:
usage("Missing input file")

print "# bootci.py run at: %s" % datetime.datetime.now()
print "# real data:", realdata
print "# bootstrap replicates:",
for i in range(len(bootnames)):
print bootnames[i],
print
print "# input:", fname
print "# %s: %0.3f" % ("confidence", conf)

parnames, realEst = parselegofit(realdata)
mat = []
mat.append(realEst)

for name in bootnames:
parnames2, estimates = parselegofit(name)

if parnames != parnames2:
print >> sys.stderr, "Input files estimate different parameters"
print >> sys.stderr, " 1:", parnames
print >> sys.stderr, " 2:", parnames2
exit(1)

mat.append(estimates)

# transpose matrix, so that mat[i] is a list of estimates for a single
# parameter.
mat = zip(*mat)
assert len(mat) == len(parnames)
parnames, estimates, boot = parseflat(fname)

# number of parameters
npar = len(mat)
npar = len(parnames)

assert npar == len(estimates)
assert npar == len(boot)

tailProb = (1.0 - conf)/2.0

Expand All @@ -222,12 +212,12 @@ def interpolate(p, v):
else:
print "%10s %15s %15s %15s" % ("par", "est", "low", "high")
for i in range(npar):
v = sorted(mat[i])
v = sorted(boot[i])
lowBnd = interpolate(tailProb, v)
highBnd = interpolate(1.0-tailProb, v)
if lbl:
print "%10s %15.8f %15.8f %15.8f %s" % \
(parnames[i], realEst[i], lowBnd, highBnd, lbl)
(parnames[i], estimates[i], lowBnd, highBnd, lbl)
else:
print "%10s %15.8f %15.8f %15.8f" % \
(parnames[i], realEst[i], lowBnd, highBnd)
(parnames[i], estimates[i], lowBnd, highBnd)
2 changes: 1 addition & 1 deletion src/version.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#ifndef VERSION
#define VERSION "1.54"
#define VERSION "1.55"
#endif

0 comments on commit a92feb0

Please sign in to comment.