def monte_carlo(df, n=1000, verbose=False):
"""Take a DataFrame of probabilities (in range 0..1) for outcomes (in columns)
for each case (in rows), and conduct a Monte Carlo simulation.
Return a tuple of three DataFrames. The first is the raw simulation results.
The second is the outcome tally of those results for each simulation.
The third DataFrame returned is a binned summary of the tally for each outcome.
"""
# contract
assert((df.sum(axis=1) == 1.0).all)
# monte carlo - we do this by rows to use the pandas.cut() function
print('Doing the MC simulation ...')
simulation = {}
for (name, series) in df.iterrows():
# for each case MC similate n outcomes ...
if verbose:
print(name)
# set up this simulation
votes = np.random.rand(n)
s = series[series > 0.0]
labels = s.index.tolist()
cuts = [0.0] + s.cumsum().tolist()
# and simulate
simulation[name] = pd.cut(votes, bins=cuts, labels=labels, precision=7)
# Take the dictionary of case outcomes above,
# Put them into a DataFrame as columns.
# So that we can subsequently tally rows of simulation outcomes below.
simulation = pd.DataFrame(simulation)
# tally the results - for each simulation
print('Tallying the results of the MC simulation ...')
tally = {}
for (name, series) in simulation.iterrows():
tally[name] = series.value_counts()
tally = pd.DataFrame.from_dict(tally, orient='index')
tally.fillna(0, inplace=True)
tally = tally.astype(int)
# - summarise into value_counts
print('Summarising the tally of the MC simulation ...')
summary = {}
for (name, series) in tally.iteritems():
summary[name] = series.value_counts()
summary = pd.DataFrame(summary)
summary.fillna(0, inplace=True)
summary = summary.astype(int)
# - return simulation summary
return (simulation, tally, summary)
Sunday, 26 June 2016
Monte Carlo simulation
Yesterday I coded a quick and dirty Monte-Carlo simulation function. Here it is.
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment