# 利用python绘制数据的瀑布图的教程

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

index = [‘sales’,’returns’,’credit fees’,’rebates’,’late charges’,’shipping’]
data = {‘amount’: [350000,-30000,-7500,-25000,95000,-7000]}
trans = pd.dataframe(data=data,index=index)

from ipython.display import display
display(trans)

display(trans.amount.cumsum())
sales 350000
returns 320000
credit fees 312500
rebates 287500
late charges 382500
shipping 375500
name: amount, dtype: int64

blank=trans.amount.cumsum().shift(1).fillna(0)
display(blank)
sales 0
returns 350000
credit fees 320000
rebates 312500
late charges 287500
shipping 382500
name: amount, dtype: float64

total = trans.sum().amount
trans.loc[“net”] = total
blank.loc[“net”] = total
display(trans)
display(blank)

sales 0
returns 350000
credit fees 320000
rebates 312500
late charges 287500
shipping 382500
net 375500
name: amount, dtype: float64

step = blank.reset_index(drop=true).repeat(3).shift(-1)
step[1::3] = np.nan
display(step)
0 0
0 nan
0 350000
1 350000
1 nan
1 320000
2 320000
2 nan
2 312500
3 312500
3 nan
3 287500
4 287500
4 nan
4 382500
5 382500
5 nan
5 375500
6 375500
6 nan
6 nan
name: amount, dtype: float64

blank.loc[“net”] = 0

my_plot = trans.plot(kind=’bar’, stacked=true, bottom=blank,legend=none, title=”2014 sales waterfall”)
my_plot.plot(step.index, step.values,’k’)

def money(x, pos):
‘the two args are the value and tick position’
return “\${:,.0f}”.format(x)
from matplotlib.ticker import funcformatter
formatter = funcformatter(money)

my_plot = trans.plot(kind=’bar’, stacked=true, bottom=blank,legend=none, title=”2014 sales waterfall”)
my_plot.plot(step.index, step.values,’k’)
my_plot.set_xlabel(“transaction types”)
my_plot.yaxis.set_major_formatter(formatter)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import funcformatter
#use python 2.7+ syntax to format currency
def money(x, pos):
‘the two args are the value and tick position’
return “\${:,.0f}”.format(x)
formatter = funcformatter(money)
#data to plot. do not include a total, it will be calculated
index = [‘sales’,’returns’,’credit fees’,’rebates’,’late charges’,’shipping’]
data = {‘amount’: [350000,-30000,-7500,-25000,95000,-7000]}
#store data and create a blank series to use for the waterfall
trans = pd.dataframe(data=data,index=index)
blank = trans.amount.cumsum().shift(1).fillna(0)
#get the net total number for the final element in the waterfall
total = trans.sum().amount
trans.loc[“net”]= total
blank.loc[“net”] = total
#the steps graphically show the levels as well as used for label placement
step = blank.reset_index(drop=true).repeat(3).shift(-1)
step[1::3] = np.nan
#when plotting the last element, we want to show the full bar,
#set the blank to 0
blank.loc[“net”] = 0
#plot and label
my_plot = trans.plot(kind=’bar’, stacked=true, bottom=blank,legend=none, figsize=(10, 5), title=”2014 sales waterfall”)
my_plot.plot(step.index, step.values,’k’)
my_plot.set_xlabel(“transaction types”)
#format the axis for dollars
my_plot.yaxis.set_major_formatter(formatter)
#get the y-axis position for the labels
y_height = trans.amount.cumsum().shift(1).fillna(0)
#get an offset so labels don’t sit right on top of the bar
max = trans.max()
neg_offset = max / 25
pos_offset = max / 50
plot_offset = int(max / 15)
#start label loop
loop = 0
for index, row in trans.iterrows():
# for the last item in the list, we don’t want to double count
if row[‘amount’] == total:
y = y_height[loop]
else:
y = y_height[loop] + row[‘amount’]
# determine if we want a neg or pos offset
if row[‘amount’] > 0:
y += pos_offset
else:
y -= neg_offset
my_plot.annotate(“{:,.0f}”.format(row[‘amount’]),(loop,y),ha=”center”)
loop+=1
#scale up the y axis so there is room for the labels
my_plot.set_ylim(0,blank.max()+int(plot_offset))
#rotate the labels
my_plot.set_xticklabels(trans.index,rotation=0)
my_plot.get_figure().savefig(“waterfall.png”,dpi=200,bbox_inches=’tight’)

Posted in 未分类