This notebook is a summary of python plots. The purpose is to able to quickly get examples for plots in the future usage.
The source is coming from mutiple places as shown below.
kesic.com is the original source of matplotlib examples:

从零开始学Python【1】—matplotlib(条形图)

从零开始学Python【2】—matplotlib(饼图)

从零开始学Python【3】—matplotlib(箱线图)

从零开始学Python【4】—matplotlib(直方图)

从零开始学Python【5】—matplotlib(折线图)

从零开始学Python【15】—matplotlib(散点图)

从零开始学Python【7】—matplotlib(雷达图)

jupyter widget example is coming from:

Interactive Python with Widgets

The data set in this blog can be found in github page:
https://github.com/supersheepbear/notebooks/tree/master/python

python

import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import numpy as np
import pandas as pd
import scipy.stats as scs

plot styles

python

1	plt.style.use('ggplot')

python

1	plt.style.available

['bmh',
 'classic',
 'dark_background',
 'fast',
 'fivethirtyeight',
 'ggplot',
 'grayscale',
 'seaborn-bright',
 'seaborn-colorblind',
 'seaborn-dark-palette',
 'seaborn-dark',
 'seaborn-darkgrid',
 'seaborn-deep',
 'seaborn-muted',
 'seaborn-notebook',
 'seaborn-paper',
 'seaborn-pastel',
 'seaborn-poster',
 'seaborn-talk',
 'seaborn-ticks',
 'seaborn-white',
 'seaborn-whitegrid',
 'seaborn',
 'Solarize_Light2',
 'tableau-colorblind10',
 '_classic_test']

bar plot

vertical

python

# 导入绘图模块

#  data
x = range(4)
GDP = [12406.8,13908.57,9386.87,9143.64]

# 中文乱码的处理
plt.rcParams['font.sans-serif'] =['Microsoft YaHei']
plt.rcParams['axes.unicode_minus'] = False

# bar plot
plt.bar(x, GDP, align = 'center',color='steelblue', alpha = 0.8, width=0.6)
# ylabel title
plt.ylabel('GDP')
# title
plt.title('四个直辖市GDP大比拼')
# xticks and their label
plt.xticks(range(4),['北京市','上海市','天津市','重庆市'])
# ylim
plt.ylim([5000,15000])
# yticks
plt.yticks(np.linspace(5000,15000,5))
# grid
plt.grid(alpha=0.5, linestyle="--", axis="y")

# 为每个条形图添加数值标签
for x,y in zip(x, GDP):
    plt.text(x,y+100,'%s' %round(y,1),ha='center')
    
# 显示图形
plt.show()

horizontal

python

# data
x = range(5)
price = [39.5,39.9,45.4,38.9,33.34]

# 中文乱码的处理
plt.rcParams['font.sans-serif'] =['Microsoft YaHei']
plt.rcParams['axes.unicode_minus'] = False

# draw
plt.barh(x, price, align = 'center',color='steelblue', alpha = 0.8, height=0.5)
# xlabel
plt.xlabel('价格')
# title
plt.title('不同平台书的最低价比较')
# y ticks and label
plt.yticks(range(5),['亚马逊','当当网','中国图书网','京东','天猫'])
# x limit
plt.xlim([32,47])
# grid
plt.grid(alpha=0.5, linestyle="--", axis="x")


#  label for bars
for x,y in zip(x, price):
    plt.text(y+0.1,x,'%s' %y,va='center')
# show pic    
plt.show()

compare plot

lateral stack

python

# 导入绘图模块
import matplotlib.pyplot as plt
import numpy as np
# data
bar_width = 0.35
x1 = np.arange(5)
Y2016 = [15600,12700,11300,4270,3620]
x2 = np.arange(5)+bar_width
Y2017 = [17400,14800,12000,5200,4020]
labels = ['北京','上海','香港','深圳','广州']

# 中文乱码的处理
plt.rcParams['font.sans-serif'] =['Microsoft YaHei']
plt.rcParams['axes.unicode_minus'] = False

# plot
plt.bar(x1, Y2016, label = '2016', color = 'steelblue', alpha = 0.8, width = bar_width)
plt.bar(x2, Y2017, label = '2017', color = 'indianred', alpha = 0.8, width = bar_width)
# labels
plt.xlabel('Top5城市')
plt.ylabel('家庭数量')
# title
plt.title('亿万财富家庭数Top5城市分布')
# xticks
plt.xticks(np.arange(5)+bar_width,labels)
# ylimits
plt.ylim([2500, 19000])
# grid
plt.grid(alpha=0.5, linestyle="--", axis="y")

# data labels
for x2016,y2016 in zip(x1, Y2016):
    plt.text(x2016-bar_width/2, y2016+100, '%s' %y2016)

for x2017,y2017 in zip(x2, Y2017):
    plt.text(x2017-bar_width/2, y2017+100, '%s' %y2017)
# legend
plt.legend(loc='best')
# show plot
plt.show()

vertical stack

python

# 导入绘图模块
import matplotlib.pyplot as plt
import numpy as np
# data
bar_width = 0.35
x = np.arange(5)
Y2016 = [15600,12700,11300,4270,3620]
Y2017 = [17400,14800,12000,5200,4020]
labels = ['北京','上海','香港','深圳','广州']

# 中文乱码的处理
plt.rcParams['font.sans-serif'] =['Microsoft YaHei']
plt.rcParams['axes.unicode_minus'] = False

# plot
plt.bar(x, Y2017, label = '2017', color = 'red', alpha = 0.8, width = bar_width, bottom=y2016)
plt.bar(x, Y2016, label = '2016', color = 'blue', alpha = 0.8, width = bar_width)
# labels
plt.xlabel('Top5城市')
plt.ylabel('家庭数量')
# title
plt.title('亿万财富家庭数Top5城市分布')
# xticks
plt.xticks(np.arange(5),labels)

# grid
plt.grid(alpha=0.5, linestyle="--", axis="y")
# ylimits
plt.ylim([0, 22500])
# legend
plt.legend(loc='best')
# show plot
plt.show()

top down stack

python

# 导入绘图模块
import matplotlib.pyplot as plt
import numpy as np
# data
bar_width = 0.35
x = np.arange(5)
Y2016 = [15600,12700,11300,4270,3620]
Y2017 = -1*np.array([17400,14800,12000,5200,4020])
labels = ['北京','上海','香港','深圳','广州']

# 中文乱码的处理
plt.rcParams['font.sans-serif'] =['Microsoft YaHei']
plt.rcParams['axes.unicode_minus'] = False

# plot
plt.bar(x, Y2017, label = '2017', color = 'red', alpha = 0.8, width = bar_width)
plt.bar(x, Y2016, label = '2016', color = 'blue', alpha = 0.8, width = bar_width)
# labels
plt.xlabel('Top5城市')
plt.ylabel('家庭数量')
# title
plt.title('亿万财富家庭数Top5城市分布')
# xticks
plt.xticks(np.arange(5),labels)

# grid
plt.grid(alpha=0.5, linestyle="--", axis="y")
# ylimits
plt.ylim([-20000, 20000])
# legend
plt.legend(loc='best')

# data labels
for x2016,y2016 in zip(x, Y2016):
    plt.text(x2016-bar_width/2, y2016+100, '%s' %y2016)

for x2017,y2017 in zip(x, Y2017):
    plt.text(x2017-bar_width/2, y2017-1500, '%s' %-y2017)
# show plot
plt.show()

pie plot

pie函数参数解读
plt.pie(x, explode=None, labels=None, colors=None, autopct=None, pctdistance=0.6, shadow=False, labeldistance=1.1, startangle=None, radius=None, counterclock=True, wedgeprops=None, textprops=None, center=(0, 0), frame=False)

x：指定绘图的数据；

explode：指定饼图某些部分的突出显示，即呈现爆炸式；

labels：为饼图添加标签说明，类似于图例说明；

colors：指定饼图的填充色；

autopct：自动添加百分比显示，可以采用格式化的方法显示；

pctdistance：设置百分比标签与圆心的距离；

shadow：是否添加饼图的阴影效果；

labeldistance：设置各扇形标签（图例）与圆心的距离；

startangle：设置饼图的初始摆放角度；

radius：设置饼图的半径大小；

counterclock：是否让饼图按逆时针顺序呈现；

wedgeprops：设置饼图内外边界的属性，如边界线的粗细、颜色等；

textprops：设置饼图中文本的属性，如字体大小、颜色等；

center：指定饼图的中心点位置，默认为原点

frame：是否要显示饼图背后的图框，如果设置为True的话，需要同时控制图框x轴、y轴的范围和饼图的中心位置；

python

# 导入第三方模块
import matplotlib.pyplot as plt

# style
plt.style.use('ggplot')

# data
edu = [0.2515,0.3724,0.3336,0.0368,0.0057]
labels = ['中专','大专','本科','硕士','其他']

explode = [0,0.1,0,0,0]
colors=['#9999ff','#ff9999','#7777aa','#2442aa','#dd5555'] # 自定义颜色

# 中文乱码和坐标轴负号的处理
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
plt.rcParams['axes.unicode_minus'] = False

# standarlize plot to be a circle instead of oval
plt.axes(aspect='equal')

# limits
plt.xlim(0,4)
plt.ylim(0,4)

# 绘制饼图
plt.pie(x = edu, # data
        explode=explode, # explot
        labels=labels, # label
        colors=colors, # colors
        autopct='%.1f%%', # percent style
        pctdistance=0.8,  # percent dist to center
        labeldistance = 1.15, # label distance to center
        startangle = 180, # start angle
        radius = 1.5, # radius
        counterclock = False, # couter clock or not
        wedgeprops = {'linewidth': 1.5, 'edgecolor':'green'},# outer edge style
        textprops = {'fontsize':12, 'color':'k'}, # text style
        center = (1.8,1.8), # origin point
        frame = 1)# show frame or not
# 删除x轴和y轴的刻度
plt.xticks(())
plt.yticks(())
# 添加图标题
plt.title('芝麻信用失信用户教育水平分布')

# 显示图形
plt.show()

histogram

hist函数的参数解读
绘图之前，我们先来讲解一下matplotlib包中hist函数的参数含义及使用方法：
plt.hist(x, bins=10, range=None, normed=False, weights=None, cumulative=False, bottom=None, histtype=’bar’, align=’mid’, orientation=’vertical’, rwidth=None, log=False, color=None, label=None, stacked=False)

x：指定要绘制直方图的数据；

bins：指定直方图条形的个数；

range：指定直方图数据的上下界，默认包含绘图数据的最大值和最小值；

normed：是否将直方图的频数转换成频率；

weights：该参数可为每一个数据点设置权重；

cumulative：是否需要计算累计频数或频率；

bottom：可以为直方图的每个条形添加基准线，默认为0；

histtype：指定直方图的类型，默认为bar，除此还有’barstacked’, ‘step’, ‘stepfilled’；

align：设置条形边界值的对其方式，默认为mid，除此还有’left’和’right’；

orientation：设置直方图的摆放方向，默认为垂直方向；

rwidth：设置直方图条形宽度的百分比；

log：是否需要对绘图数据进行log变换；

color：设置直方图的填充色；

label：设置直方图的标签，可通过legend展示其图例；

stacked：当有多个数据时，是否需要将直方图呈堆叠摆放，默认水平摆放；

data cleaning

python

1	titanic = pd.read_csv('train.csv')

python

1	titanic.head()

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Ticket	Fare	Cabin	Embarked
0	1	0	3	Braund, Mr. Owen Harris	male	22.0	1	A/5 21171	7.2500	NaN	S
1	2	1	1	Cumings, Mrs. John Bradley (Florence Briggs Th...	female	38.0	1	PC 17599	71.2833	C85	C
2	3	1	3	Heikkinen, Miss. Laina	female	26.0	0	STON/O2. 3101282	7.9250	NaN	S
3	4	1	1	Futrelle, Mrs. Jacques Heath (Lily May Peel)	female	35.0	1	113803	53.1000	C123	S
4	5	0	3	Allen, Mr. William Henry	male	35.0	0	373450	8.0500	NaN	S

python

1	titanic.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
PassengerId    891 non-null int64
Survived       891 non-null int64
Pclass         891 non-null int64
Name           891 non-null object
Sex            891 non-null object
Age            714 non-null float64
SibSp          891 non-null int64
Parch          891 non-null int64
Ticket         891 non-null object
Fare           891 non-null float64
Cabin          204 non-null object
Embarked       889 non-null object
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB

python

1	titanic.describe()

	PassengerId	Survived	Pclass	Age	SibSp	Parch	Fare
count	891.000000	891.000000	891.000000	714.000000	891.000000	891.000000	891.000000
mean	446.000000	0.383838	2.308642	29.699118	0.523008	0.381594	32.204208
std	257.353842	0.486592	0.836071	14.526497	1.102743	0.806057	49.693429
min	1.000000	0.000000	1.000000	0.420000	0.000000	0.000000	0.000000
25%	223.500000	0.000000	2.000000	20.125000	0.000000	0.000000	7.910400
50%	446.000000	0.000000	3.000000	28.000000	0.000000	0.000000	14.454200
75%	668.500000	1.000000	3.000000	38.000000	1.000000	0.000000	31.000000
max	891.000000	1.000000	3.000000	80.000000	8.000000	6.000000	512.329200

We want to plot age data, therefore check null values

python

1	titanic.isnull().any()

PassengerId    False
Survived       False
Pclass         False
Name           False
Sex            False
Age            False
SibSp          False
Parch          False
Ticket         False
Fare           False
Cabin           True
Embarked        True
dtype: bool

python

1	titanic.dropna(subset=['Age'], inplace=True)

python

1	titanic.isnull().any()

PassengerId    False
Survived       False
Pclass         False
Name           False
Sex            False
Age            False
SibSp          False
Parch          False
Ticket         False
Fare           False
Cabin           True
Embarked        True
dtype: bool

typical plot

python

# 导入第三方包
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab

# 中文和负号的正常显示
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
plt.rcParams['axes.unicode_minus'] = False

# style
plt.style.use('ggplot')
# plot
arr=plt.hist(titanic.Age, # data
            bins = 20, # bins
            color = 'steelblue', # color
            edgecolor = 'k', # edge color
            label = '直方图' )# label
# removes ticks on top and right
plt.tick_params(top='off', right='off')
# title
plt.title("age distribution")
# label
plt.xlabel("ages")
plt.ylabel("number")

# legend
plt.legend()

# show numbers on top
for i in range(20):
    plt.text(arr[1][i],arr[0][i],str(arr[0][i].astype(int)))

# show plot
plt.show()

accumulative plot

python

# accumulative plot
bins = np.arange(titanic.Age.min(),titanic.Age.max(),5)
arr = plt.hist(titanic.Age, # data
        bins = bins, # bins
        density = True, # density or not
        cumulative = True, # accumulative or not
        color = 'steelblue', # color
        edgecolor = 'k', # edge color
        label = 'histogram' )# label

# title and labels
plt.title('乘客年龄的频率累计直方图')
plt.xlabel('年龄')
plt.ylabel('累计频率')

# remove ticks on top and right
plt.tick_params(top='off', right='off')

# legend
plt.legend(loc = 'best')

# show numbers on top
for i in range(len(bins)-1):
    plt.text(arr[1][i],arr[0][i],str(round(arr[0][i],2)))

# show plot
plt.show()

plot with normal distribution

python

# normal distrubition
bins = np.arange(titanic.Age.min(),titanic.Age.max(),5)
arr = plt.hist(titanic.Age, # 绘图数据
               bins = bins, # 指定直方图的组距
               density = True, # density plot
               color = 'steelblue', # color
               edgecolor = 'k') # edge color

# title and label
plt.title('乘客年龄直方图')
plt.xlabel('年龄')
plt.ylabel('频率')

# normal distrubition
x1 = np.linspace(titanic.Age.min(), titanic.Age.max(), 1000)
normal = scs.norm.pdf(x1, titanic.Age.mean(), titanic.Age.std())
# plot normal distribution
line1, = plt.plot(x1,normal,'r-', linewidth = 2) 

# gaussianKDE distrubution
kde = mlab.GaussianKDE(titanic.Age)
x2 = np.linspace(titanic.Age.min(), titanic.Age.max(), 1000)
# plot gaussianKDE distrubution
line2, = plt.plot(x2,kde(x2),'g-', linewidth = 2)

# remove ticks on top and right
plt.tick_params(top='off', right='off')

# show numbers on top
for i in range(len(bins)-1):
    plt.text(arr[1][i],arr[0][i],str(round(arr[0][i],2)))

# show legend
plt.legend([line1, line2],['正态分布曲线','核密度曲线'],loc='best')
# show plot
plt.show()

stack plot

python

# data
age_female = titanic.Age[titanic.Sex == 'female']
age_male = titanic.Age[titanic.Sex == 'male']

# bins
bins = np.arange(titanic.Age.min(), titanic.Age.max(), 2)
# male
arr1 = plt.hist(age_male,
         bins = bins,
         label = '男性',
         color = 'steelblue',
         alpha = 0.7,
         edgecolor = 'k') # edge color)
# female
arr2 = plt.hist(age_female,
         bins = bins,
         label = '女性',
         alpha = 0.6,
         edgecolor = 'k') # edge color)

# title and label
plt.title('乘客年龄直方图')
plt.xlabel('年龄')
plt.ylabel('人数')

# remove ticks on top and right
plt.tick_params(top='off', right='off')

# show numbers on top
for i in range(len(bins)-1):
    plt.text(arr1[1][i],arr1[0][i],str(arr1[0][i].astype(int)))

    # show numbers on top
for i in range(len(bins)-1):
    plt.text(arr2[1][i],arr2[0][i],str(arr2[0][i].astype(int)))

# shown legend
plt.legend()
# show plot
plt.show()

box plot

boxplot函数的参数解读
绘图之前，我们先来讲解一下matplotlib包中boxplot函数的参数含义及使用方法：
plt.boxplot(x, notch=None, sym=None, vert=None, whis=None, positions=None, widths=None, patch_artist=None, meanline=None, showmeans=None, showcaps=None, showbox=None, showfliers=None, boxprops=None, labels=None, flierprops=None, medianprops=None, meanprops=None, capprops=None, whiskerprops=None)

x：指定要绘制箱线图的数据；

notch：是否是凹口的形式展现箱线图，默认非凹口；

sym：指定异常点的形状，默认为+号显示；

vert：是否需要将箱线图垂直摆放，默认垂直摆放；

whis：指定上下须与上下四分位的距离，默认为1.5倍的四分位差；

positions：指定箱线图的位置，默认为[0,1,2…]；

widths：指定箱线图的宽度，默认为0.5；

patch_artist：是否填充箱体的颜色；

meanline：是否用线的形式表示均值，默认用点来表示；

showmeans：是否显示均值，默认不显示；

showcaps：是否显示箱线图顶端和末端的两条线，默认显示；

showbox：是否显示箱线图的箱体，默认显示；

showfliers：是否显示异常值，默认显示；

boxprops：设置箱体的属性，如边框色，填充色等；

labels：为箱线图添加标签，类似于图例的作用；

filerprops：设置异常值的属性，如异常点的形状、大小、填充色等；

medianprops：设置中位数的属性，如线的类型、粗细等；

meanprops：设置均值的属性，如点的大小、颜色等；

capprops：设置箱线图顶端和末端线条的属性，如颜色、粗细等；

whiskerprops：设置须的属性，如颜色、粗细、线的类型等；

data preparation

see histogram data preparation for details

single box plot

python

# 设置中文和负号正常显示
plt.rcParams['font.sans-serif'] = 'Microsoft YaHei'
plt.rcParams['axes.unicode_minus'] = False

# box plot
arr = plt.boxplot(x = titanic.Age, # data
            patch_artist=True, # custom color
            showmeans=True, # show means or not
            boxprops = {'color':'black','facecolor':'#9999ff'}, # color
            flierprops = {'marker':'o','markerfacecolor':'red','color':'black'}, # outlier points colors
            meanprops = {'marker':'D','markerfacecolor':'indianred'}, # mean point colors
            medianprops = {'linestyle':'--','color':'orange'}) # median point colors
# y limits
plt.ylim(0,85)
# legend
plt.legend([arr["boxes"][0]], ['A'], loc='upper right')
# remove ticks on top and right
plt.tick_params(top='off', right='off')
# show plot
plt.show()

multiple boxes plot

python

# sort values
titanic.sort_values(by = 'Pclass', inplace=True)


age = []
levels = titanic.Pclass.unique()
for pclass in levels:
    age.append(titanic.loc[titanic.Pclass==pclass,'Age'])

arr = plt.boxplot(x = age, 
            patch_artist=True,
            labels = ['一等舱','二等舱','三等舱'],
            showmeans=True, 
            boxprops = {'color':'black','facecolor':'#9999ff'}, 
            flierprops = {'marker':'o','markerfacecolor':'red','color':'black'},
            meanprops = {'marker':'D','markerfacecolor':'indianred'},
            medianprops = {'linestyle':'--','color':'orange'})

plt.legend([arr["boxes"][0], arr["boxes"][1], arr["boxes"][2]], ['一等舱','二等舱','三等舱'], loc='upper left')

# x limits
plt.xlim(-0.5,4)
plt.show()

python

age# sort values
titanic.sort_values(by = 'Pclass', inplace=True)


age = []
levels = titanic.Pclass.unique()
for pclass in levels:
    age.append(titanic.loc[titanic.Pclass==pclass,'Age'])

arr0 = plt.boxplot(x = age[0], 
            patch_artist=True,
            labels = ['一等舱'],
            showmeans=True, 
            boxprops = {'color':'black','facecolor':'green'}, 
            flierprops = {'marker':'o','markerfacecolor':'red','color':'black'},
            meanprops = {'marker':'D','markerfacecolor':'indianred'},
            medianprops = {'linestyle':'--','color':'orange'},
            positions = [0])
arr1 = plt.boxplot(x = age[1],
            patch_artist=True,
            labels = ['二等舱'],
            showmeans=True, 
            boxprops = {'color':'black','facecolor':'blue'}, 
            flierprops = {'marker':'o','markerfacecolor':'red','color':'black'},
            meanprops = {'marker':'D','markerfacecolor':'indianred'},
            medianprops = {'linestyle':'--','color':'orange'},
            positions = [1])
arr2 = plt.boxplot(x = age[2], 
            patch_artist=True,
            labels = ['三等舱'],
            showmeans=True, 
            boxprops = {'color':'black','facecolor':'orange'}, 
            flierprops = {'marker':'o','markerfacecolor':'red','color':'black'},
            meanprops = {'marker':'D','markerfacecolor':'indianred'},
            medianprops = {'linestyle':'--','color':'orange'},
            positions = [2])

plt.legend([arr0["boxes"][0], arr1["boxes"][0], arr2["boxes"][0]], ['一等舱','二等舱','三等舱'], loc='upper left')
# x limits
plt.xlim(-1,2.5)
plt.show()

line chart

matplotlib模块中plot函数语法及参数含义：
plt.hist(x,y,linestyle,
linewidth,color,marker,
markersize,markeredgecolor,
markerfactcolor,label,alpha)

x：指定折线图的x轴数据；

y：指定折线图的y轴数据；

linestyle：指定折线的类型，可以是实线、虚线、点虚线、点点线等，默认文实线；

linewidth：指定折线的宽度

marker：可以为折线图添加点，该参数是设置点的形状；

markersize：设置点的大小；

markeredgecolor：设置点的边框色；

markerfactcolor：设置点的填充色；

label：为折线图添加标签，类似于图例的作用；

one dimension plot

python

# data
article_reading = pd.read_csv('wechart.csv')
article_reading.date = pd.to_datetime(article_reading.date)
sub_data = article_reading.loc[article_reading.date >= '2017-08-01' ,:]
sub_data.head()

	date	article_reading_cnts	article_reading_times	collect_times
212	2017-08-01	116	313	11
213	2017-08-02	91	248	15
214	2017-08-03	62	220	7
215	2017-08-04	52	162	2
216	2017-08-05	45	134	8

python

import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('ggplot')
pd.plotting.register_matplotlib_converters()

# 设置中文编码和负号的正常显示
plt.rcParams['font.sans-serif'] = 'Microsoft YaHei'
plt.rcParams['axes.unicode_minus'] = False

# fig size
fig = plt.figure(figsize=(10,6))
# plot
plt.plot(sub_data.date, # x data
         sub_data.article_reading_cnts, # y data
         linestyle = '-', # line style
         linewidth = 2, # line width
         color = 'steelblue', # color
         marker = 'o', # shape of points
         markersize = 6, # size of points
         markeredgecolor='black', # points edge color
         markerfacecolor='brown') # points filled color

# title and label
plt.title('公众号每天阅读人数趋势图')
plt.xlabel('日期')
plt.ylabel('人数')

# remove ticks on top and right
plt.tick_params(top = 'off', right = 'off')

# set 45 degree display for x
fig.autofmt_xdate(rotation = 45)



# show plot
plt.show()

optimized one dimension plot

python

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
plt.style.use('ggplot')
pd.plotting.register_matplotlib_converters()

# 设置中文编码和负号的正常显示
plt.rcParams['font.sans-serif'] = 'Microsoft YaHei'
plt.rcParams['axes.unicode_minus'] = False

# fig size
fig = plt.figure(figsize=(10,6))
# plot
plt.plot(sub_data.date, # x data
         sub_data.article_reading_cnts, # y data
         linestyle = '-', # line style
         linewidth = 2, # line width
         color = 'steelblue', # color
         marker = 'o', # shape of points
         markersize = 6, # size of points
         markeredgecolor='black', # points edge color
         markerfacecolor='brown') # points filled color

# title and label
plt.title('公众号每天阅读人数趋势图')
plt.xlabel('日期')
plt.ylabel('人数')

# remove ticks on top and right
plt.tick_params(top = 'off', right = 'off')

# set 45 degree display for x
fig.autofmt_xdate(rotation = 45)

# axis info
ax = plt.gca()
# date format
date_format = mpl.dates.DateFormatter("%Y-%m-%d")  
ax.xaxis.set_major_formatter(date_format) 

# 设置x轴显示多少个日期刻度
#xlocator = mpl.ticker.LinearLocator(10)
# 设置x轴每个刻度的间隔天数
xlocator = mpl.ticker.MultipleLocator(5)
ax.xaxis.set_major_locator(xlocator)

# show plot
plt.show()

multiple dimension plot

python

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
plt.style.use('ggplot')
pd.plotting.register_matplotlib_converters()

# 设置中文编码和负号的正常显示
plt.rcParams['font.sans-serif'] = 'Microsoft YaHei'
plt.rcParams['axes.unicode_minus'] = False

# fig size
fig = plt.figure(figsize=(10,6))
# plot
plt.plot(sub_data.date, # x data
         sub_data.article_reading_cnts, # y data
         linestyle = '-', # line style
         linewidth = 2, # line width
         color = 'steelblue', # color
         marker = 'o', # shape of points
         markersize = 6, # size of points
         markeredgecolor='black', # points edge color
         markerfacecolor='brown', # points filled color
         label = '阅读人数') # label

plt.plot(sub_data.date, # x data
         sub_data.article_reading_times, # y data
         linestyle = '-', # line style
         linewidth = 2, # line width
         color = '#ff9999', # color
         marker = 'o', # shape of points
         markersize = 6, # size of points
         markeredgecolor='black', # points edge color
         markerfacecolor='#ff9999', # points filled color
         label = '阅读人次') # label

# title and label
plt.title('公众号每天阅读人数趋势图')
plt.xlabel('日期')
plt.ylabel('人数')

# remove ticks on top and right
plt.tick_params(top = 'off', right = 'off')

# set 45 degree display for x
fig.autofmt_xdate(rotation = 45)

# axis info
ax = plt.gca()
# date format
date_format = mpl.dates.DateFormatter("%Y-%m-%d")  
ax.xaxis.set_major_formatter(date_format) 

# 设置x轴显示多少个日期刻度
#xlocator = mpl.ticker.LinearLocator(10)
# 设置x轴每个刻度的间隔天数
xlocator = mpl.ticker.MultipleLocator(5)
ax.xaxis.set_major_locator(xlocator)

# show legend
plt.legend()

# show plot
plt.show()

scatter plot

matplotlib模块中scatter函数语法及参数含义：

plt.scatter(x, y, s=20,
c=None, marker=’o’,
cmap=None, norm=None,
vmin=None, vmax=None,
alpha=None, linewidths=None,
edgecolors=None)
x：指定散点图的x轴数据；

y：指定散点图的y轴数据；

s：指定散点图点的大小，默认为20，通过传入新的变量，实现气泡图的绘制；

c：指定散点图点的颜色，默认为蓝色；

marker：指定散点图点的形状，默认为圆形；

cmap：指定色图，只有当c参数是一个浮点型的数组的时候才起作用；

norm：设置数据亮度，标准化到0~1之间，使用该参数仍需要c为浮点型的数组；

vmin、vmax：亮度设置，与norm类似，如果使用了norm则该参数无效；

alpha：设置散点的透明度；

linewidths：设置散点边界线的宽度；

edgecolors：设置散点边界线的颜色；

one dimension scatter plot

python

1
2
3

# data
cars = pd.read_csv('cars.csv')
cars.head()

	speed	dist
0	4	2
1	4	10
2	7	4
3	7	22
4	8	16

python

import pandas as pd
import matplotlib.pyplot as plt

plt.style.use('ggplot')
# 设置中文编码和负号的正常显示
plt.rcParams['font.sans-serif'] = 'Microsoft YaHei'
plt.rcParams['axes.unicode_minus'] = False

plt.scatter(
    x=cars["speed"],
    y=cars["dist"],
    c="steelblue",
    marker="s", # marker
    alpha=0.9, # alpha
    linewidths = 0.3, # width
    edgecolors = 'red') # edge color

# title and labels
plt.title('汽车速度与刹车距离的关系')
plt.xlabel('汽车速度')
plt.ylabel('刹车距离')

# Remove ticks on top and right
plt.tick_params(top = 'off', right = 'off')
# show plot
plt.show()

one dimension plot with linear regression

linear regression

python

1
2
3

from sklearn.linear_model import LinearRegression
reg = LinearRegression().fit(cars.speed.values.reshape(-1,1), cars.dist.values.reshape(-1,1))
pred = reg.predict(cars.speed.values.reshape(-1,1))

intercept

python

1	reg.intercept_

array([-17.57909489])

slope

python

reg.coef_

array([[3.93240876]])

python

1	reg.coef_[0][0], reg.intercept_[0]

(3.9324087591240873, -17.57909489051095)

python

import pandas as pd
import matplotlib.pyplot as plt

plt.style.use('ggplot')
# 设置中文编码和负号的正常显示
plt.rcParams['font.sans-serif'] = 'Microsoft YaHei'
plt.rcParams['axes.unicode_minus'] = False

plt.scatter(
    x=cars["speed"],
    y=cars["dist"],
    c="steelblue",
    marker="s", # marker
    alpha=0.9, # alpha
    linewidths = 0.3, # width
    edgecolors = 'red') # edge color

# regression line
plt.plot(cars.speed,
         pred,
         linewidth = 2,
         label = '回归线')

# text
plt.text(5,100,"y={:.2f}x + {:.2f}".format(reg.coef_[0][0], reg.intercept_[0]))

# title and labels
plt.title('汽车速度与刹车距离的关系')
plt.xlabel('汽车速度')
plt.ylabel('刹车距离')

# Remove ticks on top and right
plt.tick_params(top = 'off', right = 'off')
# show plot
plt.show()

multiple dimensions plot

python

# data
iris = pd.read_csv("iris.csv",header=None)
iris.columns=(['sepal_length','sepal_width', 'petal_length', 'petal_width', 'class'])
iris.head()

	sepal_length	sepal_width	petal_length	petal_width	class
0	5.1	3.5	1.4	0.2	Iris-setosa
1	4.9	3.0	1.4	0.2	Iris-setosa
2	4.7	3.2	1.3	0.2	Iris-setosa
3	4.6	3.1	1.5	0.2	Iris-setosa
4	5.0	3.6	1.4	0.2	Iris-setosa

python

1 2	classes = iris["class"].unique() classes

array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)

python

# colors
colors = ['steelblue', '#9999ff', '#ff9999']

# plot
for i in range(len(classes)):
    plt.scatter(iris.loc[iris["class"]==classes[i],"petal_length"],
                iris.loc[iris["class"]==classes[i],'petal_width'],
                label=classes[i],
                color=colors[i])
# title and label
plt.title('花瓣长度与宽度的关系')
plt.xlabel('花瓣长度')
plt.ylabel('花瓣宽度')
# remove ticks on top and right
plt.tick_params(top = 'off', right = 'off')
# legend
plt.legend(loc = 'upper left')
# show plot
plt.show()

bubble plot

Show another dimension by size of scatter marker.

python

1	import numpy as np

python

# colors
colors = ['steelblue', '#9999ff', '#ff9999']

# normalized sepal_width
sepal_width = iris.loc[iris["class"]==classes[i],'sepal_width']
sepal_width_scaled = (sepal_width_positive-sepal_width_positive.mean())/sepal_width_positive.std()
# Make all data positive
sepal_width_scaled_positive = sepal_width_scaled - sepal_width_scaled.min()

# plot
for i in range(len(classes)):
    plt.scatter(iris.loc[iris["class"]==classes[i],"petal_length"],
                iris.loc[iris["class"]==classes[i],'petal_width'],
                label=classes[i],
                color=colors[i],
                s=(sepal_width_scaled_positive * 50))
# title and label
plt.title('花瓣长度与宽度的关系')
plt.xlabel('花瓣长度')
plt.ylabel('花瓣宽度')
# remove ticks on top and right
plt.tick_params(top = 'off', right = 'off')
# legend
plt.legend(loc = 'upper left')
# text
plt.text(1,1.5,"size: sepal_width",)
# show plot
plt.show()

radar plot

one dimension radar plot

python

import numpy as np
import matplotlib.pyplot as plt

# 中文和负号的正常显示
plt.rcParams['font.sans-serif'] = 'Microsoft YaHei'
plt.rcParams['axes.unicode_minus'] = False

# style
plt.style.use('ggplot')

# data
values = [3.2,2.1,3.5,2.8,3]
feature = ['个人能力','QC知识','解决问题能力','服务质量意识','团队精神']

N = len(values)
# angles
angles=np.linspace(0, 2*np.pi, N, endpoint=False)

# values need to be a close loop to fill color
values=np.concatenate((values,[values[0]]))
angles=np.concatenate((angles,[angles[0]]))

# plot
fig=plt.figure()
ax = fig.add_subplot(111, polar=True)
ax.plot(angles, values, 'o-', linewidth=2)
ax.fill(angles, values, alpha=0.25)
# labels
ax.set_thetagrids(angles * 180/np.pi, feature)
# y limits
ax.set_ylim(0,5)
# title
plt.title('活动前后员工状态表现')
# grid
ax.grid(True)
# show plot
plt.show()

multiple dimension radar plot

python

import numpy as np
import matplotlib.pyplot as plt

# 中文和负号的正常显示
plt.rcParams['font.sans-serif'] = 'Microsoft YaHei'
plt.rcParams['axes.unicode_minus'] = False

# style
plt.style.use('ggplot')

# data
values = [3.2,2.1,3.5,2.8,3]
values2 = [4,4.1,4.5,4,4.1]
feature = ['个人能力','QC知识','解决问题能力','服务质量意识','团队精神']

N = len(values)
# angles
angles=np.linspace(0, 2*np.pi, N, endpoint=False)

# values need to be a close loop to fill color
values=np.concatenate((values,[values[0]]))
values2=np.concatenate((values2,[values2[0]]))
angles=np.concatenate((angles,[angles[0]]))

# plot
fig=plt.figure()
ax = fig.add_subplot(111, polar=True)
ax.plot(angles, values, 'o-', linewidth=2)
ax.fill(angles, values, alpha=0.25)

ax.plot(angles, values2, 'o-', linewidth=2, label = '活动后')
ax.fill(angles, values2, alpha=0.25)
# labels
ax.set_thetagrids(angles * 180/np.pi, feature)
# y limits
ax.set_ylim(0,5)
# title
plt.title('活动前后员工状态表现')
# grid
ax.grid(True)
# show plot
plt.show()

simple example

python

1
2
3

import ipywidgets as wg
from IPython.display import display
%matplotlib inline

python

1
2
3

name = wg.Text(value='Name')
age = wg.IntSlider(description="Age:")
display(name,age)

Text(value='Name')



IntSlider(value=0, description='Age:')

python

a = wg.FloatText()
b = wg.FloatSlider()
display(a,b)
mylink = wg.jslink((a,'value'), (b,'value'))

FloatText(value=0.0)



FloatSlider(value=0.0)

python

import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
def myPlot(c):
    x = np.linspace(-5,5,20)
    y =c * x**2
    plt.plot(x,y, 'r--')
    plt.ylabel('y(x)')
    plt.xlabel('x')
    plt.ylim([0, 80])
    plt.xlim([-5, 5])

python

1 2	c_slide = wg.FloatSlider(value=1.0, min=0, max=3.0, step=0.1) wg.interact(myPlot, c=c_slide)

interactive(children=(FloatSlider(value=1.0, description='c', max=3.0), Output()), _dom_classes=('widget-inter…





<function __main__.myPlot(c)>

actual data example

python

1	titanic = pd.read_csv('train.csv')

python

1	titanic.describe()

	PassengerId	Survived	Pclass	Age	SibSp	Parch	Fare
count	891.000000	891.000000	891.000000	714.000000	891.000000	891.000000	891.000000
mean	446.000000	0.383838	2.308642	29.699118	0.523008	0.381594	32.204208
std	257.353842	0.486592	0.836071	14.526497	1.102743	0.806057	49.693429
min	1.000000	0.000000	1.000000	0.420000	0.000000	0.000000	0.000000
25%	223.500000	0.000000	2.000000	20.125000	0.000000	0.000000	7.910400
50%	446.000000	0.000000	3.000000	28.000000	0.000000	0.000000	14.454200
75%	668.500000	1.000000	3.000000	38.000000	1.000000	0.000000	31.000000
max	891.000000	1.000000	3.000000	80.000000	8.000000	6.000000	512.329200

python

1
2
3

titanic.dropna(subset=['Age'], inplace=True)
titanic.sort_values("Age", inplace=True)
titanic.head()

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Parch	Ticket	Fare	Cabin	Embarked
803	804	1	3	Thomas, Master. Assad Alexander	male	0.42	0	1	2625	8.5167	NaN	C
755	756	1	2	Hamalainen, Master. Viljo	male	0.67	1	1	250649	14.5000	NaN	S
644	645	1	3	Baclini, Miss. Eugenie	female	0.75	2	1	2666	19.2583	NaN	C
469	470	1	3	Baclini, Miss. Helene Barbara	female	0.75	2	1	2666	19.2583	NaN	C
78	79	1	2	Caldwell, Master. Alden Gates	male	0.83	0	2	248738	29.0000	NaN	S

python

1 2	def myPlot(index): plt.scatter(0, titanic.iloc[int(index),:].loc["Pclass"])

python

a = wg.FloatText()
b = wg.FloatSlider()
display(a,b)
mylink = wg.jslink((a,'value'), (b,'value'))
index_slide = wg.FloatSlider(value=0, min=0, max=len(titanic)-1, step=1)
wg.interact(myPlot, index=index_slide)

FloatText(value=0.0)



FloatSlider(value=0.0)



interactive(children=(FloatSlider(value=0.0, description='index', max=713.0, step=1.0), Output()), _dom_classe…





<function __main__.myPlot(index)>

widget_types can be found in:
widge types

example of link widgets together

python

play = wg.Play(
    value=0,
    min=0,
    max=len(titanic)-1,
    step=1,
    interval=200,
    description="Press play",
    disabled=False
)
slider = wg.FloatSlider(value=0, min=0, max=len(titanic)-1, step=1)
text = wg.FloatText(
    value=0,
    min=0,
    max=len(titanic)-1,
    step=1,
    description='pClass',
    disabled=False
)
wg.jslink((play, 'value'), (text,'value'))
wg.jslink((play, 'value'), (slider,'value'))
ui1 = wg.HBox([play])
ui2 = wg.HBox([slider])
display(ui1)
display(ui2)
wg.interact(myPlot, index=text)

HBox(children=(Play(value=0, description='Press play', interval=200, max=713),))



HBox(children=(FloatSlider(value=0.0, max=713.0, step=1.0),))



interactive(children=(FloatText(value=0.0, description='pClass', step=1.0), Output()), _dom_classes=('widget-i…





<function __main__.myPlot(index)>

python

1
2

plot styles

bar plot

vertical

horizontal

compare plot

lateral stack

vertical stack

top down stack

pie plot

histogram

data cleaning

typical plot

accumulative plot

plot with normal distribution

stack plot

box plot

data preparation

single box plot

multiple boxes plot

line chart

one dimension plot

optimized one dimension plot

multiple dimension plot

scatter plot

one dimension scatter plot

one dimension plot with linear regression

multiple dimensions plot

bubble plot

radar plot

one dimension radar plot

multiple dimension radar plot

interactive widget plot

simple example

actual data example

example of link widgets together