常见图形:(箱线图，柱状图，散点图，折线图...)searborn+ matplotlib

# -*- coding: utf-8 -*-

import seaborn as sns
import numpy as np

#------------------------显示中文---------------------------------#
import matplotlib as mpl
mpl.rcParams['font.sans-serif'] = ['SimHei']  # 指定默认字体
mpl.rcParams['axes.unicode_minus'] = False  # 解决保存图像是负号'-'显示为方块的问题


#----------------------------------绘制箱线图-----------------------------------#
'''
箱线图的作用：
检验连续数据是否存在离群点，以及数据分布的范围（4分位数）
必须要一个参数 
y: 需要判断的数据序列
x: 分类的标签的数据
hue: 分组因子
palette: 调色板...Set1, Set2, Set3
linwidth: 线宽(2.5相当于加粗的线)
order: 排序的序列.例如 order = ['Dinner', 'Lunch']
orient = 'h' 对df中的每个数值型的数据都做一个箱线图
whis 参数设定是否显示箱线图的离群点， whis = np.inf 表示不显示
'''
sns.set_style("whitegrid")
tips = sns.load_dataset("tips")
tips
ax = sns.boxplot(x = 'time', y = 'total_bill',hue = 'smoker', data = tips, order = ['Dinner', 'Lunch'],
                 linewidth= 1.5,palette = 'Set3')
ax = sns.boxplot(data = tips, orient = 'h', palette = 'Set3')

#箱线图+ 有分布趋势的散点图
#图形组合也就是两条绘图语句一起运行就可以了，相当于图形覆盖了
ax = sns.boxplot(x='day', y = 'total_bill', data = tips)
ax = sns.swarmplot(x = 'day', y = 'total_bill', data = tips, color = '.25')

#plt.scatter(x = iris['sepal_length'], y = iris['sepal_width'])


#-----------------------------------barplot 带分布的散点图--------------------------#
'''
estimator: 统计参数默认为 np.mean,可自定义： np.sum, np.count_nonzero, np.median...
palette: 主题的颜色 palette = 'Blues_d'
'''
#统计参数默认实mean
ax = sns.barplot(x = "day", y = "total_bill", hue = 'sex', data = tips, estimator= np.sum, ci = 0, palette = "Blues_d")

#--------------------------------countplot 计数统计图----------------------------#
#对因子变量来说这个实很重要的
ax = sns.countplot(x = 'day', data = tips, hue = 'time')

#--------------------------factorplot/FacetGrid 联合绘图------------------------------#
'''
hue : 对应分组变量
kind: 对应的图片类型， bar/violin/count
col_wrap = 2  每行画2个子图
size: 每个小图的图片大小
'''
import matplotlib.pyplot as plt
#col用于分面绘图
g = sns.FacetGrid(tips, col = "day", col_wrap = 2, size = 3)
g.map(plt.scatter, 'total_bill', "tip").add_legend()
#折线图
g = sns.FacetGrid(tips, col = "day", col_wrap = 2, size = 3)
g.map(plt.plot,'total_bill')
#hue:用于分组绘图
g = sns.FacetGrid(tips, hue = "day", size = 5)
g.map(plt.scatter, 'total_bill', "tip").add_legend()
g.set(ylabel = "tip", xlabel = "total_bill", title = "day of total_bill")
#分组柱状图
g = sns.factorplot(x = "sex", col = "day", data = tips, kind = "count", col_wrap=2, size = 3)
#分组箱线图，产看不同变量下total_bill的分布情况，col为分子绘图，col_wrap 每行画3个子图
g = sns.factorplot(y = "total_bill", col = "day", col_wrap = 2,data = tips, kind = "box", size = 3, aspect = 1)

#--------------------------回归图 lmplot-------------------------------#
'''
markers = ["o", "x"] 用不同的标记标记数据点
jitter控制散点抖动程度
row也是可以控制分组子图的

'''
#分组绘图， 不同的组用不同的形状标记
g = sns.lmplot(x = 'total_bill', y = 'tip', hue = 'smoker', data = tips, markers = ["o", "x"])
#不仅分组，还分开不同的子图绘制，用col参数控制
g = sns.lmplot(x = 'total_bill', y = 'tip', col = 'smoker', data = tips)
# col + hue 双分组参数，既分组又分子图绘制，jitter控制散点抖动程度
g = sns.lmplot(x = "size", y = "total_bill", hue = "day", col = "day", data = tips,
               , col_wrap= 2, size = 3aspect= .4, x_jitter = .1)
分组子图，那么row也是可以控制分组子图的
g = sns.lmplot(x = "total_bill", y = "tip", row = "sex", col = "time", data = tips, size = 4)

#-------------------------------回归图  regplot------------------------------#
'''
ci: 控制回归的置信度
'''
ax = sns.regplot(x = "total_bill", y = "tip", data = tips, color="g", marker="+", ci = 90)
#上面的都是拟合一次曲线，拟合二次曲线通过order = 2设置， 拟合一次曲线相当于 order = 1
ans = sns.load_dataset("anscombe")
ax = sns.regplot(x = "x", y = "y", data= ans.loc[ans.dataset == "II"], 
                 scatter_kws={"s": 80}, order = 2, ci = None, truncate = True)


#-------------------------------数值分布绘图------------------------#
''' 绘制数值变量的密度分布图， 默认既绘制概率密度曲线，也绘制直方图
hist: True/False 直方图是否显示
vertical: True/Fasle 是否垂直显示
'''
ax = sns.distplot(tips["total_bill"],rug = True, hist = True, vertical=False)

#-----------------------------核密度图kdeplot-----------------------------#
'''
单变量下核密度图和折线图显示的结果实一致的
'''
#分组绘制双变量的核密度图， 相当于绘制两个核密度图，通过图可以看到密度中心，类似挖掘算法中的聚类中心绘图
iris = sns.load_dataset("iris")
setosa = iris.loc[iris.species == 'setosa'] #组1
virgnica = iris.loc[iris.species == 'virginica'] # 组2
ax = sns.kdeplot(setosa.sepal_width, setosa.sepal_length, cmap = "Reds", shade=True, shade_lowest=False)
ax = sns.kdeplot(virgnica.sepal_width, virgnica.sepal_length, cmap = "Blues", shade = True, shade_lowest=False)

#-----------------------------------------双变量关系图， joinplot----------------------------------#
'''
kind = reg绘制回归线
kind = kde绘制核密度图
'''
#默认绘制双变量的散点图，计算两个变量的直方图，计算两个变量的相关系数和置信度
np.random.seed(0)
sns.set(style = "white", color_codes = True)
g = sns.jointplot(x = "total_bill", y = "tip", data = tips)
#通过kind 参数，除了绘制散点图，还要绘制拟合的直线，拟合的核密度
g = sns.jointplot("total_bill", "tip", data = tips, kind = "reg")
#使用六角形代替点图
g = sns.jointplot("total_bill", "tip", data = tips, kind = "hex")
#绘制核密度图
g = sns.jointplot("sepal_width", "petal_length", data = iris, kind = "kde", space = 0, color = "g")
#控制图形的大小和颜色
g = sns.jointplot("total_bill", "tip", data = tips, size = 5, ratio=3, color="g")

#--------------------------------变量关系组图，pairplot----------------------------------------#
'''
var = ["sepal_width", "sepal_length"] 指定使用的数据列名
diag_kind： 制定对角线的图形，默认为直方图,"kde": 核密度图
markers = ["o", "s", "D"] 指定点的类型
'''
# x-y 的散点图 画回归线，画散点图， scatter= True, fit_reg = True
g = sns.lmplot(x = "total_bill", y = "tip", data = tips, fit_reg = True, hue = "smoker", scatter = True)
#分组的变量关系图,不同的组用不同的形状标记,对角线默认绘制直方图，当然也可以绘制核密度图
g = sns.pairplot(iris, hue = "species", markers = ["o", "s", "D"], kind = "kde")
#只取dataframe中的一部分变量绘图
g = sns.pairplot(iris, vars = ["sepal_width", "sepal_length"])


#-------------------------------------------热力图 heatmap------------------------------------#
'''
vmin颜色值映射的最小值
vmax颜色值映射的最大值
cbar：每个变量的颜色棒是否显示
annot = True 将数值显示到图上
'''
ax = sns.heatmap(iris.isnull(), yticklabels=False, cbar= False, cmap = "Blues")
#绘制热力图，还要将数值写到热力图上
flights = sns.load_dataset("flights")
flights = flights.pivot("month", "year", "passengers")
ax = sns.heatmap(flights,annot = True, fmt = "d")

#--------------------------------------tsplot 时序图--------------------------#
'''
estimator 默认为 mean 
'''
np.random.seed(22)
sns.set(color_codes = True)
x = np.linspace(0, 15, 31)
data = np.sin(x) + np.random.rand(10, 31) + np.random.randn(10,1)
# 绘制不同的置信度拟合图
ax = sns.tsplot(data = data, ci = [68, 95], color = "m")
#tsplot
gammas = sns.load_dataset("gammas")
ax= sns.tsplot(time = "timepoint", value = "BOLD signal" , data = gammas, unit = "subject", condition = "ROI")

#--------------------------双坐标轴--------------------------------------#
'''
twinx 或者 twiny 函数 设置双坐标轴
'''

import pandas as pd
import matplotlib.pyplot as plt
sale=pd.Series(np.random.random(10)*100).map(int)

ax = plt.subplot(111)
tips['tip'].plot(ax = ax,  color = "b")
ax.set(xlabel = 'time', ylabel = 'tip')
ax2 = ax.twinx()
tips["total_bill"].plot(ax = ax2, color = "r")
ax2.set_ylabel = ("total_bill")
plt.title("example of double series figure")
常见图形:(箱线图，柱状图，散点图，折线图...)searborn+ matplotlib

猜你喜欢