df["X"] = pd.Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"])
df
Out[92]:
Col1 Col2 X
0 0.047633 0.150047 A
1 0.296385 0.212826 A
2 0.562141 0.136243 A
3 0.997786 0.224560 A
4 0.585457 0.178914 A
5 0.551201 0.867102 B
6 0.740142 0.003872 B
7 0.959130 0.581506 B
8 0.114489 0.534242 B
9 0.042882 0.314845 B
bp = df.boxplot(by="X")
Area
使用 Series.plot.area() 或者 DataFrame.plot.area() 可以画出area图。
In [60]: df = pd.DataFrame(np.random.rand(10, 4), columns=["a", "b", "c", "d"])
In [61]: df.plot.area();
如果不想叠加,可以指定stacked=False
In [62]: df.plot.area(stacked=False);
Scatter
DataFrame.plot.scatter() 可以创建点图。
In [63]: df = pd.DataFrame(np.random.rand(50, 4), columns=["a", "b", "c", "d"])
In [64]: df.plot.scatter(x="a", y="b");
scatter图还可以带第三个轴:
df.plot.scatter(x="a", y="b", c="c", s=50);
可以将第三个参数变为散点的大小:
df.plot.scatter(x="a", y="b", s=df["c"] * 200);
Hexagonal bin
使用 DataFrame.plot.hexbin() 可以创建蜂窝图:
In [69]: df = pd.DataFrame(np.random.randn(1000, 2), columns=["a", "b"])
In [70]: df["b"] = df["b"] + np.arange(1000)
In [71]: df.plot.hexbin(x="a", y="b", gridsize=25);
In [88]: from pandas.plotting import andrews_curves
In [89]: data = pd.read_csv("data/iris.data")
In [90]: plt.figure();
In [91]: andrews_curves(data, "Name");
In [92]: from pandas.plotting import parallel_coordinates
In [93]: data = pd.read_csv("data/iris.data")
In [94]: plt.figure();
In [95]: parallel_coordinates(data, "Name");
滞后图lag plot
滞后图是用时间序列和相应的滞后阶数序列做出的散点图。可以用于观测自相关性。
In [96]: from pandas.plotting import lag_plot
In [97]: plt.figure();
In [98]: spacing = np.linspace(-99 * np.pi, 99 * np.pi, num=1000)
In [99]: data = pd.Series(0.1 * np.random.rand(1000) + 0.9 * np.sin(spacing))
In [100]: lag_plot(data);
In [101]: from pandas.plotting import autocorrelation_plot
In [102]: plt.figure();
In [103]: spacing = np.linspace(-9 * np.pi, 9 * np.pi, num=1000)
In [104]: data = pd.Series(0.7 * np.random.rand(1000) + 0.3 * np.sin(spacing))
In [105]: autocorrelation_plot(data);
In [106]: from pandas.plotting import bootstrap_plot
In [107]: data = pd.Series(np.random.rand(1000))
In [108]: bootstrap_plot(data, size=50, samples=500, color="grey");
In [120]: ts = pd.Series(np.random.randn(1000), index=pd.date_range("1/1/2000", periods=1000))
In [121]: ts = np.exp(ts.cumsum())
In [122]: ts.plot(logy=True);
多个Y轴
使用secondary_y=True 可以绘制多个Y轴数据:
In [125]: plt.figure();
In [126]: ax = df.plot(secondary_y=["A", "B"])
In [127]: ax.set_ylabel("CD scale");
In [128]: ax.right_ax.set_ylabel("AB scale");
小图标上面默认会添加right字样,想要去掉的话可以设置mark_right=False:
In [129]: plt.figure();
In [130]: df.plot(secondary_y=["A", "B"], mark_right=False);
In [133]: plt.figure();
In [134]: df["A"].plot(x_compat=True);
如果有多个图像需要调整,可以使用with:
In [135]: plt.figure();
In [136]: with pd.plotting.plot_params.use("x_compat", True):
.....: df["A"].plot(color="r")
.....: df["B"].plot(color="g")
.....: df["C"].plot(color="b")
.....:
In [176]: df = pd.DataFrame(np.random.randn(1000, 10), index=ts.index)
In [177]: df = df.cumsum()
In [178]: plt.figure();
In [179]: df.plot(colormap="cubehelix");