相关图#
一个分类图,显示了在法医工作中获得的 214 个玻璃碎片样本的矿物含量之间的相关性。
该数据集包含七个变量,测量了每个玻璃碎片中发现的镁 (Mg)、钙 (Ca)、铁 (Fe)、钾 (K)、钠 (Na)、铝 (Al) 和钡 (Ba) 的含量。彩色圆圈代表这些变量对之间的相关性。每个相关性的幅度用圆圈的大小编码。
详情
- 样本数据:
- Bokeh API:
- 更多信息:
- 关键词:
散点图, 相关图
from itertools import combinations
import numpy as np
import pandas as pd
from bokeh.models import ColumnDataSource, FixedTicker
from bokeh.plotting import figure, show
from bokeh.sampledata.forensic_glass import data as df
from bokeh.transform import linear_cmap
elements = ("Mg", "Ca", "Fe", "K", "Na", "Al", "Ba")
pairs = list(combinations(elements, 2))
correlations = []
for x, y in pairs:
matrix = np.corrcoef(df[x], df[y])
correlations.append(matrix[0, 1])
x, y = list(zip(*pairs))
new_df = pd.DataFrame({
"oxide_1": x,
"oxide_2": y,
"correlation": correlations,
"dot_size": [(1+ 10 * abs(corr)) * 10 for corr in correlations],
})
x_range = new_df["oxide_1"].unique()
y_range = list(new_df["oxide_2"].unique())
source = ColumnDataSource(new_df)
p = figure(x_axis_location="above", toolbar_location=None, x_range=x_range, y_range=y_range, background_fill_color="#fafafa")
c = p.scatter(x="oxide_1", y="oxide_2", size="dot_size", source=source, fill_color=linear_cmap("correlation", "RdYlGn9", -0.5, 0.5), line_color="#202020")
color_bar = c.construct_color_bar(
location=(200, 0),
ticker=FixedTicker(ticks=[-0.5, 0.0, 0.5]),
title="correlation",
major_tick_line_color=None,
width=150,
height=20,
)
p.add_layout(color_bar, "below")
p.axis.major_tick_line_color = None
p.axis.major_tick_out = 0
p.axis.axis_line_color = None
p.grid.grid_line_color = None
p.outline_line_color = None
show(p)