Bokeh uses python APIs to build and render HTML elements and animate them with JavaScript
Can embed in jupyter noteboks, save to HTML, or even create server-based apps (e.g. Dask Dashboard)
Embedded HTML/JS means interactions can be published online at e.g. JupyterHub
figure
is the canvas onto which glyph
objects are addedfrom bokeh.plotting import figure
p = figure(title="Welcome to Bokeh", plot_height=200, plot_width=600)
p.line([1, 2, 3], [-1, 4, 2]) # glyphs are added to figure via methods
show(p)
glyph
s can be combined onto a single figure
import numpy as np
x = np.linspace(0, 4, 100)
y = np.sin(3*np.cos(5*x)*x)
noise = 0.4*np.random.randn(*x.shape)
p = figure(title="Fancier figure", plot_height=350, plot_width=800)
p.background_fill_color = "#bdc8c9"
p.grid.grid_line_width = 3
p.grid.grid_line_color = "#f2fff7"
p.line(x, y, line_width=2.3, line_alpha=0.6, line_color="#4380e8", line_dash="2 2", legend_label="Mean")
p.circle(
x, y+noise, radius=0.02,
line_width=1.5, line_alpha=0.8, line_color="#460878",
fill_alpha=0.4, fill_color="#b861ff", legend_label="Samples"
)
p.legend.location = "bottom_left"
show(p)
row
, column
, and grid
layouts can be used to combine figuresfrom bokeh.layouts import column
y2 = np.cos(5*x)*np.sin(3*x)
noise2 = 0.4*np.random.randn(*x.shape)
angles = np.arange(*x.shape)*np.pi/x.shape[0] # properties can be arrays too
p2 = figure(title="Linked plot", plot_height=350, plot_width=800, x_range=p.x_range, y_range=p.y_range)
p2.background_fill_color = "#ffd9b0"
p2.grid.grid_line_width = 3
p2.grid.grid_line_color = "#6e6a66"
p2.line(x, y2, line_width=3, line_alpha=0.9, line_color="#34eba8", line_dash="4 4", legend_label="Mean")
p2.cross(
x, y2+noise2, size=10, angle=angles,
line_width=1.2, line_alpha=0.8, line_color="#d90000",
fill_alpha=0.4, fill_color="#f73b3b", legend_label="Samples"
)
p2.legend.location = "bottom_left"
show(column(p, p2))
ColumnDataSource
objects represent data on JS sideimport pandas as pd
df = pd.read_html("https://www.baseball-reference.com/postseason/2020_NLCS.shtml", match="Justin Turner")[0]
df = df[df.columns[:20]]
df.columns = [x[1] for x in df.columns]
df = df.dropna(axis="rows", how="any")
df["Name"] = df.Name.str.replace("*", "")
df = df.set_index("Name")
df
G | AB | R | H | 2B | 3B | HR | RBI | BB | SO | BA | OBP | SLG | OPS | SB | CS | E | WPA | cWPA | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Name | |||||||||||||||||||
Austin Barnes | 3 | 7 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 2 | 0.286 | 0.286 | 0.286 | 0.571 | 0 | 0 | 1.0 | -0.05 | -0.78% |
Cody Bellinger | 7 | 25 | 3 | 5 | 0 | 1 | 2 | 5 | 6 | 9 | 0.200 | 0.355 | 0.520 | 0.875 | 1 | 0 | 0.0 | 0.17 | 10.85% |
Mookie Betts | 7 | 26 | 4 | 7 | 1 | 0 | 0 | 1 | 5 | 4 | 0.269 | 0.387 | 0.308 | 0.695 | 1 | 0 | 0.0 | 0.03 | 1.57% |
Enrique Hernandez | 6 | 13 | 2 | 4 | 0 | 0 | 2 | 2 | 1 | 2 | 0.308 | 0.357 | 0.769 | 1.126 | 0 | 0 | 1.0 | 0.11 | 5.95% |
Max Muncy | 7 | 22 | 6 | 5 | 2 | 0 | 2 | 6 | 9 | 11 | 0.227 | 0.452 | 0.591 | 1.043 | 0 | 0 | 0.0 | -0.15 | -6.82% |
Joc Pederson | 6 | 18 | 2 | 7 | 0 | 0 | 1 | 3 | 1 | 2 | 0.389 | 0.421 | 0.556 | 0.977 | 0 | 0 | 0.0 | 0.05 | 0.80% |
AJ Pollock | 6 | 20 | 0 | 4 | 0 | 0 | 0 | 0 | 0 | 4 | 0.200 | 0.200 | 0.200 | 0.400 | 0 | 0 | 0.0 | -0.45 | -12.19% |
Edwin Rios | 4 | 9 | 2 | 2 | 0 | 0 | 2 | 3 | 2 | 5 | 0.222 | 0.333 | 0.889 | 1.222 | 0 | 0 | 0.0 | 0.11 | 2.01% |
Corey Seager | 7 | 29 | 8 | 9 | 2 | 0 | 5 | 11 | 1 | 6 | 0.310 | 0.333 | 0.897 | 1.230 | 0 | 0 | 0.0 | -0.08 | -9.40% |
Will Smith | 7 | 28 | 3 | 5 | 1 | 0 | 1 | 7 | 0 | 10 | 0.179 | 0.179 | 0.321 | 0.500 | 0 | 0 | 0.0 | 0.24 | 6.02% |
Chris Taylor | 5 | 18 | 3 | 4 | 2 | 0 | 0 | 0 | 2 | 8 | 0.222 | 0.300 | 0.333 | 0.633 | 0 | 0 | 0.0 | -0.05 | 1.21% |
Justin Turner | 7 | 25 | 6 | 7 | 2 | 0 | 1 | 1 | 3 | 4 | 0.280 | 0.379 | 0.480 | 0.859 | 0 | 0 | 0.0 | -0.24 | -4.49% |
ColumnDataSource
fields in glyph instantiationHoverTool
to display information stored in ColumnDataSource
from bokeh.models import ColumnDataSource, HoverTool
source = ColumnDataSource(df)
source.data["radius"] = (1 + source.data["RBI"]) / 300
p = figure(
title="Dodgers 2020 NLCS Player Stats", x_axis_label="Batting Average", y_axis_label="Slugging Average",
plot_height=400, plot_width=800, tools=""
)
p.circle(
x="BA", y="SLG", radius="radius",
line_color="#005A9C", line_alpha=0.9, fill_color="#005A9C", fill_alpha=0.5,
source=source
)
tooltips = [("Name", "@Name"), ("RBIs", "@RBI"), ("WPA", "@WPA"), ("Strike Outs", "@SO"), ("Walks", "@BB")]
p.add_tools(HoverTool(
point_policy="snap_to_data",
tooltips=tooltips
))
show(p)
p = figure(
title="Dodgers 2020 NLCS Player Stats", x_axis_label="Batting Average", y_axis_label="Slugging Average",
plot_height=400, plot_width=800, tools="hover",
tooltips=tooltips
)
p.circle(
x="BA", y="SLG", radius="radius",
line_color="#005A9C", line_alpha=0.9, fill_color="#005A9C", fill_alpha=0.5,
source=source
)
show(p)
p = figure(
title="Dodgers 2020 NLCS Player Stats", x_axis_label="Batting Average", y_axis_label="Slugging Average",
plot_height=400, plot_width=800, tools=""
)
p.circle(
x="BA", y="SLG", radius="radius",
line_color="#005A9C", line_alpha=0.9, fill_color="#005A9C", fill_alpha=0.5,
source=source
)
m, b = np.polyfit(df.BA, df.SLG, 1)
line_x = [df.BA.min(), df.BA.max()]
p.line(
x=line_x, y=[m*x+b for x in line_x],
line_width=1.2, line_alpha=0.8, line_dash="2 2", line_color="#ef3e42"
)
p.add_tools(HoverTool(
point_policy="snap_to_data",
tooltips=tooltips
))
show(p)
p = figure(
title="Dodgers 2020 NLCS Player Stats", x_axis_label="Batting Average", y_axis_label="Slugging Average",
plot_height=400, plot_width=800, tools=""
)
p.circle(
x="BA", y="SLG", radius="radius",
line_color="#005A9C", line_alpha=0.9, fill_color="#005A9C", fill_alpha=0.5,
source=source, name="scatter"
)
m, b = np.polyfit(df.BA, df.SLG, 1)
line_x = [df.BA.min(), df.BA.max()]
p.line(
x=line_x, y=[m*x+b for x in line_x],
line_width=1.2, line_alpha=0.8, line_dash="2 2", line_color="#ef3e42"
)
p.add_tools(HoverTool(
point_policy="snap_to_data",
renderers=p.select("scatter"),
tooltips=tooltips
))
show(p)
p = figure(
title="Dodgers 2020 NLCS Player Stats", x_axis_label="Batting Average", y_axis_label="Slugging Average",
plot_height=400, plot_width=800, tools=""
)
p.circle(
x="BA", y="SLG", radius="radius",
line_color="#005A9C", line_alpha=0.9, fill_color="#005A9C", fill_alpha=0.5,
hover_line_color="#005A9C", hover_fill_color="#A5ACAF", hover_fill_alpha=0.8,
source=source, name="scatter"
)
m, b = np.polyfit(df.BA, df.SLG, 1)
line_x = [df.BA.min(), df.BA.max()]
p.line(
x=line_x, y=[m*x+b for x in line_x],
line_width=1.2, line_alpha=0.8, line_dash="2 2", line_color="#ef3e42"
)
p.add_tools(HoverTool(
point_policy="snap_to_data",
renderers=p.select("scatter"),
tooltips=tooltips
))
show(p)
from bs4 import BeautifulSoup as bs
from urllib.request import urlopen
# this will be a bit uglier than it should be because
# our original stat source didn't keep accents on names
with urlopen("https://www.mlb.com/dodgers/roster") as html:
soup = bs(html, "lxml")
imgs = [div.find("img") for div in soup.find_all("div", class_="player-thumb__back")]
imgs = [(img.attrs["alt"], img.attrs["src"]) for img in imgs]
links = []
for name in df.index:
for img_name, img_src in imgs:
if len(name) == len(img_name):
# deal with missing accents
if len([None for i, j in zip(name, img_name) if i == j]) >= (len(name) - 2):
links.append(img_src)
source.data["img"] = links
TOOLTIPS = """
<div style="width: 150px">
<table style="margin-bottom: 0px">
<tr style="background-color: transparent">
<td><img src="@img" height="42" alt="@Name" width="42" border="2"></img></td>
<td style="text-align:left"><span style="font-size: 17px; font-weight: bold">@Name</span></td>
</tr>
</table>
"""
# color tooltips differently depending on whether player
# out- or under-performed the mean Dodger
for stat, op in zip(["RBI", "WPA", "SO", "BB"], [max, max, min, max]):
mean = df[stat].mean()
source.data[stat+"_color"] = ["#04b31e" if op(mean, x) == x else "#fc4c4c" for x in source.data[stat]]
TOOLTIPS += """
<div style="margin-top: 2px">
<span style="font-size: 15px;">{}</span>
<span style="font-size: 10px; color: @{}_color;">@{}</span>
</div>
""".format(stat, stat, stat)
TOOLTIPS += "</div>"
p.tools = []
p.add_tools(HoverTool(
point_policy="snap_to_data",
renderers=p.select("scatter"),
tooltips=TOOLTIPS
))
show(p)
from bokeh.palettes import Dark2 as palette
data = {"x": x}
for i in range(4):
data[str(i)] = np.exp(-(i+1)*x/4) * np.sin((i+1)*np.pi*x/2) / (i+1)
source = ColumnDataSource(data)
p = figure(title="I saw the sine", plot_height=400, plot_width=800)
for n, color in enumerate(palette[4]):
p.line("x", str(n), line_color=color, line_width=2.3, line_alpha=0.7, source=source)
p.add_tools(HoverTool(
mode="vline",
line_policy="interp",
renderers=p.renderers[-1:],
tooltips=[("x", "@x")] + [("Frequency {} pi".format((i+1)/2), f"@{i}") for i in range(4)]
))
show(p)
from bokeh.models import Slider
from bokeh.layouts import row
p = figure(plot_width=400, plot_height=200, tools="")
r = p.circle([1,2,3,4,5,], [3,2,5,6,4], radius=0.2, alpha=0.5)
slider = Slider(start=0.1, end=2, step=0.01, value=0.2)
slider.js_link('value', r.glyph, 'radius')
show(row(p, slider))
from bokeh.palettes import Spectral4
from bokeh.plotting import figure, show
try:
from bokeh.sampledata.stocks import AAPL, GOOG, IBM, MSFT
except:
from bokeh.sampledata import download
download(progress=False)
from bokeh.sampledata.stocks import AAPL, GOOG, IBM, MSFT
p = figure(plot_width=800, plot_height=250, x_axis_type="datetime")
p.title.text = 'Click on legend entries to mute the corresponding lines'
for data, name, color in zip([AAPL, IBM, MSFT, GOOG], ["AAPL", "IBM", "MSFT", "GOOG"], Spectral4):
df = pd.DataFrame(data)
df['date'] = pd.to_datetime(df['date'])
p.line(df['date'], df['close'], line_width=2, color=color, alpha=0.8,
muted_color=color, muted_alpha=0.2, legend_label=name)
p.legend.location = "top_left"
p.legend.click_policy="mute"
show(p)
from oracle import sample
N = 250
x, y = sample(N)
p = figure(title="Samples", plot_height=300, plot_width=800, tools="")
p.circle(x, y, radius=0.02, line_width=1.5, fill_alpha=0.4)
show(p)
from oracle import true_function
line_x = np.linspace(x.min(), x.max(), 100)
line_y = true_function(line_x)
p.line(line_x, line_y, line_width=2.3, line_alpha=0.5, line_dash="4 2", legend_label="True function")
p.legend.location = "bottom_right"
show(p)
from bokeh.models import LinearColorMapper
from oracle import a, b, c, sigma
plot_dim = 330
grid_dim = 50
m_star = b
k_star = c+a
m = np.linspace(m_star-5, m_star+5, grid_dim)
k = np.linspace(k_star-5, k_star+5, grid_dim)
mm, kk = np.meshgrid(m, k)
img = 2*a**2 + (mm-b)**2 + (kk-a-c)**2 + sigma**2
p_expected = figure(
plot_height=plot_dim,
plot_width=plot_dim,
title="Log expected error",
x_axis_label="intercept",
y_axis_label="slope",
x_range=(k_star-5, k_star+5),
y_range=(m_star-5, m_star+5),
tools=""
)
p_expected.grid.grid_line_alpha = 0.0
mapper = LinearColorMapper(
low=np.log(img).min(),
high=np.log(img).max(),
palette="Plasma256"
)
p_expected.image(
"log",
x=k_star-5,
y=m_star-5,
dw=10,
dh=10,
color_mapper=mapper,
source=ColumnDataSource({"img": [img], "log": [np.log(img)]}),
name="img",
level="image"
)
p_expected.cross(
[k_star],
[m_star],
line_color="#ffffff",
line_width=1.5,
fill_color="#ffffff",
size=10
)
p_expected.add_tools(HoverTool(
renderers=p_expected.select("img"),
tooltips=[
("Slope", "$x"),
("Intercept", "$y"),
("Expected Error", "@img")
]
))
show(p_expected)
n = 10 # size of subsample
approx_img = np.zeros_like(img)
for i in range(n):
approx_img += (mm*x[i] + kk - y[i])**2
approx_img /= n
img_src = ColumnDataSource({
"img": [np.log(approx_img)],
"m": [mm],
"k": [kk]
})
sample_src = ColumnDataSource({
"x": x,
"y": y
})
p_train = figure(
plot_height=plot_dim,
plot_width=plot_dim,
title="Log train error",
x_axis_label="intercept",
y_axis_label="slope",
x_range=(k_star-5, k_star+5),
y_range=(m_star-5, m_star+5),
tools=""
)
p_train.grid.grid_line_alpha = 0.0
p_train.image(
"img",
x=k_star-5,
y=m_star-5,
dw=10,
dh=10,
color_mapper=mapper,
level="image",
source=img_src
)
js_code = """
var img_data = img_src.data; // data from the source containing the image
var sample_data = sample_src.data; // data from the source containing our samples
var n = cb_obj.value; // current value of the slider
var img_dim = {}; // format the string with the grid dimension
// initialize all the variables we'll need
var pixel;
var slope;
var intercept;
var error;
var idx;
// loop through each slope/intercept combination, calculate
// the average error over the current number of samples, then
// update that pixel in the image source's data
for (var i = 0; i < img_dim; i++) {{
for (var j = 0; j < img_dim; j++) {{
pixel = 0;
idx = i*img_dim + j;
for (var k = 0; k < n; k++) {{
slope = img_data["m"][0][idx]
intercept = img_data["k"][0][idx]
error = slope*sample_data["x"][k] + intercept - sample_data["y"][k];
pixel += Math.pow(error, 2);
}}
pixel /= n;
img_data["img"][0][idx] = Math.log(pixel);
}}
}}
// have the source update all its renderers to reflect the new data
img_src.change.emit();
""".format(grid_dim)
from bokeh.models import CustomJS
slider = Slider(start=1, end=N, step=1, value=n, title="Train samples", orientation="vertical", direction="rtl")
callback = CustomJS(args={"img_src": img_src, "sample_src": sample_src}, code=js_code)
slider.js_on_change("value", callback)
show(row(p_expected, p_train, slider))
p_scatter = figure(
plot_height=int(plot_dim*0.7),
plot_width=plot_dim*2,
x_axis_label="x",
y_axis_label="y",
tools=""
)
sub_x = x[:slider.value]
sub_y = y[:slider.value]
m_hat, b_hat = np.polyfit(sub_x, sub_y, 1)
subsample_src = ColumnDataSource({
"x": x[:slider.value],
"y": y[:slider.value]
})
line_src = ColumnDataSource({
"x": [x.min(), x.max()],
"y": [m_hat*x.min() + b_hat, m_hat*x.max() + b_hat]
})
tracker_src = ColumnDataSource({
"x": [b_hat],
"y": [m_hat]
})
p_expected.cross(
"x",
"y",
line_color="#000000",
line_width=1.5,
fill_color="#000000",
size=10,
source=tracker_src
)
p_scatter.circle(
"x",
"y",
line_width=1.5,
line_alpha=0.8,
fill_alpha=0.4,
source=subsample_src,
legend_label="Samples"
)
p_scatter.line(
"x",
"y",
line_width=2.3,
line_alpha=0.8,
source=line_src,
legend_label="Train best fit"
)
best_line_x = [x.min(), x.max()]
best_line_y = [m_star*i + k_star for i in best_line_x]
p_scatter.line(
best_line_x,
best_line_y,
line_width=2.3,
line_alpha=0.5,
line_dash="2 2",
legend_label="True best fit"
)
quad_x = np.linspace(x.min(), x.max(), 100)
p_scatter.line(
quad_x,
a*quad_x**2 + b*quad_x + c,
line_width=2.3,
line_alpha=0.5,
line_dash="4 4",
legend_label="True function"
)
p_scatter.legend.location = "top_left"
js_code += """
var subsample_data = subsample_src.data;
var line_data = line_src.data;
var tracker_data = tracker_src.data;
// push all n samples to our subsample source
// compute sums we'll need to do regression fit
var x;
var y;
var xsum = 0;
var ysum = 0;
var x2sum = 0;
var xysum = 0;
var new_x = [];
var new_y = [];
for (var i = 0; i < n; i ++) {{
x = sample_data["x"][i];
y = sample_data["y"][i];
new_x.push(x);
new_y.push(y);
xsum += x;
ysum += y;
x2sum += Math.pow(x, 2);
xysum += x*y;
}}
subsample_data["x"] = new_x;
subsample_data["y"] = new_y;
// update best fit values for regression
var denom = n*x2sum - Math.pow(xsum, 2);
var m = (n*xysum - xsum*ysum) / denom;
var b = (ysum*x2sum - xsum*xysum) / denom;
tracker_data["x"] = [b];
tracker_data["y"] = [m];
// plot a new best fit line with these values
var new_line_y = [];
for (var i = 0; i < line_data["x"].length; i ++) {{
new_line_y.push(m*line_data["x"][i] + b);
}}
line_data["y"] = new_line_y;
// update all source renderers
subsample_src.change.emit();
tracker_src.change.emit();
line_src.change.emit();
"""
callback = CustomJS(
args={
"img_src": img_src,
"sample_src": sample_src,
"subsample_src": subsample_src,
"line_src": line_src,
"tracker_src": tracker_src
},
code=js_code
)
slider.js_on_change("value", callback)
show(column(
row(p_expected, p_train, slider),
p_scatter
))