from bokeh.io import output_notebook, show # or output_file, save
output_notebook()


from bokeh.plotting import figure

p = figure(title="Welcome to Bokeh", plot_height=200, plot_width=600)
p.line([1, 2, 3], [-1, 4, 2]) # glyphs are added to figure via methods
show(p)


import numpy as np
x = np.linspace(0, 4, 100)
y = np.sin(3*np.cos(5*x)*x)
noise = 0.4*np.random.randn(*x.shape)

p = figure(title="Fancier figure", plot_height=350, plot_width=800)
p.background_fill_color = "#bdc8c9"
p.grid.grid_line_width = 3
p.grid.grid_line_color = "#f2fff7"

p.line(x, y, line_width=2.3, line_alpha=0.6, line_color="#4380e8", line_dash="2 2", legend_label="Mean")
p.circle(
    x, y+noise, radius=0.02,
    line_width=1.5, line_alpha=0.8, line_color="#460878",
    fill_alpha=0.4, fill_color="#b861ff", legend_label="Samples"
)
p.legend.location = "bottom_left"


show(p)


from bokeh.layouts import column

y2 = np.cos(5*x)*np.sin(3*x)
noise2 = 0.4*np.random.randn(*x.shape)
angles = np.arange(*x.shape)*np.pi/x.shape[0] # properties can be arrays too

p2 = figure(title="Linked plot", plot_height=350, plot_width=800, x_range=p.x_range, y_range=p.y_range)
p2.background_fill_color = "#ffd9b0"
p2.grid.grid_line_width = 3
p2.grid.grid_line_color = "#6e6a66"

p2.line(x, y2, line_width=3, line_alpha=0.9, line_color="#34eba8", line_dash="4 4", legend_label="Mean")
p2.cross(
    x, y2+noise2, size=10, angle=angles,
    line_width=1.2, line_alpha=0.8, line_color="#d90000",
    fill_alpha=0.4, fill_color="#f73b3b", legend_label="Samples"
)
p2.legend.location = "bottom_left"


show(column(p, p2))


import pandas as pd

df = pd.read_html("https://www.baseball-reference.com/postseason/2020_NLCS.shtml", match="Justin Turner")[0]
df = df[df.columns[:20]]
df.columns = [x[1] for x in df.columns]
df = df.dropna(axis="rows", how="any")
df["Name"] = df.Name.str.replace("*", "")
df = df.set_index("Name")
df


from bokeh.models import ColumnDataSource, HoverTool

source = ColumnDataSource(df)
source.data["radius"] = (1 + source.data["RBI"]) / 300

p = figure(
    title="Dodgers 2020 NLCS Player Stats", x_axis_label="Batting Average", y_axis_label="Slugging Average",
    plot_height=400, plot_width=800, tools=""
)
p.circle(
    x="BA", y="SLG", radius="radius",
    line_color="#005A9C", line_alpha=0.9, fill_color="#005A9C", fill_alpha=0.5,
    source=source
)
tooltips = [("Name", "@Name"), ("RBIs", "@RBI"), ("WPA", "@WPA"), ("Strike Outs", "@SO"), ("Walks", "@BB")]
p.add_tools(HoverTool(
    point_policy="snap_to_data",
    tooltips=tooltips
))


show(p)


p = figure(
    title="Dodgers 2020 NLCS Player Stats", x_axis_label="Batting Average", y_axis_label="Slugging Average",
    plot_height=400, plot_width=800, tools="hover",
    tooltips=tooltips
)
p.circle(
    x="BA", y="SLG", radius="radius",
    line_color="#005A9C", line_alpha=0.9, fill_color="#005A9C", fill_alpha=0.5,
    source=source
)


show(p)


p = figure(
    title="Dodgers 2020 NLCS Player Stats", x_axis_label="Batting Average", y_axis_label="Slugging Average",
    plot_height=400, plot_width=800, tools=""
)
p.circle(
    x="BA", y="SLG", radius="radius",
    line_color="#005A9C", line_alpha=0.9, fill_color="#005A9C", fill_alpha=0.5,
    source=source
)

m, b = np.polyfit(df.BA, df.SLG, 1)
line_x = [df.BA.min(), df.BA.max()]
p.line(
    x=line_x, y=[m*x+b for x in line_x],
    line_width=1.2, line_alpha=0.8, line_dash="2 2", line_color="#ef3e42"
)
p.add_tools(HoverTool(
    point_policy="snap_to_data",
    tooltips=tooltips
))


show(p)


p = figure(
    title="Dodgers 2020 NLCS Player Stats", x_axis_label="Batting Average", y_axis_label="Slugging Average",
    plot_height=400, plot_width=800, tools=""
)
p.circle(
    x="BA", y="SLG", radius="radius",
    line_color="#005A9C", line_alpha=0.9, fill_color="#005A9C", fill_alpha=0.5,
    source=source, name="scatter"
)

m, b = np.polyfit(df.BA, df.SLG, 1)
line_x = [df.BA.min(), df.BA.max()]
p.line(
    x=line_x, y=[m*x+b for x in line_x],
    line_width=1.2, line_alpha=0.8, line_dash="2 2", line_color="#ef3e42"
)
p.add_tools(HoverTool(
    point_policy="snap_to_data",
    renderers=p.select("scatter"),
    tooltips=tooltips
))


show(p)


p = figure(
    title="Dodgers 2020 NLCS Player Stats", x_axis_label="Batting Average", y_axis_label="Slugging Average",
    plot_height=400, plot_width=800, tools=""
)
p.circle(
    x="BA", y="SLG", radius="radius",
    line_color="#005A9C", line_alpha=0.9, fill_color="#005A9C", fill_alpha=0.5,
    hover_line_color="#005A9C", hover_fill_color="#A5ACAF", hover_fill_alpha=0.8,
    source=source, name="scatter"
)

m, b = np.polyfit(df.BA, df.SLG, 1)
line_x = [df.BA.min(), df.BA.max()]
p.line(
    x=line_x, y=[m*x+b for x in line_x],
    line_width=1.2, line_alpha=0.8, line_dash="2 2", line_color="#ef3e42"
)
p.add_tools(HoverTool(
    point_policy="snap_to_data",
    renderers=p.select("scatter"),
    tooltips=tooltips
))


show(p)


from bs4 import BeautifulSoup as bs
from urllib.request import urlopen

# this will be a bit uglier than it should be because
# our original stat source didn't keep accents on names
with urlopen("https://www.mlb.com/dodgers/roster") as html:
    soup = bs(html, "lxml")
imgs = [div.find("img") for div in soup.find_all("div", class_="player-thumb__back")]
imgs = [(img.attrs["alt"], img.attrs["src"]) for img in imgs]

links = []
for name in df.index:
    for img_name, img_src in imgs:
        if len(name) == len(img_name):
            # deal with missing accents
            if len([None for i, j in zip(name, img_name) if i == j]) >= (len(name) - 2):
                links.append(img_src)
source.data["img"] = links


TOOLTIPS = """
    <div style="width: 150px">
        <table style="margin-bottom: 0px">
            <tr style="background-color: transparent">
                <td><img src="@img" height="42" alt="@Name" width="42" border="2"></img></td>
                <td style="text-align:left"><span style="font-size: 17px; font-weight: bold">@Name</span></td>
            </tr>
        </table>
"""

# color tooltips differently depending on whether player
# out- or under-performed the mean Dodger
for stat, op in zip(["RBI", "WPA", "SO", "BB"], [max, max, min, max]):
    mean = df[stat].mean()
    source.data[stat+"_color"] = ["#04b31e" if op(mean, x) == x else "#fc4c4c" for x in source.data[stat]]

    TOOLTIPS += """
    <div style="margin-top: 2px">
        <span style="font-size: 15px;">{}</span>
        <span style="font-size: 10px; color: @{}_color;">@{}</span>
    </div>
""".format(stat, stat, stat)
TOOLTIPS += "</div>"

p.tools = []
p.add_tools(HoverTool(
    point_policy="snap_to_data",
    renderers=p.select("scatter"),
    tooltips=TOOLTIPS
))


show(p)


from bokeh.palettes import Dark2 as palette

data = {"x": x}
for i in range(4):
    data[str(i)] = np.exp(-(i+1)*x/4) * np.sin((i+1)*np.pi*x/2) / (i+1)
source = ColumnDataSource(data)

p = figure(title="I saw the sine", plot_height=400, plot_width=800)
for n, color in enumerate(palette[4]):
    p.line("x", str(n), line_color=color, line_width=2.3, line_alpha=0.7, source=source)

p.add_tools(HoverTool(
    mode="vline",
    line_policy="interp",
    renderers=p.renderers[-1:],
    tooltips=[("x", "@x")] + [("Frequency {} pi".format((i+1)/2), f"@{i}") for i in range(4)]
))


show(p)


from bokeh.models import Slider
from bokeh.layouts import row

p = figure(plot_width=400, plot_height=200, tools="")
r = p.circle([1,2,3,4,5,], [3,2,5,6,4], radius=0.2, alpha=0.5)

slider = Slider(start=0.1, end=2, step=0.01, value=0.2)
slider.js_link('value', r.glyph, 'radius')

show(row(p, slider))


from bokeh.palettes import Spectral4
from bokeh.plotting import figure, show
try:
    from bokeh.sampledata.stocks import AAPL, GOOG, IBM, MSFT
except:
    from bokeh.sampledata import download
    download(progress=False)
    from bokeh.sampledata.stocks import AAPL, GOOG, IBM, MSFT

p = figure(plot_width=800, plot_height=250, x_axis_type="datetime")
p.title.text = 'Click on legend entries to mute the corresponding lines'

for data, name, color in zip([AAPL, IBM, MSFT, GOOG], ["AAPL", "IBM", "MSFT", "GOOG"], Spectral4):
    df = pd.DataFrame(data)
    df['date'] = pd.to_datetime(df['date'])
    p.line(df['date'], df['close'], line_width=2, color=color, alpha=0.8,
           muted_color=color, muted_alpha=0.2, legend_label=name)

p.legend.location = "top_left"
p.legend.click_policy="mute"


show(p)


from oracle import sample

N = 250
x, y = sample(N)
p = figure(title="Samples", plot_height=300, plot_width=800, tools="")
p.circle(x, y, radius=0.02, line_width=1.5, fill_alpha=0.4)
show(p)


from oracle import true_function

line_x = np.linspace(x.min(), x.max(), 100)
line_y = true_function(line_x)
p.line(line_x, line_y, line_width=2.3, line_alpha=0.5, line_dash="4 2", legend_label="True function")
p.legend.location = "bottom_right"
show(p)


from bokeh.models import LinearColorMapper
from oracle import a, b, c, sigma

plot_dim = 330
grid_dim = 50

m_star = b
k_star = c+a
m = np.linspace(m_star-5, m_star+5, grid_dim)
k = np.linspace(k_star-5, k_star+5, grid_dim)
mm, kk = np.meshgrid(m, k)
img = 2*a**2 + (mm-b)**2 + (kk-a-c)**2 + sigma**2

p_expected = figure(
    plot_height=plot_dim,
    plot_width=plot_dim,
    title="Log expected error",
    x_axis_label="intercept",
    y_axis_label="slope",
    x_range=(k_star-5, k_star+5),
    y_range=(m_star-5, m_star+5),
    tools=""
)
p_expected.grid.grid_line_alpha = 0.0
mapper = LinearColorMapper(
    low=np.log(img).min(),
    high=np.log(img).max(),
    palette="Plasma256"
)
p_expected.image(
    "log",
    x=k_star-5,
    y=m_star-5,
    dw=10,
    dh=10,
    color_mapper=mapper,
    source=ColumnDataSource({"img": [img], "log": [np.log(img)]}),
    name="img",
    level="image"
)

p_expected.cross(
    [k_star],
    [m_star],
    line_color="#ffffff",
    line_width=1.5,
    fill_color="#ffffff",
    size=10
)

p_expected.add_tools(HoverTool(
    renderers=p_expected.select("img"),
    tooltips=[
        ("Slope", "$x"),
        ("Intercept", "$y"),
        ("Expected Error", "@img")
    ]
))


show(p_expected)


n = 10 # size of subsample
approx_img = np.zeros_like(img)
for i in range(n):
    approx_img += (mm*x[i] + kk - y[i])**2
approx_img /= n

img_src = ColumnDataSource({
    "img": [np.log(approx_img)],
    "m": [mm],
    "k": [kk]
})
sample_src = ColumnDataSource({
    "x": x,
    "y": y
})

p_train = figure(
    plot_height=plot_dim,
    plot_width=plot_dim,
    title="Log train error",
    x_axis_label="intercept",
    y_axis_label="slope",
    x_range=(k_star-5, k_star+5),
    y_range=(m_star-5, m_star+5),
    tools=""
)
p_train.grid.grid_line_alpha = 0.0

p_train.image(
    "img",
    x=k_star-5,
    y=m_star-5,
    dw=10,
    dh=10,
    color_mapper=mapper,
    level="image",
    source=img_src
)


js_code = """
        var img_data = img_src.data; // data from the source containing the image
        var sample_data = sample_src.data; // data from the source containing our samples
        var n = cb_obj.value; // current value of the slider
        var img_dim = {}; // format the string with the grid dimension

        // initialize all the variables we'll need
        var pixel;
        var slope;
        var intercept;
        var error;
        var idx;

        // loop through each slope/intercept combination, calculate
        // the average error over the current number of samples, then
        // update that pixel in the image source's data
        for (var i = 0; i < img_dim; i++) {{
            for (var j = 0; j < img_dim; j++) {{
                pixel = 0;
                idx = i*img_dim + j;
                for (var k = 0; k < n; k++) {{
                    slope = img_data["m"][0][idx]
                    intercept = img_data["k"][0][idx]
                    error = slope*sample_data["x"][k] + intercept - sample_data["y"][k];
                    pixel += Math.pow(error, 2);
                }}
                pixel /= n;
                img_data["img"][0][idx] = Math.log(pixel);
            }}
        }}

        // have the source update all its renderers to reflect the new data
        img_src.change.emit();
""".format(grid_dim)


from bokeh.models import CustomJS

slider = Slider(start=1, end=N, step=1, value=n, title="Train samples", orientation="vertical", direction="rtl")
callback = CustomJS(args={"img_src": img_src, "sample_src": sample_src}, code=js_code)
slider.js_on_change("value", callback)
show(row(p_expected, p_train, slider))


p_scatter = figure(
    plot_height=int(plot_dim*0.7),
    plot_width=plot_dim*2,
    x_axis_label="x",
    y_axis_label="y",
    tools=""
)

sub_x = x[:slider.value]
sub_y = y[:slider.value]
m_hat, b_hat = np.polyfit(sub_x, sub_y, 1)

subsample_src = ColumnDataSource({
    "x": x[:slider.value],
    "y": y[:slider.value]
})
line_src = ColumnDataSource({
    "x": [x.min(), x.max()],
    "y": [m_hat*x.min() + b_hat, m_hat*x.max() + b_hat]
})
tracker_src = ColumnDataSource({
    "x": [b_hat],
    "y": [m_hat]
})
p_expected.cross(
    "x",
    "y",
    line_color="#000000",
    line_width=1.5,
    fill_color="#000000",
    size=10,
    source=tracker_src
)

p_scatter.circle(
    "x",
    "y",
    line_width=1.5,
    line_alpha=0.8,
    fill_alpha=0.4,
    source=subsample_src,
    legend_label="Samples"
)
p_scatter.line(
    "x",
    "y",
    line_width=2.3,
    line_alpha=0.8,
    source=line_src,
    legend_label="Train best fit"
)

best_line_x = [x.min(), x.max()]
best_line_y = [m_star*i + k_star for i in best_line_x]
p_scatter.line(
    best_line_x,
    best_line_y,
    line_width=2.3,
    line_alpha=0.5,
    line_dash="2 2",
    legend_label="True best fit"
)

quad_x = np.linspace(x.min(), x.max(), 100)
p_scatter.line(
    quad_x,
    a*quad_x**2 + b*quad_x + c,
    line_width=2.3,
    line_alpha=0.5,
    line_dash="4 4",
    legend_label="True function"
)
p_scatter.legend.location = "top_left"


js_code += """
        var subsample_data = subsample_src.data;
        var line_data = line_src.data;
        var tracker_data = tracker_src.data;

        // push all n samples to our subsample source
        // compute sums we'll need to do regression fit
        var x;
        var y;
        var xsum = 0;
        var ysum = 0;
        var x2sum = 0;
        var xysum = 0;
        var new_x = [];
        var new_y = [];
        for (var i = 0; i < n; i ++) {{
            x = sample_data["x"][i];
            y = sample_data["y"][i];
            new_x.push(x);
            new_y.push(y);

            xsum += x;
            ysum += y;
            x2sum += Math.pow(x, 2);
            xysum += x*y;
        }}

        subsample_data["x"] = new_x;
        subsample_data["y"] = new_y;

        // update best fit values for regression
        var denom = n*x2sum - Math.pow(xsum, 2);
        var m = (n*xysum - xsum*ysum) / denom;
        var b = (ysum*x2sum - xsum*xysum) / denom;
        tracker_data["x"] = [b];
        tracker_data["y"] = [m];

        // plot a new best fit line with these values
        var new_line_y = [];
        for (var i = 0; i < line_data["x"].length; i ++) {{
            new_line_y.push(m*line_data["x"][i] + b);
        }}
        line_data["y"] = new_line_y;

        // update all source renderers
        subsample_src.change.emit();
        tracker_src.change.emit();
        line_src.change.emit();
"""


callback = CustomJS(
    args={
        "img_src": img_src,
        "sample_src": sample_src,
        "subsample_src": subsample_src,
        "line_src": line_src,
        "tracker_src": tracker_src
    },
    code=js_code
)
slider.js_on_change("value", callback)


show(column(
    row(p_expected, p_train, slider),
    p_scatter
))

	G	AB	R	H	2B	3B	HR	RBI	BB	SO	BA	OBP	SLG	OPS	SB	CS	E	WPA	cWPA
Name
Austin Barnes	3	7	0	2	0	0	0	0	0	2	0.286	0.286	0.286	0.571	0	0	1.0	-0.05	-0.78%
Cody Bellinger	7	25	3	5	0	1	2	5	6	9	0.200	0.355	0.520	0.875	1	0	0.0	0.17	10.85%
Mookie Betts	7	26	4	7	1	0	0	1	5	4	0.269	0.387	0.308	0.695	1	0	0.0	0.03	1.57%
Enrique Hernandez	6	13	2	4	0	0	2	2	1	2	0.308	0.357	0.769	1.126	0	0	1.0	0.11	5.95%
Max Muncy	7	22	6	5	2	0	2	6	9	11	0.227	0.452	0.591	1.043	0	0	0.0	-0.15	-6.82%
Joc Pederson	6	18	2	7	0	0	1	3	1	2	0.389	0.421	0.556	0.977	0	0	0.0	0.05	0.80%
AJ Pollock	6	20	0	4	0	0	0	0	0	4	0.200	0.200	0.200	0.400	0	0	0.0	-0.45	-12.19%
Edwin Rios	4	9	2	2	0	0	2	3	2	5	0.222	0.333	0.889	1.222	0	0	0.0	0.11	2.01%
Corey Seager	7	29	8	9	2	0	5	11	1	6	0.310	0.333	0.897	1.230	0	0	0.0	-0.08	-9.40%
Will Smith	7	28	3	5	1	0	1	7	0	10	0.179	0.179	0.321	0.500	0	0	0.0	0.24	6.02%
Chris Taylor	5	18	3	4	2	0	0	0	2	8	0.222	0.300	0.333	0.633	0	0	0.0	-0.05	1.21%
Justin Turner	7	25	6	7	2	0	1	1	3	4	0.280	0.379	0.480	0.859	0	0	0.0	-0.24	-4.49%

Interactive Plotting with Bokeh¶

Or if I see another unformatted matplotlib figure in a paper I will lose my mind¶

The Basics¶

The Basics¶

The Basics¶

The Basics¶

Adding Interactions¶

Adding Interactions¶

Adding Interactions¶

Adding Interactions¶

Adding interactions¶

Adding interactions¶

Adding interactions¶

Adding Interactions¶

Adding Interactions¶

Adding interactions¶

More Complex Example: Using JavaScript Callbacks¶

JS Callback Example¶

JS Callback Example¶

JS Callback Example¶

JS Callback Example¶

Conclusions¶