Contents

Seattle bicycle traffic

This notebook contains my replication of this blog post by Jake VanderPlas on using data from bicycle traffic across Seattle’s Fremont Bridge to learn about commuting patterns.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
import os
import ssl
from urllib.request import urlretrieve

import altair as alt
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn
from pandas.tseries.holiday import USFederalHolidayCalendar
from seattlecycling.data import get_fremont_data
from seattlecycling.toolbox import csnap
from sklearn.decomposition import PCA
from sklearn.mixture import GaussianMixture as GMM
from vega_datasets import data as vega_data

seaborn.set()

Helper functions

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
fremont_url = (
    "https://data.seattle.gov/api/views/65db-xm6k/rows.csv?accessType=DOWNLOAD"
)


def get_fremont_data(
    filename="seattle_weather_fremont.csv", url=fremont_url, force_download=False
):
    """Download and cache the fremont bridge data

    Parameters
    ----------
    filename : string (optional)
        location to store the data
    url : string (optional)
        web location of the data
    force_download : Boolean (optional)
        if True, force redownload of data

    Returns
    -------
    data : pandas.DataFrame
        The fremont bridge data
    """
    # Solve problem with SSL certificate verification
    if not os.environ.get("PYTHONHTTPSVERIFY", "") and getattr(
        ssl, "_create_unverified_context", None
    ):
        ssl._create_default_https_context = ssl._create_unverified_context
    # Download and prepare data
    if force_download or not os.path.exists(filename):
        urlretrieve(url, filename)
    data = pd.read_csv("seattle_weather_fremont.csv", index_col="Date")
    try:
        data.index = pd.to_datetime(data.index, format="%m/%d/%Y %I:%M:%S %p")
    except TypeError:
        data.index = pd.to_datetime(data.index)
    data.columns = ["total", "west", "east"]
    return data


def hours_of_daylight(date, axis=23.44, latitude=47.61):
    """Compute the hours of daylight for the given date"""
    diff = date - pd.datetime(2000, 12, 21)
    day = diff.total_seconds() / 24.0 / 3600
    day %= 365.25
    m = 1.0 - np.tan(np.radians(latitude)) * np.tan(
        np.radians(axis) * np.cos(day * np.pi / 182.625)
    )
    m = max(0, min(m, 2))
    return 24.0 * np.degrees(np.arccos(1 - m)) / 180.0


def print_rms(var):
    """Calculates and prints the root-mean-square about the trend line"""
    rms = np.std(var)
    print("Root-mean-square about trend: {0: .0f} riders".format(rms))


def csnap(df, fn=lambda x: x.shape, msg=None):
    """
    Custom Help function to print things in method chaining.
    Returns back the df to further use in chaining.
    """
    if msg:
        print(msg)
    display(fn(df))
    return df

Unsupervised exploration

1
2
3
4
5
6
7
8
# Load data

start = "1 Oct 2012"
end = "30 Jun 2015"

data = get_fremont_data()
data = data.loc[start:end]
data.head(3)
/var/folders/xg/n9p73cf50s52twlnz7z778vr0000gn/T/ipykernel_3899/1377138058.py:7: FutureWarning: Value based partial slicing on non-monotonic DatetimeIndexes with non-existing keys is deprecated and will raise a KeyError in a future Version.
  data = data.loc[start:end]

totalwesteast
Date
2012-10-03 00:00:0013.04.09.0
2012-10-03 01:00:0010.04.06.0
2012-10-03 02:00:002.01.01.0
1
2
3
# A first look at the data

data.resample("w").sum().plot()
<matplotlib.axes._subplots.AxesSubplot at 0x1a23e32850>

seattle-bicycle-traffic_files/figure-markdown_strict/cell-5-output-2.png

Same graph as above using Altair

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
# Create a melted dataset

melted = (
    data.resample("w")
    .sum()
    .reset_index()
    .rename(columns={"Date": "date"})
    .melt(
        id_vars="date",
        var_name=["side"],
        value_name="crossings",
        value_vars=["total", "east", "west"],
    )
)
melted.head()

datesidecrossings
02012-10-07total14292.0
12012-10-14total16795.0
22012-10-21total15509.0
32012-10-28total13437.0
42012-11-04total12194.0
1
2
3
# Produce same graph as above

alt.Chart(melted).mark_line().encode(x="date", y="crossings", color="side")

seattle-bicycle-traffic_files/figure-markdown_strict/cell-7-output-3.png

Visualising the data

1
2
3
4
5
6
7
8
# Create a datframe with days as dates and index and hours as columns
pivoted = data.pivot_table(
    ["west", "east", "total"],
    index=data.index.date,
    columns=data.index.hour,
    fill_value=0,
)
pivoted.head()

east...west
Date0123456789...14151617181920212223
2012-10-0396131105095146104...7772133192122592925245
2012-10-0411063111518913494...63731141541375727312511
2012-10-057432273710111981...63801201441074227111016
2012-10-0675221215164755...89115107107414025181415
2012-10-075512238122636...1261221321186826191295

5 rows × 72 columns

1
2
3
4
# Put raw values in a matrix

X = pivoted.values
X.shape
(1001, 72)
1
2
3
4
# Use PCA to reduce dimensionality (keep 90 percent of the variance)

Xpca = PCA(0.9).fit_transform(X)
Xpca.shape
(1001, 2)
1
2
3
total_trips = X.sum(1)
plt.scatter(Xpca[:, 0], Xpca[:, 1], c=total_trips, cmap="Paired")
plt.colorbar(label="Total trips")
<matplotlib.colorbar.Colorbar at 0x1a26ec9dd0>

seattle-bicycle-traffic_files/figure-markdown_strict/cell-11-output-2.png

What can we learn from this graph? We can see that the days fall into two quite distinct cluster, one with a higher number of trips and one with a lower number of trips, that the number of trips increases along the length of each projected cluster (i.e. as we move away from the origin), and that close to the origin, the groups are less distinguishable. Overall, we can see that there are, in effect, two types of days for Seattle cyclists. This is indeed pretty cool.

Unsupervised clustering

1
2
3
4
5
6
# Use a Gaussian mixture model to separate days into two clusters

gmm = GMM(2, covariance_type="full", random_state=0)
gmm.fit(Xpca)
cluster_label = gmm.predict(Xpca)
plt.scatter(Xpca[:, 0], Xpca[:, 1], c=cluster_label, cmap="Paired")
<matplotlib.collections.PathCollection at 0x1a249be050>

seattle-bicycle-traffic_files/figure-markdown_strict/cell-12-output-2.png

1
2
3
4
5
# Add cluster labels to original data

pivoted["cluster"] = cluster_label
data = data.join(pivoted["cluster"], on=data.index.date)
data.head()

totalwesteastcluster
Date
2012-10-03 00:00:0013.04.09.00
2012-10-03 01:00:0010.04.06.00
2012-10-03 02:00:002.01.01.00
2012-10-03 03:00:005.02.03.00
2012-10-03 04:00:007.06.01.00
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
# Plot

hourly = data.groupby(["cluster", data.index.time]).mean()

fig, ax = plt.subplots(1, 2, figsize=(14, 5))
hourly_ticks = 4 * 60 * 60 * np.arange(6)

for i in range(2):
    hourly.loc[i].plot(ax=ax[i], xticks=hourly_ticks)
    ax[i].set_title("Cluster {0}".format(i))
    ax[i].set_ylabel("Average hourly trips")

seattle-bicycle-traffic_files/figure-markdown_strict/cell-14-output-1.png

First plot shows a sharp bimodal pattern, indicative of a communing pattern (with the majority of people riding west in the morning and east in the evening), while the second plot shows a wide unimodel pattern, indicative of weekend days and holidays.

Uncovering work habits

1
2
3
4
5
6
7
8
# Check whether two clusters correspond to weekend and weekdays

dayofweek = pd.to_datetime(pivoted.index).dayofweek

plt.scatter(Xpca[:, 0], Xpca[:, 1], c=dayofweek, cmap=plt.cm.get_cmap("Paired", 7))

cb = plt.colorbar(ticks=range(7))
cb.set_ticklabels(["Mo", "Tu", "We", "Th", "Fr", "Sa", "Su"])

seattle-bicycle-traffic_files/figure-markdown_strict/cell-15-output-1.png

Let’s look more closely at weekdays that follow a weekend pattern, of which there are a few.

1
2
3
4
5
6
7
8
results = pd.DataFrame(
    {
        "cluster": pivoted["cluster"],
        "is_weekend": (dayofweek > 4),
        "weekday": pivoted.index.map(lambda x: x.strftime("%a")),
    }
)
results.head()

clusteris_weekendweekday
2012-10-030FalseWed
2012-10-040FalseThu
2012-10-050FalseFri
2012-10-061TrueSat
2012-10-071TrueSun

Count number of weekend days with a workday pattern

1
2
weekend_workdays = results.query("cluster == 0 and is_weekend")
len(weekend_workdays)
0

Count number of week days that fall into the weekend / holiday pattern

1
2
weekday_holidays = results.query("cluster == 1 and not is_weekend")
len(weekday_holidays)
23

There were zero weekend days where people in Seattle decided to work, but 23 weekdays that appear to be public holidasy. Let’s have a look.

1
2
3
4
5
# Download list of public holidays

cal = USFederalHolidayCalendar()
holidays = cal.holidays("2012", "2016", return_name=True)
holidays.head()
2012-01-02                 New Years Day
2012-01-16    Martin Luther King Jr. Day
2012-02-20                Presidents Day
2012-05-28                  Memorial Day
2012-07-04                      July 4th
dtype: object
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
# Add the days before and after holidays to the list

holidays_all = pd.concat(
    [
        holidays,
        "Day before " + holidays.shift(-1, "D"),
        "Day after " + holidays.shift(1, "D"),
    ]
)
holidays_all.sort_index(inplace=True)
holidays_all.head()
2012-01-01                 Day before New Years Day
2012-01-02                            New Years Day
2012-01-03                  Day after New Years Day
2012-01-15    Day before Martin Luther King Jr. Day
2012-01-16               Martin Luther King Jr. Day
dtype: object
1
2
3
4
5
# A list of holidays on which people in Seattle skip work

holidays_all.name = "name"
joined = weekday_holidays.join(holidays_all)
set(joined["name"])
{'Christmas',
 'Day after Christmas',
 'Day after Thanksgiving',
 'Day before Christmas',
 'July 4th',
 'Labor Day',
 'Memorial Day',
 'New Years Day',
 'Thanksgiving'}
1
2
3
# A list of holidays on which people in Seattle do go to work

set(holidays) - set(joined["name"])
{'Columbus Day',
 'Martin Luther King Jr. Day',
 'Presidents Day',
 'Veterans Day'}

What’s up with Fridays?

1
2
3
4
5
# Plot Fridays separately

fridays = dayofweek == 4
plt.scatter(Xpca[:, 0], Xpca[:, 1], c="gray", alpha=0.2)
plt.scatter(Xpca[fridays, 0], Xpca[fridays, 1], c="green")
<matplotlib.collections.PathCollection at 0x1a26303c10>

seattle-bicycle-traffic_files/figure-markdown_strict/cell-23-output-2.png

What’s going on with the three strange outliers in the right bottom corner?

1
2
3
4
# Get dates for the three outliers

weird_fridays = pivoted.index[fridays & (Xpca[:, 0] > 900)]
weird_fridays
Index([2013-05-17, 2014-05-16, 2015-05-15], dtype='object')
1
2
3
4
5
6
# Plot pattern for three outliers relative to average Friday

all_days = data.pivot_table("total", index=data.index.time, columns=data.index.date)

all_days.loc[:, weird_fridays].plot()
all_days.mean(1).plot(color="grey", lw=4, alpha=0.5, xticks=hourly_ticks)
<matplotlib.axes._subplots.AxesSubplot at 0x1a2631b750>

seattle-bicycle-traffic_files/figure-markdown_strict/cell-25-output-2.png

We’ve found Seattle’s bike to work day.

Supervised modeling

This part contains my replication of this blog post by Jake VanderPlan on using data from bicycle traffic across Seattle’s Fremont Bridge to learn about commuting patterns.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression

plt.style.use("seaborn")

from seattlecycling.data import get_fremont_data
from seattlecycling.toolbox import hours_of_daylight, print_rms
1
2
3
4
5
6
7
8
# Load data

start = "1 Oct 2012"
end = "15 May 2014"

data = get_fremont_data()
data = data.loc[start:end]
data.head(3)

totalwesteast
Date
2012-10-03 00:00:0013.04.09.0
2012-10-03 01:00:0010.04.06.0
2012-10-03 02:00:002.01.01.0
1
2
3
4
# Resample data into daily and weekly totals

daily = data.resample("d").sum()
weekly = data.resample("w").sum()
1
2
3
4
# A first look at the data

weekly.plot()
plt.ylabel("Weekly rides");

seattle-bicycle-traffic_files/figure-markdown_strict/cell-29-output-1.png

1
2
3
# Look at rolling weekly mean to smooth out short-term variation

data.resample("d").sum().rolling(30, center=True).mean().plot();

seattle-bicycle-traffic_files/figure-markdown_strict/cell-30-output-1.png

Blog post points out that 2014 has seen increased cycle traffic across the bridge. Below we’re modelling seasonal variation based on what we think influences peoples’ decision whether or not to ride a bike.

Accounting for hours of daylight

1
2
3
4
5
6
7
# Hours of daylight

weekly["daylight"] = list(map(hours_of_daylight, weekly.index))
daily["daylight"] = list(map(hours_of_daylight, daily.index))

weekly["daylight"].plot()
plt.ylabel("Hours of daylight (Seattle)");

seattle-bicycle-traffic_files/figure-markdown_strict/cell-31-output-1.png

1
2
3
4
5
# Relationship between daylight and cycle traffic

plt.scatter(weekly.daylight, weekly.total)
plt.xlabel("Hours of daylight")
plt.ylabel("Weekly bicycle traffic");

seattle-bicycle-traffic_files/figure-markdown_strict/cell-32-output-1.png

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
# Adding a linear trend

X = weekly[["daylight"]]
y = weekly["total"]
clf = LinearRegression(fit_intercept=True).fit(X, y)

weekly["daylight_trend"] = clf.predict(X)
weekly["daylight_corrected_total"] = (
    weekly.total - weekly.daylight_trend + np.mean(weekly.daylight_trend)
)

xfit = np.linspace(7, 17)
yfit = clf.predict(xfit[:, None])

plt.scatter(weekly.daylight, weekly.total)
plt.plot(xfit, yfit, "-k")
plt.title("Bycicle traffic through the year")
plt.xlabel("Hours of daylight")
plt.ylabel("Weekly bicycle traffic");

seattle-bicycle-traffic_files/figure-markdown_strict/cell-33-output-1.png

1
clf.coef_[0]
1966.2003072317068
1
2
3
4
5
6
7
8
# Plot detrended data

trend = clf.predict(weekly[["daylight"]].values)
plt.scatter(weekly.daylight, weekly.total - trend + np.mean(trend))
plt.plot(xfit, np.mean(trend) + 0 * yfit, "-k")
plt.title("Weekly traffic through the year (detrended)")
plt.xlabel("Hours of daylight")
plt.ylabel("Adjusted weekly bicycle traffic");

seattle-bicycle-traffic_files/figure-markdown_strict/cell-35-output-1.png

In the graph above, we have removed the number of riders per week that correlate with the number of hours of daylight, so that we can think of what is shown of the number of rides per week we’d expect to see if daylight was not an issue.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
fix, ax = plt.subplots(1, 2, figsize=(15, 5))

weekly[["total", "daylight_trend"]].plot(ax=ax[0])
weekly["daylight_corrected_total"].plot(ax=ax[1])

ax[0].set_ylabel("Weekly crossing")
ax[0].set_title("Total weekly crossings and trend")
ax[1].set_ylabel("Adjusted weekly crossings")
ax[1].set_title("Detrended weekly crossings")

print_rms(weekly["daylight_corrected_total"])
Root-mean-square about trend:  2872 riders

seattle-bicycle-traffic_files/figure-markdown_strict/cell-36-output-2.png

Accounting for day of the week

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
# Plot average number of trips by weekday

days = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
daily["dayofweek"] = daily["total"].index.dayofweek
grouped = daily.groupby("dayofweek")["total"].mean()
grouped.index = days

grouped.plot()
plt.title("Average crossings by weekday")
plt.ylabel("Average daily crossings");

seattle-bicycle-traffic_files/figure-markdown_strict/cell-37-output-1.png

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
# Account for hours of daylight and day of week simultaneously

# Add one-hot indicators of weekdays
for i in range(7):
    daily[days[i]] = (daily.index.dayofweek == i).astype(float)

# Detrend on days of week and daylight together
X = daily[days + ["daylight"]]
y = daily["total"]
clf = LinearRegression().fit(X, y)

daily["dayofweek_trend"] = clf.predict(X)
daily["dayofweek_corrected"] = (
    daily["total"] - daily["dayofweek_trend"] + daily["dayofweek_trend"].mean()
)
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
# Plot crossings and trend, and detrended data

fix, ax = plt.subplots(1, 2, figsize=(15, 5))

daily[["total", "dayofweek_trend"]].plot(ax=ax[0])
daily["dayofweek_corrected"].plot(ax=ax[1])

ax[0].set_ylabel("Daily crossing")
ax[0].set_title("Total daily crossings and trend")
ax[1].set_ylabel("Adjusted daily crossings")
ax[1].set_title("Detrended daily crossings")

print_rms(daily["dayofweek_corrected"])
Root-mean-square about trend:  652 riders

seattle-bicycle-traffic_files/figure-markdown_strict/cell-39-output-2.png

Accounting for rainfall and temparature

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
# Read in weather data
weather = pd.read_csv(
    "seattle_weather_SeaTacWeather.csv",
    index_col="DATE",
    parse_dates=True,
    usecols=[2, 5, 9, 10],
)
weather = weather.loc[start:end]
weather.columns = map(str.lower, weather.columns)

# Temparatures are in 1/10 deg F; convert to deg C
weather["tmax"] = (weather["tmax"] - 32) * 5 / 9
weather["tmin"] = (weather["tmin"] - 32) * 5 / 9

# Rainfall is in inches; convert to mm
weather["prcp"] *= 25.4

weather["tmax"].resample("w").max().plot()
weather["tmin"].resample("w").min().plot()
plt.title("Temperature extremes in Seattle")
plt.ylabel("Weekly temperature extremes (C)");

seattle-bicycle-traffic_files/figure-markdown_strict/cell-40-output-1.png

1
2
3
weather["prcp"].resample("w").sum().plot()
plt.title("Precipitation in Seattle")
plt.ylabel("Weekly precipitation in Seattle (mm)");

seattle-bicycle-traffic_files/figure-markdown_strict/cell-41-output-1.png

1
2
3
# Combine daily and weather dataset

daily = daily.join(weather)
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
# Detrend data including weather information

columns = days + ["daylight", "tmax", "tmin", "prcp"]
X = daily[columns]
y = daily["total"]
clf = LinearRegression().fit(X, y)

daily["overall_trend"] = clf.predict(X)
daily["overall_corrected"] = (
    daily["total"] - daily["overall_trend"] + daily["overall_trend"].mean()
)
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
# Plot crossings and trend, and detrended data
fix, ax = plt.subplots(1, 2, figsize=(15, 5))

daily[["total", "overall_trend"]].plot(ax=ax[0])
daily["overall_corrected"].plot(ax=ax[1])

ax[0].set_ylabel("Daily crossing")
ax[0].set_title("Total daily crossings and trend")
ax[1].set_ylabel("Adjusted daily crossings")
ax[1].set_title("Detrended daily crossings")

print_rms(daily["overall_corrected"])
Root-mean-square about trend:  457 riders

seattle-bicycle-traffic_files/figure-markdown_strict/cell-44-output-2.png

1
2
3
4
# Plot rolling 30 day average

daily["overall_corrected"].rolling(30, center=True).mean().plot()
plt.title("1-month moving average");

seattle-bicycle-traffic_files/figure-markdown_strict/cell-45-output-1.png

Accounting for a steady increase in riders

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
daily["daycount"] = np.arange(len(daily))

columns = days + ["daycount", "daylight", "tmax", "tmin", "prcp"]
X = daily[columns]
y = daily["total"]
final_model = LinearRegression().fit(X, y)

daily["final_trend"] = final_model.predict(X)
daily["final_corrected"] = (
    daily["total"] - daily["final_trend"] + daily["final_trend"].mean()
)
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
# Plot crossings and trend, and detrended data
fix, ax = plt.subplots(1, 2, figsize=(15, 5))

daily[["total", "final_trend"]].plot(ax=ax[0])
daily["final_corrected"].plot(ax=ax[1])

ax[0].set_ylabel("Daily crossing")
ax[0].set_title("Total daily crossings and trend")
ax[1].set_ylabel("Adjusted daily crossings")
ax[1].set_title("Detrended daily crossings")

print_rms(daily["final_corrected"])
Root-mean-square about trend:  451 riders

seattle-bicycle-traffic_files/figure-markdown_strict/cell-47-output-2.png

What can the final model tell us?

1
2
3
4
5
6
# Compute error variance

vy = np.sum((y - daily["final_trend"]) ** 2) / len(y)
X2 = np.hstack([X, np.ones((X.shape[0], 1))])
C = vy * np.linalg.inv(np.dot(X2.T, X2))
var = C.diagonal()

How does rain affect ridership?

1
2
3
4
5
6
7
8
ind = columns.index("prcp")
slope = final_model.coef_[ind]
error = np.sqrt(var[ind])
print(
    "{0: .0f} +/- {1: .0f} daily crossings lost per cm of rain".format(
        -slope * 10, error * 10
    )
)
 331 +/-  28 daily crossings lost per cm of rain

The model shows that for every cm of rain, about 300 cyclists stay home or use another mode of transport.

How does temparature affect ridership?

1
2
3
4
5
6
ind1, ind2 = columns.index("tmin"), columns.index("tmax")
slope = final_model.coef_[ind1] + final_model.coef_[ind2]
error = np.sqrt(var[ind1] + var[ind2])
print(
    "{0:.0f} +/- {1:.0f} riders per ten degrees Celsius".format(10 * slope, 10 * error)
)
493 +/- 102 riders per ten degrees Celsius

How does daylight affect ridership?

1
2
3
4
ind = columns.index("daylight")
slopt = final_model.coef_[ind]
error = np.sqrt(var[ind])
print("{0:.0f} +/- {1:.0f} riders per hour of daylight".format(slope, error))
49 +/- 12 riders per hour of daylight

Is ridership increasing?

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
ind = columns.index("daycount")
slope = final_model.coef_[ind]
error = np.sqrt(var[ind])
print("{0:.2f} +/- {1:.2f} new riders per day".format(slope, error))
print("{0:.1f} +/- {1:.1f} new riders per week".format(7 * slope, 7 * error))
print(
    "annual change: ({0:.0f} +/- {1:.0f})%".format(
        100 * 365 * slope / daily["total"].mean(),
        100 * 365 * error / daily["total"].mean(),
    )
)
0.43 +/- 0.11 new riders per day
3.0 +/- 0.8 new riders per week
annual change: (7 +/- 2)%