# Agencies

### Downloads + Imports

In [None]:
%run "setup.ipynb"

### Read and format data


In [None]:
%time agencies = pd.read_csv(zipfile.open('agency.txt'))
agencies.tail()
agencies.info()

In [None]:
agencies.head()

In [None]:
%time routes = pd.read_csv(zipfile.open('routes.txt'))
routes.tail()

routes = routes.join(agencies[['agency_id','agency_name']].set_index('agency_id'), on='agency_id')
routes.head()

### Agencies per Route Type

In [None]:
routes['agency_name'].value_counts().head()

In [None]:
rename = {2: "Intercity Rail Service", 100: "Railway Service", 109: "Suburban Railway", 400: "Urban Railway Service", 700: "Bus Service", 900: "Tram Service", 1000: "Water Transport Service"}
routes['route_type'].replace(rename, inplace=True)
routes.head()

In [None]:
routes_sorted = routes.groupby(['route_type', 'agency_name']).size().reset_index(name="count")
routes_sorted['max'] = routes_sorted.groupby('agency_name')['count'].transform('sum')
routes_sorted.loc[routes_sorted['max'] < 40, 'agency_name'] = 'Other'
routes_sorted = routes_sorted.sort_values(["max",'agency_name',"count"], ascending=False).drop('max', axis=1)
t = routes_sorted.groupby(['route_type', 'agency_name']).aggregate({'count': 'sum'}).reset_index()
t = t.assign(
    ac = lambda x: x.groupby(['route_type'])['count'].transform(np.sum),
    share = lambda x: x['count'].div(x['ac'])
)
t = t.pivot(index='route_type', columns='agency_name', values='share')
t.insert(len(t.columns)-1, 'Other', t.pop("Other"))
t.fillna(0.0, inplace=True)

In [None]:
fig, ax = plt.subplots(figsize=(15,6))
cmap = sns.light_palette(sns_c[0])
fmt = lambda y, _: f'{y :0.0%}'
t.pipe((sns.heatmap, 'data'), 
        vmin=0.0,
        vmax=1.0,
        cmap="YlGnBu",
        linewidths=0.1, 
        linecolor='black',
        annot=True, 
        fmt='0.2%',
        cbar_kws={'format': mtick.FuncFormatter(fmt)},
        ax=ax
    )
ax.set(title='Agency Share per Route Type');