Create milano.ipynb

datathon
anitagraser 2023-05-15 22:14:15 +02:00
rodzic f2c7bdea96
commit 8f505a192e
1 zmienionych plików z 253 dodań i 0 usunięć

Wyświetl plik

@ -0,0 +1,253 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from os.path import exists\n",
"from urllib.request import urlretrieve\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"url = \"https://dati.comune.milano.it/dataset/845682d9-d0c9-41a5-8bbe-f28dc668a1cd/resource/9e226d9f-ad8b-47e8-a217-88db4673797b/download/v2_accessiorari_disaggregazionecategoria_euroareacundefined_2015.json\"\n",
"file = 'data/hourly2015.json'\n",
"if not exists(file):\n",
" urlretrieve(url, file)\n",
"df = pd.read_json(file)\n",
"df['datetime'] = pd.to_datetime(df['data'] + ' ' + df['timeIndex'])\n",
"df.set_index('datetime', inplace=True)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.EURO4.plot()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def load_daily_json(url, file):\n",
" if not exists(file):\n",
" urlretrieve(url, file)\n",
" df = pd.read_json(file)\n",
" df.rename(columns={'data_giorno':'timeIndex', \n",
" 'numero_transiti_giornalieri':'totale'}, inplace=True)\n",
" try: \n",
" df['datetime'] = pd.to_datetime(df['timeIndex'])\n",
" except:\n",
" df['datetime'] = pd.to_datetime(df['data_giorno'])\n",
" df.set_index('datetime', inplace=True)\n",
" return df\n",
"\n",
"\n",
"files = {\n",
" 'data/daily2012.json': \"https://dati.comune.milano.it/dataset/55a1dbf1-0442-48df-bffe-4f2615b1c473/resource/0ca542c9-0d83-44b0-948f-8fd877443078/download/v2_accessigiornalieri_areacundefined_2012.json\",\n",
" 'data/daily2013.json': \"https://dati.comune.milano.it/dataset/b597f3b0-1616-4117-97c4-64e1d619edac/resource/b906728b-ecab-4ff9-b30b-968a3e3a3aa0/download/v2_accessigiornalieri_areacundefined_2013.json\",\n",
" 'data/daily2013.json': \"https://dati.comune.milano.it/dataset/b597f3b0-1616-4117-97c4-64e1d619edac/resource/b906728b-ecab-4ff9-b30b-968a3e3a3aa0/download/v2_accessigiornalieri_areacundefined_2013.json\",\n",
" 'data/daily2014.json': \"https://dati.comune.milano.it/dataset/0083deed-5af1-46d1-a603-0d10f1500fc6/resource/2af44a47-d12e-42c7-aca5-b0a08431ba00/download/v2_accessigiornalieri_areacundefined_2014.json\",\n",
" 'data/daily2015.json': \"https://dati.comune.milano.it/dataset/134141e6-5e39-4900-ad3f-4d7c5bf411e3/resource/74cd4c73-6748-46cb-8972-0476ffaa550b/download/v2_accessigiornalieri_areacundefined_2015.json\",\n",
" 'data/daily2016-18.json': \"https://dati.comune.milano.it/dataset/792cfd91-6cea-40d3-bcc2-feda97c110e4/resource/fcf2a4c2-1867-47ed-94db-8e7898d65236/download/ds1085_ingressi_areac_precedenti.json\",\n",
" 'data/daily2019-23.json': \"https://dati.comune.milano.it/dataset/8937eb87-2356-40ba-bd82-e0fabe38b598/resource/c16a1f83-57b8-473e-b2ba-e626baa4db7a/download/ingressi_areac_2023-04-19.json\"\n",
"}\n",
"\n",
"df = pd.DataFrame()\n",
"for f, url in files.items():\n",
" df = df.append(load_daily_json(url, f))\n",
"\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"years = [i for i in range(2012, 2024)]\n",
"years"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import hvplot.pandas \n",
"from holoviews import opts, dim, Layout"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"Layout([pd.DataFrame(df[df.index.year==i]).hvplot.heatmap(\n",
" title=f'AREA C Daily access {i}', \n",
" x='datetime.day', y='datetime.month', \n",
" C='totale', reduce_function=np.sum) for i in years]\n",
").cols(1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"Layout([pd.DataFrame(df[df.index.year==i]).hvplot.heatmap(\n",
" title=f'AREA C Daily access {i}', \n",
" x='datetime.week', y='datetime.weekday', \n",
" C='totale', reduce_function=np.sum) for i in years]\n",
").cols(1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"s = df.totale.resample(\"1d\").sum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"s.plot()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from adtk.data import validate_series\n",
"from adtk.visualization import plot\n",
"s = validate_series(s)\n",
"s"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from adtk.detector import ThresholdAD\n",
"threshold_ad = ThresholdAD(high=180000, low=25000)\n",
"anomalies = threshold_ad.detect(s)\n",
"plot(s, anomaly=anomalies, ts_linewidth=1, ts_markersize=3, anomaly_markersize=5, anomaly_color='red', anomaly_tag=\"marker\");"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from adtk.detector import QuantileAD\n",
"quantile_ad = QuantileAD(high=0.95, low=0.05)\n",
"anomalies = quantile_ad.fit_detect(s)\n",
"plot(s, anomaly=anomalies, ts_linewidth=1, ts_markersize=3, anomaly_markersize=5, anomaly_color='red', anomaly_tag=\"marker\");"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from adtk.transformer import ClassicSeasonalDecomposition\n",
"s_transformed = ClassicSeasonalDecomposition(freq=7, trend=True).fit_transform(s).rename(\"Seasonal decomposition residual\")\n",
"plot(pd.concat([s, s_transformed], axis=1), ts_linewidth=1, ts_markersize=4);"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from adtk.detector import SeasonalAD\n",
"seasonal_ad = SeasonalAD(c=3.0, side=\"both\")\n",
"anomalies = seasonal_ad.fit_detect(s)\n",
"plot(s, anomaly=anomalies, anomaly_color=\"red\", anomaly_tag=\"marker\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from adtk.transformer import RollingAggregate\n",
"s_transformed = RollingAggregate(agg='sum', window=7).transform(s)\n",
"plot(s_transformed.rename(\"Rolling count of valid values\"), ts_linewidth=1, ts_markersize=4);"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from adtk.detector import SeasonalAD\n",
"seasonal_ad = SeasonalAD(c=3.0, side=\"both\")\n",
"anomalies = seasonal_ad.fit_detect(s_transformed)\n",
"plot(s_transformed, anomaly=anomalies, anomaly_color=\"red\", anomaly_tag=\"marker\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "geospatial-plus",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}