ogd-at-lab/notebooks/utils/dataaccess.py

120 wiersze
4.6 KiB
Python

from os.path import exists
from urllib.request import urlretrieve
import geopandas as gpd
import pandas as pd
def gdf_from_wfs(layer):
"""
Get geopandas.GeoDataFrame from data.wien.gv.at WFS service based on layer name
Parameters
----------
layer : string
WFS layer name
"""
file = f'{layer}.json'
url = f"https://data.wien.gv.at/daten/geo?service=WFS&request=GetFeature&version=1.1.0&typeName=ogdwien:{layer}&srsName=EPSG:4326&outputFormat=json"
if not exists(file):
urlretrieve(url, file)
return gpd.read_file(file)
def get_elevation(point):
"""
Retrieve elevation info from the Austrian Elevation Service
Implementation based on https://github.com/maegger/AustrianElevation/blob/6e0f468b6094caace6cd35f00704e4087e851cec/tree/AustrianElevation/AustrianElevation.py#L97
Parameters
----------
point : shapely.Point
Point in EPSG:3857
"""
x = point.x
y = point.y
mod_x_path = x % 20000;
path_x = x - mod_x_path;
database = int(path_x );
mod_y = y % 10;
raster_y = y - mod_y;
mod_x = x % 10;
raster_x = int(x - mod_x);
file = f'{int(raster_y)}.txt'
url = f"https://raw.githubusercontent.com/maegger/{database}/master/{int(raster_y)}.txt"
if not exists(file):
urlretrieve(url, file)
data = open(file, 'r')
for line in data:
x_wert = int(line.split(' ', 1 )[0])
if x_wert == raster_x:
elevationall = line.split(' ', 1 )[1]
return int(elevationall)
def get_airquality_df():
"""
Get data from https://go.gv.at/l9lumesakt
"""
file = 'lumesakt.csv'
url = 'https://go.gv.at/l9lumesakt'
urlretrieve(url, file)
df = pd.read_csv(file, sep=';', encoding='latin1', skiprows=1)
df.drop([0, 1], inplace=True)
for col in ['LTM', 'WG', 'WR', 'RF', 'NO2', 'NOX', 'PM10', 'PM10.1', 'PM25', 'PM25.1', 'O3', 'O3.1', 'SO2', 'CO', 'CO.1']:
df[col] = df[col].str.replace(',', '.')
df[col] = df[col].str.replace('NE', '')
df[col] = df[col].str.replace('---', '')
df[col] = df[col].apply(pd.to_numeric,errors='coerce')
df.rename(columns={'Unnamed: 0': 'NAME_KURZ'}, inplace=True)
df.rename(columns={'Zeit-LTM': 'time airtemp'}, inplace=True)
df.rename(columns={'LTM': 'airtemp °C'}, inplace=True)
df.rename(columns={'Zeit-Wind': 'time wind'}, inplace=True)
df.rename(columns={'WG': 'windspeed kmh'}, inplace=True)
df.rename(columns={'WR': 'winddirection °'}, inplace=True)
df.rename(columns={'Zeit-RF': 'time humidity'}, inplace=True)
df.rename(columns={'RF': 'relhumidity %'}, inplace=True)
df.rename(columns={'Zeit-NO2': 'time NO2'}, inplace=True)
df.rename(columns={'Zeit-NOX': 'time NOX'}, inplace=True)
df.rename(columns={'Zeit-PM': 'time PM'}, inplace=True)
df.rename(columns={'Zeit-O3': 'time O3'}, inplace=True)
df.rename(columns={'Zeit-SO2': 'time SO2'}, inplace=True)
df.rename(columns={'Zeit-CO': 'time CO'}, inplace=True)
df.set_index('NAME_KURZ', inplace=True)
return df
def get_heatvulnerabilityindex_df():
"""
Get pandas.DataFrame of heat vulnerability from
https://www.wien.gv.at/gogv/l9ogdaverageurbanheatvulnerabilityindex
"""
file = 'heatvulnerabilityindex.csv'
url = 'https://www.wien.gv.at/gogv/l9ogdaverageurbanheatvulnerabilityindex'
if not exists(file):
urlretrieve(url, file)
df = pd.read_csv(file, sep=';', encoding='latin1')
for col in ['AVG_UHVI_A', 'AVG_UHVI_O', 'AVG_UHVI_Y']:
df[col] = df[col].str.replace(',', '.').apply(pd.to_numeric,errors='coerce')
df.set_index('SUB_DISTRICT_CODE_VIE', inplace=True)
return df
def get_heatvulnerabilityindex_gdf():
"""
Get geopandas.GeoDataFrame of heat vulnerability from
https://www.wien.gv.at/gogv/l9ogdaverageurbanheatvulnerabilityindex
"""
df = get_heatvulnerabilityindex_df()
districts = gdf_from_wfs('ZAEHLBEZIRKOGD')
districts['SUB_DISTRICT_CODE_VIE'] = districts['ZBEZ'].astype(int) + 90000
districts.set_index('SUB_DISTRICT_CODE_VIE', inplace=True)
gdf = gpd.GeoDataFrame(pd.DataFrame(districts).join(df))
return gdf
def get_zaehlsprengel_gdf(year=2020):
"""
Get geopandas.GeoDataFrame of Zählsprengel districts from Statistik Austria
"""
file = f'OGDEXT_ZSP_1_STATISTIK_AUSTRIA_{year}0101.zip'
url = f'http://data.statistik.gv.at/data/OGDEXT_ZSP_1_STATISTIK_AUSTRIA_{year}0101.zip'
if not exists(file):
urlretrieve(url, file)
gdf = gpd.read_file(f'zip://{file}')
return gdf