From 32ef0ed32077609c342fc1427a9f9fc6894699df Mon Sep 17 00:00:00 2001 From: mazano Date: Sat, 13 Mar 2021 05:00:16 +0200 Subject: [PATCH] Optimise osmenrich startup logic and imposm (#110) --- .env | 53 ++++++++++++ docker-compose-rancher.yml | 162 ----------------------------------- docker-compose-web.yml | 145 +++++++++++++++++++++++++------ docker-compose.yml | 131 ++++++++++------------------ docker-imposm3/importer.py | 18 ++-- docker-osm-pbf/Dockerfile | 14 --- docker-osm-pbf/README.md | 31 ------- docker-osm-pbf/download.sh | 48 ----------- docker-osmenrich/enrich.py | 72 ++++++++++------ docker-osmupdate/download.py | 16 ++-- readme.md | 45 +++------- 11 files changed, 294 insertions(+), 441 deletions(-) create mode 100644 .env delete mode 100644 docker-compose-rancher.yml delete mode 100644 docker-osm-pbf/Dockerfile delete mode 100644 docker-osm-pbf/README.md delete mode 100644 docker-osm-pbf/download.sh diff --git a/.env b/.env new file mode 100644 index 0000000..d08c5be --- /dev/null +++ b/.env @@ -0,0 +1,53 @@ +COMPOSE_PROJECT_NAME=kartozadockerosm +POSTGRES_USER=docker +POSTGRES_PASS=docker +POSTGRES_DBNAME=gis +DB_PORT=35432 +PGDB_PORT=6500 +POSTGRES_VERSION=13-3.1 +MARTIN_PORT=3000 +WATCH_MODE=true +DATABASE_URL=postgres://docker:docker@db/gis +# Uncomment to expose the postgis database on the network +ALLOW_IP_RANGE= 0.0.0.0/0 +POSTGRES_PORT=5432 +POSTGRES_HOST=db +# seconds between 2 executions of the script +# if 0, then no update will be done, only the first initial import from the PBF +TIME=120 +# folder for settings (with *.json and *.sql) +SETTINGS=settings +# folder for caching +CACHE=cache +# folder for diff which has been imported +IMPORT_DONE=import_done +# folder for diff which hasn't been imported yet +IMPORT_QUEUE=import_queue +# it can be 3857 +SRID=4326 +# see http://imposm.org/docs/imposm3/latest/tutorial.html#optimize +OPTIMIZE=false +# see http://imposm.org/docs/imposm3/latest/tutorial.html#deploy-production-tables +DBSCHEMA_PRODUCTION=public +# http://imposm.org/docs/imposm3/latest/tutorial.html#deploy-production-tables +DBSCHEMA_IMPORT=import +# http://imposm.org/docs/imposm3/latest/tutorial.html#deploy-production-tables +DBSCHEMA_BACKUP=backup +# Install some styles if you are using the default mapping. It can be 'yes' or 'no' +QGIS_STYLE=yes +# Use clip in the database - To use this you should have run make import_clip to add your clip to the DB +CLIP=yes +# These are all currently the defaults but listed here for your +# convenience if you want to change them +# the maximum time range to assemble a cumulated changefile. +MAX_DAYS=100 +# osmupdate uses a combination of minutely, hourly and daily changefiles. This value can be minute, hour, day or sporadic. +DIFF=sporadic +# argument to determine the maximum number of parallely processed changefiles. +MAX_MERGE=7 +# define level for gzip compression. values between 1 (low compression but fast) and 9 (high compression but slow) +COMPRESSION_LEVEL=1 +# change the URL to use a custom URL to fetch regional file updates. +BASE_URL=http://planet.openstreetmap.org/replication/ +PGADMIN_DEFAULT_EMAIL=docker@gmail.com +PGADMIN_DEFAULT_PASSWORD=docker diff --git a/docker-compose-rancher.yml b/docker-compose-rancher.yml deleted file mode 100644 index 4837758..0000000 --- a/docker-compose-rancher.yml +++ /dev/null @@ -1,162 +0,0 @@ -version: '2.1' - -volumes: - osm-postgis-data: - import_done: - import_queue: - cache: - osm_settings: - pgadmin_data: - -services: - db: - # About the postgresql version, it should match in the dockerfile of docker-imposm3 - image: kartoza/postgis:12.0 - hostname: db - container_name: dockerosm_db - environment: - - POSTGRES_USER=docker - - POSTGRES_PASS=docker - - POSTGRES_DBNAME=gis - # Uncomment to expose the postgis database on the network - # - ALLOW_IP_RANGE= 0.0.0.0/0 - volumes: - - osm-postgis-data:/var/lib/postgresql - # Uncomment to use the postgis database from outside the docker network - # ports: - # - "35432:5432" - healthcheck: - test: "exit 0" - - osm_downloader: - image: kartoza/docker-osm:pbf-downloader - container_name: dockerosm_pbf_download - volumes: - # These are sharable to other containers - - osm_settings:/home/settings - environment: - # Read the README in docker-osm-pbf - - CONTINENT=africa - - COUNTRY=south-africa - - BASE_URL=http://download.geofabrik.de - - MAPPING_URL=https://raw.githubusercontent.com/kartoza/docker-osm/develop/settings - - GEOJSON_URL='' - - imposm: - image: kartoza/docker-osm:imposm-latest - container_name: dockerosm_imposm - volumes: - # These are sharable to other containers - - osm_settings:/home/settings - - import_done:/home/import_done - - import_queue:/home/import_queue - - cache:/home/cache - depends_on: - db: - condition: service_healthy - environment: - - POSTGRES_USER=docker - - POSTGRES_PASS=docker - - POSTGRES_DBNAME=gis - - POSTGRES_PORT=5432 - - POSTGRES_HOST=db - # seconds between 2 executions of the script - # if 0, then no update will be done, only the first initial import from the PBF - - TIME=120 - # folder for settings (with *.json and *.sql) - - SETTINGS=settings - # folder for caching - - CACHE=cache - # folder for diff which has been imported - - IMPORT_DONE=import_done - # folder for diff which hasn't been imported yet - - IMPORT_QUEUE=import_queue - # it can be 3857 - - SRID=4326 - # see http://imposm.org/docs/imposm3/latest/tutorial.html#optimize - - OPTIMIZE=false - # see http://imposm.org/docs/imposm3/latest/tutorial.html#deploy-production-tables - - DBSCHEMA_PRODUCTION=public - # http://imposm.org/docs/imposm3/latest/tutorial.html#deploy-production-tables - - DBSCHEMA_IMPORT=import - # http://imposm.org/docs/imposm3/latest/tutorial.html#deploy-production-tables - - DBSCHEMA_BACKUP=backup - # Install some styles if you are using the default mapping. It can be 'yes' or 'no' - - QGIS_STYLE=yes - # Use clip in the database - To use this you should have run make import_clip to add your clip to the DB - - CLIP=no - command: bash -c "while [ ! -f /home/settings/country.pbf ] ; do sleep 1; done && python3 -u /home/importer.py" - - osmupdate: - build: docker-osmupdate - image: kartoza/docker-osm:osmupdate-latest - container_name: dockerosm_osmupdate - volumes: - # These are sharable to other containers - - osm_settings:/home/settings - - import_done:/home/import_done - - import_queue:/home/import_queue - - cache:/home/cache - depends_on: - db: - condition: service_healthy - environment: - # These are all currently the defaults but listed here for your - # convenience if you want to change them - # the maximum time range to assemble a cumulated changefile. - - MAX_DAYS=100 - # osmupdate uses a combination of minutely, hourly and daily changefiles. This value can be minute, hour, day or sporadic. - - DIFF=sporadic - # argument to determine the maximum number of parallely processed changefiles. - - MAX_MERGE=7 - # define level for gzip compression. values between 1 (low compression but fast) and 9 (high compression but slow) - - COMPRESSION_LEVEL=1 - # change the URL to use a custom URL to fetch regional file updates. - - BASE_URL=http://planet.openstreetmap.org/replication/ - # folder for diff which hasn't been imported yet - - IMPORT_QUEUE=import_queue - # folder for diff which has been imported - - IMPORT_DONE=import_done - # seconds between 2 executions of the script - # if 0, then no update will be done, only the first initial import from the PBF - - TIME=120 - command: bash -c "while [ ! -f /home/settings/country.pbf ] ; do sleep 1; done && python3 -u /home/download.py" - - pgadmin4: - image: dpage/pgadmin4:4.16 - hostname: pgadmin4 - volumes: - - pgadmin_data:/var/lib/pgadmin - environment: - - PGADMIN_DEFAULT_EMAIL=docker@gmail.com - - PGADMIN_DEFAULT_PASSWORD=docker - ports: - - 6500:80 - restart: on-failure - depends_on: - db: - condition: service_healthy - - osmenrich: - build: docker-osmenrich - container_name: dockerosm_osmenrich - volumes: - # These are sharable to other containers - - ./settings:/home/settings - - import_done:/home/import_done - - import_queue:/home/import_queue - - cache:/home/cache - depends_on: - db: - condition: service_healthy - environment: - # These are all currently the defaults but listed here for your - # convenience if you want to change them - # folder for diff which hasn't been imported yet - - IMPORT_QUEUE=import_queue - # folder for diff which has been imported - - IMPORT_DONE=import_done - # seconds between 2 executions of the script - # if 0, then no update will be done, only the first initial import from the PBF - - TIME=120 - command: bash -c "while [ ! -f /home/settings/importer.lock ] ; do sleep 1; done && python3 -u /home/enrich.py" \ No newline at end of file diff --git a/docker-compose-web.yml b/docker-compose-web.yml index 97eaa30..30a9794 100644 --- a/docker-compose-web.yml +++ b/docker-compose-web.yml @@ -1,18 +1,128 @@ -# Usage: - -# docker-compose -f docker-compose.yml -f docker-compose-web.yml - version: '2.1' +volumes: + osm-postgis-data: + import_done: + import_queue: + cache: + pgadmin_data: + services: + db: + image: kartoza/postgis:${POSTGRES_VERSION} + hostname: db + container_name: dockerosm_db + environment: + - POSTGRES_USER=${POSTGRES_USER} + - POSTGRES_PASS=${POSTGRES_PASS} + - POSTGRES_DBNAME=${POSTGRES_DBNAME} + - ALLOW_IP_RANGE=${ALLOW_IP_RANGE} + volumes: + - osm-postgis-data:/var/lib/postgresql + ports: + - ${DB_PORT}:5432 + healthcheck: + test: "exit 0" + + imposm: + image: kartoza/docker-osm:imposm-latest + build: docker-imposm3 + container_name: dockerosm_imposm + volumes: + - ./settings:/home/settings + - import_done:/home/import_done + - import_queue:/home/import_queue + - cache:/home/cache + depends_on: + db: + condition: service_healthy + environment: + - POSTGRES_USER=${POSTGRES_USER} + - POSTGRES_PASS=${POSTGRES_PASS} + - POSTGRES_DBNAME=${POSTGRES_DBNAME} + - POSTGRES_PORT=${POSTGRES_PORT} + - POSTGRES_HOST=${POSTGRES_HOST} + - TIME=${TIME} + - SETTINGS=${SETTINGS} + - CACHE=${CACHE} + - IMPORT_DONE=${IMPORT_DONE} + - IMPORT_QUEUE=${IMPORT_QUEUE} + - SRID=${SRID} + - OPTIMIZE=${OPTIMIZE} + - DBSCHEMA_PRODUCTION=${DBSCHEMA_PRODUCTION} + - DBSCHEMA_IMPORT=${DBSCHEMA_IMPORT} + - DBSCHEMA_BACKUP=${DBSCHEMA_BACKUP} + - QGIS_STYLE=${QGIS_STYLE} + - CLIP=${CLIP} + command: bash -c "while [ ! -f /home/settings/country.pbf ] ; do sleep 1; done && python3 -u /home/importer.py" + + osmupdate: + build: docker-osmupdate + image: kartoza/docker-osm:osmupdate-latest + container_name: dockerosm_osmupdate + volumes_from: + - imposm + depends_on: + db: + condition: service_healthy + environment: + - MAX_DAYS=${MAX_DAYS} + - DIFF=${DIFF} + - MAX_MERGE=${MAX_MERGE} + - COMPRESSION_LEVEL=${COMPRESSION_LEVEL} + - BASE_URL=${BASE_URL} + - IMPORT_QUEUE=${IMPORT_QUEUE} + - IMPORT_DONE=${IMPORT_DONE} + - TIME=${TIME} + command: bash -c "while [ ! -f /home/settings/country.pbf ] ; do sleep 1; done && python3 -u /home/download.py" + + pgadmin4: + image: dpage/pgadmin4:4.16 + hostname: pgadmin4 + volumes: + - pgadmin_data:/var/lib/pgadmin + environment: + - PGADMIN_DEFAULT_EMAIL=${PGADMIN_DEFAULT_EMAIL} + - PGADMIN_DEFAULT_PASSWORD=${PGADMIN_DEFAULT_PASSWORD} + ports: + - ${PGDB_PORT}:80 + restart: on-failure + depends_on: + db: + condition: service_healthy + + osmenrich: + build: docker-osmenrich + volumes_from: + - imposm + depends_on: + db: + condition: service_healthy + environment: + - IMPORT_QUEUE=${IMPORT_QUEUE} + - IMPORT_DONE=${IMPORT_DONE} + - TIME=${TIME} + - DBSCHEMA_PRODUCTION=${DBSCHEMA_PRODUCTION} + + martin: + image: urbica/martin + restart: on-failure + ports: + - ${MARTIN_PORT}:3000 + environment: + - WATCH_MODE=${WATCH_MODE} + - DATABASE_URL=${DATABASE_URL} + depends_on: + db: + condition: service_healthy + qgisserver: - image: camptocamp/qgis-server:3.6 - hostname: dockerosm_qgisserver - container_name: dockerosm_qgisserver + image: openquake/qgis-server:ltr + environment: - QGIS_PROJECT_FILE=/project/project.qgs - - GIS_SERVER_LOG_LEVEL=DEBUG - - MAX_REQUESTS_PER_PROCESS=100 + - QGIS_SERVER_LOG_LEVEL=DEBUG + - QGIS_SERVER_PARALLEL_RENDERING=1 volumes: - ./logs:/var/log/apache2 - ./web:/project @@ -20,23 +130,6 @@ services: depends_on: db: condition: service_healthy - links: - - db:db ports: - 8198:80 restart: on-failure - - # Server vector tiles from PostgreSQL DB - martin: - image: urbica/martin - hostname: dockerosm_martin - container_name: dockerosm_martin - restart: on-failure - ports: - - 3000:3000 - environment: - - WATCH_MODE=true - - DATABASE_URL=postgres://docker:docker@db/gis - depends_on: - db: - condition: service_healthy diff --git a/docker-compose.yml b/docker-compose.yml index 69214c2..17819d4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -9,21 +9,18 @@ volumes: services: db: - # About the postgresql version, it should match in the dockerfile of docker-imposm3 - image: kartoza/postgis:12.0 + image: kartoza/postgis:${POSTGRES_VERSION} hostname: db container_name: dockerosm_db environment: - - POSTGRES_USER=docker - - POSTGRES_PASS=docker - - POSTGRES_DBNAME=gis - # Uncomment to expose the postgis database on the network - # - ALLOW_IP_RANGE= 0.0.0.0/0 + - POSTGRES_USER=${POSTGRES_USER} + - POSTGRES_PASS=${POSTGRES_PASS} + - POSTGRES_DBNAME=${POSTGRES_DBNAME} + - ALLOW_IP_RANGE=${ALLOW_IP_RANGE} volumes: - osm-postgis-data:/var/lib/postgresql - # Uncomment to use the postgis database from outside the docker network - # ports: - # - "35432:5432" + ports: + - ${DB_PORT}:5432 healthcheck: test: "exit 0" @@ -33,7 +30,6 @@ services: build: docker-imposm3 container_name: dockerosm_imposm volumes: - # These are sharable to other containers - ./settings:/home/settings - import_done:/home/import_done - import_queue:/home/import_queue @@ -42,83 +38,54 @@ services: db: condition: service_healthy environment: - - POSTGRES_USER=docker - - POSTGRES_PASS=docker - - POSTGRES_DBNAME=gis - - POSTGRES_PORT=5432 - - POSTGRES_HOST=db - # seconds between 2 executions of the script - # if 0, then no update will be done, only the first initial import from the PBF - - TIME=120 - # folder for settings (with *.json and *.sql) - - SETTINGS=settings - # folder for caching - - CACHE=cache - # folder for diff which has been imported - - IMPORT_DONE=import_done - # folder for diff which hasn't been imported yet - - IMPORT_QUEUE=import_queue - # it can be 3857 - - SRID=4326 - # see http://imposm.org/docs/imposm3/latest/tutorial.html#optimize - - OPTIMIZE=false - # see http://imposm.org/docs/imposm3/latest/tutorial.html#deploy-production-tables - - DBSCHEMA_PRODUCTION=public - # http://imposm.org/docs/imposm3/latest/tutorial.html#deploy-production-tables - - DBSCHEMA_IMPORT=import - # http://imposm.org/docs/imposm3/latest/tutorial.html#deploy-production-tables - - DBSCHEMA_BACKUP=backup - # Install some styles if you are using the default mapping. It can be 'yes' or 'no' - - QGIS_STYLE=yes - # Use clip in the database - To use this you should have run make import_clip to add your clip to the DB - - CLIP=no + - POSTGRES_USER=${POSTGRES_USER} + - POSTGRES_PASS=${POSTGRES_PASS} + - POSTGRES_DBNAME=${POSTGRES_DBNAME} + - POSTGRES_PORT=${POSTGRES_PORT} + - POSTGRES_HOST=${POSTGRES_HOST} + - TIME=${TIME} + - SETTINGS=${SETTINGS} + - CACHE=${CACHE} + - IMPORT_DONE=${IMPORT_DONE} + - IMPORT_QUEUE=${IMPORT_QUEUE} + - SRID=${SRID} + - OPTIMIZE=${OPTIMIZE} + - DBSCHEMA_PRODUCTION=${DBSCHEMA_PRODUCTION} + - DBSCHEMA_IMPORT=${DBSCHEMA_IMPORT} + - DBSCHEMA_BACKUP=${DBSCHEMA_BACKUP} + - QGIS_STYLE=${QGIS_STYLE} + - CLIP=${CLIP} command: bash -c "while [ ! -f /home/settings/country.pbf ] ; do sleep 1; done && python3 -u /home/importer.py" osmupdate: build: docker-osmupdate image: kartoza/docker-osm:osmupdate-latest container_name: dockerosm_osmupdate - volumes: - # These are sharable to other containers - - ./settings:/home/settings - - import_done:/home/import_done - - import_queue:/home/import_queue - - cache:/home/cache + volumes_from: + - imposm depends_on: db: condition: service_healthy environment: - # These are all currently the defaults but listed here for your - # convenience if you want to change them - # the maximum time range to assemble a cumulated changefile. - - MAX_DAYS=100 - # osmupdate uses a combination of minutely, hourly and daily changefiles. This value can be minute, hour, day or sporadic. - - DIFF=sporadic - # argument to determine the maximum number of parallely processed changefiles. - - MAX_MERGE=7 - # define level for gzip compression. values between 1 (low compression but fast) and 9 (high compression but slow) - - COMPRESSION_LEVEL=1 - # change the URL to use a custom URL to fetch regional file updates. - - BASE_URL=http://planet.openstreetmap.org/replication/ - # folder for diff which hasn't been imported yet - - IMPORT_QUEUE=import_queue - # folder for diff which has been imported - - IMPORT_DONE=import_done - # seconds between 2 executions of the script - # if 0, then no update will be done, only the first initial import from the PBF - - TIME=120 - command: bash -c "while [ ! -f /home/settings/country.pbf ] ; do sleep 1; done && python3 -u /home/download.py" + - MAX_DAYS=${MAX_DAYS} + - DIFF=${DIFF} + - MAX_MERGE=${MAX_MERGE} + - COMPRESSION_LEVEL=${COMPRESSION_LEVEL} + - BASE_URL=${BASE_URL} + - IMPORT_QUEUE=${IMPORT_QUEUE} + - IMPORT_DONE=${IMPORT_DONE} + - TIME=${TIME} pgadmin4: - image: dpage/pgadmin4:4.16 + image: dpage/pgadmin4:latest hostname: pgadmin4 volumes: - pgadmin_data:/var/lib/pgadmin environment: - - PGADMIN_DEFAULT_EMAIL=docker@gmail.com - - PGADMIN_DEFAULT_PASSWORD=docker + - PGADMIN_DEFAULT_EMAIL=${PGADMIN_DEFAULT_EMAIL} + - PGADMIN_DEFAULT_PASSWORD=${PGADMIN_DEFAULT_PASSWORD} ports: - - 6500:80 + - ${PGDB_PORT}:80 restart: on-failure depends_on: db: @@ -127,23 +94,13 @@ services: osmenrich: build: docker-osmenrich container_name: dockerosm_osmenrich - volumes: - # These are sharable to other containers - - ./settings:/home/settings - - import_done:/home/import_done - - import_queue:/home/import_queue - - cache:/home/cache + volumes_from: + - imposm depends_on: db: condition: service_healthy environment: - # These are all currently the defaults but listed here for your - # convenience if you want to change them - # folder for diff which hasn't been imported yet - - IMPORT_QUEUE=import_queue - # folder for diff which has been imported - - IMPORT_DONE=import_done - # seconds between 2 executions of the script - # if 0, then no update will be done, only the first initial import from the PBF - - TIME=120 - command: bash -c "while [ ! -f /home/settings/importer.lock ] ; do sleep 1; done && python3 -u /home/enrich.py" + - IMPORT_QUEUE=${IMPORT_QUEUE} + - IMPORT_DONE=${IMPORT_DONE} + - TIME=${TIME} + - DBSCHEMA_PRODUCTION=${DBSCHEMA_PRODUCTION} diff --git a/docker-imposm3/importer.py b/docker-imposm3/importer.py index 3ce0844..16624d3 100644 --- a/docker-imposm3/importer.py +++ b/docker-imposm3/importer.py @@ -19,15 +19,16 @@ ***************************************************************************/ """ -from sys import exit, stderr from os import environ, listdir +from os.path import join, exists, abspath, isabs from pathlib import Path from shutil import move -from os.path import join, exists, abspath, isabs -from psycopg2 import connect, OperationalError from subprocess import call +from sys import exit, stderr from time import sleep +from psycopg2 import connect, OperationalError + class Importer(object): @@ -231,10 +232,11 @@ class Importer(object): command += ['-f', self.qgis_style] call(command) - def locate_table(self, name): + def locate_table(self, name, schema): """Check for tables in the DB table exists in the DB""" - sql = """ SELECT EXISTS (SELECT 1 AS result from information_schema.tables where table_name like 'TEMP_TABLE'); """ - self.cursor.execute(sql.replace('TEMP_TABLE', '%s' % name)) + sql = """ SELECT EXISTS (SELECT 1 AS result from information_schema.tables + where table_name like TEMP_TABLE and table_schema = 'TEMP_SCHEMA'); """ + self.cursor.execute(sql.replace('TEMP_TABLE', '%s' % name).replace('TEMP_SCHEMA', '%s' % schema)) # noinspection PyUnboundLocalVariable return self.cursor.fetchone()[0] @@ -245,9 +247,11 @@ class Importer(object): def run(self): """First checker.""" - osm_tables = self.locate_table('osm_%') + + osm_tables = self.locate_table("'osm_%'", self.default['DBSCHEMA_PRODUCTION']) if osm_tables != 1: + # It means that the DB is empty. Let's import the PBF file. if self.clip_json_file: diff --git a/docker-osm-pbf/Dockerfile b/docker-osm-pbf/Dockerfile deleted file mode 100644 index 7b40c4f..0000000 --- a/docker-osm-pbf/Dockerfile +++ /dev/null @@ -1,14 +0,0 @@ -FROM alpine:latest - -RUN apk --no-cache add --update bash curl - -ENV BASE_URL='http://download.geofabrik.de' -ENV CONTINENT='' -ENV COUNTRY='' -ENV MAPPING_URL='https://raw.githubusercontent.com/kartoza/docker-osm/develop/settings' -ENV GEOJSON_URL='' - -RUN mkdir /home/settings -ADD download.sh /download.sh - -ENTRYPOINT ["/bin/bash", "/download.sh"] diff --git a/docker-osm-pbf/README.md b/docker-osm-pbf/README.md deleted file mode 100644 index 4e3630d..0000000 --- a/docker-osm-pbf/README.md +++ /dev/null @@ -1,31 +0,0 @@ -# Download Docker OSM Files - -This image is used to facilitate downloading of docker-osm files which are required to get the image -running. The image will download OSM PBF file, Mapping file, Clip Geojson and QGIS Style file. - -Environment variables - - -**BASE_URL='http://download.geofabrik.de'** - -This is used to download the OSM PBF file. Currently points to Geofabrik - -**CONTINENT=''** - -Used to specify what continent you need to download pbf from. This is mandatory eg `CONTINENT=africa` - -**COUNTRY=''** - -Used to specify which country you need to download pbf from. This is optional if you intent -to only use continent pbf. Eg `COUNTRY=lesotho` - -**MAPPING_URL='https://raw.githubusercontent.com/kartoza/docker-osm/develop/settings'** - -This currently points to the docker-osm repository to enable downloading of the mapping file, qgis_style - file. These files are mandatory in the running of docker-osm - -**GEOJSON_URL=''** - -This points to the geojson file that is used for clipping data in OSM. This can be empty if you do -not intent to use the clip functionality in docker-osm - diff --git a/docker-osm-pbf/download.sh b/docker-osm-pbf/download.sh deleted file mode 100644 index 79bfd09..0000000 --- a/docker-osm-pbf/download.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env bash - -CONTINENT_LOCKFILE=/home/settings/.${CONTINENT}_lock -COUNTRY_LOCKFILE=/home/settings/.${COUNTRY}_lock - -touch /home/settings/last.state.txt -touch /home/settings/timestamp.txt - -# Download OSM Mapping file and Associated data -if [ ! -f /home/settings/mapping.yml ]; then \ - wget -c ${MAPPING_URL}/mapping.yml -O /home/settings/mapping.yml -fi - -if [ ! -f /home/settings/qgis_style.sql ]; then \ - wget -c ${MAPPING_URL}/qgis_style.sql -O /home/settings/qgis_style.sql -fi -if [ ! -f /home/settings/post-pbf-import.sql ]; then \ - url=${MAPPING_URL}/post-pbf-import.sql - if curl --output /dev/null --silent --head --fail "${url}"; then - wget -c ${MAPPING_URL}/post-pbf-import.sql -O /home/settings/post-pbf-import.sql - else - echo "URL does not exist: ${url}" - fi -fi - -if [[ ! -f /home/settings/clip.geojson && -z ${GEOJSON_URL} ]]; then \ - echo "We are not downloading any Geojson" -else - wget -c ${GEOJSON_URL} -O /home/settings/clip.geojson -fi - -# Download OSM PBF -if [[ ! -f ${CONTINENT_LOCKFILE} && -z ${COUNTRY} ]]; then \ - - echo "${BASE_URL}/${CONTINENT}-latest.osm.pbf" - wget -c --no-check-certificate ${BASE_URL}/${CONTINENT}-latest.osm.pbf -O /tmp/${CONTINENT}.pbf - mv /tmp/${CONTINENT}.pbf /home/settings/country.pbf - touch ${CONTINENT_LOCKFILE} - - -elif [[ ! -f ${COUNTRY_LOCKFILE} ]]; then - - echo "${BASE_URL}/${CONTINENT}/${COUNTRY}-latest.osm.pbf" - wget -c --no-check-certificate ${BASE_URL}/${CONTINENT}/${COUNTRY}-latest.osm.pbf -O /tmp/${COUNTRY}.pbf - mv /tmp/${COUNTRY}.pbf /home/settings/country.pbf - touch ${COUNTRY_LOCKFILE} -fi - diff --git a/docker-osmenrich/enrich.py b/docker-osmenrich/enrich.py index 5b30754..73aca16 100644 --- a/docker-osmenrich/enrich.py +++ b/docker-osmenrich/enrich.py @@ -20,16 +20,17 @@ """ import gzip -import xmltodict -import yaml -from xmltodict import OrderedDict -from dateutil import parser from os import environ, listdir, mkdir from os.path import join, exists, getsize from sys import exit, stderr -from urllib import request -from psycopg2 import connect, OperationalError, ProgrammingError from time import sleep +from urllib import request + +import xmltodict +import yaml +from dateutil import parser +from psycopg2 import connect, OperationalError, ProgrammingError +from xmltodict import OrderedDict class Enrich(object): @@ -62,6 +63,7 @@ class Enrich(object): 'IMPORT_DONE': 'import_done', 'CACHE': 'cache', 'MAX_DIFF_FILE_SIZE': 100000000, + 'DBSCHEMA_PRODUCTION': 'public', 'CACHE_MODIFY_CHECK': '' } self.mapping_file = None @@ -220,18 +222,24 @@ class Enrich(object): for table, table_data in self.mapping_database_schema.items(): new_columns_postgis = [] for enrich_key, enrich_type in self.enriched_column.items(): - try: - cursor.execute('select %s from %s' % (enrich_key, table)) - except ProgrammingError as e: - connection.rollback() + check_column = ''' SELECT EXISTS (SELECT 1 FROM information_schema.columns + WHERE table_name='%s' and column_name='%s'); ''' % ( + table, enrich_key) + cursor.execute(check_column) + column_existence = cursor.fetchone()[0] + + if column_existence != 1: if enrich_type == 'int': - new_columns_postgis.append('ADD COLUMN %s INTEGER' % enrich_key) + new_columns_postgis.append('ADD COLUMN IF NOT EXISTS %s NUMERIC' % enrich_key) elif enrich_type == 'string': - new_columns_postgis.append('ADD COLUMN %s VARCHAR' % enrich_key) + new_columns_postgis.append( + 'ADD COLUMN IF NOT EXISTS %s CHARACTER VARYING (255)' % enrich_key) elif enrich_type == 'datetime': - new_columns_postgis.append('ADD COLUMN %s timestamp' % enrich_key) + new_columns_postgis.append('ADD COLUMN IF NOT EXISTS %s TIMESTAMPTZ' % enrich_key) + if len(new_columns_postgis) > 0: - query = 'ALTER TABLE %s %s' % (table, ','.join(new_columns_postgis)) + query = 'ALTER TABLE %s."%s" %s;' % ( + self.default['DBSCHEMA_PRODUCTION'], table, ','.join(new_columns_postgis)) cursor.execute(query) connection.commit() connection.close() @@ -327,8 +335,8 @@ class Enrich(object): connection = self.create_connection() cursor = connection.cursor() try: - query = 'UPDATE %s SET %s WHERE %s=%s' % ( - table_name, ','.join(sets), osm_id_column, osm_id) + query = 'UPDATE %s.%s SET %s WHERE %s=%s' % (self.default['DBSCHEMA_PRODUCTION'], + table_name, ','.join(sets), osm_id_column, osm_id) cursor.execute(query) connection.commit() except ProgrammingError as e: @@ -415,8 +423,9 @@ class Enrich(object): row_batch = {} osm_ids = [] try: - cursor.execute( - 'select * from %s WHERE changeset_timestamp IS NULL AND osm_id IS NOT NULL ORDER BY osm_id' % table_name) + check_sql = ''' select * from %s."%s" WHERE "changeset_timestamp" + IS NULL AND "osm_id" IS NOT NULL ORDER BY "osm_id" ''' % (self.default['DBSCHEMA_PRODUCTION'], table_name) + cursor.execute(check_sql) row = True while row: # do something with row @@ -479,8 +488,10 @@ class Enrich(object): connection = self.create_connection() cursor = connection.cursor() try: - cursor.execute('select * from %s WHERE %s=%s' % ( - table, table_data['osm_id_columnn'], osm_id)) + validate_sql = ''' select * from %s."%s" WHERE "%s"=%s ''' % (self.default['DBSCHEMA_PRODUCTION'], + table, table_data['osm_id_columnn'], + osm_id) + cursor.execute(validate_sql) row = cursor.fetchone() if row: row = dict(zip(table_data['columns'], row)) @@ -550,15 +561,28 @@ class Enrich(object): except IOError: self.info('cache file can\'t be created') + def locate_table(self, name, schema): + """Check for tables in the DB table exists in the DB""" + connection = self.create_connection() + cursor = connection.cursor() + sql = """ SELECT EXISTS (SELECT 1 AS result from information_schema.tables + where table_name like TEMP_TABLE and table_schema = 'TEMP_SCHEMA'); """ + cursor.execute(sql.replace('TEMP_TABLE', '%s' % name).replace('TEMP_SCHEMA', '%s' % schema)) + # noinspection PyUnboundLocalVariable + return cursor.fetchone()[0] + def run(self): """First checker.""" while True: self.info('Run enrich process') - if self.check_database(): - self.enrich_empty_changeset() - self.enrich_database_from_diff_file() + + osm_tables = self.locate_table("'osm_%'", self.default['DBSCHEMA_PRODUCTION']) + if osm_tables != 1: + self.info('Imposm is still running, wait a while and try again') else: - self.info('Database is not ready') + if self.check_database(): + self.enrich_empty_changeset() + self.enrich_database_from_diff_file() # sleep looping self.info('sleeping for %s' % self.default['TIME']) diff --git a/docker-osmupdate/download.py b/docker-osmupdate/download.py index 48e6b07..399580c 100644 --- a/docker-osmupdate/download.py +++ b/docker-osmupdate/download.py @@ -19,11 +19,11 @@ ***************************************************************************/ """ -from os.path import exists, join, isabs, abspath -from os import listdir, environ -from sys import exit, stderr -from subprocess import call, Popen, PIPE from datetime import datetime +from os import listdir, environ +from os.path import exists, join, isabs, abspath +from subprocess import call, Popen, PIPE +from sys import exit, stderr from time import sleep @@ -82,12 +82,8 @@ class Downloader(object): if f.endswith('.pbf'): self.osm_file = join(self.default['SETTINGS'], f) - - if not self.osm_file: - msg = 'OSM file *.osm.pbf is missing in %s' % self.default['SETTINGS'] - self.error(msg) - else: - self.info('OSM PBF file: ' + self.osm_file) + while not exists(self.osm_file): + sleep(float(self.default['TIME'])) self.info('The checkup is OK.') diff --git a/readme.md b/readme.md index c98003c..51e56c5 100644 --- a/readme.md +++ b/readme.md @@ -2,7 +2,7 @@ A docker compose project to setup an OSM PostGIS database with automatic updates from OSM periodically. -The only files you need is a PBF file, geojson (if you intent to restrict data download to +The only files you need is a PBF file, geojson (if you intend to restrict data download to a smaller extent than the one specified by the PBF) and run the docker compose project. ## General architecture @@ -20,11 +20,9 @@ Alternatively you can execute the `settings_downloader.sh` script to download th ./settings_downloader.sh GEOJSON_URL CONTINENT COUNTRY ie ./settings_downloader.sh https://github.com/kartoza/docker-osm/raw/develop/settings/clip.geojson africa south-africa ``` +For a full list of allowed file names read json file `countries.json` -If you use rancher for container management you can use the provided docker-compose.yml which allows you to automatically -download the settings onto the host machine by using environment variables. -* Run the docker-compose-rancher file and make sure the environment variables are setup properly for -osm_downloader to download the correct pbf file +Alternatively you can use the python script `pbf_downloader.py` * If you want to connect from your local QGIS Desktop: * In the file `docker-compose.yml`, uncomment the block: @@ -76,9 +74,7 @@ website: https://imposm.org/docs/imposm3/latest/mapping.html The default file in Docker-OSM is coming from https://raw.githubusercontent.com/omniscale/imposm3/master/example-mapping.yml -**Note** that you can't import OSM metadata such as author, timestamp or version. -This is a limitation from ImpOSM, check the feature request on the [Imposm repository](https://github.com/omniscale/imposm3/issues/58). -Imposm is designed for spatial analysis, not for OSM contribution analysis. +**Note** Imposm is designed for spatial analysis, not for OSM contribution analysis. If you need such a feature, you need to use another database schema supporting OSM Metadata. You can check the [OSM Wiki](https://wiki.openstreetmap.org/wiki/Databases_and_data_access_APIs#Database_Schemas) for "Lossless" schemas. @@ -98,14 +94,14 @@ you to define a smaller area that you can work with. This is always desirable to limit the features being imported into the database rather than clipping them. -**NB:** Ensure you add a geojson covering the area you intent to clip into the settings folder. -The geojson can be the same extent of the administrative area of your country or it can be a +**NB:** Ensure you add a geojson covering the area you intend to clip into the `settings` folder. +The geojson can be the same extent of the administrative area for your country, or it can be a smaller extent. The CRS of the geojson should always be EPSG:4326. **NB:** It is encouraged to simplify the geometry for the `clip.geojson` as a simplified geometry is easier to process during the import. -Rather use the minimum bounding box for the area you intent to clip your dataset with. +Rather use the minimum bounding box for the area you intend to clip your dataset with. ### QGIS Styles @@ -121,8 +117,7 @@ make backup_styles ### SQL Trigger, functions, views... -You can add PostGIS functions, triggers, materialized views in a -SQL file called `post-pbf-import.sql`. +You can add PostGIS functions, triggers, materialized views into an SQL file called `post-pbf-import.sql`. It will be imported automatically in the database. ### Build and run @@ -134,7 +129,7 @@ docker-compose build docker-compose up ``` -In production you should daemonize the services when bringing them up: +In production, you should daemonize the services when bringing them up: ```bash docker-compose up -d @@ -250,26 +245,12 @@ in this repository. # PostGIS -With -e, you can add some settings to PostGIS: -```bash - - ALLOW_IP_RANGE= 0.0.0.0/0 -``` -More environment variables for Kartoza/postgis image can be found from https://github.com/kartoza/docker-postgis#environment-variables +For environment variables associated with `docker-postgis` refer to [docker postgis repository](https://github.com/kartoza/docker-postgis) -# QGIS Server and Martin Vector tiles +### Support -You can run a QGIS Server front end or martin vector tiles to the OSM mirror by using the provided -docker-compose-web.yml file. For example: - -```bash -docker-compose -f docker-compose.yml -f docker-compose-web.yml qgisserver up -``` - -or -```bash -docker-compose -f docker-compose.yml -f docker-compose-web.yml martin up -``` -For more information about martin configuration and usage can be found from https://github.com/urbica/martin +If you require more substantial assistance from [kartoza](https://kartoza.com) (because our work and interaction on docker-osm is pro bono), +please consider taking out a [Support Level Agreeement](https://kartoza.com/en/shop/product/support) # Credits