kopia lustrzana https://github.com/saubury/mastodon-stream
further cleanup
rodzic
6d80c5897e
commit
32ad2e6ea6
|
@ -1,5 +1,5 @@
|
|||
---
|
||||
version: '3'
|
||||
version: '3.9'
|
||||
services:
|
||||
zookeeper:
|
||||
image: confluentinc/cp-zookeeper:${CONF_VER}
|
||||
|
@ -13,7 +13,7 @@ services:
|
|||
start_period: 15s
|
||||
interval: 5s
|
||||
timeout: 10s
|
||||
retries: 10
|
||||
retries: 30
|
||||
environment:
|
||||
ZOOKEEPER_CLIENT_PORT: 2181
|
||||
ZOOKEEPER_TICK_TIME: 2000
|
||||
|
@ -34,7 +34,7 @@ services:
|
|||
start_period: 15s
|
||||
interval: 5s
|
||||
timeout: 10s
|
||||
retries: 20
|
||||
retries: 30
|
||||
environment:
|
||||
KAFKA_BROKER_ID: 1
|
||||
KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181'
|
||||
|
@ -67,18 +67,16 @@ services:
|
|||
- "8081:8081"
|
||||
healthcheck:
|
||||
test: nc -z localhost 8081 || exit -1
|
||||
start_period: 15s
|
||||
interval: 5s
|
||||
timeout: 10s
|
||||
retries: 10
|
||||
start_period: 30s
|
||||
interval: 20s
|
||||
timeout: 60s
|
||||
retries: 300
|
||||
environment:
|
||||
SCHEMA_REGISTRY_HOST_NAME: schema-registry
|
||||
SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: 'broker:29092'
|
||||
SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:8081
|
||||
|
||||
connect:
|
||||
# image: cnfldemos/cp-server-connect-datagen:0.5.0-6.2.0
|
||||
# image: confluentinc/cp-server-connect:${CONF_VER}
|
||||
build:
|
||||
context: ./kafka-connect
|
||||
dockerfile: Dockerfile
|
||||
|
@ -95,7 +93,7 @@ services:
|
|||
start_period: 15s
|
||||
interval: 5s
|
||||
timeout: 10s
|
||||
retries: 10
|
||||
retries: 30
|
||||
environment:
|
||||
CONNECT_BOOTSTRAP_SERVERS: 'broker:29092'
|
||||
CONNECT_REST_ADVERTISED_HOST_NAME: connect
|
||||
|
@ -130,7 +128,6 @@ services:
|
|||
condition: service_healthy
|
||||
connect:
|
||||
condition: service_healthy
|
||||
# - ksqldb-server
|
||||
ports:
|
||||
- "9021:9021"
|
||||
healthcheck:
|
||||
|
@ -138,7 +135,7 @@ services:
|
|||
start_period: 15s
|
||||
interval: 5s
|
||||
timeout: 10s
|
||||
retries: 10
|
||||
retries: 30
|
||||
environment:
|
||||
CONTROL_CENTER_BOOTSTRAP_SERVERS: 'broker:29092'
|
||||
CONTROL_CENTER_CONNECT_CONNECT-DEFAULT_CLUSTER: 'http://connect:8083'
|
||||
|
@ -180,77 +177,24 @@ services:
|
|||
/usr/bin/mc policy set public minio/mastodon;
|
||||
exit 0;
|
||||
"
|
||||
|
||||
# jupyter:
|
||||
# image: jupyter/scipy-notebook
|
||||
# ports:
|
||||
# - "8888:8888"
|
||||
# volumes:
|
||||
# - ./notebooks:/home/jovyan/
|
||||
# environment:
|
||||
# JUPYTER_ENABLE_LAB: "yes"
|
||||
# JUPYTER_RUNTIME_DIR: "/tmp"
|
||||
# command: "start-notebook.sh --NotebookApp.token='' --NotebookApp.password=''"
|
||||
|
||||
|
||||
|
||||
# ksqldb-server:
|
||||
# image: confluentinc/cp-ksqldb-server:${CONF_VER}
|
||||
# platform: linux/amd64
|
||||
# hostname: ksqldb-server
|
||||
# container_name: ksqldb-server
|
||||
# depends_on:
|
||||
# - broker
|
||||
# - connect
|
||||
# ports:
|
||||
# - "8088:8088"
|
||||
# environment:
|
||||
# KSQL_CONFIG_DIR: "/etc/ksql"
|
||||
# KSQL_BOOTSTRAP_SERVERS: "broker:29092"
|
||||
# KSQL_HOST_NAME: ksqldb-server
|
||||
# KSQL_LISTENERS: "http://0.0.0.0:8088"
|
||||
# KSQL_CACHE_MAX_BYTES_BUFFERING: 0
|
||||
# KSQL_KSQL_SCHEMA_REGISTRY_URL: "http://schema-registry:8081"
|
||||
# KSQL_PRODUCER_INTERCEPTOR_CLASSES: "io.confluent.monitoring.clients.interceptor.MonitoringProducerInterceptor"
|
||||
# KSQL_CONSUMER_INTERCEPTOR_CLASSES: "io.confluent.monitoring.clients.interceptor.MonitoringConsumerInterceptor"
|
||||
# KSQL_KSQL_CONNECT_URL: "http://connect:8083"
|
||||
# KSQL_KSQL_LOGGING_PROCESSING_TOPIC_REPLICATION_FACTOR: 1
|
||||
# KSQL_KSQL_LOGGING_PROCESSING_TOPIC_AUTO_CREATE: 'true'
|
||||
# KSQL_KSQL_LOGGING_PROCESSING_STREAM_AUTO_CREATE: 'true'
|
||||
|
||||
# ksqldb-cli:
|
||||
# image: confluentinc/cp-ksqldb-cli:${CONF_VER}
|
||||
# platform: linux/amd64
|
||||
# container_name: ksqldb-cli
|
||||
# depends_on:
|
||||
# - broker
|
||||
# - connect
|
||||
# - ksqldb-server
|
||||
# entrypoint: /bin/sh
|
||||
# tty: true
|
||||
|
||||
|
||||
# elasticsearch:
|
||||
# image: docker.elastic.co/elasticsearch/elasticsearch:${ELST_VER}
|
||||
# platform: linux/amd64
|
||||
# container_name: elasticsearch
|
||||
# ports:
|
||||
# - 9200:9200
|
||||
# environment:
|
||||
# xpack.security.enabled: "false"
|
||||
# ES_JAVA_OPTS: "-Xms1g -Xmx1g"
|
||||
# discovery.type: "single-node"
|
||||
|
||||
|
||||
# kibana:
|
||||
# image: docker.elastic.co/kibana/kibana:${ELST_VER}
|
||||
# platform: linux/amd64
|
||||
# container_name: kibana
|
||||
# hostname: kibana
|
||||
# depends_on:
|
||||
# - elasticsearch
|
||||
# ports:
|
||||
# - 5601:5601
|
||||
# environment:
|
||||
# xpack.security.enabled: "false"
|
||||
# discovery.type: "single-node"
|
||||
jupyter:
|
||||
image: jupyter/scipy-notebook
|
||||
ports:
|
||||
- "8888:8888"
|
||||
healthcheck:
|
||||
test: nc -z localhost 8888 || exit -1
|
||||
start_period: 15s
|
||||
interval: 5s
|
||||
timeout: 10s
|
||||
retries: 10
|
||||
volumes:
|
||||
- ./notebooks:/home/jovyan/
|
||||
user: root
|
||||
environment:
|
||||
JUPYTER_ENABLE_LAB: "yes"
|
||||
JUPYTER_RUNTIME_DIR: "/tmp"
|
||||
NB_USER: simonaubury
|
||||
CHOWN_HOME: 'yes'
|
||||
CHOWN_HOME_OPTS: '-R'
|
||||
command: "start-notebook.sh --allow-root --ip=0.0.0.0 --NotebookApp.token='' --NotebookApp.password=''"
|
||||
|
|
Plik binarny nie jest wyświetlany.
Przed Szerokość: | Wysokość: | Rozmiar: 21 KiB Po Szerokość: | Wysokość: | Rozmiar: 20 KiB |
Plik binarny nie jest wyświetlany.
Przed Szerokość: | Wysokość: | Rozmiar: 61 KiB Po Szerokość: | Wysokość: | Rozmiar: 72 KiB |
|
@ -87,6 +87,22 @@ select
|
|||
, mastodon_text
|
||||
from read_parquet('20230213/mastodon-topic/partition=0/*.parquet');
|
||||
|
||||
insert into toots
|
||||
select
|
||||
m_id
|
||||
, created_at
|
||||
, created_at_str
|
||||
, app
|
||||
, url
|
||||
, base_url
|
||||
, language
|
||||
, favourites
|
||||
, username
|
||||
, bot
|
||||
, tags
|
||||
, characters
|
||||
, mastodon_text
|
||||
from read_parquet('20230216/mastodon-topic/partition=0/*.parquet');
|
||||
|
||||
|
||||
create table all_toots
|
||||
|
|
|
@ -1,6 +1,3 @@
|
|||
# FROM confluentinc/cp-server-connect-base:7.3.1
|
||||
FROM confluentinc/cp-server-connect:7.1.0
|
||||
|
||||
RUN confluent-hub install --no-prompt confluentinc/kafka-connect-s3:10.3.0
|
||||
|
||||
# ENTRYPOINT ["tail", "-f", "/dev/null"]
|
||||
|
|
File diff suppressed because one or more lines are too long
Ładowanie…
Reference in New Issue