kopia lustrzana https://github.com/saubury/mastodon-stream
Incremental dev
rodzic
688528a53f
commit
0cc14823c3
|
@ -0,0 +1,3 @@
|
||||||
|
CONF_VER=7.1.0
|
||||||
|
ELST_VER=7.11.0
|
||||||
|
TZ_SET=Australia/Sydney
|
|
@ -7,3 +7,4 @@ config/mastodon-sink-s3-aws.json
|
||||||
notebooks/demo.ipynb
|
notebooks/demo.ipynb
|
||||||
duckdb/init-s3.sql
|
duckdb/init-s3.sql
|
||||||
data_tmp/*
|
data_tmp/*
|
||||||
|
docker-compose-orig.yml
|
||||||
|
|
|
@ -25,6 +25,8 @@ python mastodonlisten.py --baseURL https://data-folks.masto.host/ --enableKafka
|
||||||
|
|
||||||
confluent-hub install confluentinc/kafka-connect-s3:10.3.0
|
confluent-hub install confluentinc/kafka-connect-s3:10.3.0
|
||||||
|
|
||||||
|
curl -X PUT -H "Content-Type:application/json" localhost:8083/connectors/mastodon-sink-s3/config -d '@./config/mastodon-sink-s3-minio.json'
|
||||||
|
|
||||||
curl -X PUT -H "Content-Type:application/json" localhost:8083/connectors/mastodon-sink-s3/config -d '@./config/mastodon-sink-s3.json'
|
curl -X PUT -H "Content-Type:application/json" localhost:8083/connectors/mastodon-sink-s3/config -d '@./config/mastodon-sink-s3.json'
|
||||||
|
|
||||||
curl -X PUT -H "Content-Type:application/json" localhost:8083/connectors/mastodon-sink-s3-aws/config -d '@./config/mastodon-sink-s3-aws.json'
|
curl -X PUT -H "Content-Type:application/json" localhost:8083/connectors/mastodon-sink-s3-aws/config -d '@./config/mastodon-sink-s3-aws.json'
|
||||||
|
|
|
@ -12,7 +12,7 @@
|
||||||
"name": "created_at",
|
"name": "created_at",
|
||||||
"type": ["null","int"],
|
"type": ["null","int"],
|
||||||
"logicalType": "date",
|
"logicalType": "date",
|
||||||
"default" : "null"
|
"default" : null
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "created_at_str",
|
"name": "created_at_str",
|
||||||
|
|
|
@ -0,0 +1,16 @@
|
||||||
|
{
|
||||||
|
"name": "mastodon-sink-s3",
|
||||||
|
"connector.class": "io.confluent.connect.s3.S3SinkConnector",
|
||||||
|
"topics": "mastodon-topic",
|
||||||
|
"format.class": "io.confluent.connect.s3.format.parquet.ParquetFormat",
|
||||||
|
"flush.size": "10",
|
||||||
|
"s3.bucket.name": "mastodon",
|
||||||
|
"aws.access.key.id": "minio",
|
||||||
|
"aws.secret.access.key": "minio123",
|
||||||
|
"storage.class": "io.confluent.connect.s3.storage.S3Storage",
|
||||||
|
"store.url": "http://minio:9000"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Plik binarny nie jest wyświetlany.
Przed Szerokość: | Wysokość: | Rozmiar: 21 KiB Po Szerokość: | Wysokość: | Rozmiar: 21 KiB |
|
@ -31,3 +31,92 @@ order by 1,2
|
||||||
|
|
||||||
-- select username, bot, count(*) from xx group by 1,2 order by 3 desc;
|
-- select username, bot, count(*) from xx group by 1,2 order by 3 desc;
|
||||||
|
|
||||||
|
as select *
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-- old backup
|
||||||
|
create table toots
|
||||||
|
as
|
||||||
|
select m_id
|
||||||
|
, created_at
|
||||||
|
, created_at_str
|
||||||
|
, app
|
||||||
|
, url
|
||||||
|
, base_url
|
||||||
|
, language
|
||||||
|
, favourites
|
||||||
|
, username
|
||||||
|
, bot
|
||||||
|
, tags
|
||||||
|
, characters
|
||||||
|
, mastodon_text
|
||||||
|
FROM read_parquet('../xx.parquet');
|
||||||
|
|
||||||
|
insert into toots
|
||||||
|
select
|
||||||
|
m_id
|
||||||
|
, created_at
|
||||||
|
, created_at_str
|
||||||
|
, app
|
||||||
|
, url
|
||||||
|
, base_url
|
||||||
|
, language
|
||||||
|
, favourites
|
||||||
|
, username
|
||||||
|
, bot
|
||||||
|
, tags
|
||||||
|
, characters
|
||||||
|
, mastodon_text
|
||||||
|
from read_parquet('*.parquet');
|
||||||
|
|
||||||
|
insert into toots
|
||||||
|
select
|
||||||
|
m_id
|
||||||
|
, created_at
|
||||||
|
, created_at_str
|
||||||
|
, app
|
||||||
|
, url
|
||||||
|
, base_url
|
||||||
|
, language
|
||||||
|
, favourites
|
||||||
|
, username
|
||||||
|
, bot
|
||||||
|
, tags
|
||||||
|
, characters
|
||||||
|
, mastodon_text
|
||||||
|
from read_parquet('20230213/mastodon-topic/partition=0/*.parquet');
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
create table all_toots
|
||||||
|
as
|
||||||
|
select
|
||||||
|
m_id
|
||||||
|
, created_at
|
||||||
|
, app
|
||||||
|
, url
|
||||||
|
, base_url
|
||||||
|
, language
|
||||||
|
, favourites
|
||||||
|
, username
|
||||||
|
, bot
|
||||||
|
, tags
|
||||||
|
, characters
|
||||||
|
, mastodon_text
|
||||||
|
from toots
|
||||||
|
group by
|
||||||
|
m_id
|
||||||
|
, created_at
|
||||||
|
, app
|
||||||
|
, url
|
||||||
|
, base_url
|
||||||
|
, language
|
||||||
|
, favourites
|
||||||
|
, username
|
||||||
|
, bot
|
||||||
|
, tags
|
||||||
|
, characters
|
||||||
|
, mastodon_text;
|
||||||
|
|
||||||
|
COPY all_toots TO 'all_toots.parquet' (FORMAT PARQUET);
|
|
@ -0,0 +1,6 @@
|
||||||
|
# FROM confluentinc/cp-server-connect-base:7.3.1
|
||||||
|
FROM confluentinc/cp-server-connect:7.1.0
|
||||||
|
|
||||||
|
RUN confluent-hub install --no-prompt confluentinc/kafka-connect-s3:10.3.0
|
||||||
|
|
||||||
|
# ENTRYPOINT ["tail", "-f", "/dev/null"]
|
File diff suppressed because one or more lines are too long
Ładowanie…
Reference in New Issue