kopia lustrzana https://github.com/saubury/mastodon-stream
cleanup folders
rodzic
d62100d1d8
commit
f8dc638b2c
138
duckdb/go.sql
138
duckdb/go.sql
|
@ -1,138 +0,0 @@
|
|||
-- .read duckdb/go.sql
|
||||
|
||||
/*
|
||||
drop table if exists xx;
|
||||
|
||||
create table xx as
|
||||
select m_id
|
||||
, created_at_str
|
||||
, created_at, ('EPOCH'::TIMESTAMP + INTERVAL (created_at::INT) seconds)::TIMESTAMPTZ as created_tz
|
||||
, app
|
||||
, url
|
||||
, regexp_replace(regexp_replace(url, '^http[s]://', ''), '/.*$', '') as new_url
|
||||
, base_url
|
||||
, language
|
||||
, favourites
|
||||
, username
|
||||
, bot
|
||||
, tags
|
||||
, characters
|
||||
, mastodon_text
|
||||
FROM read_parquet('s3://mastodon/topics/mastodon-topic/partition=0/*');
|
||||
*/
|
||||
|
||||
select date_part('day', created_tz) as created_day
|
||||
, date_part('hour', created_tz) as created_hour
|
||||
, count(*)
|
||||
from yy
|
||||
group by 1,2
|
||||
order by 1,2
|
||||
;
|
||||
|
||||
-- select username, bot, count(*) from xx group by 1,2 order by 3 desc;
|
||||
|
||||
as select *
|
||||
|
||||
|
||||
|
||||
-- old backup
|
||||
create table toots
|
||||
as
|
||||
select m_id
|
||||
, created_at
|
||||
, created_at_str
|
||||
, app
|
||||
, url
|
||||
, base_url
|
||||
, language
|
||||
, favourites
|
||||
, username
|
||||
, bot
|
||||
, tags
|
||||
, characters
|
||||
, mastodon_text
|
||||
FROM read_parquet('../xx.parquet');
|
||||
|
||||
insert into toots
|
||||
select
|
||||
m_id
|
||||
, created_at
|
||||
, created_at_str
|
||||
, app
|
||||
, url
|
||||
, base_url
|
||||
, language
|
||||
, favourites
|
||||
, username
|
||||
, bot
|
||||
, tags
|
||||
, characters
|
||||
, mastodon_text
|
||||
from read_parquet('mastodo*.parquet');
|
||||
|
||||
insert into toots
|
||||
select
|
||||
m_id
|
||||
, created_at
|
||||
, created_at_str
|
||||
, app
|
||||
, url
|
||||
, base_url
|
||||
, language
|
||||
, favourites
|
||||
, username
|
||||
, bot
|
||||
, tags
|
||||
, characters
|
||||
, mastodon_text
|
||||
from read_parquet('20230213/mastodon-topic/partition=0/*.parquet');
|
||||
|
||||
insert into toots
|
||||
select
|
||||
m_id
|
||||
, created_at
|
||||
, created_at_str
|
||||
, app
|
||||
, url
|
||||
, base_url
|
||||
, language
|
||||
, favourites
|
||||
, username
|
||||
, bot
|
||||
, tags
|
||||
, characters
|
||||
, mastodon_text
|
||||
from read_parquet('20230220/mastodon-topic/partition=0/*.parquet');
|
||||
|
||||
|
||||
create table all_toots
|
||||
as
|
||||
select
|
||||
m_id
|
||||
, created_at
|
||||
, app
|
||||
, url
|
||||
, base_url
|
||||
, language
|
||||
, favourites
|
||||
, username
|
||||
, bot
|
||||
, tags
|
||||
, characters
|
||||
, mastodon_text
|
||||
from toots
|
||||
group by
|
||||
m_id
|
||||
, created_at
|
||||
, app
|
||||
, url
|
||||
, base_url
|
||||
, language
|
||||
, favourites
|
||||
, username
|
||||
, bot
|
||||
, tags
|
||||
, characters
|
||||
, mastodon_text;
|
||||
|
||||
COPY all_toots TO 'all_toots.parquet' (FORMAT PARQUET);
|
|
@ -1,9 +0,0 @@
|
|||
install 'httpfs';
|
||||
load 'httpfs';
|
||||
|
||||
set s3_endpoint='localhost:9000';
|
||||
set s3_access_key_id='minio';
|
||||
set s3_secret_access_key='minio123';
|
||||
set s3_use_ssl=false;
|
||||
set s3_region='us-east-1';
|
||||
set s3_url_style='path';
|
|
@ -117,7 +117,7 @@
|
|||
"\n",
|
||||
"insert into language\n",
|
||||
"select *\n",
|
||||
"from read_csv('../duckdb/language.csv', AUTO_DETECT=TRUE, header=True);\n",
|
||||
"from read_csv('./language.csv', AUTO_DETECT=TRUE, header=True);\n",
|
||||
"\n",
|
||||
"create table mastodon_toot_raw as\n",
|
||||
"select m_id\n",
|
||||
|
|
Ładowanie…
Reference in New Issue