cleanup folders

main
Simon Aubury 2023-02-21 18:04:56 +11:00
rodzic d62100d1d8
commit f8dc638b2c
4 zmienionych plików z 1 dodań i 148 usunięć

Wyświetl plik

@ -1,138 +0,0 @@
-- .read duckdb/go.sql
/*
drop table if exists xx;
create table xx as
select m_id
, created_at_str
, created_at, ('EPOCH'::TIMESTAMP + INTERVAL (created_at::INT) seconds)::TIMESTAMPTZ as created_tz
, app
, url
, regexp_replace(regexp_replace(url, '^http[s]://', ''), '/.*$', '') as new_url
, base_url
, language
, favourites
, username
, bot
, tags
, characters
, mastodon_text
FROM read_parquet('s3://mastodon/topics/mastodon-topic/partition=0/*');
*/
select date_part('day', created_tz) as created_day
, date_part('hour', created_tz) as created_hour
, count(*)
from yy
group by 1,2
order by 1,2
;
-- select username, bot, count(*) from xx group by 1,2 order by 3 desc;
as select *
-- old backup
create table toots
as
select m_id
, created_at
, created_at_str
, app
, url
, base_url
, language
, favourites
, username
, bot
, tags
, characters
, mastodon_text
FROM read_parquet('../xx.parquet');
insert into toots
select
m_id
, created_at
, created_at_str
, app
, url
, base_url
, language
, favourites
, username
, bot
, tags
, characters
, mastodon_text
from read_parquet('mastodo*.parquet');
insert into toots
select
m_id
, created_at
, created_at_str
, app
, url
, base_url
, language
, favourites
, username
, bot
, tags
, characters
, mastodon_text
from read_parquet('20230213/mastodon-topic/partition=0/*.parquet');
insert into toots
select
m_id
, created_at
, created_at_str
, app
, url
, base_url
, language
, favourites
, username
, bot
, tags
, characters
, mastodon_text
from read_parquet('20230220/mastodon-topic/partition=0/*.parquet');
create table all_toots
as
select
m_id
, created_at
, app
, url
, base_url
, language
, favourites
, username
, bot
, tags
, characters
, mastodon_text
from toots
group by
m_id
, created_at
, app
, url
, base_url
, language
, favourites
, username
, bot
, tags
, characters
, mastodon_text;
COPY all_toots TO 'all_toots.parquet' (FORMAT PARQUET);

Wyświetl plik

@ -1,9 +0,0 @@
install 'httpfs';
load 'httpfs';
set s3_endpoint='localhost:9000';
set s3_access_key_id='minio';
set s3_secret_access_key='minio123';
set s3_use_ssl=false;
set s3_region='us-east-1';
set s3_url_style='path';

Wyświetl plik

@ -117,7 +117,7 @@
"\n",
"insert into language\n",
"select *\n",
"from read_csv('../duckdb/language.csv', AUTO_DETECT=TRUE, header=True);\n",
"from read_csv('./language.csv', AUTO_DETECT=TRUE, header=True);\n",
"\n",
"create table mastodon_toot_raw as\n",
"select m_id\n",