mastodon-stream/mastodonlisten.py

107 wiersze
2.7 KiB
Python
Czysty Zwykły widok Historia

2023-01-28 11:06:42 +00:00
import mastodon
from mastodon import Mastodon
from bs4 import BeautifulSoup
2023-02-01 22:17:10 +00:00
import argparse
2023-02-02 07:15:56 +00:00
import datetime
2023-01-28 11:06:42 +00:00
from kafkaproducer import kafka_producer
2023-02-01 22:17:10 +00:00
# globals
base_url = ''
enable_kafka = False
2023-02-02 07:15:56 +00:00
quiet = False
2023-02-01 22:17:10 +00:00
# if enable_kafka:
2023-01-28 11:06:42 +00:00
topic_name, producer = kafka_producer()
#### Listener for Mastodon events
class Listener(mastodon.StreamListener):
def on_update(self, status):
2023-02-01 22:17:10 +00:00
m_text = BeautifulSoup(status.content, 'html.parser').text
num_tags = len(status.tags)
num_chars = len(m_text)
num_words = len(m_text.split())
m_lang = status.language
if m_lang is None:
m_lang = 'unknown'
2023-02-02 07:15:56 +00:00
m_user = status.account.username
2023-02-01 22:17:10 +00:00
app=''
# attribute only available on local
if hasattr(status, 'application'):
app = status.application.get('name')
value_dict = {
'm_id': status.id,
2023-02-02 07:15:56 +00:00
'created_at': int(datetime.datetime.now().strftime('%s')),
2023-02-01 22:17:10 +00:00
'app': app,
'url': status.url,
'base_url': base_url,
'language': m_lang,
'favourites': status.favourites_count,
2023-02-02 07:15:56 +00:00
'username': m_user,
2023-02-01 22:17:10 +00:00
'bot': status.account.bot,
'tags': num_tags,
'characters': num_chars,
'words': num_words,
'mastodon_text': m_text
}
2023-02-02 07:15:56 +00:00
if not quiet:
print(f'{m_user} {m_lang}', m_text[:30])
2023-01-28 11:06:42 +00:00
2023-02-02 07:15:56 +00:00
if enable_kafka:
producer.produce(topic = topic_name, value = value_dict)
producer.flush()
2023-01-28 11:06:42 +00:00
def main():
2023-02-01 22:17:10 +00:00
global base_url
global enable_kafka
2023-02-02 07:15:56 +00:00
global quiet
2023-02-01 22:17:10 +00:00
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(
'--enableKafka',
help='Whether to enable Kafka producer.',
action='store_true',
required=False,
default=False)
parser.add_argument(
'--public',
help='listen to public stream (instead of local).',
action='store_true',
required=False,
default=False)
2023-02-02 07:15:56 +00:00
parser.add_argument(
'--quiet',
help='Do not echo a summary of the toot',
action='store_true',
required=False,
default=False)
2023-02-01 22:17:10 +00:00
parser.add_argument(
'--baseURL',
help='Server URL',
required=False,
default='https://mastodon.social')
args = parser.parse_args()
base_url=args.baseURL
enable_kafka=args.enableKafka
mastodon = Mastodon(api_base_url = base_url)
if args.public:
mastodon.stream_public(Listener())
else:
mastodon.stream_local(Listener())
2023-01-28 11:06:42 +00:00
2023-02-01 22:17:10 +00:00
if __name__ == '__main__':
2023-01-28 11:06:42 +00:00
main()