kopia lustrzana https://github.com/mkdryden/telegram-stats-bot
Porównaj commity
35 Commity
Autor | SHA1 | Data |
---|---|---|
mkdryden | 9ba83b36c9 | |
Michael DM Dryden | 552f65dcf6 | |
dinosaurtirex | 75560be5dc | |
Michael D. M. Dryden | 2c221294c6 | |
Michael D. M. Dryden | a89659ba7a | |
Michael D. M. Dryden | 7f66b3dadf | |
Michael D. M. Dryden | d5135a5a9c | |
Michael D. M. Dryden | 69e905e1e2 | |
Michael D. M. Dryden | 91c60ad871 | |
Michael D. M. Dryden | fe98e08eb5 | |
Michael D. M. Dryden | f707e29f90 | |
Michael D. M. Dryden | 97af778748 | |
Michael D. M. Dryden | 573359efc1 | |
Michael D. M. Dryden | 0ba771e73d | |
Michael D. M. Dryden | bb1e61e247 | |
Michael DM Dryden | 6aaa7ec9c9 | |
Michael DM Dryden | 0e4a37072c | |
Michael DM Dryden | 325cf33a89 | |
Michael DM Dryden | 01136894ff | |
Michael DM Dryden | 750427a841 | |
Michael DM Dryden | 5fd5c4ee08 | |
Michael DM Dryden | f3b61b640e | |
Michael DM Dryden | a1cdba2296 | |
mkdryden | ca52c7b9f8 | |
Michael DM Dryden | b8b8eff5bf | |
Michael DM Dryden | 7d80fc8ed3 | |
Michael DM Dryden | e8ecfab466 | |
Michael DM Dryden | 205bdd8325 | |
Michael DM Dryden | 85c6a86ed9 | |
Michael DM Dryden | 19da115f93 | |
Michael DM Dryden | ab71a2efae | |
Michael DM Dryden | a247701ab9 | |
Michael DM Dryden | 93feb6c8f8 | |
Michael DM Dryden | bce746cc88 | |
Michael D. M. Dryden | 7b1920967f |
|
@ -1,8 +1,6 @@
|
|||
name: ci
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 10 * * *' # everyday at 10am
|
||||
push:
|
||||
branches:
|
||||
- '**'
|
||||
|
|
|
@ -6,11 +6,86 @@ All notable changes to this project will be documented in this file.
|
|||
The format is based on `Keep a Changelog <https://keepachangelog.com/en/1.0.0/>`_,
|
||||
and this project adheres to `Semantic Versioning <https://semver.org/spec/v2.0.0.html>`_.
|
||||
|
||||
----------
|
||||
`0.8.1`_ - 2023-11-17
|
||||
----------
|
||||
Fixed
|
||||
-----
|
||||
- Fix Sqlalchemy related username insertion bug
|
||||
- Fix exception when backup store is disabled
|
||||
|
||||
----------
|
||||
`0.8.0`_ - 2023-11-13
|
||||
----------
|
||||
Added
|
||||
-----
|
||||
- Added a suite of tests for db and stats functions
|
||||
|
||||
Changed
|
||||
-------
|
||||
- Upgraded python-telegram-bot to version 20
|
||||
- Reply to edited messages
|
||||
- Updated to SQLAlchemy 2.0
|
||||
- Updated to pandas 2.1
|
||||
- Updated other dependencies
|
||||
- Separate SQL code from db_init function
|
||||
- Update to psycopg 3.0
|
||||
- Add more protection against bad stats options
|
||||
- Upgrade docker image to python 3.11
|
||||
- Update docker-compose file to use postgres 16
|
||||
- Use SQLAlchemy-Utils to make new db
|
||||
|
||||
Fixed
|
||||
-----
|
||||
- Date selection for word statistics was broken
|
||||
- Incorrect datatype for column in user_events table
|
||||
- Modify docker-compose file to set up host authentication with md5
|
||||
- Increase minimum title plot height
|
||||
|
||||
----------
|
||||
`0.7.0`_ - 2023-01-14
|
||||
----------
|
||||
Fixed
|
||||
-----
|
||||
- Sticker pack names save correctly now
|
||||
- Explicitly add psycopg2-binary as dependency because sqlalchemy extra doesn't seem to work anymore
|
||||
- Try to map user ids to names during json dump import. (#17)
|
||||
|
||||
Added
|
||||
-----
|
||||
- Add script to import data from desktop client json dumps
|
||||
- Add ECDF plot for message counts by user with ``/stats count-dist``
|
||||
|
||||
-------------
|
||||
`0.6.4`_ - 2022-02-27
|
||||
-------------
|
||||
Changed
|
||||
-------
|
||||
- Bumped python-telegram-bot to version 13.11 (#9)
|
||||
|
||||
-------------
|
||||
`0.6.3`_ - 2022-01-13
|
||||
-------------
|
||||
Changed
|
||||
-------
|
||||
- Titles plot uses seconds resolution with -duration option
|
||||
|
||||
Fixed
|
||||
-----
|
||||
- Fix database creation code for immutable SQLAlchemy 1.4 URLs
|
||||
- Titles plot considers time zone correctly for current time. (Prevents negative bars in titles plot with -duration option)
|
||||
|
||||
----------
|
||||
`0.6.2`_ - 2021-11-11
|
||||
----------
|
||||
Changed
|
||||
-----
|
||||
- Switched build backend to poetry-core so that PEP 517 builds don't need full poetry install
|
||||
|
||||
----------
|
||||
`0.6.1`_ - 2021-11-07
|
||||
----------
|
||||
Added
|
||||
Changed
|
||||
-----
|
||||
- Bumped pillow version to 8.3.2 for security reasons
|
||||
|
||||
|
@ -93,7 +168,7 @@ Fixed
|
|||
----------------------
|
||||
- Initial release
|
||||
|
||||
.. _Unreleased: https://github.com/mkdryden/telegram-stats-bot/compare/v0.6.1...HEAD
|
||||
.. _Unreleased: https://github.com/mkdryden/telegram-stats-bot/compare/v0.7.0...HEAD
|
||||
.. _0.1.1: https://github.com/mkdryden/telegram-stats-bot/releases/tag/v0.1.1
|
||||
.. _0.2.0: https://github.com/mkdryden/telegram-stats-bot/releases/tag/v0.2.0
|
||||
.. _0.3.0: https://github.com/mkdryden/telegram-stats-bot/releases/tag/v0.3.0
|
||||
|
@ -102,3 +177,8 @@ Fixed
|
|||
.. _0.5.0: https://github.com/mkdryden/telegram-stats-bot/releases/tag/v0.5.0
|
||||
.. _0.6.0: https://github.com/mkdryden/telegram-stats-bot/releases/tag/v0.6.0
|
||||
.. _0.6.1: https://github.com/mkdryden/telegram-stats-bot/releases/tag/v0.6.1
|
||||
.. _0.6.2: https://github.com/mkdryden/telegram-stats-bot/releases/tag/v0.6.2
|
||||
.. _0.6.3: https://github.com/mkdryden/telegram-stats-bot/releases/tag/v0.6.3
|
||||
.. _0.7.0: https://github.com/mkdryden/telegram-stats-bot/releases/tag/v0.7.0
|
||||
.. _0.8.0: https://github.com/mkdryden/telegram-stats-bot/releases/tag/v0.8.0
|
||||
.. _0.8.1: https://github.com/mkdryden/telegram-stats-bot/releases/tag/v0.8.1
|
||||
|
|
|
@ -1,12 +1,11 @@
|
|||
FROM python:3.9
|
||||
FROM python:3.11
|
||||
|
||||
WORKDIR /usr/src/app
|
||||
|
||||
COPY . .
|
||||
RUN pip install --no-cache-dir poetry
|
||||
RUN poetry install
|
||||
RUN pip install .
|
||||
|
||||
ENV TZ="America/Toronto" \
|
||||
ENV TZ="America/New_York" \
|
||||
BOT_TOKEN=-1 \
|
||||
CHAT_ID=0 \
|
||||
POSTGRES_USER=postgres \
|
||||
|
@ -14,4 +13,4 @@ ENV TZ="America/Toronto" \
|
|||
POSTGRES_HOST=db \
|
||||
POSTGRES_DB=telegram_bot
|
||||
|
||||
CMD [ "sh", "-c", "poetry run python -m telegram_stats_bot.main --tz=$TZ $BOT_TOKEN $CHAT_ID postgresql://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST/$POSTGRES_DB" ]
|
||||
CMD [ "sh", "-c", "python -m telegram_stats_bot.main --tz=$TZ $BOT_TOKEN $CHAT_ID postgresql+psycopg://$POSTGRES_USER:$POSTGRES_PASSWORD@$POSTGRES_HOST/$POSTGRES_DB" ]
|
||||
|
|
52
README.rst
52
README.rst
|
@ -22,6 +22,9 @@ telegram-stats-bot
|
|||
Telegram-stats-bot is a simple bot that lives in your Telegram group, logging messages to a Postgresql database and
|
||||
serving statistical tables and plots to users as Telegram messages.
|
||||
|
||||
*Note: Version 0.8.0 adds a number of behind the scenes improvements and dependency bumps in preparation for a version
|
||||
1.0 release. Stay tuned for a new interface using the inline keyboard bot functionality!*
|
||||
|
||||
.. image:: examples/chat_example.png
|
||||
:alt: Bot conversation example
|
||||
|
||||
|
@ -39,10 +42,14 @@ Table of contents
|
|||
|
||||
- `Setup`_
|
||||
|
||||
- `Importing Data`_
|
||||
|
||||
- `Fetching Stats`_
|
||||
|
||||
- `counts`_
|
||||
|
||||
- `count-dist`_
|
||||
|
||||
- `hours`_
|
||||
|
||||
- `days`_
|
||||
|
@ -126,14 +133,26 @@ The easiest way to install or upgrade is with pip:
|
|||
|
||||
$ pip install telegram-stats-bot --upgrade
|
||||
|
||||
Or you can install the latest git version using `poetry <https://python-poetry.org/>`_ (installed to Python 3.7 or later):
|
||||
This works directly from the git repository as well:
|
||||
|
||||
.. code:: shell
|
||||
|
||||
$ pip install --upgrade git+https://github.com/mkdryden/telegram-stats-bot
|
||||
|
||||
Or you can install an entire venv for development using `poetry <https://python-poetry.org/>`_:
|
||||
|
||||
.. code:: shell
|
||||
|
||||
$ git clone https://github.com/mkdryden/telegram-stats-bot.git
|
||||
$ cd telegram-stats-bot
|
||||
$ poetry install
|
||||
|
||||
Installing directly with ``setup.py`` should also work from the Pypi sdist (but why?).
|
||||
|
||||
If you want to be able to run the unit tests, you must install the test dependencies as well,
|
||||
and postgresql must be available in your PATH:
|
||||
|
||||
.. code:: shell
|
||||
|
||||
$ poetry install --with test
|
||||
|
||||
------
|
||||
Docker
|
||||
|
@ -186,6 +205,26 @@ you've sent a message to trigger the update).
|
|||
You can see if messages are being logged correctly by reviewing the terminal output.
|
||||
You should see a line like ``2020-06-04 02:08:39,212 - __main__ - INFO - 8``, whenever a message is logged.
|
||||
|
||||
--------------
|
||||
Importing Data
|
||||
--------------
|
||||
Data can be imported from JSON dumps from the desktop client.
|
||||
Hit the three dot button from inside the desired group and select "Export chat history".
|
||||
Make sure you select JSON as the output format.
|
||||
You can also limit the date, as desired.
|
||||
The database will be updated and existing messages will remain, so you can use this feature to fill in gaps when the bot was not running.
|
||||
|
||||
To import data, simply call:
|
||||
|
||||
.. code:: shell
|
||||
|
||||
$ python -m telegram_stats_bot.json_dump_parser "/some/path/to/dump.json" "postgresql://telegram:CoolPassword@localhost/telegram_bot" --tz="America/Toronto"
|
||||
|
||||
Where the first argument is the path to the json dump, the second is the db connection string, as above, and the optional `tz` argument should be the time zone of the system used to dump the json.
|
||||
|
||||
This can be run without stopping a running bot, though it also attempts to set the user id to user name mapping, so will add an extra entry to every user in the dump (this currently only affects the user stats related to user name changes).
|
||||
Before you run this, make sure your db string is correct or you might accidentally mess up other databases on the same server.
|
||||
|
||||
--------------
|
||||
Fetching Stats
|
||||
--------------
|
||||
|
@ -224,6 +263,13 @@ counts
|
|||
@WhereAreMyManners 30481 5.1
|
||||
@TheWorstOfTheBest 28705 4.8
|
||||
|
||||
count-dist
|
||||
----------
|
||||
``/stats count-dist`` returns an ECDF plot of the users in the group by message count.
|
||||
|
||||
.. image:: examples/count-dist.png
|
||||
:alt: Example of count-dist plot
|
||||
|
||||
hours
|
||||
-----
|
||||
``/stats hours`` returns a plot of message frequency for the hours of the day.
|
||||
|
|
|
@ -17,13 +17,14 @@ services:
|
|||
POSTGRES_HOST: db
|
||||
POSTGRES_DB: telegram_bot
|
||||
db:
|
||||
image: postgres:12
|
||||
image: postgres:16
|
||||
restart: always
|
||||
volumes:
|
||||
- db-data:/var/lib/postgresql/data
|
||||
environment:
|
||||
POSTGRES_PASSWORD: password
|
||||
POSTGRES_DB: telegram_bot
|
||||
POSTGRES_HOST_AUTH_METHOD: md5 # psycopg seems to have issues with scram-sha-256
|
||||
|
||||
volumes:
|
||||
db-data:
|
Plik binarny nie jest wyświetlany.
Po Szerokość: | Wysokość: | Rozmiar: 17 KiB |
Plik diff jest za duży
Load Diff
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "telegram-stats-bot"
|
||||
version = "0.6.1"
|
||||
version = "0.8.1"
|
||||
description = "A logging and statistics bot for Telegram based on python-telegram-bot."
|
||||
authors = ["Michael DM Dryden <mk.dryden@utoronto.ca>"]
|
||||
repository = "https://github.com/mkdryden/telegram-stats-bot"
|
||||
|
@ -9,19 +9,30 @@ readme = "README.rst"
|
|||
include = ["examples/*", "CHANGELOG.rst"]
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.8"
|
||||
python-telegram-bot = "^13.6"
|
||||
sqlalchemy = {version = "^1.4", extras = ["postgresql_psycopg2binary"]}
|
||||
SQLAlchemy-Utils = "0.37.6"
|
||||
pandas = "^1.0.4"
|
||||
seaborn = "^0.10.1"
|
||||
numpy = "^1.18.4"
|
||||
matplotlib = "^3.2.1"
|
||||
python = ">=3.9,<3.12"
|
||||
python-telegram-bot = {version = "^20.6", extras = ["job-queue"]}
|
||||
SQLAlchemy = "^2.0.23"
|
||||
SQLAlchemy-Utils = "^0.41"
|
||||
pandas = "^2.1"
|
||||
seaborn = "^0.13"
|
||||
numpy = "^1.26.1"
|
||||
matplotlib = "^3.8"
|
||||
appdirs = "^1.4.4"
|
||||
single-source = "^0.2.0"
|
||||
single-source = "^0.3.0"
|
||||
typer = "^0.9.0"
|
||||
psycopg = {version = "^3.1.12", extras = ["binary"]}
|
||||
pytest = "^7.4.3"
|
||||
|
||||
[tool.poetry.group.test]
|
||||
optional = true
|
||||
|
||||
[tool.poetry.group.test.dependencies]
|
||||
pytest = "^7.4.3"
|
||||
pytest-postgresql = "^5.0.0"
|
||||
random-word = "^1.0.11"
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry>=0.12"]
|
||||
build-backend = "poetry.masonry.api"
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
|
|
@ -21,8 +21,8 @@
|
|||
|
||||
import logging
|
||||
|
||||
from sqlalchemy import Column, Table, MetaData
|
||||
from sqlalchemy.engine import Engine
|
||||
from sqlalchemy import Column, Table, MetaData, text
|
||||
from sqlalchemy.engine import Connection
|
||||
from sqlalchemy.dialects import postgresql
|
||||
from sqlalchemy.types import TIMESTAMP, BigInteger
|
||||
|
||||
|
@ -35,9 +35,7 @@ messages = Table('messages_utc', metadata,
|
|||
Column('from_user', BigInteger),
|
||||
Column('text_index_col', postgresql.TSVECTOR))
|
||||
|
||||
|
||||
def init_dbs(engine: Engine):
|
||||
sql = """
|
||||
db_sql = sql = """
|
||||
create table if not exists messages_utc
|
||||
(
|
||||
message_id bigint,
|
||||
|
@ -75,10 +73,13 @@ def init_dbs(engine: Engine):
|
|||
(
|
||||
message_id bigint,
|
||||
user_id bigint,
|
||||
date timestamp with time zone,
|
||||
date timestamptz,
|
||||
event text
|
||||
);
|
||||
|
||||
-- Migrate wrong column type
|
||||
alter table user_events alter column user_id type bigint using user_id::bigint;
|
||||
|
||||
create index if not exists ix_user_events_message_id
|
||||
on user_events (message_id);
|
||||
|
||||
|
@ -95,5 +96,6 @@ def init_dbs(engine: Engine):
|
|||
|
||||
"""
|
||||
|
||||
with engine.connect() as con:
|
||||
con.execute(sql)
|
||||
|
||||
def init_dbs(con: Connection):
|
||||
con.execute(text(db_sql))
|
||||
|
|
|
@ -23,6 +23,11 @@ import json
|
|||
import typing
|
||||
|
||||
import pandas as pd
|
||||
import sqlalchemy.engine
|
||||
import typer
|
||||
from sqlalchemy import create_engine
|
||||
|
||||
from .stats import StatsRunner
|
||||
|
||||
media_dict = {'sticker': 'sticker',
|
||||
'animation': 'animation',
|
||||
|
@ -31,8 +36,8 @@ media_dict = {'sticker': 'sticker',
|
|||
'audio_file': 'audio',
|
||||
'video_message': 'video_note'}
|
||||
|
||||
user_event_cat = pd.Categorical(['left', 'joined'])
|
||||
message_type_cat = pd.Categorical(['migrate_from_group', 'text', 'pinned_message', 'photo', 'sticker',
|
||||
user_event_cat = pd.CategoricalDtype(['left', 'joined'])
|
||||
message_type_cat = pd.CategoricalDtype(['migrate_from_group', 'text', 'pinned_message', 'photo', 'sticker',
|
||||
'new_chat_members', 'left_chat_member', 'animation', 'video',
|
||||
'location', 'new_chat_title', 'voice', 'audio',
|
||||
'new_chat_photo', 'video_note', 'poll'])
|
||||
|
@ -50,9 +55,10 @@ def text_list_parser(text: typing.Union[str, typing.Sequence]) -> str:
|
|||
return out
|
||||
|
||||
|
||||
def convert_messages(df: pd.DataFrame) -> typing.Tuple[typing.List[dict], typing.List[dict]]:
|
||||
def convert_messages(df: pd.DataFrame) -> typing.Tuple[typing.List[dict], typing.List[dict], dict]:
|
||||
messages_out = []
|
||||
users_out = []
|
||||
|
||||
for message in df.itertuples():
|
||||
message_dict = {'message_id': message.id,
|
||||
'date': message.date,
|
||||
|
@ -71,16 +77,18 @@ def convert_messages(df: pd.DataFrame) -> typing.Tuple[typing.List[dict], typing
|
|||
user_event_dict = {}
|
||||
if message.type == 'message':
|
||||
if pd.notnull(message.from_id):
|
||||
message_dict['from_user'] = message.from_id
|
||||
if not message.from_id.startswith('user'):
|
||||
continue
|
||||
message_dict['from_user'] = int(message.from_id[4:]) # remove 'user' from id
|
||||
|
||||
if pd.notnull(message.forwarded_from):
|
||||
try:
|
||||
message_dict['forward_from'] = int(message.forwarded_from)
|
||||
message_dict['forward_from'] = int(message.from_id[4:]) # username is used in forwarded_from
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if pd.notnull(message.reply_to_message_id):
|
||||
message_dict['reply_to_message'] = message.reply_to_message_id
|
||||
message_dict['reply_to_message'] = int(message.reply_to_message_id)
|
||||
|
||||
if pd.notnull(message.photo):
|
||||
message_dict['type'] = 'photo'
|
||||
|
@ -97,12 +105,11 @@ def convert_messages(df: pd.DataFrame) -> typing.Tuple[typing.List[dict], typing
|
|||
message_dict['text'] = text_list_parser(message.text)
|
||||
elif pd.notnull(message.poll):
|
||||
message_dict['type'] = 'poll'
|
||||
elif pd.notnull(message.location_information):
|
||||
message_dict['type'] = 'location'
|
||||
|
||||
elif message.type == 'service':
|
||||
if pd.notnull(message.actor_id):
|
||||
message_dict['from_user'] = message.actor_id
|
||||
if message.actor_id.startswith('user'):
|
||||
message_dict['from_user'] = int(message.actor_id[4:])
|
||||
|
||||
if message.action == 'edit_group_title':
|
||||
message_dict['type'] = 'new_chat_title'
|
||||
|
@ -118,12 +125,12 @@ def convert_messages(df: pd.DataFrame) -> typing.Tuple[typing.List[dict], typing
|
|||
users_out.append({'message_id': message.id,
|
||||
'user_id': i,
|
||||
'date': message.date,
|
||||
'event': 'join'})
|
||||
'event': 'joined'})
|
||||
except TypeError:
|
||||
user_event_dict = {'message_id': message.id,
|
||||
'user_id': message.actor_id,
|
||||
'date': message.date,
|
||||
'event': 'join'}
|
||||
'event': 'joined'}
|
||||
elif message.action == 'remove_members':
|
||||
message_dict['type'] = 'left_chat_member'
|
||||
for i in message.members:
|
||||
|
@ -136,7 +143,15 @@ def convert_messages(df: pd.DataFrame) -> typing.Tuple[typing.List[dict], typing
|
|||
messages_out.append(message_dict)
|
||||
if user_event_dict != {}:
|
||||
users_out.append(user_event_dict)
|
||||
return messages_out, users_out
|
||||
|
||||
user_map = {int(i[4:]): df.loc[df['from_id'] == i, 'from'].iloc[0]
|
||||
for i in df['from_id'].unique()
|
||||
if (df['from_id'] == i).any() and i.startswith('user')}
|
||||
|
||||
# Use long name for both name and long name since we can't fetch usernames
|
||||
user_map = {k: (v, v) for k, v in user_map.items() if v}
|
||||
|
||||
return messages_out, users_out, user_map
|
||||
|
||||
|
||||
def parse_json(path: str):
|
||||
|
@ -144,3 +159,78 @@ def parse_json(path: str):
|
|||
js = json.load(f)
|
||||
chat = js['chats']['list'][1]['messages']
|
||||
df = pd.DataFrame(chat)
|
||||
|
||||
|
||||
def fix_dtypes_m(df: pd.DataFrame, tz: str) -> pd.DataFrame:
|
||||
intcols = ['forward_from_message_id', 'forward_from', 'forward_from_chat',
|
||||
'from_user', 'reply_to_message']
|
||||
df_out = df.copy()
|
||||
df_out.loc[:, intcols] = df_out.loc[:, intcols].astype('Int64')
|
||||
df_out.loc[:, 'date'] = pd.to_datetime(df_out['date'], utc=False).dt.tz_localize(tz=tz,
|
||||
ambiguous=True)
|
||||
df_out.loc[:, 'type'] = df_out.loc[:, 'type'].astype(message_type_cat)
|
||||
return df_out.convert_dtypes()
|
||||
|
||||
|
||||
def fix_dtypes_u(df: pd.DataFrame, tz: str) -> pd.DataFrame:
|
||||
df_out = df.copy()
|
||||
df_out.loc[:, 'date'] = pd.to_datetime(df_out['date'], utc=False).dt.tz_localize(tz=tz,
|
||||
ambiguous=True)
|
||||
df_out.loc[df_out.event == 'join', 'event'] = 'joined'
|
||||
df_out['event'] = df_out.event.astype(user_event_cat)
|
||||
|
||||
return df_out.convert_dtypes()
|
||||
|
||||
|
||||
def update_user_list(users: dict[int, tuple[str, str]], engine: sqlalchemy.engine.Engine, tz: str):
|
||||
stats_runner = StatsRunner(engine, tz)
|
||||
stats_runner.update_user_ids(users)
|
||||
|
||||
def main(json_path: str, db_url: str, tz: str = 'Etc/UTC'):
|
||||
"""
|
||||
Parse backup json file and update database with contents.
|
||||
:param json_path:
|
||||
:param db_url:
|
||||
:param tz:
|
||||
:return:
|
||||
"""
|
||||
with open(json_path, encoding='utf-8') as f:
|
||||
js = json.load(f)
|
||||
|
||||
chat = js['messages']
|
||||
messages, users, user_map = convert_messages(pd.DataFrame(chat))
|
||||
|
||||
df_m = pd.DataFrame(messages).set_index('message_id')
|
||||
df_m = fix_dtypes_m(df_m, tz)
|
||||
df_u = pd.DataFrame(users).set_index('message_id')
|
||||
df_u = fix_dtypes_u(df_u, tz)
|
||||
|
||||
engine = create_engine(db_url, echo=False)
|
||||
|
||||
# Exclude existing messages
|
||||
m_ids = pd.read_sql_table('messages_utc', engine).message_id
|
||||
df_m = df_m.loc[~df_m.index.isin(m_ids)]
|
||||
|
||||
# Map usernames back to numeric ids
|
||||
inverse_user_map = pd.DataFrame(user_map).T.reset_index()
|
||||
df_u = df_u.reset_index().merge(inverse_user_map, how='inner', left_on='user_id', right_on=0) \
|
||||
.loc[:, ['index', 'message_id', 'date', 'event']] \
|
||||
.rename(columns={'index': 'user_id'}) \
|
||||
.set_index('message_id', drop=True)
|
||||
|
||||
# Merge existing user events
|
||||
m_ids = pd.read_sql_table('user_events', engine).set_index('message_id')
|
||||
df_u['user_id'] = df_u['user_id'].astype('Int64')
|
||||
merged = df_u.merge(m_ids, how='outer', left_index=True, right_index=True, suffixes=('', 'y'))
|
||||
merged['user_idy'] = pd.to_numeric(merged['user_idy'], errors='coerce').astype('Int64') # Keep existing valid IDs
|
||||
merged['user_id'] = merged['user_id'].fillna(merged['user_idy'])
|
||||
df_u = merged.loc[:, ['user_id', 'date', 'event']].dropna(how='any')
|
||||
|
||||
df_u.to_sql('user_events', engine, if_exists='replace') # Contains possible updates to existing values
|
||||
df_m.to_sql('messages_utc', engine, if_exists='append')
|
||||
|
||||
update_user_list(user_map, engine, tz)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
typer.run(main)
|
||||
|
|
|
@ -26,8 +26,7 @@ import json
|
|||
import os
|
||||
|
||||
from sqlalchemy import MetaData, Table, Column, create_engine, BigInteger, TIMESTAMP, Text
|
||||
from sqlalchemy_utils import database_exists
|
||||
from sqlalchemy_utils.functions.orm import quote
|
||||
from sqlalchemy_utils import database_exists, create_database
|
||||
|
||||
from .parse import MessageDict
|
||||
from .db import init_dbs
|
||||
|
@ -54,6 +53,11 @@ user_events = Table('user_events', metadata,
|
|||
Column('user_id', BigInteger),
|
||||
Column('date', TIMESTAMP),
|
||||
Column('event', Text))
|
||||
user_names = Table('user_names', metadata,
|
||||
Column('user_id', BigInteger),
|
||||
Column('date', TIMESTAMP),
|
||||
Column('username', Text),
|
||||
Column('display_name', Text))
|
||||
|
||||
|
||||
def date_converter(o):
|
||||
|
@ -74,23 +78,10 @@ class PostgresStore(object):
|
|||
def __init__(self, connection_url: str):
|
||||
self.engine = create_engine(connection_url, echo=False, isolation_level="AUTOCOMMIT")
|
||||
if not database_exists(self.engine.url):
|
||||
text = f"""
|
||||
CREATE DATABASE {quote(self.engine, self.engine.url.database)}
|
||||
ENCODING 'utf-8'
|
||||
TEMPLATE {quote(self.engine, 'template1')}
|
||||
"""
|
||||
create_database(connection_url, template='template1')
|
||||
|
||||
url = copy(self.engine.url)
|
||||
url.database = 'postgres'
|
||||
|
||||
engine = create_engine(url, echo=False, isolation_level="AUTOCOMMIT")
|
||||
result_proxy = engine.execute(text)
|
||||
|
||||
if result_proxy is not None:
|
||||
result_proxy.close()
|
||||
engine.dispose()
|
||||
|
||||
init_dbs(self.engine)
|
||||
with self.engine.connect() as con:
|
||||
init_dbs(con)
|
||||
|
||||
def append_data(self, name: str, data: MessageDict):
|
||||
data['date'] = str(data['date'])
|
||||
|
@ -107,12 +98,12 @@ class PostgresStore(object):
|
|||
data['date'] = str(data['date'])
|
||||
if name == 'messages':
|
||||
update_statement = messages.update()\
|
||||
.where(messages.c.message_id == data['message_id'])
|
||||
.where(messages.c['message_id'] == data['message_id'])
|
||||
with self.engine.connect() as con:
|
||||
_ = con.execute(update_statement, data)
|
||||
elif name == 'user_events':
|
||||
update_statement = user_events.update()\
|
||||
.where(user_events.c.message_id == data['message_id'])
|
||||
.where(user_events.c['message_id'] == data['message_id'])
|
||||
with self.engine.connect() as con:
|
||||
_ = con.execute(update_statement, data)
|
||||
else:
|
||||
|
|
|
@ -28,19 +28,21 @@ import os
|
|||
|
||||
import telegram
|
||||
from telegram.error import BadRequest
|
||||
from telegram.ext import Updater, CommandHandler, MessageHandler, Filters, CallbackContext, JobQueue
|
||||
from telegram.update import Update
|
||||
from telegram.ext import Updater, CommandHandler, MessageHandler, CallbackContext, JobQueue, ContextTypes, Application, \
|
||||
filters
|
||||
from telegram import Update
|
||||
import appdirs
|
||||
|
||||
from .parse import parse_message
|
||||
from .log_storage import JSONStore, PostgresStore
|
||||
from .stats import StatsRunner, get_parser, HelpException
|
||||
|
||||
|
||||
warnings.filterwarnings("ignore")
|
||||
|
||||
logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
level=logging.INFO)
|
||||
logging.getLogger('httpx').setLevel(logging.WARNING) # Mute normal http requests
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
stats = None
|
||||
|
@ -54,10 +56,11 @@ sticker_idx = None
|
|||
sticker_id = None
|
||||
|
||||
|
||||
def log_message(update: Update, context: CallbackContext):
|
||||
async def log_message(update: Update, context: ContextTypes.DEFAULT_TYPE):
|
||||
if update.edited_message:
|
||||
edited_message, user = parse_message(update.effective_message)
|
||||
bak_store.append_data('edited-messages', edited_message)
|
||||
if bak_store:
|
||||
bak_store.append_data('edited-messages', edited_message)
|
||||
store.update_data('messages', edited_message)
|
||||
return
|
||||
|
||||
|
@ -68,37 +71,36 @@ def log_message(update: Update, context: CallbackContext):
|
|||
message, user = parse_message(update.effective_message)
|
||||
|
||||
if message:
|
||||
bak_store.append_data('messages', message)
|
||||
if bak_store:
|
||||
bak_store.append_data('messages', message)
|
||||
store.append_data('messages', message)
|
||||
if len(user) > 0:
|
||||
for i in user:
|
||||
if i:
|
||||
bak_store.append_data('user_events', i)
|
||||
if bak_store:
|
||||
bak_store.append_data('user_events', i)
|
||||
store.append_data('user_events', i)
|
||||
|
||||
|
||||
def get_chatid(update: Update, context: CallbackContext):
|
||||
context.bot.send_message(chat_id=update.effective_chat.id,
|
||||
text=f"Chat id: {update.effective_chat.id}")
|
||||
async def get_chatid(update: Update, context: ContextTypes.DEFAULT_TYPE):
|
||||
await update.message.reply_text(text=f"Chat id: {update.effective_chat.id}")
|
||||
|
||||
|
||||
def test_can_read_all_group_messages(context: CallbackContext):
|
||||
async def test_can_read_all_group_messages(context: CallbackContext):
|
||||
if not context.bot.can_read_all_group_messages:
|
||||
logger.error("Bot privacy is set to enabled, cannot log messages!!!")
|
||||
|
||||
|
||||
def update_usernames_wrapper(context: CallbackContext):
|
||||
context.dispatcher.run_async(update_usernames, context)
|
||||
|
||||
|
||||
def update_usernames(context: CallbackContext): # context.job.context contains the chat_id
|
||||
async def update_usernames(context: ContextTypes.DEFAULT_TYPE): # context.job.context contains the chat_id
|
||||
user_ids = stats.get_message_user_ids()
|
||||
db_users = stats.get_db_users()
|
||||
tg_users = {user_id: None for user_id in user_ids}
|
||||
to_update = {}
|
||||
for u_id in tg_users:
|
||||
try:
|
||||
user = context.bot.get_chat_member(chat_id=context.job.context, user_id=u_id).user
|
||||
chat_member: telegram.ChatMember = await context.bot.get_chat_member(chat_id=context.job.chat_id,
|
||||
user_id=u_id)
|
||||
user = chat_member.user
|
||||
tg_users[u_id] = user.name, user.full_name
|
||||
if tg_users[u_id] != db_users[u_id]:
|
||||
if tg_users[u_id][1] == db_users[u_id][1]: # Flag these so we don't insert new row
|
||||
|
@ -119,7 +121,7 @@ def update_usernames(context: CallbackContext): # context.job.context contains
|
|||
logger.info("Usernames updated")
|
||||
|
||||
|
||||
def print_stats(update: Update, context: CallbackContext):
|
||||
async def print_stats(update: Update, context: CallbackContext):
|
||||
if update.effective_user.id not in stats.users:
|
||||
return
|
||||
|
||||
|
@ -130,11 +132,11 @@ def print_stats(update: Update, context: CallbackContext):
|
|||
ns = stats_parser.parse_args(shlex.split(" ".join(context.args)))
|
||||
except HelpException as e:
|
||||
text = e.msg
|
||||
send_help(text, context, update)
|
||||
await send_help(text, context, update)
|
||||
return
|
||||
except argparse.ArgumentError as e:
|
||||
text = str(e)
|
||||
send_help(text, context, update)
|
||||
await send_help(text, context, update)
|
||||
return
|
||||
else:
|
||||
args = vars(ns)
|
||||
|
@ -146,7 +148,7 @@ def print_stats(update: Update, context: CallbackContext):
|
|||
uid = args['user']
|
||||
args['user'] = uid, stats.users[uid][0]
|
||||
except KeyError:
|
||||
send_help("unknown userid", context, update)
|
||||
await send_help("unknown userid", context, update)
|
||||
return
|
||||
except KeyError:
|
||||
pass
|
||||
|
@ -162,18 +164,19 @@ def print_stats(update: Update, context: CallbackContext):
|
|||
text, image = func(**args)
|
||||
except HelpException as e:
|
||||
text = e.msg
|
||||
send_help(text, context, update)
|
||||
await send_help(text, context, update)
|
||||
return
|
||||
|
||||
if text:
|
||||
context.bot.send_message(chat_id=update.effective_chat.id,
|
||||
text=text,
|
||||
parse_mode=telegram.ParseMode.MARKDOWN_V2)
|
||||
await update.effective_message.reply_text(text=text,
|
||||
parse_mode=telegram.constants.ParseMode.MARKDOWN_V2)
|
||||
|
||||
if image:
|
||||
context.bot.send_photo(chat_id=update.effective_chat.id, photo=image)
|
||||
await update.effective_message.reply_photo(caption='`' + " ".join(context.args) + '`', photo=image,
|
||||
parse_mode=telegram.constants.ParseMode.MARKDOWN_V2)
|
||||
|
||||
|
||||
def send_help(text: str, context: CallbackContext, update: Update):
|
||||
async def send_help(text: str, context: CallbackContext, update: Update):
|
||||
"""
|
||||
Send help text to user. Tries to send a direct message if possible.
|
||||
:param text: text to send
|
||||
|
@ -182,13 +185,12 @@ def send_help(text: str, context: CallbackContext, update: Update):
|
|||
:return:
|
||||
"""
|
||||
try:
|
||||
context.bot.send_message(chat_id=update.effective_user.id,
|
||||
text=f"```\n{text}\n```",
|
||||
parse_mode=telegram.ParseMode.MARKDOWN_V2)
|
||||
except telegram.error.Unauthorized: # If user has never chatted with bot
|
||||
context.bot.send_message(chat_id=update.effective_chat.id,
|
||||
text=f"```\n{text}\n```",
|
||||
parse_mode=telegram.ParseMode.MARKDOWN_V2)
|
||||
await context.bot.send_message(chat_id=update.effective_user.id,
|
||||
text=f"```\n{text}\n```",
|
||||
parse_mode=telegram.constants.ParseMode.MARKDOWN_V2)
|
||||
except telegram.error.Forbidden: # If user has never chatted with bot
|
||||
await update.message.reply_text(text=f"```\n{text}\n```",
|
||||
parse_mode=telegram.constants.ParseMode.MARKDOWN_V2)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@ -198,37 +200,43 @@ if __name__ == '__main__':
|
|||
parser.add_argument('postgres_url', type=str, help="Sqlalchemy-compatible postgresql url.")
|
||||
parser.add_argument('--json-path', type=str,
|
||||
help="Either full path to backup storage folder or prefix (will be stored in app data dir.",
|
||||
default="chat")
|
||||
default="")
|
||||
parser.add_argument('--tz', type=str,
|
||||
help="tz database time zone string, e.g. Europe/London",
|
||||
default='Etc/UTC')
|
||||
args = parser.parse_args()
|
||||
|
||||
updater = Updater(token=args.token, use_context=True)
|
||||
dispatcher = updater.dispatcher
|
||||
application = Application.builder().token(args.token).build()
|
||||
|
||||
path = args.json_path
|
||||
if not os.path.split(path)[0]: # Empty string for left part of path
|
||||
path = os.path.join(appdirs.user_data_dir('telegram-stats-bot'), path)
|
||||
if args.json_path:
|
||||
path = args.json_path
|
||||
if not os.path.split(path)[0]: # Empty string for left part of path
|
||||
path = os.path.join(appdirs.user_data_dir('telegram-stats-bot'), path)
|
||||
|
||||
os.makedirs(path, exist_ok=True)
|
||||
bak_store = JSONStore(path)
|
||||
else:
|
||||
bak_store = None
|
||||
|
||||
# Use psycopg 3
|
||||
if args.postgres_url.startswith('postgresql://'):
|
||||
args.postgres_url = args.postgres_url.replace('postgresql://', 'postgresql+psycopg://', 1)
|
||||
|
||||
os.makedirs(path, exist_ok=True)
|
||||
bak_store = JSONStore(path)
|
||||
store = PostgresStore(args.postgres_url)
|
||||
stats = StatsRunner(store.engine, tz=args.tz)
|
||||
|
||||
stats_handler = CommandHandler('stats', print_stats, filters=~Filters.update.edited_message, run_async=True)
|
||||
dispatcher.add_handler(stats_handler)
|
||||
stats_handler = CommandHandler('stats', print_stats)
|
||||
application.add_handler(stats_handler)
|
||||
|
||||
chat_id_handler = CommandHandler('chatid', get_chatid, filters=~Filters.update.edited_message)
|
||||
dispatcher.add_handler(chat_id_handler)
|
||||
chat_id_handler = CommandHandler('chatid', get_chatid, filters=~filters.UpdateType.EDITED)
|
||||
application.add_handler(chat_id_handler)
|
||||
|
||||
if args.chat_id != 0:
|
||||
log_handler = MessageHandler(Filters.chat(chat_id=args.chat_id), log_message)
|
||||
dispatcher.add_handler(log_handler)
|
||||
log_handler = MessageHandler(filters.Chat(chat_id=args.chat_id), log_message)
|
||||
application.add_handler(log_handler)
|
||||
|
||||
job_queue: JobQueue = updater.job_queue
|
||||
update_users_job = job_queue.run_repeating(update_usernames_wrapper, interval=3600, first=5, context=args.chat_id)
|
||||
job_queue = application.job_queue
|
||||
update_users_job = job_queue.run_repeating(update_usernames, interval=3600, first=5, chat_id=args.chat_id)
|
||||
test_privacy_job = job_queue.run_once(test_can_read_all_group_messages, 0)
|
||||
|
||||
updater.start_polling()
|
||||
updater.idle()
|
||||
application.run_polling()
|
||||
|
|
|
@ -48,7 +48,7 @@ class MessageDict(TypedDict):
|
|||
type: str
|
||||
|
||||
|
||||
def parse_message(message: telegram.message.Message) -> Tuple[dict, List[dict]]:
|
||||
def parse_message(message: telegram.Message) -> Tuple[dict, List[dict]]:
|
||||
message_dict: MessageDict = {'message_id': message.message_id,
|
||||
'date': message.date,
|
||||
'from_user': None,
|
||||
|
@ -99,7 +99,7 @@ def parse_message(message: telegram.message.Message) -> Tuple[dict, List[dict]]:
|
|||
elif message.sticker:
|
||||
message_dict['type'] = 'sticker'
|
||||
message_dict['file_id'] = message.sticker.file_id
|
||||
message_dict['sticker_set_name']: message.sticker.set_name
|
||||
message_dict['sticker_set_name'] = message.sticker.set_name
|
||||
elif message.video:
|
||||
message_dict['type'] = 'video'
|
||||
elif message.video_note:
|
||||
|
|
|
@ -34,7 +34,7 @@ import numpy as np
|
|||
from matplotlib.figure import Figure
|
||||
from matplotlib.dates import date2num
|
||||
from sqlalchemy.engine import Engine
|
||||
from sqlalchemy import select, func
|
||||
from sqlalchemy import select, func, text
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
from .utils import escape_markdown, TsStat, random_quote
|
||||
|
@ -45,9 +45,18 @@ sns.set_context('paper')
|
|||
sns.set_style('whitegrid')
|
||||
sns.set_palette("Set2")
|
||||
|
||||
logging.getLogger('matplotlib').setLevel(logging.WARNING) # Mute matplotlib debug messages
|
||||
logger = logging.getLogger()
|
||||
|
||||
|
||||
def output_fig(fig: Figure) -> BytesIO:
|
||||
bio = BytesIO()
|
||||
bio.name = 'plot.png'
|
||||
fig.savefig(bio, bbox_inches='tight', dpi=200, format='png')
|
||||
bio.seek(0)
|
||||
return bio
|
||||
|
||||
|
||||
class HelpException(Exception):
|
||||
def __init__(self, msg: str = None):
|
||||
self.msg = msg
|
||||
|
@ -72,6 +81,7 @@ class InternalParser(argparse.ArgumentParser):
|
|||
|
||||
class StatsRunner(object):
|
||||
allowed_methods = {'counts': "get_chat_counts",
|
||||
'count-dist': 'get_chat_ecdf',
|
||||
'hours': "get_counts_by_hour",
|
||||
'days': "get_counts_by_day",
|
||||
'week': "get_week_by_hourday",
|
||||
|
@ -94,8 +104,8 @@ class StatsRunner(object):
|
|||
def get_message_user_ids(self) -> List[int]:
|
||||
"""Returns list of unique user ids from messages in database."""
|
||||
with self.engine.connect() as con:
|
||||
result = con.execute("SELECT DISTINCT from_user FROM messages_utc;")
|
||||
return [user for user, in result.fetchall()]
|
||||
result = con.execute(text("SELECT DISTINCT from_user FROM messages_utc;"))
|
||||
return [user for user, in result.fetchall() if user is not None]
|
||||
|
||||
def get_db_users(self) -> Dict[int, Tuple[str, str]]:
|
||||
"""Returns dictionary mapping user ids to usernames and full names."""
|
||||
|
@ -111,7 +121,7 @@ class StatsRunner(object):
|
|||
"""
|
||||
|
||||
with self.engine.connect() as con:
|
||||
result = con.execute(query)
|
||||
result = con.execute(text(query))
|
||||
result = result.fetchall()
|
||||
|
||||
return {user_id: (username, name) for user_id, username, name in result}
|
||||
|
@ -124,19 +134,19 @@ class StatsRunner(object):
|
|||
for uid in user_dict:
|
||||
username, display_name = user_dict[uid]
|
||||
sql_dict = {'uid': uid, 'username': username, 'display_name': display_name}
|
||||
query = """
|
||||
UPDATE user_names
|
||||
SET username = %(username)s
|
||||
WHERE user_id = %(uid)s AND username IS DISTINCT FROM %(username)s;
|
||||
update_query = """
|
||||
UPDATE user_names
|
||||
SET username = :username
|
||||
WHERE user_id = :uid AND username IS DISTINCT FROM :username;
|
||||
"""
|
||||
insert_query = """
|
||||
INSERT INTO user_names(user_id, date, username, display_name)
|
||||
VALUES (:uid, current_timestamp, :username, :display_name);
|
||||
"""
|
||||
if display_name:
|
||||
query += """\n
|
||||
INSERT INTO user_names(user_id, date, username, display_name)
|
||||
VALUES (%(uid)s, current_timestamp, %(username)s, %(display_name)s);
|
||||
"""
|
||||
|
||||
with self.engine.connect() as con:
|
||||
con.execute(query, sql_dict)
|
||||
con.execute(text(update_query), sql_dict)
|
||||
if display_name:
|
||||
con.execute(text(insert_query), sql_dict)
|
||||
|
||||
def get_chat_counts(self, n: int = 20, lquery: str = None, mtype: str = None, start: str = None, end: str = None) \
|
||||
-> Tuple[Union[str, None], Union[None, BytesIO]]:
|
||||
|
@ -165,25 +175,25 @@ class StatsRunner(object):
|
|||
|
||||
if start:
|
||||
sql_dict['start_dt'] = pd.to_datetime(start)
|
||||
query_conditions.append("date >= %(start_dt)s")
|
||||
query_conditions.append("date >= :start_dt")
|
||||
|
||||
if end:
|
||||
sql_dict['end_dt'] = pd.to_datetime(end)
|
||||
query_conditions.append("date < %(end_dt)s")
|
||||
query_conditions.append("date < :end_dt")
|
||||
|
||||
query_where = ""
|
||||
if query_conditions:
|
||||
query_where = f"WHERE {' AND '.join(query_conditions)}"
|
||||
|
||||
query = f"""
|
||||
SELECT "from_user", COUNT(*) as "count"
|
||||
FROM "messages_utc"
|
||||
{query_where}
|
||||
GROUP BY "from_user"
|
||||
ORDER BY "count" DESC;
|
||||
SELECT "from_user", COUNT(*) as "count"
|
||||
FROM "messages_utc"
|
||||
{query_where}
|
||||
GROUP BY "from_user"
|
||||
ORDER BY "count" DESC;
|
||||
"""
|
||||
with self.engine.connect() as con:
|
||||
df = pd.read_sql_query(query, con, params=sql_dict, index_col='from_user')
|
||||
df = pd.read_sql_query(text(query), con, params=sql_dict, index_col='from_user')
|
||||
|
||||
if len(df) == 0:
|
||||
return "No matching messages", None
|
||||
|
@ -201,9 +211,75 @@ class StatsRunner(object):
|
|||
df.columns = ['User', 'Total Messages', 'Percent']
|
||||
df['User'] = df['User'].str.replace(r'[^\x00-\x7F]|[@]', "", regex=True) # Drop emoji and @
|
||||
|
||||
text = df.iloc[:n].to_string(index=False, header=True, float_format=lambda x: f"{x:.1f}")
|
||||
out_text = df.iloc[:n].to_string(index=False, header=True, float_format=lambda x: f"{x:.1f}")
|
||||
|
||||
return f"```\n{text}\n```", None
|
||||
return f"```\n{out_text}\n```", None
|
||||
|
||||
def get_chat_ecdf(self, lquery: str = None, mtype: str = None, start: str = None, end: str = None,
|
||||
log: bool = False) -> Tuple[Union[str, None], Union[None, BytesIO]]:
|
||||
"""
|
||||
Get message counts by number of users as an ECDF plot.
|
||||
:param lquery: Limit results to lexical query (&, |, !, <n>)
|
||||
:param mtype: Limit results to message type (text, sticker, photo, etc.)
|
||||
:param start: Start timestamp (e.g. 2019, 2019-01, 2019-01-01, "2019-01-01 14:21")
|
||||
:param end: End timestamp (e.g. 2019, 2019-01, 2019-01-01, "2019-01-01 14:21")
|
||||
:param log: Plot with log scale.
|
||||
"""
|
||||
sql_dict = {}
|
||||
query_conditions = []
|
||||
|
||||
if lquery:
|
||||
query_conditions.append(f"text_index_col @@ to_tsquery( {random_quote(lquery)} )")
|
||||
|
||||
if mtype:
|
||||
if mtype not in ('text', 'sticker', 'photo', 'animation', 'video', 'voice', 'location', 'video_note',
|
||||
'audio', 'document', 'poll'):
|
||||
raise HelpException(f'mtype {mtype} is invalid.')
|
||||
query_conditions.append(f"""type = '{mtype}'""")
|
||||
|
||||
if start:
|
||||
sql_dict['start_dt'] = pd.to_datetime(start)
|
||||
query_conditions.append("date >= :start_dt")
|
||||
|
||||
if end:
|
||||
sql_dict['end_dt'] = pd.to_datetime(end)
|
||||
query_conditions.append("date < :end_dt")
|
||||
|
||||
query_where = ""
|
||||
if query_conditions:
|
||||
query_where = f"WHERE {' AND '.join(query_conditions)}"
|
||||
|
||||
query = f"""
|
||||
SELECT "from_user", COUNT(*) as "count"
|
||||
FROM "messages_utc"
|
||||
{query_where}
|
||||
GROUP BY "from_user"
|
||||
ORDER BY "count" DESC;
|
||||
"""
|
||||
|
||||
with self.engine.connect() as con:
|
||||
df = pd.read_sql_query(text(query), con, params=sql_dict)
|
||||
|
||||
if len(df) == 0:
|
||||
return "No matching messages", None
|
||||
|
||||
fig = Figure(constrained_layout=True)
|
||||
subplot = fig.subplots()
|
||||
|
||||
sns.ecdfplot(df, y='count', stat='count', log_scale=log, ax=subplot)
|
||||
subplot.set_xlabel('User')
|
||||
subplot.set_ylabel('Messages')
|
||||
|
||||
if lquery:
|
||||
subplot.set_title(f"Messages by User for {lquery}")
|
||||
else:
|
||||
subplot.set_title("Messages by User")
|
||||
|
||||
sns.despine(fig=fig)
|
||||
|
||||
bio = output_fig(fig)
|
||||
|
||||
return None, bio
|
||||
|
||||
def get_counts_by_hour(self, user: Tuple[int, str] = None, lquery: str = None, start: str = None, end: str = None) \
|
||||
-> Tuple[Union[str, None], Union[None, BytesIO]]:
|
||||
|
@ -221,15 +297,15 @@ class StatsRunner(object):
|
|||
|
||||
if start:
|
||||
sql_dict['start_dt'] = pd.to_datetime(start)
|
||||
query_conditions.append("date >= %(start_dt)s")
|
||||
query_conditions.append("date >= :start_dt")
|
||||
|
||||
if end:
|
||||
sql_dict['end_dt'] = pd.to_datetime(end)
|
||||
query_conditions.append("date < %(end_dt)s")
|
||||
query_conditions.append("date < :end_dt")
|
||||
|
||||
if user:
|
||||
sql_dict['user'] = user[0]
|
||||
query_conditions.append("from_user = %(user)s")
|
||||
query_conditions.append("from_user = :user")
|
||||
|
||||
query_where = ""
|
||||
if query_conditions:
|
||||
|
@ -244,7 +320,7 @@ class StatsRunner(object):
|
|||
"""
|
||||
|
||||
with self.engine.connect() as con:
|
||||
df = pd.read_sql_query(query, con, params=sql_dict)
|
||||
df = pd.read_sql_query(text(query), con, params=sql_dict)
|
||||
|
||||
if len(df) == 0:
|
||||
return "No matching messages", None
|
||||
|
@ -267,10 +343,12 @@ class StatsRunner(object):
|
|||
fig = Figure(constrained_layout=True)
|
||||
subplot = fig.subplots()
|
||||
|
||||
sns.stripplot(x='hour', y='messages', data=df, jitter=.4, size=2, ax=subplot, alpha=.5, zorder=0)
|
||||
sns.boxplot(x='hour', y='messages', data=df, whis=1, showfliers=False, whiskerprops={"zorder": 10},
|
||||
boxprops={"zorder": 10},
|
||||
ax=subplot, zorder=10)
|
||||
plot_common_kwargs = dict(x='hour', y='messages', hue='hour', data=df, ax=subplot, legend=False,
|
||||
palette='flare')
|
||||
sns.stripplot(jitter=.4, size=2, alpha=.5, zorder=1, **plot_common_kwargs)
|
||||
sns.boxplot(whis=1, showfliers=False, whiskerprops={"zorder": 10}, boxprops={"zorder": 10}, zorder=10,
|
||||
**plot_common_kwargs)
|
||||
|
||||
subplot.set_ylim(bottom=0, top=df['messages'].quantile(0.999, interpolation='higher'))
|
||||
|
||||
subplot.axvspan(11.5, 23.5, zorder=0, color=(0, 0, 0, 0.05))
|
||||
|
@ -288,10 +366,7 @@ class StatsRunner(object):
|
|||
|
||||
sns.despine(fig=fig)
|
||||
|
||||
bio = BytesIO()
|
||||
bio.name = 'plot.png'
|
||||
fig.savefig(bio, bbox_inches='tight')
|
||||
bio.seek(0)
|
||||
bio = output_fig(fig)
|
||||
|
||||
return None, bio
|
||||
|
||||
|
@ -312,31 +387,31 @@ class StatsRunner(object):
|
|||
|
||||
if start:
|
||||
sql_dict['start_dt'] = pd.to_datetime(start)
|
||||
query_conditions.append("date >= %(start_dt)s")
|
||||
query_conditions.append("date >= :start_dt")
|
||||
|
||||
if end:
|
||||
sql_dict['end_dt'] = pd.to_datetime(end)
|
||||
query_conditions.append("date < %(end_dt)s")
|
||||
query_conditions.append("date < :end_dt")
|
||||
|
||||
if user:
|
||||
sql_dict['user'] = user[0]
|
||||
query_conditions.append("from_user = %(user)s")
|
||||
query_conditions.append("from_user = :user")
|
||||
|
||||
query_where = ""
|
||||
if query_conditions:
|
||||
query_where = f"WHERE {' AND '.join(query_conditions)}"
|
||||
|
||||
query = f"""
|
||||
SELECT date_trunc('day', date)
|
||||
as day, count(*) as messages
|
||||
FROM messages_utc
|
||||
{query_where}
|
||||
GROUP BY day
|
||||
ORDER BY day
|
||||
SELECT date_trunc('day', date)
|
||||
as day, count(*) as messages
|
||||
FROM messages_utc
|
||||
{query_where}
|
||||
GROUP BY day
|
||||
ORDER BY day
|
||||
"""
|
||||
|
||||
with self.engine.connect() as con:
|
||||
df = pd.read_sql_query(query, con, params=sql_dict)
|
||||
df = pd.read_sql_query(text(query), con, params=sql_dict)
|
||||
|
||||
if len(df) == 0:
|
||||
return "No matching messages", None
|
||||
|
@ -373,10 +448,7 @@ class StatsRunner(object):
|
|||
|
||||
sns.despine(fig=fig)
|
||||
|
||||
bio = BytesIO()
|
||||
bio.name = 'plot.png'
|
||||
fig.savefig(bio, bbox_inches='tight')
|
||||
bio.seek(0)
|
||||
bio = output_fig(fig)
|
||||
|
||||
return None, bio
|
||||
|
||||
|
@ -396,30 +468,30 @@ class StatsRunner(object):
|
|||
|
||||
if start:
|
||||
sql_dict['start_dt'] = pd.to_datetime(start)
|
||||
query_conditions.append("date >= %(start_dt)s")
|
||||
query_conditions.append("date >= :start_dt")
|
||||
|
||||
if end:
|
||||
sql_dict['end_dt'] = pd.to_datetime(end)
|
||||
query_conditions.append("date < %(end_dt)s")
|
||||
query_conditions.append("date < :end_dt")
|
||||
|
||||
if user:
|
||||
sql_dict['user'] = user[0]
|
||||
query_conditions.append("from_user = %(user)s")
|
||||
query_conditions.append("from_user = :user")
|
||||
|
||||
query_where = ""
|
||||
if query_conditions:
|
||||
query_where = f"WHERE {' AND '.join(query_conditions)}"
|
||||
|
||||
query = f"""
|
||||
SELECT date_trunc('hour', date)
|
||||
as msg_time, count(*) as messages
|
||||
FROM messages_utc
|
||||
{query_where}
|
||||
GROUP BY msg_time
|
||||
ORDER BY msg_time
|
||||
SELECT date_trunc('hour', date)
|
||||
as msg_time, count(*) as messages
|
||||
FROM messages_utc
|
||||
{query_where}
|
||||
GROUP BY msg_time
|
||||
ORDER BY msg_time
|
||||
"""
|
||||
with self.engine.connect() as con:
|
||||
df = pd.read_sql_query(query, con, params=sql_dict)
|
||||
df = pd.read_sql_query(text(query), con, params=sql_dict)
|
||||
|
||||
if len(df) == 0:
|
||||
return "No matching messages", None
|
||||
|
@ -452,10 +524,7 @@ class StatsRunner(object):
|
|||
else:
|
||||
ax.set_title("Total messages by day and hour")
|
||||
|
||||
bio = BytesIO()
|
||||
bio.name = 'plot.png'
|
||||
fig.savefig(bio, bbox_inches='tight')
|
||||
bio.seek(0)
|
||||
bio = output_fig(fig)
|
||||
|
||||
return None, bio
|
||||
|
||||
|
@ -482,15 +551,15 @@ class StatsRunner(object):
|
|||
|
||||
if start:
|
||||
sql_dict['start_dt'] = pd.to_datetime(start)
|
||||
query_conditions.append("date >= %(start_dt)s")
|
||||
query_conditions.append("date >= :start_dt")
|
||||
|
||||
if end:
|
||||
sql_dict['end_dt'] = pd.to_datetime(end)
|
||||
query_conditions.append("date < %(end_dt)s")
|
||||
query_conditions.append("date < :end_dt")
|
||||
|
||||
if user:
|
||||
sql_dict['user'] = user[0]
|
||||
query_conditions.append("from_user = %(user)s")
|
||||
query_conditions.append("from_user = :user")
|
||||
|
||||
query_where = ""
|
||||
if query_conditions:
|
||||
|
@ -506,7 +575,7 @@ class StatsRunner(object):
|
|||
"""
|
||||
|
||||
with self.engine.connect() as con:
|
||||
df = pd.read_sql_query(query, con, params=sql_dict)
|
||||
df = pd.read_sql_query(text(query), con, params=sql_dict)
|
||||
|
||||
if len(df) == 0:
|
||||
return "No matching messages", None
|
||||
|
@ -542,10 +611,7 @@ class StatsRunner(object):
|
|||
sns.despine(fig=fig)
|
||||
fig.tight_layout()
|
||||
|
||||
bio = BytesIO()
|
||||
bio.name = 'plot.png'
|
||||
fig.savefig(bio)
|
||||
bio.seek(0)
|
||||
bio = output_fig(fig)
|
||||
|
||||
return None, bio
|
||||
|
||||
|
@ -562,11 +628,11 @@ class StatsRunner(object):
|
|||
|
||||
if start:
|
||||
sql_dict['start_dt'] = pd.to_datetime(start)
|
||||
query_conditions.append("date >= %(start_dt)s")
|
||||
query_conditions.append("date >= :start_dt")
|
||||
|
||||
if end:
|
||||
sql_dict['end_dt'] = pd.to_datetime(end)
|
||||
query_conditions.append("date < %(end_dt)s")
|
||||
query_conditions.append("date < :end_dt")
|
||||
|
||||
query_where = ""
|
||||
if query_conditions:
|
||||
|
@ -580,7 +646,10 @@ class StatsRunner(object):
|
|||
"""
|
||||
|
||||
with self.engine.connect() as con:
|
||||
df = pd.read_sql_query(query, con, params=sql_dict)
|
||||
df = pd.read_sql_query(text(query), con, params=sql_dict)
|
||||
|
||||
if len(df) == 0:
|
||||
return "No chat titles in range", None
|
||||
|
||||
df['idx'] = np.arange(len(df))
|
||||
df['diff'] = -df['date'].diff(-1)
|
||||
|
@ -589,14 +658,14 @@ class StatsRunner(object):
|
|||
if end:
|
||||
last = pd.Timestamp(sql_dict['end_dt'], tz=self.tz).tz_convert('utc')
|
||||
else:
|
||||
last = pd.Timestamp(datetime.now(), tz='utc')
|
||||
last = pd.Timestamp(datetime.utcnow(), tz='utc')
|
||||
|
||||
df_end = df['end']
|
||||
df_end.iloc[-1] = last
|
||||
df.loc[:, 'end'] = df_end
|
||||
df.loc[:, 'diff'].iloc[-1] = df.iloc[-1]['end'] - df.iloc[-1]['date']
|
||||
|
||||
fig = Figure(constrained_layout=True, figsize=(12, 0.15 * len(df)))
|
||||
fig = Figure(constrained_layout=True, figsize=(12, 1+0.15 * len(df)))
|
||||
ax = fig.subplots()
|
||||
|
||||
if duration:
|
||||
|
@ -604,7 +673,7 @@ class StatsRunner(object):
|
|||
df = df.reset_index(drop=True)
|
||||
df['idx'] = df.index
|
||||
|
||||
ax.barh(df.idx, df['diff'].dt.days, tick_label=df.new_chat_title)
|
||||
ax.barh(df.idx, df['diff'].dt.days + df['diff'].dt.seconds / 86400, tick_label=df.new_chat_title)
|
||||
|
||||
ax.margins(0.2)
|
||||
ax.set_ylabel("")
|
||||
|
@ -640,10 +709,7 @@ class StatsRunner(object):
|
|||
ax.tick_params(axis='y', which='both', labelleft=False, left=False)
|
||||
sns.despine(fig=fig, left=True)
|
||||
|
||||
bio = BytesIO()
|
||||
bio.name = 'plot.png'
|
||||
fig.savefig(bio, dpi=200)
|
||||
bio.seek(0)
|
||||
bio = output_fig(fig)
|
||||
|
||||
return None, bio
|
||||
|
||||
|
@ -657,36 +723,36 @@ class StatsRunner(object):
|
|||
count_query = """
|
||||
SELECT COUNT(*)
|
||||
FROM "messages_utc"
|
||||
WHERE from_user = %(user)s;
|
||||
WHERE from_user = :user;
|
||||
"""
|
||||
|
||||
days_query = """
|
||||
SELECT EXTRACT(epoch FROM(NOW() - MIN(date))) / 86400 as "days"
|
||||
FROM "messages_utc"
|
||||
WHERE from_user = %(user)s;
|
||||
WHERE from_user = :user;
|
||||
"""
|
||||
|
||||
event_query = """
|
||||
SELECT date, event
|
||||
FROM user_events
|
||||
WHERE user_id = %(user)s
|
||||
WHERE user_id = :user
|
||||
ORDER BY "date";
|
||||
"""
|
||||
|
||||
username_query = """
|
||||
SELECT COUNT(*)
|
||||
FROM "user_names"
|
||||
WHERE user_id = %(user)s;
|
||||
WHERE user_id = :user;
|
||||
"""
|
||||
|
||||
with self.engine.connect() as con:
|
||||
result = con.execute(count_query, sql_dict)
|
||||
result = con.execute(text(count_query), sql_dict)
|
||||
msg_count: int = result.fetchall()[0][0]
|
||||
result = con.execute(days_query, sql_dict)
|
||||
result = con.execute(text(days_query), sql_dict)
|
||||
days: float = result.fetchall()[0][0]
|
||||
result = con.execute(event_query, sql_dict)
|
||||
result = con.execute(text(event_query), sql_dict)
|
||||
events: list = result.fetchall()
|
||||
result = con.execute(username_query, sql_dict)
|
||||
result = con.execute(text(username_query), sql_dict)
|
||||
name_count: int = result.fetchall()[0][0]
|
||||
|
||||
event_text = '\n'.join([f'{event.event} on {pd.to_datetime(event.date).tz_convert(self.tz)}'
|
||||
|
@ -696,13 +762,17 @@ class StatsRunner(object):
|
|||
if event_text:
|
||||
event_text = '\n' + event_text
|
||||
|
||||
text = f"Messages sent: {msg_count}\n" \
|
||||
f"Average messages per day: {msg_count/days:.2f}\n" \
|
||||
f"First message was {days:.2f} days ago.\n" \
|
||||
f"Usernames on record: {name_count}\n" \
|
||||
f"Average username lifetime: {days/name_count:.2f} days\n" + event_text
|
||||
try:
|
||||
out_text = f"Messages sent: {msg_count}\n" \
|
||||
f"Average messages per day: {msg_count / days:.2f}\n" \
|
||||
f"First message was {days:.2f} days ago.\n" \
|
||||
f"Usernames on record: {name_count}\n" \
|
||||
f"Average username lifetime: {days / name_count:.2f} days\n" + event_text
|
||||
except TypeError:
|
||||
return 'No data for user', None
|
||||
|
||||
return f"User {user[1].lstrip('@')}: ```\n{text}\n```", None
|
||||
|
||||
return f"User {user[1].lstrip('@')}: ```\n{out_text}\n```", None
|
||||
|
||||
def get_user_correlation(self, start: str = None, end: str = None, agg: bool = True, c_type: str = None,
|
||||
n: int = 5, thresh: float = 0.05, autouser=None, **kwargs) -> Tuple[str, None]:
|
||||
|
@ -721,11 +791,11 @@ class StatsRunner(object):
|
|||
|
||||
if start:
|
||||
sql_dict['start_dt'] = pd.to_datetime(start)
|
||||
query_conditions.append("date >= %(start_dt)s")
|
||||
query_conditions.append("date >= :start_dt")
|
||||
|
||||
if end:
|
||||
sql_dict['end_dt'] = pd.to_datetime(end)
|
||||
query_conditions.append("date < %(end_dt)s")
|
||||
query_conditions.append("date < :end_dt")
|
||||
|
||||
query_where = ""
|
||||
if query_conditions:
|
||||
|
@ -758,7 +828,11 @@ class StatsRunner(object):
|
|||
"""
|
||||
|
||||
with self.engine.connect() as con:
|
||||
df = pd.read_sql_query(query, con, params=sql_dict)
|
||||
df = pd.read_sql_query(text(query), con, params=sql_dict)
|
||||
|
||||
if len(df) == 0:
|
||||
return 'No messages in range', None
|
||||
|
||||
df['msg_time'] = pd.to_datetime(df.msg_time)
|
||||
df['msg_time'] = df.msg_time.dt.tz_convert(self.tz)
|
||||
|
||||
|
@ -801,11 +875,11 @@ class StatsRunner(object):
|
|||
if n > len(me) // 2:
|
||||
n = int(len(me) // 2)
|
||||
|
||||
text = me.to_string(header=False, float_format=lambda x: f"{x:.3f}")
|
||||
split = text.splitlines()
|
||||
text = "\n".join(['HIGHEST CORRELATION:'] + split[:n] + ['\nLOWEST CORRELATION:'] + split[-n:])
|
||||
out_text = me.to_string(header=False, float_format=lambda x: f"{x:.3f}")
|
||||
split = out_text.splitlines()
|
||||
out_text = "\n".join(['HIGHEST CORRELATION:'] + split[:n] + ['\nLOWEST CORRELATION:'] + split[-n:])
|
||||
|
||||
return f"**User Correlations for {escape_markdown(user[1])}**\n```\n{text}\n```", None
|
||||
return f"**User Correlations for {escape_markdown(user[1])}**\n```\n{out_text}\n```", None
|
||||
|
||||
def get_message_deltas(self, lquery: str = None, start: str = None, end: str = None, n: int = 10, thresh: int = 500,
|
||||
autouser=None, **kwargs) -> Tuple[Union[str, None], Union[None, BytesIO]]:
|
||||
|
@ -826,11 +900,11 @@ class StatsRunner(object):
|
|||
|
||||
if start:
|
||||
sql_dict['start_dt'] = pd.to_datetime(start)
|
||||
query_conditions.append("date >= %(start_dt)s")
|
||||
query_conditions.append("date >= :start_dt")
|
||||
|
||||
if end:
|
||||
sql_dict['end_dt'] = pd.to_datetime(end)
|
||||
query_conditions.append("date < %(end_dt)s")
|
||||
query_conditions.append("date < :end_dt")
|
||||
|
||||
query_where = ""
|
||||
if query_conditions:
|
||||
|
@ -854,7 +928,7 @@ class StatsRunner(object):
|
|||
dense_rank() over (partition by from_user order by date)
|
||||
) as grp
|
||||
from messages_utc
|
||||
where from_user in (%(me)s, %(other)s) {where}
|
||||
where from_user in (:me, :other) {where}
|
||||
order by date
|
||||
) t
|
||||
group by from_user, grp
|
||||
|
@ -867,7 +941,7 @@ class StatsRunner(object):
|
|||
sql_dict['other'] = other
|
||||
|
||||
with self.engine.connect() as con:
|
||||
result = con.execute(query, sql_dict)
|
||||
result = con.execute(text(query), sql_dict)
|
||||
output: Tuple[timedelta, int] = result.fetchall()[0]
|
||||
|
||||
return output
|
||||
|
@ -884,9 +958,9 @@ class StatsRunner(object):
|
|||
if len(me) < 1:
|
||||
return "\n```\nSorry, not enough data, try a bigger date range or decrease -thresh.\n```", None
|
||||
|
||||
text = me.iloc[:n].to_string(header=False, index=True)
|
||||
out_text = me.iloc[:n].to_string(header=False, index=True)
|
||||
|
||||
return f"**Median message delays for {escape_markdown(user[1])} and:**\n```\n{text}\n```", None
|
||||
return f"**Median message delays for {escape_markdown(user[1])} and:**\n```\n{out_text}\n```", None
|
||||
|
||||
def get_type_stats(self, start: str = None, end: str = None, autouser=None, **kwargs) -> Tuple[str, None]:
|
||||
"""
|
||||
|
@ -900,11 +974,11 @@ class StatsRunner(object):
|
|||
|
||||
if start:
|
||||
sql_dict['start_dt'] = pd.to_datetime(start)
|
||||
query_conditions.append("date >= %(start_dt)s")
|
||||
query_conditions.append("date >= :start_dt")
|
||||
|
||||
if end:
|
||||
sql_dict['end_dt'] = pd.to_datetime(end)
|
||||
query_conditions.append("date < %(end_dt)s")
|
||||
query_conditions.append("date < :end_dt")
|
||||
|
||||
query_where = ""
|
||||
if query_conditions:
|
||||
|
@ -921,13 +995,17 @@ class StatsRunner(object):
|
|||
"""
|
||||
|
||||
with self.engine.connect() as con:
|
||||
df = pd.read_sql_query(query, con, params=sql_dict)
|
||||
df = pd.read_sql_query(text(query), con, params=sql_dict)
|
||||
|
||||
if len(df) == 0:
|
||||
return 'No messages in range', None
|
||||
|
||||
df['Group Percent'] = df['count'] / df['count'].sum() * 100
|
||||
df.columns = ['type', 'Group Count', 'Group Percent']
|
||||
|
||||
if user:
|
||||
sql_dict['user'] = user[0]
|
||||
query_conditions.append("from_user = %(user)s")
|
||||
query_conditions.append("from_user = :user")
|
||||
|
||||
query = f"""
|
||||
SELECT type, count(*) as user_count
|
||||
|
@ -939,25 +1017,27 @@ class StatsRunner(object):
|
|||
ORDER BY user_count DESC;
|
||||
"""
|
||||
with self.engine.connect() as con:
|
||||
df_u = pd.read_sql_query(query, con, params=sql_dict)
|
||||
df_u = pd.read_sql_query(text(query), con, params=sql_dict)
|
||||
df_u['User Percent'] = df_u['user_count'] / df_u['user_count'].sum() * 100
|
||||
df_u.columns = ['type', 'User Count', 'User Percent']
|
||||
|
||||
df = df.merge(df_u, on="type", how="outer")
|
||||
|
||||
a = list(zip(df.columns.values, ["Total"] + df.iloc[:, 1:].sum().to_list()))
|
||||
df = df.append({key: value for key, value in a}, ignore_index=True)
|
||||
df = pd.concat((df, pd.DataFrame({key: [value] for key, value in a})), ignore_index=True)
|
||||
|
||||
df['Group Count'] = df['Group Count'].astype('Int64')
|
||||
try:
|
||||
df['User Count'] = df['User Count'].astype('Int64')
|
||||
except KeyError:
|
||||
pass
|
||||
text = df.to_string(index=False, header=True, float_format=lambda x: f"{x:.1f}")
|
||||
|
||||
out_text = df.to_string(index=False, header=True, float_format=lambda x: f"{x:.1f}")
|
||||
|
||||
if user:
|
||||
return f"**Messages by type, {escape_markdown(user[1])} vs group:**\n```\n{text}\n```", None
|
||||
return f"**Messages by type, {escape_markdown(user[1])} vs group:**\n```\n{out_text}\n```", None
|
||||
else:
|
||||
return f"**Messages by type:**\n```\n{text}\n```", None
|
||||
return f"**Messages by type:**\n```\n{out_text}\n```", None
|
||||
|
||||
def get_word_stats(self, n: int = 4, limit: int = 20, start: str = None, end: str = None,
|
||||
user: Tuple[int, str] = None, **kwargs) -> Tuple[str, None]:
|
||||
|
@ -974,37 +1054,40 @@ class StatsRunner(object):
|
|||
if user:
|
||||
q = q.where(messages.c['from_user'] == user[0])
|
||||
if start:
|
||||
q = q.where(messages.c['date'] >= str(pd.to_datetime('2019')))
|
||||
q = q.where(messages.c['date'] >= str(pd.to_datetime(start)))
|
||||
if end:
|
||||
q = q.where(messages.c['date'] < str(pd.to_datetime('2019')))
|
||||
q = q.where(messages.c['date'] < str(pd.to_datetime(end)))
|
||||
|
||||
q = q.scalar_subquery()
|
||||
f = TsStat(q)
|
||||
stmt = select([f.c['word'], f.c['ndoc'], f.c['nentry']]) \
|
||||
stmt = select(f.columns['word'], f.columns['ndoc'], f.columns['nentry']) \
|
||||
.select_from(f)
|
||||
|
||||
if n:
|
||||
stmt = stmt.where(func.length(f.c['word']) >= n)
|
||||
stmt = stmt.where(func.length(f.columns['word']) >= n)
|
||||
|
||||
stmt = stmt.order_by(f.c.nentry.desc(),
|
||||
f.c.ndoc.desc(),
|
||||
f.c.word)
|
||||
stmt = stmt.order_by(f.columns['nentry'].desc(),
|
||||
f.columns['ndoc'].desc(),
|
||||
f.columns['word'])
|
||||
|
||||
if limit:
|
||||
stmt = stmt.limit(limit)\
|
||||
.compile(dialect=postgresql.dialect())
|
||||
stmt = stmt.limit(limit) \
|
||||
.compile(dialect=postgresql.dialect())
|
||||
|
||||
with self.engine.connect() as con:
|
||||
df = pd.read_sql_query(stmt, con)
|
||||
|
||||
if len(df) == 0:
|
||||
return 'No messages in range', None
|
||||
|
||||
df.columns = ['Lexeme', 'Messages', 'Uses']
|
||||
|
||||
text = df.to_string(index=False, header=True, float_format=lambda x: f"{x:.1f}")
|
||||
out_text = df.to_string(index=False, header=True, float_format=lambda x: f"{x:.1f}")
|
||||
|
||||
if user:
|
||||
return f"**Most frequently used lexemes, {escape_markdown(user[1].lstrip('@'))}\n```\n{text}\n```", None
|
||||
return f"**Most frequently used lexemes, {escape_markdown(user[1].lstrip('@'))}\n```\n{out_text}\n```", None
|
||||
else:
|
||||
return f"**Most frequently used lexemes, all users:**\n```\n{text}\n```", None
|
||||
return f"**Most frequently used lexemes, all users:**\n```\n{out_text}\n```", None
|
||||
|
||||
def get_random_message(self, lquery: str = None, start: str = None, end: str = None,
|
||||
user: Tuple[int, str] = None, **kwargs) -> Tuple[str, None]:
|
||||
|
@ -1022,41 +1105,41 @@ class StatsRunner(object):
|
|||
|
||||
if user:
|
||||
sql_dict['user'] = user[0]
|
||||
query_conditions.append("from_user = %(user)s")
|
||||
query_conditions.append("from_user = :user")
|
||||
|
||||
if start:
|
||||
sql_dict['start_dt'] = pd.to_datetime(start)
|
||||
query_conditions.append("date >= %(start_dt)s")
|
||||
query_conditions.append("date >= :start_dt")
|
||||
|
||||
if end:
|
||||
sql_dict['end_dt'] = pd.to_datetime(end)
|
||||
query_conditions.append("date < %(end_dt)s")
|
||||
query_conditions.append("date < :end_dt")
|
||||
|
||||
query_where = ""
|
||||
if query_conditions:
|
||||
query_where = f"AND {' AND '.join(query_conditions)}"
|
||||
|
||||
query = f"""
|
||||
SELECT date, from_user, text
|
||||
FROM messages_utc
|
||||
WHERE type = 'text'
|
||||
{query_where}
|
||||
ORDER BY RANDOM()
|
||||
LIMIT 1;
|
||||
SELECT date, from_user, text
|
||||
FROM messages_utc
|
||||
WHERE type = 'text'
|
||||
{query_where}
|
||||
ORDER BY RANDOM()
|
||||
LIMIT 1;
|
||||
"""
|
||||
|
||||
with self.engine.connect() as con:
|
||||
result = con.execute(query, sql_dict)
|
||||
result = con.execute(text(query), sql_dict)
|
||||
try:
|
||||
date, from_user, text = result.fetchall()[0]
|
||||
date, from_user, out_text = result.fetchall()[0]
|
||||
except IndexError:
|
||||
return "No matching messages", None
|
||||
|
||||
return f"*On {escape_markdown(date.strftime('%Y-%m-%d'))}, " \
|
||||
f"{escape_markdown(self.users[from_user][0]).lstrip('@')}" \
|
||||
f" gave these words of wisdom:*\n" \
|
||||
f"{escape_markdown(text)}\n", \
|
||||
None
|
||||
f"{escape_markdown(out_text)}\n", \
|
||||
None
|
||||
|
||||
|
||||
def get_parser(runner: StatsRunner) -> InternalParser:
|
||||
|
|
|
@ -0,0 +1,97 @@
|
|||
import datetime
|
||||
import secrets
|
||||
from datetime import timedelta, timezone
|
||||
import json
|
||||
|
||||
import pytest
|
||||
from pytest_postgresql import factories
|
||||
from sqlalchemy import create_engine, text, NullPool, Engine
|
||||
from random_word.services.local import Local
|
||||
|
||||
from telegram_stats_bot.db import db_sql
|
||||
from telegram_stats_bot.log_storage import messages, user_names
|
||||
|
||||
|
||||
class RandomWords(Local):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
with open(self.source) as word_database:
|
||||
self.words = list(json.load(word_database).keys()) # Cache loaded file
|
||||
|
||||
def get_random_word(self):
|
||||
"""
|
||||
Parent implementation reloads json every time, which is slow.
|
||||
"""
|
||||
return secrets.choice(self.words)
|
||||
|
||||
|
||||
def generate_user_names(n_users=10) -> list[dict]:
|
||||
random_words = RandomWords()
|
||||
start_date = datetime.datetime(2020, 1, 1, 0, 0, 0, tzinfo=timezone.utc)
|
||||
|
||||
return [{'user_id': n,
|
||||
'date': start_date,
|
||||
'username': '@' + random_words.get_random_word(),
|
||||
'display_name': random_words.get_random_word()}
|
||||
for n in range(n_users)]
|
||||
|
||||
|
||||
def generate_message_data(n_rows=5000, n_users=10, n_titles=3) -> list[dict]:
|
||||
random_words = RandomWords()
|
||||
start_date = datetime.datetime(2020, 1, 1, 0, 0, 0, tzinfo=timezone.utc)
|
||||
|
||||
chat = [{'message_id': n,
|
||||
'date': start_date + timedelta(hours=n),
|
||||
'from_user': n % n_users,
|
||||
'forward_from_message_id': None,
|
||||
'forward_from': None,
|
||||
'forward_from_chat': None,
|
||||
'caption': None,
|
||||
'text': ' '.join([random_words.get_random_word() for _ in range(10)]),
|
||||
'sticker_set_name': None,
|
||||
'new_chat_title': None,
|
||||
'reply_to_message': None,
|
||||
'file_id': None,
|
||||
'type': 'text'}
|
||||
for n in range(n_rows)]
|
||||
|
||||
# Add new chat titles
|
||||
for t in [n_rows//n_titles * n for n in range(n_titles)]:
|
||||
chat[t]['type'] = 'new_chat_title'
|
||||
chat[t]['new_chat_title'] = random_words.get_random_word()
|
||||
chat[t]['text'] = None
|
||||
return chat
|
||||
|
||||
|
||||
n_users = 10
|
||||
n_rows = 5000
|
||||
user_table = generate_user_names(n_users)
|
||||
message_table = generate_message_data(n_rows=n_rows, n_users=n_users)
|
||||
|
||||
|
||||
def load_database(**kwargs):
|
||||
engine = create_engine("postgresql+psycopg://" +
|
||||
f"postgres:{kwargs['password']}@{kwargs['host']}:{kwargs['port']}/{kwargs['dbname']}")
|
||||
with engine.connect() as con:
|
||||
con.execute(text(db_sql))
|
||||
con.execute(user_names.insert(), user_table)
|
||||
con.execute(messages.insert(), message_table)
|
||||
con.commit()
|
||||
|
||||
|
||||
psql_proc_loaded = factories.postgresql_proc(
|
||||
load=[load_database],
|
||||
)
|
||||
|
||||
psql_loaded = factories.postgresql(
|
||||
"psql_proc_loaded",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def db_connection(psql_loaded) -> Engine:
|
||||
"""Return a database connection."""
|
||||
connection = f'postgresql+psycopg://{psql_loaded.info.user}:@{psql_loaded.info.host}:' +\
|
||||
f'{psql_loaded.info.port}/{psql_loaded.info.dbname}'
|
||||
engine = create_engine(connection, echo=False, poolclass=NullPool)
|
||||
return engine
|
|
@ -0,0 +1,9 @@
|
|||
from sqlalchemy import text, inspect
|
||||
from tests.conftest import n_rows
|
||||
|
||||
|
||||
def test_db_load(db_connection):
|
||||
"""Check main postgresql fixture."""
|
||||
assert set(inspect(db_connection).get_table_names()) == {'messages_utc', 'user_events', 'user_names'}
|
||||
with db_connection.connect() as con:
|
||||
assert con.execute(text("select count(*) from messages_utc")).fetchone()[0] == n_rows
|
|
@ -0,0 +1,361 @@
|
|||
from io import BytesIO
|
||||
|
||||
from tests.conftest import n_users, n_rows, user_table
|
||||
from telegram_stats_bot.stats import StatsRunner, HelpException
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sr(db_connection):
|
||||
return StatsRunner(db_connection)
|
||||
|
||||
|
||||
def test_get_message_user_ids(sr):
|
||||
assert set(sr.get_message_user_ids()) == set(range(len(user_table)))
|
||||
|
||||
|
||||
def test_get_db_users(sr):
|
||||
for k, v in sr.get_db_users().items():
|
||||
username, display_name = v
|
||||
assert username == user_table[k]['username']
|
||||
assert display_name == user_table[k]['display_name']
|
||||
|
||||
|
||||
@pytest.mark.usefixtures('sr')
|
||||
class TestChatCounts:
|
||||
def test_basic(self, sr):
|
||||
sr.get_chat_counts()
|
||||
|
||||
def test_lquery(self, sr):
|
||||
sr.get_chat_counts(lquery='dogdfs')
|
||||
|
||||
def test_mtype_valid(self, sr):
|
||||
assert sr.get_chat_counts(mtype='text')[0].count('\n') == 2 + n_users
|
||||
|
||||
def test_mtype_empty(self, sr):
|
||||
assert sr.get_chat_counts(mtype='sticker')[0] == 'No matching messages'
|
||||
|
||||
def test_n(self, sr):
|
||||
assert sr.get_chat_counts(n=3)[0].count('\n') == 2 + 3
|
||||
|
||||
def test_start_out_of_bounds(self, sr):
|
||||
assert sr.get_chat_counts(start='2023')[0] == 'No matching messages'
|
||||
|
||||
def test_start_valid(self, sr):
|
||||
assert sr.get_chat_counts(start='2020')[0].count('\n') == 2 + n_users
|
||||
|
||||
def test_end_out_of_bounds(self, sr):
|
||||
assert sr.get_chat_counts(end='2019')[0] == 'No matching messages'
|
||||
|
||||
def test_end_valid(self, sr):
|
||||
assert sr.get_chat_counts(end='2025')[0].count('\n') == 2 + n_users
|
||||
|
||||
|
||||
class TestChatECDF:
|
||||
def test_basic(self, sr):
|
||||
assert isinstance(sr.get_chat_ecdf()[1], BytesIO)
|
||||
|
||||
def test_lquery(self, sr):
|
||||
assert sr.get_chat_ecdf(lquery='dogdfskjweadsf')[0] == 'No matching messages'
|
||||
|
||||
def test_mtype_valid(self, sr):
|
||||
assert isinstance(sr.get_chat_ecdf(mtype='text')[1], BytesIO)
|
||||
|
||||
def test_mtype_empty(self, sr):
|
||||
assert sr.get_chat_ecdf(mtype='sticker')[0] == 'No matching messages'
|
||||
|
||||
def test_start_out_of_bounds(self, sr):
|
||||
assert sr.get_chat_ecdf(start='2023')[0] == 'No matching messages'
|
||||
|
||||
def test_start_valid(self, sr):
|
||||
assert isinstance(sr.get_chat_ecdf(start='2020')[1], BytesIO)
|
||||
|
||||
def test_end_out_of_bounds(self, sr):
|
||||
assert sr.get_chat_ecdf(end='2019')[0] == 'No matching messages'
|
||||
|
||||
def test_end_valid(self, sr):
|
||||
assert isinstance(sr.get_chat_ecdf(end='2025')[1], BytesIO)
|
||||
|
||||
def test_log(self, sr):
|
||||
assert isinstance(sr.get_chat_ecdf(log=True)[1], BytesIO)
|
||||
|
||||
|
||||
class TestHours:
|
||||
def test_basic(self, sr):
|
||||
assert isinstance(sr.get_counts_by_hour()[1], BytesIO)
|
||||
|
||||
def test_user(self, sr):
|
||||
assert isinstance(sr.get_counts_by_hour(user=(0, user_table[0]['username']))[1], BytesIO)
|
||||
|
||||
def test_lquery(self, sr):
|
||||
assert sr.get_counts_by_hour(lquery='dogsadfadsdfs')[0] == 'No matching messages'
|
||||
|
||||
def test_start_out_of_bounds(self, sr):
|
||||
assert sr.get_counts_by_hour(start='2023')[0] == 'No matching messages'
|
||||
|
||||
def test_start_valid(self, sr):
|
||||
assert isinstance(sr.get_counts_by_hour(start='2020')[1], BytesIO)
|
||||
|
||||
def test_end_out_of_bounds(self, sr):
|
||||
assert sr.get_counts_by_hour(end='2019')[0] == 'No matching messages'
|
||||
|
||||
def test_end_valid(self, sr):
|
||||
assert isinstance(sr.get_counts_by_hour(end='2025')[1], BytesIO)
|
||||
|
||||
|
||||
class TestDays:
|
||||
def test_basic(self, sr):
|
||||
assert isinstance(sr.get_counts_by_day()[1], BytesIO)
|
||||
|
||||
def test_user(self, sr):
|
||||
assert isinstance(sr.get_counts_by_day(user=(0, user_table[0]['username']))[1], BytesIO)
|
||||
|
||||
def test_lquery(self, sr):
|
||||
assert sr.get_counts_by_day(lquery='dogasdfdfs')[0] == 'No matching messages'
|
||||
|
||||
def test_start_out_of_bounds(self, sr):
|
||||
assert sr.get_counts_by_day(start='2023')[0] == 'No matching messages'
|
||||
|
||||
def test_start_valid(self, sr):
|
||||
assert isinstance(sr.get_counts_by_day(start='2020')[1], BytesIO)
|
||||
|
||||
def test_end_out_of_bounds(self, sr):
|
||||
assert sr.get_counts_by_day(end='2019')[0] == 'No matching messages'
|
||||
|
||||
def test_end_valid(self, sr):
|
||||
assert isinstance(sr.get_counts_by_day(end='2025')[1], BytesIO)
|
||||
|
||||
|
||||
class TestWeek:
|
||||
def test_basic(self, sr):
|
||||
assert isinstance(sr.get_week_by_hourday()[1], BytesIO)
|
||||
|
||||
def test_user(self, sr):
|
||||
assert isinstance(sr.get_week_by_hourday(user=(0, user_table[0]['username']))[1], BytesIO)
|
||||
|
||||
def test_lquery(self, sr):
|
||||
assert sr.get_week_by_hourday(lquery='dogasdfdfs')[0] == 'No matching messages'
|
||||
|
||||
def test_start_out_of_bounds(self, sr):
|
||||
assert sr.get_week_by_hourday(start='2023')[0] == 'No matching messages'
|
||||
|
||||
def test_start_valid(self, sr):
|
||||
assert isinstance(sr.get_week_by_hourday(start='2020')[1], BytesIO)
|
||||
|
||||
def test_end_out_of_bounds(self, sr):
|
||||
assert sr.get_week_by_hourday(end='2019')[0] == 'No matching messages'
|
||||
|
||||
def test_end_valid(self, sr):
|
||||
assert isinstance(sr.get_week_by_hourday(end='2025')[1], BytesIO)
|
||||
|
||||
|
||||
class TestHistory:
|
||||
def test_basic(self, sr):
|
||||
assert isinstance(sr.get_message_history()[1], BytesIO)
|
||||
|
||||
def test_user(self, sr):
|
||||
assert isinstance(sr.get_message_history(user=(0, user_table[0]['username']))[1], BytesIO)
|
||||
|
||||
def test_lquery(self, sr):
|
||||
assert sr.get_message_history(lquery='dogasdfdfs')[0] == 'No matching messages'
|
||||
|
||||
def test_start_out_of_bounds(self, sr):
|
||||
assert sr.get_message_history(start='2023')[0] == 'No matching messages'
|
||||
|
||||
def test_start_valid(self, sr):
|
||||
assert isinstance(sr.get_message_history(start='2020')[1], BytesIO)
|
||||
|
||||
def test_end_out_of_bounds(self, sr):
|
||||
assert sr.get_message_history(end='2019')[0] == 'No matching messages'
|
||||
|
||||
def test_end_valid(self, sr):
|
||||
assert isinstance(sr.get_message_history(end='2025')[1], BytesIO)
|
||||
|
||||
def test_averages(self, sr):
|
||||
assert isinstance(sr.get_message_history(averages=50)[1], BytesIO)
|
||||
|
||||
|
||||
class TestTitleHistory:
|
||||
def test_basic(self, sr):
|
||||
assert isinstance(sr.get_title_history()[1], BytesIO)
|
||||
|
||||
def test_duration(self, sr):
|
||||
assert isinstance(sr.get_title_history(duration=True)[1], BytesIO)
|
||||
|
||||
def test_start_out_of_bounds(self, sr):
|
||||
assert sr.get_title_history(start='2023')[0] == "No chat titles in range"
|
||||
|
||||
def test_start_valid(self, sr):
|
||||
assert isinstance(sr.get_title_history(start='2020')[1], BytesIO)
|
||||
|
||||
def test_end_out_of_bounds(self, sr):
|
||||
assert sr.get_title_history(end='2019')[0] == "No chat titles in range"
|
||||
|
||||
def test_end_valid(self, sr):
|
||||
assert isinstance(sr.get_title_history(end='2025')[1], BytesIO)
|
||||
|
||||
|
||||
class TestUserSummary:
|
||||
def test_basic(self, sr):
|
||||
sr.get_user_summary(user=(0, user_table[0]['username']))
|
||||
|
||||
def test_user_out_of_bounds(self, sr):
|
||||
assert sr.get_user_summary(user=(len(user_table), user_table[0]['username']))[0] == 'No data for user'
|
||||
|
||||
|
||||
class TestUserCorrelation:
|
||||
def test_basic(self, sr):
|
||||
assert sr.get_user_correlation(user=(0, user_table[0]['username']))[0]
|
||||
|
||||
def test_start_out_of_bounds(self, sr):
|
||||
assert sr.get_user_correlation(
|
||||
start='2023', user=(0, user_table[0]['username']))[0] == 'No messages in range'
|
||||
|
||||
def test_start_valid(self, sr):
|
||||
assert sr.get_user_correlation(
|
||||
start='2019', user=(0, user_table[0]['username']))[0] != 'No messages in range'
|
||||
|
||||
def test_end_out_of_bounds(self, sr):
|
||||
assert sr.get_user_correlation(
|
||||
end='2019', user=(0, user_table[0]['username']))[0] == 'No messages in range'
|
||||
|
||||
def test_end_valid(self, sr):
|
||||
assert sr.get_user_correlation(
|
||||
end='2025', user=(0, user_table[0]['username']))[0] != 'No messages in range'
|
||||
|
||||
def test_agg_false(self, sr):
|
||||
assert sr.get_user_correlation(
|
||||
agg=False, user=(0, user_table[0]['username']))[0] != 'No messages in range'
|
||||
|
||||
def test_c_spearman(self, sr):
|
||||
assert sr.get_user_correlation(
|
||||
c_type='spearman', user=(0, user_table[0]['username']))[0] != 'No messages in range'
|
||||
|
||||
def test_n(self, sr):
|
||||
assert sr.get_user_correlation(n=10, user=(0, user_table[0]['username']))[
|
||||
0] != 'No messages in range'
|
||||
|
||||
def test_thresh_valid(self, sr):
|
||||
assert sr.get_user_correlation(thresh=0.1, user=(0, user_table[0]['username']))[
|
||||
0] != 'No messages in range'
|
||||
|
||||
def test_thresh_invalid(self, sr):
|
||||
with pytest.raises(HelpException):
|
||||
sr.get_user_correlation(thresh=1.2, user=(0, user_table[0]['username']))
|
||||
|
||||
|
||||
class TestDeltas:
|
||||
def test_basic(self, sr):
|
||||
assert 'Sorry' not in sr.get_message_deltas(
|
||||
user=(0, user_table[0]['username']))[0]
|
||||
|
||||
def test_lquery(self, sr):
|
||||
assert 'Sorry' in sr.get_message_deltas(
|
||||
lquery='dogsdfsdsdfs', user=(0, user_table[0]['username']))[0]
|
||||
|
||||
def test_start_out_of_bounds(self, sr):
|
||||
assert 'Sorry' in sr.get_message_deltas(
|
||||
start='2023', user=(0, user_table[0]['username']))[0]
|
||||
|
||||
def test_start_valid(self, sr):
|
||||
assert 'Sorry' not in sr.get_message_deltas(
|
||||
start='2019', user=(0, user_table[0]['username']))[0]
|
||||
|
||||
def test_end_out_of_bounds(self, sr):
|
||||
assert 'Sorry' in sr.get_message_deltas(
|
||||
end='2019', user=(0, user_table[0]['username']))[0]
|
||||
|
||||
def test_end_valid(self, sr):
|
||||
assert 'Sorry' not in sr.get_message_deltas(
|
||||
end='2025', user=(0, user_table[0]['username']))[0]
|
||||
|
||||
def test_n(self, sr):
|
||||
assert sr.get_message_deltas(n=4, user=(0, user_table[0]['username']))[0].count('\n') == 2 + 4
|
||||
|
||||
def test_thresh_valid(self, sr):
|
||||
assert 'Sorry' not in sr.get_message_deltas(
|
||||
thresh=30, user=(0, user_table[0]['username']))[0]
|
||||
|
||||
def test_thresh_invalid(self, sr):
|
||||
assert 'Sorry' in sr.get_message_deltas(
|
||||
thresh=3000, user=(0, user_table[0]['username']))[0]
|
||||
|
||||
|
||||
class TestTypeStats:
|
||||
def test_basic(self, sr):
|
||||
assert sr.get_type_stats(
|
||||
user=(0, user_table[0]['username']))[0] != 'No messages in range'
|
||||
|
||||
def test_start_out_of_bounds(self, sr):
|
||||
assert sr.get_type_stats(
|
||||
start='2023', user=(0, user_table[0]['username']))[0] == 'No messages in range'
|
||||
|
||||
def test_start_valid(self, sr):
|
||||
assert sr.get_type_stats(
|
||||
start='2019', user=(0, user_table[0]['username']))[0] != 'No messages in range'
|
||||
|
||||
def test_end_out_of_bounds(self, sr):
|
||||
assert sr.get_type_stats(
|
||||
end='2019', user=(0, user_table[0]['username']))[0] == 'No messages in range'
|
||||
|
||||
def test_end_valid(self, sr):
|
||||
assert sr.get_type_stats(
|
||||
end='2025', user=(0, user_table[0]['username']))[0] != 'No messages in range'
|
||||
|
||||
|
||||
class TestWordStats:
|
||||
def test_basic(self, sr):
|
||||
assert sr.get_word_stats(
|
||||
user=(0, user_table[0]['username']))[0] != 'No messages in range'
|
||||
|
||||
def test_n(self, sr):
|
||||
assert sr.get_word_stats(n=6,
|
||||
user=(0, user_table[0]['username']))[0] != 'No messages in range'
|
||||
|
||||
def test_limit(self, sr):
|
||||
assert sr.get_word_stats(limit=4,
|
||||
user=(0, user_table[0]['username']))[0] != 'No messages in range'
|
||||
|
||||
def test_start_out_of_bounds(self, sr):
|
||||
assert sr.get_word_stats(
|
||||
start='2023', user=(0, user_table[0]['username']))[0] == 'No messages in range'
|
||||
|
||||
def test_start_valid(self, sr):
|
||||
assert sr.get_word_stats(
|
||||
start='2019', user=(0, user_table[0]['username']))[0] != 'No messages in range'
|
||||
|
||||
def test_end_out_of_bounds(self, sr):
|
||||
assert sr.get_word_stats(
|
||||
end='2019', user=(0, user_table[0]['username']))[0] == 'No messages in range'
|
||||
|
||||
def test_end_valid(self, sr):
|
||||
assert sr.get_word_stats(
|
||||
end='2025', user=(0, user_table[0]['username']))[0] != 'No messages in range'
|
||||
|
||||
|
||||
class TestRandom:
|
||||
def test_basic(self, sr):
|
||||
assert sr.get_random_message(
|
||||
user=(0, user_table[0]['username']))[0] != 'No matching messages'
|
||||
|
||||
def test_lquery(self, sr):
|
||||
assert sr.get_random_message(lquery='sadflkjdsflkj',
|
||||
user=(0, user_table[0]['username']))[0] == 'No matching messages'
|
||||
|
||||
def test_start_out_of_bounds(self, sr):
|
||||
assert sr.get_random_message(
|
||||
start='2023', user=(0, user_table[0]['username']))[0] == 'No matching messages'
|
||||
|
||||
def test_start_valid(self, sr):
|
||||
assert sr.get_random_message(
|
||||
start='2019', user=(0, user_table[0]['username']))[0] != 'No matching messages'
|
||||
|
||||
def test_end_out_of_bounds(self, sr):
|
||||
assert sr.get_random_message(
|
||||
end='2019', user=(0, user_table[0]['username']))[0] == 'No matching messages'
|
||||
|
||||
def test_end_valid(self, sr):
|
||||
assert sr.get_random_message(
|
||||
end='2025', user=(0, user_table[0]['username']))[0] != 'No matching messages'
|
||||
|
Ładowanie…
Reference in New Issue