Add title history plot (#1)

* stats.py: Add title history plot

* Add title history plot to documentation

* Update CHANGELOG.rst

* stats.py: titles: fix end timezone issue

Co-authored-by: Michael DM Dryden <mk.dryden@utoronto.ca>
pull/2/head
mkdryden 2020-06-16 03:48:39 -04:00 zatwierdzone przez GitHub
rodzic 934c9a3ef6
commit 76481bcf37
Nie znaleziono w bazie danych klucza dla tego podpisu
ID klucza GPG: 4AEE18F83AFDEB23
4 zmienionych plików z 94 dodań i 2 usunięć

Wyświetl plik

@ -13,6 +13,7 @@ and this project adheres to `Semantic Versioning <https://semver.org/spec/v2.0.0
Added
-----
- Message type statistics ``/stats types``
- Group title history plot ``/stats titles``
Fixed
-----

Wyświetl plik

@ -43,7 +43,7 @@ Table of contents
- `Fetching Stats`_
- `counts`_
- `counts`_
- `hours`_
@ -53,6 +53,8 @@ Table of contents
- `history`_
- `titles`_
- `corr`_
- `delta`_
@ -220,6 +222,13 @@ history
.. image:: examples/history.png
:alt: Example of history plot
titles
------
``/stats titles`` returns a plot of group titles over time.
.. image:: examples/titles.png
:alt: Example of title history plot
corr
----
``/stats corr`` returns a list of users with the highest and lowest message time correlations with the requesting user.

BIN
examples/titles.png 100644

Plik binarny nie jest wyświetlany.

Po

Szerokość:  |  Wysokość:  |  Rozmiar: 17 KiB

Wyświetl plik

@ -26,11 +26,13 @@ from io import BytesIO
import argparse
import inspect
import re
from datetime import timedelta
from datetime import timedelta, datetime
import pandas as pd
import seaborn as sns
import numpy as np
from matplotlib.figure import Figure
from matplotlib.dates import date2num
from sqlalchemy.engine import Engine
from .utils import escape_markdown
@ -67,6 +69,7 @@ class StatsRunner(object):
'days': "get_counts_by_day",
'week': "get_week_by_hourday",
'history': "get_message_history",
'titles': 'get_title_history',
'corr': "get_user_correlation",
'delta': "get_message_deltas",
'types': "get_type_stats"}
@ -470,6 +473,85 @@ class StatsRunner(object):
return None, bio
def get_title_history(self, start: str = None, end: str = None) \
-> Tuple[None, BytesIO]:
"""
Make a plot of group titles history over time
:param start: Start timestamp (e.g. 2019, 2019-01, 2019-01-01, "2019-01-01 14:21")
:param end: End timestamp (e.g. 2019, 2019-01, 2019-01-01, "2019-01-01 14:21")
"""
query_conditions = []
sql_dict = {}
if start:
sql_dict['start_dt'] = pd.to_datetime(start)
query_conditions.append("date >= %(start_dt)s")
if end:
sql_dict['end_dt'] = pd.to_datetime(end)
query_conditions.append("date < %(end_dt)s")
query_where = ""
if query_conditions:
query_where = f"AND {' AND '.join(query_conditions)}"
query = f"""
SELECT date, new_chat_title
FROM messages_utc
WHERE type = 'new_chat_title' {query_where}
ORDER BY date;
"""
with self.engine.connect() as con:
df = pd.read_sql_query(query, con, params=sql_dict)
df['idx'] = np.arange(len(df))
df['diff'] = df['date'].diff(-1)
df['end'] = df['date'] - df['diff']
if end:
last = pd.Timestamp(sql_dict['end_dt'], tz=self.tz).tz_convert('utc')
else:
last = pd.Timestamp(datetime.now(), tz='utc')
df_end = df['end']
df_end.iloc[-1] = last
df.loc[:, 'end'] = df_end
fig = Figure(constrained_layout=True, figsize=(12, 0.15 * len(df)))
ax = fig.subplots()
x = df.iloc[:-1].end
y = df.iloc[:-1].idx + .5
ax.scatter(x, y, zorder=4, color=sns.color_palette()[1])
titles = list(zip(df.date.apply(date2num),
df.end.apply(date2num) - df.date.apply(date2num)))
for n, i in enumerate(titles):
ax.broken_barh([i], (n, 1))
ax.annotate(df.new_chat_title[n], xy=(i[0] + i[1], n), xycoords='data',
xytext=(10, 0), textcoords='offset points',
horizontalalignment='left', verticalalignment='bottom')
ax.margins(0.2)
ax.set_ylabel("")
ax.set_ylim(-1, (df.idx.max() + 1))
ax.set_xlim(titles[0][0] - 1, None)
ax.set_xlabel("")
ax.set_title("Chat Title History")
ax.grid(False, which='both', axis='y')
ax.tick_params(axis='y', which='both', labelleft=False, left=False)
sns.despine(fig=fig, left=True)
bio = BytesIO()
bio.name = 'plot.png'
fig.savefig(bio, dpi=200)
bio.seek(0)
return None, bio
def get_user_correlation(self, start: str = None, end: str = None, agg: bool = True, c_type: str = None,
n: int = 5, thresh: float = 0.05, autouser=None, **kwargs) -> Tuple[str, None]:
"""