datasette/datasette/views/table.py

import sqlite3
import urllib

import jinja2
from sanic.exceptions import NotFound
from sanic.request import RequestParameters

from datasette.utils import (
    Filters,
    compound_keys_after_sql,
    escape_sqlite,
    filters_should_redirect,
    is_url,
    path_from_row_pks,
    path_with_added_args,
    to_css_class,
    urlsafe_components
)

from .base import BaseView, DatasetteError, ureg


class RowTableShared(BaseView):

    def sortable_columns_for_table(self, name, table, use_rowid):
        table_metadata = self.table_metadata(name, table)
        if "sortable_columns" in table_metadata:
            sortable_columns = set(table_metadata["sortable_columns"])
        else:
            table_info = self.ds.inspect()[name]["tables"].get(table) or {}
            sortable_columns = set(table_info.get("columns", []))
        if use_rowid:
            sortable_columns.add("rowid")
        return sortable_columns

    async def display_columns_and_rows(
        self,
        database,
        table,
        description,
        rows,
        link_column=False,
        expand_foreign_keys=True,
    ):
        "Returns columns, rows for specified table - including fancy foreign key treatment"
        table_metadata = self.table_metadata(database, table)
        info = self.ds.inspect()[database]
        sortable_columns = self.sortable_columns_for_table(database, table, True)
        columns = [
            {"name": r[0], "sortable": r[0] in sortable_columns} for r in description
        ]
        tables = info["tables"]
        table_info = tables.get(table) or {}
        pks = table_info.get("primary_keys") or []

        # Prefetch foreign key resolutions for later expansion:
        fks = {}
        labeled_fks = {}
        if table_info and expand_foreign_keys:
            foreign_keys = table_info["foreign_keys"]["outgoing"]
            for fk in foreign_keys:
                label_column = (
                    # First look in metadata.json definition for this foreign key table:
                    self.table_metadata(database, fk["other_table"]).get("label_column")
                    # Fall back to label_column from .inspect() detection:
                    or tables.get(fk["other_table"], {}).get("label_column")
                )
                if not label_column:
                    # No label for this FK
                    fks[fk["column"]] = fk["other_table"]
                    continue

                ids_to_lookup = set([row[fk["column"]] for row in rows])
                sql = 'select "{other_column}", "{label_column}" from {other_table} where "{other_column}" in ({placeholders})'.format(
                    other_column=fk["other_column"],
                    label_column=label_column,
                    other_table=escape_sqlite(fk["other_table"]),
                    placeholders=", ".join(["?"] * len(ids_to_lookup)),
                )
                try:
                    results = await self.execute(
                        database, sql, list(set(ids_to_lookup))
                    )
                except sqlite3.OperationalError:
                    # Probably hit the timelimit
                    pass
                else:
                    for id, value in results:
                        labeled_fks[(fk["column"], id)] = (fk["other_table"], value)

        cell_rows = []
        for row in rows:
            cells = []
            # Unless we are a view, the first column is a link - either to the rowid
            # or to the simple or compound primary key
            if link_column:
                cells.append(
                    {
                        "column": pks[0] if len(pks) == 1 else "Link",
                        "value": jinja2.Markup(
                            '<a href="/{database}/{table}/{flat_pks_quoted}">{flat_pks}</a>'.format(
                                database=database,
                                table=urllib.parse.quote_plus(table),
                                flat_pks=str(
                                    jinja2.escape(
                                        path_from_row_pks(row, pks, not pks, False)
                                    )
                                ),
                                flat_pks_quoted=path_from_row_pks(row, pks, not pks),
                            )
                        ),
                    }
                )

            for value, column_dict in zip(row, columns):
                column = column_dict["name"]
                if link_column and len(pks) == 1 and column == pks[0]:
                    # If there's a simple primary key, don't repeat the value as it's
                    # already shown in the link column.
                    continue

                if (column, value) in labeled_fks:
                    other_table, label = labeled_fks[(column, value)]
                    display_value = jinja2.Markup(
                        '<a href="/{database}/{table}/{link_id}">{label}</a>&nbsp;<em>{id}</em>'.format(
                            database=database,
                            table=urllib.parse.quote_plus(other_table),
                            link_id=urllib.parse.quote_plus(str(value)),
                            id=str(jinja2.escape(value)),
                            label=str(jinja2.escape(label)),
                        )
                    )
                elif column in fks:
                    display_value = jinja2.Markup(
                        '<a href="/{database}/{table}/{link_id}">{id}</a>'.format(
                            database=database,
                            table=urllib.parse.quote_plus(fks[column]),
                            link_id=urllib.parse.quote_plus(str(value)),
                            id=str(jinja2.escape(value)),
                        )
                    )
                elif value is None:
                    display_value = jinja2.Markup("&nbsp;")
                elif is_url(str(value).strip()):
                    display_value = jinja2.Markup(
                        '<a href="{url}">{url}</a>'.format(
                            url=jinja2.escape(value.strip())
                        )
                    )
                elif column in table_metadata.get("units", {}) and value != "":
                    # Interpret units using pint
                    value = value * ureg(table_metadata["units"][column])
                    # Pint uses floating point which sometimes introduces errors in the compact
                    # representation, which we have to round off to avoid ugliness. In the vast
                    # majority of cases this rounding will be inconsequential. I hope.
                    value = round(value.to_compact(), 6)
                    display_value = jinja2.Markup(
                        "{:~P}".format(value).replace(" ", "&nbsp;")
                    )
                else:
                    display_value = str(value)

                cells.append({"column": column, "value": display_value})
            cell_rows.append(cells)

        if link_column:
            # Add the link column header.
            # If it's a simple primary key, we have to remove and re-add that column name at
            # the beginning of the header row.
            if len(pks) == 1:
                columns = [col for col in columns if col["name"] != pks[0]]

            columns = [
                {"name": pks[0] if len(pks) == 1 else "Link", "sortable": len(pks) == 1}
            ] + columns
        return columns, cell_rows


class TableView(RowTableShared):

    async def data(self, request, name, hash, table):
        table = urllib.parse.unquote_plus(table)
        canned_query = self.ds.get_canned_query(name, table)
        if canned_query is not None:
            return await self.custom_sql(
                request,
                name,
                hash,
                canned_query["sql"],
                editable=False,
                canned_query=table,
            )

        is_view = bool(
            list(
                await self.execute(
                    name,
                    "SELECT count(*) from sqlite_master WHERE type = 'view' and name=:n",
                    {"n": table},
                )
            )[
                0
            ][
                0
            ]
        )
        view_definition = None
        table_definition = None
        if is_view:
            view_definition = list(
                await self.execute(
                    name,
                    'select sql from sqlite_master where name = :n and type="view"',
                    {"n": table},
                )
            )[
                0
            ][
                0
            ]
        else:
            table_definition_rows = list(
                await self.execute(
                    name,
                    'select sql from sqlite_master where name = :n and type="table"',
                    {"n": table},
                )
            )
            if not table_definition_rows:
                raise NotFound("Table not found: {}".format(table))

            table_definition = table_definition_rows[0][0]
        info = self.ds.inspect()
        table_info = info[name]["tables"].get(table) or {}
        pks = table_info.get("primary_keys") or []
        use_rowid = not pks and not is_view
        if use_rowid:
            select = "rowid, *"
            order_by = "rowid"
            order_by_pks = "rowid"
        else:
            select = "*"
            order_by_pks = ", ".join([escape_sqlite(pk) for pk in pks])
            order_by = order_by_pks

        if is_view:
            order_by = ""

        # We roll our own query_string decoder because by default Sanic
        # drops anything with an empty value e.g. ?name__exact=
        args = RequestParameters(
            urllib.parse.parse_qs(request.query_string, keep_blank_values=True)
        )

        # Special args start with _ and do not contain a __
        # That's so if there is a column that starts with _
        # it can still be queried using ?_col__exact=blah
        special_args = {}
        special_args_lists = {}
        other_args = {}
        for key, value in args.items():
            if key.startswith("_") and "__" not in key:
                special_args[key] = value[0]
                special_args_lists[key] = value
            else:
                other_args[key] = value[0]

        # Handle ?_filter_column and redirect, if present
        redirect_params = filters_should_redirect(special_args)
        if redirect_params:
            return self.redirect(
                request,
                path_with_added_args(request, redirect_params),
                forward_querystring=False,
            )

        # Spot ?_sort_by_desc and redirect to _sort_desc=(_sort)
        if "_sort_by_desc" in special_args:
            return self.redirect(
                request,
                path_with_added_args(
                    request,
                    {
                        "_sort_desc": special_args.get("_sort"),
                        "_sort_by_desc": None,
                        "_sort": None,
                    },
                ),
                forward_querystring=False,
            )

        table_metadata = self.table_metadata(name, table)
        units = table_metadata.get("units", {})
        filters = Filters(sorted(other_args.items()), units, ureg)
        where_clauses, params = filters.build_where_clauses()

        # _search support:
        fts_table = info[name]["tables"].get(table, {}).get("fts_table")
        search_args = dict(
            pair for pair in special_args.items() if pair[0].startswith("_search")
        )
        search_descriptions = []
        search = ""
        if fts_table and search_args:
            if "_search" in search_args:
                # Simple ?_search=xxx
                search = search_args["_search"]
                where_clauses.append(
                    "rowid in (select rowid from [{fts_table}] where [{fts_table}] match :search)".format(
                        fts_table=fts_table
                    )
                )
                search_descriptions.append('search matches "{}"'.format(search))
                params["search"] = search
            else:
                # More complex: search against specific columns
                valid_columns = set(info[name]["tables"][fts_table]["columns"])
                for i, (key, search_text) in enumerate(search_args.items()):
                    search_col = key.split("_search_", 1)[1]
                    if search_col not in valid_columns:
                        raise DatasetteError("Cannot search by that column", status=400)

                    where_clauses.append(
                        "rowid in (select rowid from [{fts_table}] where [{search_col}] match :search_{i})".format(
                            fts_table=fts_table, search_col=search_col, i=i
                        )
                    )
                    search_descriptions.append(
                        'search column "{}" matches "{}"'.format(
                            search_col, search_text
                        )
                    )
                    params["search_{}".format(i)] = search_text

        table_rows_count = None
        sortable_columns = set()
        if not is_view:
            table_rows_count = table_info["count"]
            sortable_columns = self.sortable_columns_for_table(name, table, use_rowid)

        # Allow for custom sort order
        sort = special_args.get("_sort")
        if sort:
            if sort not in sortable_columns:
                raise DatasetteError("Cannot sort table by {}".format(sort))

            order_by = escape_sqlite(sort)
        sort_desc = special_args.get("_sort_desc")
        if sort_desc:
            if sort_desc not in sortable_columns:
                raise DatasetteError("Cannot sort table by {}".format(sort_desc))

            if sort:
                raise DatasetteError("Cannot use _sort and _sort_desc at the same time")

            order_by = "{} desc".format(escape_sqlite(sort_desc))

        from_sql = "from {table_name} {where}".format(
            table_name=escape_sqlite(table),
            where=(
                "where {} ".format(" and ".join(where_clauses))
            ) if where_clauses else "",
        )
        count_sql = "select count(*) {}".format(from_sql)

        _next = special_args.get("_next")
        offset = ""
        if _next:
            if is_view:
                # _next is an offset
                offset = " offset {}".format(int(_next))
            else:
                components = urlsafe_components(_next)
                # If a sort order is applied, the first of these is the sort value
                if sort or sort_desc:
                    sort_value = components[0]
                    # Special case for if non-urlencoded first token was $null
                    if _next.split(",")[0] == "$null":
                        sort_value = None
                    components = components[1:]

                # Figure out the SQL for next-based-on-primary-key first
                next_by_pk_clauses = []
                if use_rowid:
                    next_by_pk_clauses.append("rowid > :p{}".format(len(params)))
                    params["p{}".format(len(params))] = components[0]
                else:
                    # Apply the tie-breaker based on primary keys
                    if len(components) == len(pks):
                        param_len = len(params)
                        next_by_pk_clauses.append(
                            compound_keys_after_sql(pks, param_len)
                        )
                        for i, pk_value in enumerate(components):
                            params["p{}".format(param_len + i)] = pk_value

                # Now add the sort SQL, which may incorporate next_by_pk_clauses
                if sort or sort_desc:
                    if sort_value is None:
                        if sort_desc:
                            # Just items where column is null ordered by pk
                            where_clauses.append(
                                "({column} is null and {next_clauses})".format(
                                    column=escape_sqlite(sort_desc),
                                    next_clauses=" and ".join(next_by_pk_clauses),
                                )
                            )
                        else:
                            where_clauses.append(
                                "({column} is not null or ({column} is null and {next_clauses}))".format(
                                    column=escape_sqlite(sort),
                                    next_clauses=" and ".join(next_by_pk_clauses),
                                )
                            )
                    else:
                        where_clauses.append(
                            "({column} {op} :p{p}{extra_desc_only} or ({column} = :p{p} and {next_clauses}))".format(
                                column=escape_sqlite(sort or sort_desc),
                                op=">" if sort else "<",
                                p=len(params),
                                extra_desc_only="" if sort else " or {column2} is null".format(
                                    column2=escape_sqlite(sort or sort_desc)
                                ),
                                next_clauses=" and ".join(next_by_pk_clauses),
                            )
                        )
                        params["p{}".format(len(params))] = sort_value
                    order_by = "{}, {}".format(order_by, order_by_pks)
                else:
                    where_clauses.extend(next_by_pk_clauses)

        where_clause = ""
        if where_clauses:
            where_clause = "where {} ".format(" and ".join(where_clauses))

        if order_by:
            order_by = "order by {} ".format(order_by)

        # _group_count=col1&_group_count=col2
        group_count = special_args_lists.get("_group_count") or []
        if group_count:
            sql = 'select {group_cols}, count(*) as "count" from {table_name} {where} group by {group_cols} order by "count" desc limit 100'.format(
                group_cols=", ".join(
                    '"{}"'.format(group_count_col) for group_count_col in group_count
                ),
                table_name=escape_sqlite(table),
                where=where_clause,
            )
            return await self.custom_sql(request, name, hash, sql, editable=True)

        extra_args = {}
        # Handle ?_page_size=500
        page_size = request.raw_args.get("_size")
        if page_size:
            if page_size == "max":
                page_size = self.max_returned_rows
            try:
                page_size = int(page_size)
                if page_size < 0:
                    raise ValueError

            except ValueError:
                raise DatasetteError("_size must be a positive integer", status=400)

            if page_size > self.max_returned_rows:
                raise DatasetteError(
                    "_size must be <= {}".format(self.max_returned_rows), status=400
                )

            extra_args["page_size"] = page_size
        else:
            page_size = self.page_size

        sql = "select {select} from {table_name} {where}{order_by}limit {limit}{offset}".format(
            select=select,
            table_name=escape_sqlite(table),
            where=where_clause,
            order_by=order_by,
            limit=page_size + 1,
            offset=offset,
        )

        if request.raw_args.get("_timelimit"):
            extra_args["custom_time_limit"] = int(request.raw_args["_timelimit"])

        rows, truncated, description = await self.execute(
            name, sql, params, truncate=True, **extra_args
        )

        # facets support
        try:
            facets = request.args["_facet"]
        except KeyError:
            facets = table_metadata.get("facets", [])
        facet_results = {}
        for column in facets:
            facet_sql = """
                select {col} as value, count(*) as count
                {from_sql}
                group by {col} order by count desc limit 20
            """.format(
                col=escape_sqlite(column), from_sql=from_sql
            )
            try:
                facet_rows = await self.execute(
                    name, facet_sql, params, truncate=False, custom_time_limit=200
                )
                facet_results[column] = [
                    {
                        "value": row["value"],
                        "count": row["count"],
                        "toggle_url": urllib.parse.urljoin(
                            request.url,
                            path_with_added_args(request, {column: row["value"]}),
                        ),
                    }
                    for row in facet_rows
                ]
            except sqlite3.OperationalError:
                # Hit time limit
                pass

        columns = [r[0] for r in description]
        rows = list(rows)

        filter_columns = columns[:]
        if use_rowid and filter_columns[0] == "rowid":
            filter_columns = filter_columns[1:]

        # Pagination next link
        next_value = None
        next_url = None
        if len(rows) > page_size and page_size > 0:
            if is_view:
                next_value = int(_next or 0) + page_size
            else:
                next_value = path_from_row_pks(rows[-2], pks, use_rowid)
            # If there's a sort or sort_desc, add that value as a prefix
            if (sort or sort_desc) and not is_view:
                prefix = rows[-2][sort or sort_desc]
                if prefix is None:
                    prefix = "$null"
                else:
                    prefix = urllib.parse.quote_plus(str(prefix))
                next_value = "{},{}".format(prefix, next_value)
                added_args = {"_next": next_value}
                if sort:
                    added_args["_sort"] = sort
                else:
                    added_args["_sort_desc"] = sort_desc
            else:
                added_args = {"_next": next_value}
            next_url = urllib.parse.urljoin(
                request.url, path_with_added_args(request, added_args)
            )
            rows = rows[:page_size]

        # Number of filtered rows in whole set:
        filtered_table_rows_count = None
        if count_sql:
            try:
                count_rows = list(await self.execute(name, count_sql, params))
                filtered_table_rows_count = count_rows[0][0]
            except sqlite3.OperationalError:
                # Almost certainly hit the timeout
                pass

        # human_description_en combines filters AND search, if provided
        human_description_en = filters.human_description_en(extra=search_descriptions)

        if sort or sort_desc:
            sorted_by = "sorted by {}{}".format(
                (sort or sort_desc), " descending" if sort_desc else ""
            )
            human_description_en = " ".join(
                [b for b in [human_description_en, sorted_by] if b]
            )

        async def extra_template():
            display_columns, display_rows = await self.display_columns_and_rows(
                name,
                table,
                description,
                rows,
                link_column=not is_view,
                expand_foreign_keys=True,
            )
            metadata = self.ds.metadata.get("databases", {}).get(name, {}).get(
                "tables", {}
            ).get(
                table, {}
            )
            self.ds.update_with_inherited_metadata(metadata)
            return {
                "database_hash": hash,
                "supports_search": bool(fts_table),
                "search": search or "",
                "use_rowid": use_rowid,
                "filters": filters,
                "display_columns": display_columns,
                "filter_columns": filter_columns,
                "display_rows": display_rows,
                "is_sortable": any(c["sortable"] for c in display_columns),
                "path_with_added_args": path_with_added_args,
                "request": request,
                "sort": sort,
                "sort_desc": sort_desc,
                "disable_sort": is_view,
                "custom_rows_and_columns_templates": [
                    "_rows_and_columns-{}-{}.html".format(
                        to_css_class(name), to_css_class(table)
                    ),
                    "_rows_and_columns-table-{}-{}.html".format(
                        to_css_class(name), to_css_class(table)
                    ),
                    "_rows_and_columns.html",
                ],
                "metadata": metadata,
            }

        return {
            "database": name,
            "table": table,
            "is_view": is_view,
            "view_definition": view_definition,
            "table_definition": table_definition,
            "human_description_en": human_description_en,
            "rows": rows[:page_size],
            "truncated": truncated,
            "table_rows_count": table_rows_count,
            "filtered_table_rows_count": filtered_table_rows_count,
            "columns": columns,
            "primary_keys": pks,
            "units": units,
            "query": {"sql": sql, "params": params},
            "facet_results": facet_results,
            "next": next_value and str(next_value) or None,
            "next_url": next_url,
        }, extra_template, (
            "table-{}-{}.html".format(to_css_class(name), to_css_class(table)),
            "table.html",
        )


class RowView(RowTableShared):

    async def data(self, request, name, hash, table, pk_path):
        table = urllib.parse.unquote_plus(table)
        pk_values = urlsafe_components(pk_path)
        info = self.ds.inspect()[name]
        table_info = info["tables"].get(table) or {}
        pks = table_info.get("primary_keys") or []
        use_rowid = not pks
        select = "*"
        if use_rowid:
            select = "rowid, *"
            pks = ["rowid"]
        wheres = ['"{}"=:p{}'.format(pk, i) for i, pk in enumerate(pks)]
        sql = 'select {} from "{}" where {}'.format(select, table, " AND ".join(wheres))
        params = {}
        for i, pk_value in enumerate(pk_values):
            params["p{}".format(i)] = pk_value
        # rows, truncated, description = await self.execute(name, sql, params, truncate=True)
        rows, truncated, description = await self.execute(
            name, sql, params, truncate=True
        )
        columns = [r[0] for r in description]
        rows = list(rows)
        if not rows:
            raise NotFound("Record not found: {}".format(pk_values))

        async def template_data():
            display_columns, display_rows = await self.display_columns_and_rows(
                name,
                table,
                description,
                rows,
                link_column=False,
                expand_foreign_keys=True,
            )
            for column in display_columns:
                column["sortable"] = False
            return {
                "database_hash": hash,
                "foreign_key_tables": await self.foreign_key_tables(
                    name, table, pk_values
                ),
                "display_columns": display_columns,
                "display_rows": display_rows,
                "custom_rows_and_columns_templates": [
                    "_rows_and_columns-{}-{}.html".format(
                        to_css_class(name), to_css_class(table)
                    ),
                    "_rows_and_columns-row-{}-{}.html".format(
                        to_css_class(name), to_css_class(table)
                    ),
                    "_rows_and_columns.html",
                ],
                "metadata": self.ds.metadata.get("databases", {}).get(name, {}).get(
                    "tables", {}
                ).get(
                    table, {}
                ),
            }

        data = {
            "database": name,
            "table": table,
            "rows": rows,
            "columns": columns,
            "primary_keys": pks,
            "primary_key_values": pk_values,
            "units": self.table_metadata(name, table).get("units", {}),
        }

        if "foreign_key_tables" in (request.raw_args.get("_extras") or "").split(","):
            data["foreign_key_tables"] = await self.foreign_key_tables(
                name, table, pk_values
            )

        return data, template_data, (
            "row-{}-{}.html".format(to_css_class(name), to_css_class(table)), "row.html"
        )

    async def foreign_key_tables(self, name, table, pk_values):
        if len(pk_values) != 1:
            return []

        table_info = self.ds.inspect()[name]["tables"].get(table)
        if not table_info:
            return []

        foreign_keys = table_info["foreign_keys"]["incoming"]
        if len(foreign_keys) == 0:
            return []

        sql = "select " + ", ".join(
            [
                '(select count(*) from {table} where "{column}"=:id)'.format(
                    table=escape_sqlite(fk["other_table"]), column=fk["other_column"]
                )
                for fk in foreign_keys
            ]
        )
        try:
            rows = list(await self.execute(name, sql, {"id": pk_values[0]}))
        except sqlite3.OperationalError:
            # Almost certainly hit the timeout
            return []

        foreign_table_counts = dict(
            zip(
                [(fk["other_table"], fk["other_column"]) for fk in foreign_keys],
                list(rows[0]),
            )
        )
        foreign_key_tables = []
        for fk in foreign_keys:
            count = foreign_table_counts.get(
                (fk["other_table"], fk["other_column"])
            ) or 0
            foreign_key_tables.append({**fk, **{"count": count}})
        return foreign_key_tables