Source code for camcops_server.cc_modules.cc_sqlalchemy

"""
camcops_server/cc_modules/cc_sqlalchemy.py

===============================================================================

    Copyright (C) 2012, University of Cambridge, Department of Psychiatry.
    Created by Rudolf Cardinal (rnc1001@cam.ac.uk).

    This file is part of CamCOPS.

    CamCOPS is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    CamCOPS is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with CamCOPS. If not, see <https://www.gnu.org/licenses/>.

===============================================================================

**SQLAlchemy helper functions and constants.**

We define our metadata ``Base`` here, and things like our index naming
convention and MySQL table formats.

A few random notes:

- SQLAlchemy will automatically warn about clashing columns:

  .. :code-block:: python

    from sqlalchemy import Column, Integer
    from sqlalchemy.ext.declarative import declarative_base

    Base = declarative_base()

    class Thing(Base):
        __tablename__ = "thing"
        a = Column("a", Integer, primary_key=True)
        b = Column("b", Integer)
        c = Column("b", Integer)  # produces a warning:

  .. code-block:: none

    SAWarning: On class 'Thing', Column object 'b' named directly multiple
    times, only one will be used: b, c. Consider using orm.synonym instead

"""

from io import StringIO
import logging
import sqlite3
from typing import Any

from cardinal_pythonlib.logs import BraceStyleAdapter
from cardinal_pythonlib.sqlalchemy.dialect import (
    get_dialect_from_name,
    SqlaDialectName,
)
from cardinal_pythonlib.sqlalchemy.dump import dump_ddl
from cardinal_pythonlib.sqlalchemy.session import (
    make_sqlite_url,
    SQLITE_MEMORY_URL,
)
from pendulum import DateTime as Pendulum

from sqlalchemy.engine import create_engine
from sqlalchemy.engine.base import Engine
from sqlalchemy.ext.mutable import Mutable
from sqlalchemy.orm import DeclarativeBaseNoMeta
from sqlalchemy.schema import CreateTable
from sqlalchemy.sql.schema import MetaData, Table

from camcops_server.cc_modules.cc_cache import cache_region_static, fkg

log = BraceStyleAdapter(logging.getLogger(__name__))


# =============================================================================
# Naming convention; metadata; Base
# =============================================================================
# https://alembic.readthedocs.org/en/latest/naming.html
# https://docs.sqlalchemy.org/en/latest/core/constraints.html#configuring-constraint-naming-conventions  # noqa

MYSQL_MAX_IDENTIFIER_LENGTH = 64
LONG_COLUMN_NAME_WARNING_LIMIT = 30

NAMING_CONVENTION = {
    # - Note that constraint names must be unique in the DATABASE, not the
    #   table;
    #   https://dev.mysql.com/doc/refman/5.6/en/create-table-foreign-keys.html
    # - Index names only have to be unique for the table;
    #   https://stackoverflow.com/questions/30653452/do-index-names-have-to-be-unique-across-entire-database-in-mysql  # noqa
    # INDEX:
    "ix": "ix_%(column_0_label)s",
    # UNIQUE CONSTRAINT:
    "uq": "uq_%(table_name)s_%(column_0_name)s",
    # "uq": "uq_%(column_0_name)s",
    # CHECK CONSTRAINT:
    # "ck": "ck_%(table_name)s_%(constraint_name)s",  # too long for MySQL
    # ... https://groups.google.com/forum/#!topic/sqlalchemy/SIT4D8S9dUg
    # "ck": "ck_%(table_name)s_%(column_0_name)s",
    # Problem 2018-09-14:
    # - constraints must be unique across database
    # - MySQL only accepts 64 characters for constraint name
    # - using "%(column_0_name)" means that explicit constrant names are
    #   ignored
    # - using "%(constraint_name)" means that all constraints have to be named
    #   explicitly (very tedious)
    # - so truncate?
    #   https://docs.python.org/3/library/stdtypes.html#old-string-formatting
    #   https://www.python.org/dev/peps/pep-0237/
    # - The main problem is BOOL columns, e.g.
    #   cpft_lps_discharge.management_specialling_behavioural_disturbance
    # - Example:
    #   longthing = "abcdefghijklmnopqrstuvwxyz"
    #   d = {"thing": longthing}
    #   "hello %(thing).10s world" % d  # LEFT TRUNCATE
    #   # ... gives 'hello abcdefghij world'
    # "ck": "ck_%(table_name).30s_%(column_0_name).30s",
    # 3 for "ck_" leaves 61; 30 for table, 1 for "_", 30 for column
    # ... no...
    # "obs_contamination_bodily_waste_*"
    "ck": "ck_%(table_name)s_%(column_0_name)s",  # unique but maybe too long
    # FOREIGN KEY:
    # "fk": "fk_%(table_name)s_%(column_0_name)s_%(referred_table_name)s",  # too long for MySQL sometimes!  # noqa
    "fk": "fk_%(table_name)s_%(column_0_name)s",
    # "fk": "fk_%(column_0_name)s",
    # PRIMARY KEY:
    "pk": "pk_%(table_name)s",
}


# The base of all our model classes:
[docs]class Base(DeclarativeBaseNoMeta): metadata = MetaData(naming_convention=NAMING_CONVENTION) # Special options: __table_args__ = { # ------------------------------------------------------------------------- # MySQL special options # ------------------------------------------------------------------------- # SQLAlchemy __table_args__: # https://docs.sqlalchemy.org/en/latest/orm/extensions/declarative/table_config.html # noqa # SQLAlchemy sends keyword arguments like 'mysql_keyword_name' to be # rendered as KEYWORD_NAME in the CREATE TABLE statement: # https://docs.sqlalchemy.org/en/latest/dialects/mysql.html # Engine: InnoDB "mysql_engine": "InnoDB", # Barracuda: COMPRESSED or DYNAMIC # https://dev.mysql.com/doc/refman/5.7/en/innodb-row-format-dynamic.html # https://xenforo.com/community/threads/anyone-running-their-innodb-tables-with-row_format-compressed.99606/ # noqa # We shouldn't compress everything by default; performance hit. "mysql_row_format": "DYNAMIC", # SEE server_troubleshooting.rst FOR BUG DISCUSSION "mysql_charset": "utf8mb4 COLLATE utf8mb4_unicode_ci", # Character set # REPLACED # 'mysql_charset': 'utf8mb4', # https://dev.mysql.com/doc/refman/5.5/en/charset-unicode-utf8mb4.html # Collation # Which collation for MySQL? See # - https://stackoverflow.com/questions/766809/whats-the-difference-between-utf8-general-ci-and-utf8-unicode-ci # noqa # REPLACED # 'mysql_collate': 'utf8mb4_unicode_ci' # Note that COLLATION rules are, from least to greatest precedence: # Server collation # Connection-specific collation # Database collation # Table collation # Column collation # Query collation (using CAST or CONVERT) # - https://stackoverflow.com/questions/24356090/difference-between-database-table-column-collation # noqa # Therefore, we can set the table collation for all our tables, and not # worry about the column collation, e.g. Text(collation=...). # # To check a MySQL database, and connection/server settings: # SHOW VARIABLES LIKE '%character%'; # SHOW VARIABLES LIKE '%collation%'; # To check tables: # SHOW TABLE STATUS WHERE NAME LIKE 'my_tablename'\G # ... note use of \G to produce long-form output! # To check columns: # SHOW FULL COLUMNS FROM my_tablename; # # ONE THING IN PARTICULAR TO BEWARE: utf8mb4_unicode_ci produces # CASE-INSENSITIVE COMPARISON. For example: # SELECT 'a' = 'A'; -- produces 1 # SELECT 'a' = 'B'; -- produces 0 # SELECT BINARY 'a' = BINARY 'A'; -- produces 0 # This is a PROBLEM FOR PASSWORD FIELDS IF WE INTEND TO DO # DATABASE-LEVEL COMPARISONS WITH THEM. In that case we must ensure a # different collation is set; specifically, use # # utf8mb4_bin # # and see also # SHOW COLLATION WHERE `Collation` LIKE 'utf8mb4%'; # and # https://dev.mysql.com/doc/refman/5.6/en/charset-binary-collations.html # # To check, run # SHOW FULL COLUMNS FROM _security_users; } def __init_subclass__(cls, **kwargs: Any) -> None: cls.extend_columns(**kwargs) super().__init_subclass__(**kwargs) @classmethod def extend_columns(cls, **kwargs: Any) -> None: pass
# MySQL things we can't set via SQLAlchemy, but would like to be set: # - max_allowed_packet: should be at least 32M # - innodb_strict_mode: should be 1, but less of a concern with SQLAlchemy # MySQL things we don't care about too much: # - innodb_file_per_table: desirable, but up to the user. # ============================================================================= # Convenience functions # =============================================================================
[docs]def make_memory_sqlite_engine(echo: bool = False) -> Engine: """ Create an SQLAlchemy :class:`Engine` for an in-memory SQLite database. """ return create_engine(SQLITE_MEMORY_URL, echo=echo)
[docs]def make_file_sqlite_engine(filename: str, echo: bool = False) -> Engine: """ Create an SQLAlchemy :class:`Engine` for an on-disk SQLite database. """ return create_engine(make_sqlite_url(filename), echo=echo)
[docs]def sql_from_sqlite_database(connection: sqlite3.Connection) -> str: """ Returns SQL to describe an SQLite database. Args: connection: connection to SQLite database via ``sqlite3`` module Returns: the SQL """ with StringIO() as f: # noinspection PyTypeChecker for line in connection.iterdump(): f.write(line + "\n") f.flush() return f.getvalue()
[docs]@cache_region_static.cache_on_arguments(function_key_generator=fkg) def get_all_ddl(dialect_name: str = SqlaDialectName.MYSQL) -> str: """ Returns the DDL (data definition language; SQL ``CREATE TABLE`` commands) for our SQLAlchemy metadata. Args: dialect_name: SQLAlchemy dialect name """ metadata = Base.metadata # type: MetaData with StringIO() as f: dump_ddl(metadata, dialect_name=dialect_name, fileobj=f) f.flush() text = f.getvalue() return text
[docs]def log_all_ddl(dialect_name: str = SqlaDialectName.MYSQL) -> None: """ Send the DDL for our SQLAlchemy metadata to the Python log. Args: dialect_name: SQLAlchemy dialect name """ text = get_all_ddl(dialect_name) log.info(text) log.info("DDL length: {} characters", len(text))
[docs]@cache_region_static.cache_on_arguments(function_key_generator=fkg) def get_table_ddl( table: Table, dialect_name: str = SqlaDialectName.MYSQL ) -> str: """ Returns the DDL (data definition language; SQL ``CREATE TABLE`` commands) for a specific table. Args: table: Table to dump. dialect_name: SQLAlchemy dialect name. https://stackoverflow.com/questions/2128717/sqlalchemy-printing-raw-sql-from-create """ dialect = get_dialect_from_name(dialect_name) return str(CreateTable(table).compile(dialect=dialect))
[docs]def assert_constraint_name_ok(table_name: str, column_name: str) -> None: """ Checks that the automatically generated name of a constraint isn't too long for specific databases. Args: table_name: table name column_name: column name Raises: AssertionError, if something will break """ d = {"table_name": table_name, "column_0_name": column_name} anticipated_name = NAMING_CONVENTION["ck"] % d if len(anticipated_name) > MYSQL_MAX_IDENTIFIER_LENGTH: raise AssertionError( f"Constraint name too long for table {table_name!r}, column " f"{column_name!r}; will be {anticipated_name!r} " f"of length {len(anticipated_name)}" )
# ============================================================================= # Database engine hacks # =============================================================================
[docs]def hack_pendulum_into_pymysql() -> None: """ Hack in support for :class:`pendulum.DateTime` into the ``pymysql`` database interface. See https://pendulum.eustace.io/docs/#limitations. """ try: # noinspection PyUnresolvedReferences from pymysql.converters import encoders, escape_datetime encoders[Pendulum] = escape_datetime except ImportError: pass
hack_pendulum_into_pymysql()
[docs]class MutableDict(Mutable, dict): """ Source: https://docs.sqlalchemy.org/en/14/orm/extensions/mutable.html """
[docs] @classmethod def coerce(cls, key: str, value: Any) -> Any: """ Convert plain dictionaries to MutableDict. """ if not isinstance(value, MutableDict): if isinstance(value, dict): return MutableDict(value) # this call will raise ValueError return Mutable.coerce(key, value) else: return value
def __setitem__(self, key: str, value: Any) -> None: """ Detect dictionary set events and emit change events. """ dict.__setitem__(self, key, value) self.changed() def __delitem__(self, key: str) -> None: """ Detect dictionary del events and emit change events. """ dict.__delitem__(self, key) self.changed()