"""
camcops_server/cc_modules/cc_xml.py
===============================================================================
Copyright (C) 2012, University of Cambridge, Department of Psychiatry.
Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
This file is part of CamCOPS.
CamCOPS is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
CamCOPS is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with CamCOPS. If not, see <https://www.gnu.org/licenses/>.
===============================================================================
**XML helper functions/classes.**
"""
import base64
import datetime
import logging
from typing import Any, List, Optional, TYPE_CHECKING, Union
import xml.sax.saxutils
from cardinal_pythonlib.logs import BraceStyleAdapter
from cardinal_pythonlib.reprfunc import auto_repr
from cardinal_pythonlib.sqlalchemy.orm_inspect import gen_columns
import pendulum # avoid name confusion with Date
from pendulum import DateTime as Pendulum
from semantic_version.base import Version
from sqlalchemy.sql.schema import Column
from sqlalchemy.sql.type_api import TypeEngine
from camcops_server.cc_modules.cc_simpleobjects import XmlSimpleValue
from camcops_server.cc_modules.cc_sqla_coltypes import gen_camcops_blob_columns
if TYPE_CHECKING:
from camcops_server.cc_modules.cc_request import (
CamcopsRequest,
)
from camcops_server.cc_modules.cc_summaryelement import (
SummaryElement,
)
log = BraceStyleAdapter(logging.getLogger(__name__))
# =============================================================================
# Constants
# =============================================================================
XML_NAME_SNOMED_CODES = "snomed_ct_codes"
XML_NAMESPACES = [
' xmlns:xsi="https://www.w3.org/2001/XMLSchema-instance"'
# ' xmlns:dt="https://www.w3.org/2001/XMLSchema-datatypes"'
]
XML_IGNORE_NAMESPACES = [
'xmlns:mc="https://schemas.openxmlformats.org/markup-compatibility/2006"',
'xmlns:ignore="https://camcops.readthedocs.org/ignore"',
# ... actual URL unimportant
'mc:Ignorable="ignore"',
]
# http://www.w3.org/TR/xmlschema-1/
# http://www.w3.org/TR/2004/REC-xmlschema-2-20041028/datatypes.html
[docs]class XmlDataTypes(object):
"""
Constants representing standard XML data types.
"""
BASE64BINARY = "base64Binary"
BOOLEAN = "boolean"
DATE = "date"
DATETIME = "dateTime"
DOUBLE = "double"
INTEGER = "integer"
STRING = "string"
TIME = "time"
# =============================================================================
# XML element
# =============================================================================
[docs]class XmlElement(object):
"""
Represents XML data in a tree.
"""
[docs] def __init__(
self,
name: str,
value: Any = None,
datatype: str = None,
comment: str = None,
literal: str = None,
) -> None:
"""
Args:
name: name of this XML element
value: value of this element: may be a raw value or a list of
:class:`camcops_server.cc_modules.cc_xml.XmlElement` objects
(default: ``None``)
datatype: data type of this element (default: ``None``)
comment: description of this element (default: ``None``)
literal: literal XML; overrides all other options
"""
# Special: boolean requires lower case "true"/"false" (or 0/1)
if datatype == XmlDataTypes.BOOLEAN and value is not None:
value = str(value).lower()
self.name = name
self.value = value
self.datatype = datatype
self.comment = comment
self.literal = literal
def __repr__(self) -> str:
"""
Shows just this element.
"""
return auto_repr(self, with_addr=True)
[docs]class XmlLiteral(XmlElement):
"""
Represents literal XML.
"""
[docs] def __init__(self, literal: str) -> None:
super().__init__(name="", literal=literal)
# =============================================================================
# Some literals
# =============================================================================
XML_COMMENT_ANCILLARY = XmlLiteral("<!-- Ancillary records -->")
XML_COMMENT_ANONYMOUS = XmlLiteral("<!-- Anonymous task; no patient info -->")
XML_COMMENT_BLOBS = XmlLiteral("<!-- Associated BLOBs -->")
XML_COMMENT_CALCULATED = XmlLiteral("<!-- Calculated fields -->")
XML_COMMENT_PATIENT = XmlLiteral("<!-- Associated patient details -->")
XML_COMMENT_SNOMED_CT = XmlLiteral("<!-- SNOMED-CT codes -->")
XML_COMMENT_SPECIAL_NOTES = XmlLiteral("<!-- Any special notes added -->")
XML_COMMENT_STORED = XmlLiteral("<!-- Stored fields -->")
# =============================================================================
# XML processing
# =============================================================================
# The xml.etree.ElementTree and lxml libraries can both do this sort of thing.
# However, they do look quite fiddly and we only want to create something
# simple. Therefore, let's roll our own:
[docs]def make_xml_branches_from_columns(
obj, skip_fields: List[str] = None
) -> List[XmlElement]:
"""
Returns a list of XML branches, each an
:class:`camcops_server.cc_modules.cc_xml.XmlElement`, from an SQLAlchemy
ORM object, using the list of SQLAlchemy Column objects that
define/describe its fields.
Args:
obj: the SQLAlchemy ORM object
skip_fields: database column names to skip
"""
skip_fields = skip_fields or [] # type: List[str]
branches = [] # type: List[XmlElement]
for attrname, column in gen_columns(obj):
# log.debug("make_xml_branches_from_columns: {!r}", attrname)
colname = column.name
if colname in skip_fields:
continue
branches.append(
XmlElement(
name=colname,
value=getattr(obj, attrname),
datatype=get_xml_datatype_from_sqla_column(column),
comment=column.comment,
)
)
return branches
[docs]def make_xml_branches_from_summaries(
summaries: List["SummaryElement"],
skip_fields: List[str] = None,
sort_by_name: bool = True,
) -> List[XmlElement]:
"""
Returns a list of XML branches, each an
:class:`camcops_server.cc_modules.cc_xml.XmlElement`, from a list of
summary data provided by a task.
Args:
summaries: list of :class:`SummaryElement` objects
skip_fields: summary element names to skip
sort_by_name: sort branches by element name?
"""
skip_fields = skip_fields or []
branches = []
for s in summaries:
name = s.name
if name in skip_fields:
continue
branches.append(
XmlElement(
name=name,
value=s.value,
datatype=get_xml_datatype_from_sqla_column_type(s.coltype),
comment=s.comment,
)
)
if sort_by_name:
branches.sort(key=lambda el: el.name)
return branches
[docs]def make_xml_branches_from_blobs(
req: "CamcopsRequest", obj, skip_fields: List[str] = None
) -> List[XmlElement]:
"""
Return XML branches from those attributes of an SQLAlchemy ORM object
(e.g. task) that represent BLOBs.
Args:
req: the :class:`camcops_server.cc_modules.cc_request.CamcopsRequest`
obj: the SQLAlchemy ORM object
skip_fields: database column names to skip
Returns:
a list of :class:`camcops_server.cc_modules.cc_xml.XmlElement` objects
"""
skip_fields = skip_fields or [] # type: List[str]
branches = [] # type: List[XmlElement]
for id_attrname, column in gen_camcops_blob_columns(obj):
colname = column.name
if colname in skip_fields:
continue
relationship_attr = column.blob_relationship_attr_name
blob = getattr(obj, relationship_attr)
branches.append(
XmlElement(
name=relationship_attr,
value=None if blob is None else blob.get_xml_element(req),
comment=column.comment,
)
)
return branches
[docs]def get_xml_datatype_from_sqla_column_type(coltype: TypeEngine) -> str:
"""
Returns the XML schema datatype from an SQLAlchemy column type,
such as ``Integer``. Compare :func:`get_xml_datatype_from_sqla_column`.
"""
# http://www.xml.dvint.com/docs/SchemaDataTypesQR-2.pdf
# http://www.w3.org/TR/2004/REC-xmlschema-2-20041028/datatypes.html
pt = coltype.python_type
# pt is a *type*, not an *instance* of that type, so we use issubclass:
# Watch the order. Move from more specific to less specific.
# For example, issubclass(bool, int) == True, so do bool first.
if issubclass(pt, datetime.datetime) or issubclass(pt, Pendulum):
return XmlDataTypes.DATETIME
if issubclass(pt, datetime.date) or issubclass(pt, pendulum.Date):
return XmlDataTypes.DATE
if issubclass(pt, datetime.time) or issubclass(pt, pendulum.Time):
return XmlDataTypes.TIME
if issubclass(pt, bool):
return XmlDataTypes.BOOLEAN
if issubclass(pt, int):
return XmlDataTypes.INTEGER
if issubclass(pt, float):
return XmlDataTypes.DOUBLE
if issubclass(pt, str) or issubclass(pt, Version):
return XmlDataTypes.STRING
# BLOBs are handled separately.
raise NotImplementedError(
f"Don't know XML type for SQLAlchemy type {coltype!r} with Python "
f"type {pt!r}"
)
[docs]def get_xml_datatype_from_sqla_column(column: Column) -> Optional[str]:
"""
Returns the XML schema datatype from an SQLAlchemy Column, such as
``Integer()``. Compare :func:`get_xml_datatype_from_sqla_column_type`.
"""
coltype = column.type # type: TypeEngine
return get_xml_datatype_from_sqla_column_type(coltype)
[docs]def get_xml_blob_element(
name: str, blobdata: Optional[bytes], comment: str = None
) -> XmlElement:
"""
Returns an XmlElement representing a base-64-encoded BLOB.
Args:
name: XML element name
blobdata: the raw binary, or ``None``
comment: XML comment
"""
if blobdata:
# blobdata is raw binary
b64bytes = base64.b64encode(blobdata)
b64str = b64bytes.decode("ascii")
value = b64str
else:
value = None
return XmlElement(
name=name,
value=value,
datatype=XmlDataTypes.BASE64BINARY,
comment=comment,
)
# http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#base64Binary
[docs]def xml_escape_value(value: str) -> str:
"""
Escape a value for XML.
"""
# http://stackoverflow.com/questions/1091945/
# https://wiki.python.org/moin/EscapingXml
return xml.sax.saxutils.escape(value)
[docs]def xml_quote_attribute(attr: str) -> str:
"""
Escapes and quotes an attribute for XML.
More stringent than value escaping.
"""
return xml.sax.saxutils.quoteattr(attr)
[docs]def get_xml_tree(
element: Union[
XmlElement, XmlSimpleValue, List[Union[XmlElement, XmlSimpleValue]]
],
level: int = 0,
indent_spaces: int = 4,
eol: str = "\n",
include_comments: bool = False,
) -> str:
# noinspection HttpUrlsUsage
"""
Returns an :class:`camcops_server.cc_modules.cc_xml.XmlElement` as text.
Args:
element: root :class:`camcops_server.cc_modules.cc_xml.XmlElement`
level: starting level/depth (used for recursion)
indent_spaces: number of spaces to indent formatted XML
eol: end-of-line string
include_comments: include comments describing each field?
We will represent NULL values with ``xsi:nil``, but this requires a
namespace:
- https://stackoverflow.com/questions/774192
- http://books.xmlschemata.org/relaxng/relax-CHP-11-SECT-1.html
Comments:
- http://blog.galasoft.ch/posts/2010/02/quick-tip-commenting-out-properties-in-xaml/
- https://stackoverflow.com/questions/2073140/
Regarding newlines:
- We do nothing special, i.e. newlines are provided in raw format.
- However, some browsers may fail to display them correctly (i.e. they look
like they're missing) -- e.g. Firefox, Chrome -- see
https://stackoverflow.com/questions/2004386. Just try saving and
inspecting the results with a text editor, or use the browser's "View
Source" function (which, for Chrome, shows both newlines and line numbers
too).
""" # noqa
xmltext = ""
prefix = " " * level * indent_spaces
if isinstance(element, XmlElement):
if element.literal:
# A user-inserted piece of XML. Insert, but indent.
xmltext += prefix + element.literal + eol
else:
# Attributes
namespaces = []
if level == 0: # root
# Apply namespace to root element (will inherit):
namespaces.extend(XML_NAMESPACES)
if include_comments:
namespaces.extend(XML_IGNORE_NAMESPACES)
namespace = " ".join(namespaces)
if element.datatype:
dt = f' xsi:type="{element.datatype}"'
else:
# log.warning("XmlElement has no datatype: {!r}", element)
dt = ""
cmt = ""
if include_comments and element.comment:
cmt = f" ignore:comment={xml_quote_attribute(element.comment)}"
attributes = f"{namespace}{dt}{cmt}"
# Assemble
if element.value is None:
# NULL handling
xmltext += (
f"{prefix}<{element.name}{attributes} "
f'xsi:nil="true"/>{eol}'
)
else:
complex_value = isinstance(
element.value, XmlElement
) or isinstance(element.value, list)
value_to_recurse = (
element.value
if complex_value
else XmlSimpleValue(element.value)
)
# ... XmlSimpleValue is a marker that subsequently
# distinguishes things that were part of an XmlElement from
# user-inserted raw XML.
nl = eol if complex_value else ""
pr2 = prefix if complex_value else ""
v = get_xml_tree(
value_to_recurse,
level=level + 1,
indent_spaces=indent_spaces,
eol=eol,
include_comments=include_comments,
)
xmltext += (
f"{prefix}<{element.name}{attributes}>{nl}"
f"{v}{pr2}</{element.name}>{eol}"
)
elif isinstance(element, list):
for subelement in element:
xmltext += get_xml_tree(
subelement,
level,
indent_spaces=indent_spaces,
eol=eol,
include_comments=include_comments,
)
# recursive
elif isinstance(element, XmlSimpleValue):
# The lowest-level thing a value. No extra indent.
xmltext += xml_escape_value(str(element.value))
else:
raise ValueError(f"Bad value to get_xml_tree: {element!r}")
return xmltext
[docs]def get_xml_document(
root: XmlElement,
indent_spaces: int = 4,
eol: str = "\n",
include_comments: bool = False,
) -> str:
"""
Returns an entire XML document as text, given the root
:class:`camcops_server.cc_modules.cc_xml.XmlElement`.
Args:
root: root :class:`camcops_server.cc_modules.cc_xml.XmlElement`
indent_spaces: number of spaces to indent formatted XML
eol: end-of-line string
include_comments: include comments describing each field?
"""
if not isinstance(root, XmlElement):
raise AssertionError(
"get_xml_document: root not an XmlElement; "
"XML requires a single root"
)
return xml_header(eol) + get_xml_tree(
root,
indent_spaces=indent_spaces,
eol=eol,
include_comments=include_comments,
)