15.1.201. tablet_qt/lib/idpolicy.cpp

    Copyright (C) 2012, University of Cambridge, Department of Psychiatry.
    Created by Rudolf Cardinal (rnc1001@cam.ac.uk).

    This file is part of CamCOPS.

    CamCOPS is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    CamCOPS is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with CamCOPS. If not, see <https://www.gnu.org/licenses/>.

#include "idpolicy.h"

#include <QObject>
#include <QRegularExpression>
// ... replacing QRegExp; https://doc.qt.io/qt-6.5/qregexp.html#details
#include "common/preprocessor_aid.h"  // IWYU pragma: keep
#include "dbobjects/patient.h"
#include "lib/convert.h"

// ============================================================================
// Constants
// ============================================================================

const int BAD_TOKEN = 0;
const int TOKEN_LPAREN = -1;
const int TOKEN_RPAREN = -2;
const int TOKEN_AND = -3;
const int TOKEN_OR = -4;
const int TOKEN_NOT = -5;
const int TOKEN_ANY_IDNUM = -6;
const int TOKEN_OTHER_IDNUM = -7;
const int TOKEN_FORENAME = -8;
const int TOKEN_SURNAME = -9;
const int TOKEN_SEX = -10;
const int TOKEN_DOB = -11;
const int TOKEN_ADDRESS = -12;
const int TOKEN_GP = -13;
const int TOKEN_OTHER_DETAILS = -14;
const int TOKEN_EMAIL = -15;
// Tokens for ID numbers are from 1 upwards.

const QString TOKENIZE_RE_STR(
    // http://stackoverflow.com/questions/6162600/
    // http://stackoverflow.com/questions/20508534/c-multiline-string-raw-literal
    // https://doc.qt.io/qt-6.5/qregularexpression.html#details

    "\\s*"  // discard leading whitespace
    "("  // start capture group
    "\\w+"  // word character
    "|"  // alternator
    "\\("  // left parenthesis
    "|"  // alternator
    "\\)"  // right parenthesis
    ")"  // end capture group, and we can have lots of them

    // In full:
    // \s*(\w+|\(|\))

// ============================================================================
// Static data
// ============================================================================

QMap<int, QString> makeTokenToNameDict()
    // function to create static data

    QMap<int, QString> token_to_name;

    // Everything except ID numbers:

    token_to_name[TOKEN_LPAREN] = "(";
    token_to_name[TOKEN_RPAREN] = ")";
    token_to_name[TOKEN_AND] = "and";
    token_to_name[TOKEN_OR] = "or";
    token_to_name[TOKEN_NOT] = "not";


    token_to_name[TOKEN_SURNAME] = SURNAME_FIELD;
    token_to_name[TOKEN_SEX] = SEX_FIELD;
    token_to_name[TOKEN_DOB] = DOB_FIELD;
    token_to_name[TOKEN_EMAIL] = EMAIL_FIELD;
    token_to_name[TOKEN_ADDRESS] = ADDRESS_FIELD;
    token_to_name[TOKEN_GP] = GP_FIELD;

    return token_to_name;

const QMap<int, QString> IdPolicy::s_token_to_name = makeTokenToNameDict();
const QMap<QString, int> IdPolicy::s_name_to_token
    = convert::reverseMap(IdPolicy::s_token_to_name);

// ============================================================================
// IdPolicy class
// ============================================================================

IdPolicy::IdPolicy(const QString& policy_text) :

int IdPolicy::nameToToken(const QString& name) const
    // One of our pre-cached tokens?
    if (s_name_to_token.contains(name)) {
        return s_name_to_token[name];
    // An ID number token?
    if (name.startsWith(IDNUM_FIELD_PREFIX)) {
        const QString number
            = name.right(name.length() - IDNUM_FIELD_PREFIX.length());
        bool ok = false;
        const int which_idnum = number.toInt(&ok);
        if (ok) {
            return which_idnum;
    // Failed.
    return BAD_TOKEN;

QString IdPolicy::tokenToName(const int token) const
    if (token > 0) {
        return IDNUM_FIELD_FORMAT.arg(token);
    if (s_token_to_name.contains(token)) {
        return s_token_to_name[token];
    qWarning() << Q_FUNC_INFO << "Bad token!";
    return "BAD_TOKEN";

void IdPolicy::tokenize(QString policy_text)
    m_valid = true;

    // Single line, whitespace trimmed, newlines/tabs etc. removed
    policy_text = policy_text.simplified();

    // QString::split() splits using the regex to match BOUNDARIES.
    // Our regex matches CONTENT, so we use QRegularExpression functions.
    // https://dangelog.wordpress.com/2012/04/07/qregularexpression/
    const QRegularExpression re(TOKENIZE_RE_STR);
    QRegularExpressionMatchIterator it = re.globalMatch(policy_text);
    QStringList words;
    while (it.hasNext()) {
        const QRegularExpressionMatch match = it.next();
        const QString word = match.captured(1);
        words << word;
    for (const QString& word : words) {
        const QString element = word.toLower();
        const int token = nameToToken(element);
        if (token == BAD_TOKEN) {
            reportSyntaxError(QString("unknown word: %1").arg(word));
    // check syntax:
    AttributesType blank_attributes;
    for (const QString& name : s_name_to_token.keys()) {
        blank_attributes[name] = false;
    if (idPolicyChunk(m_tokens, blank_attributes) == ChunkValue::SyntaxError) {

void IdPolicy::invalidate()
    m_valid = false;

QString IdPolicy::original() const
    return m_policy_text;

QString IdPolicy::pretty() const
    if (!m_valid) {
        return QObject::tr("[Invalid policy]");
    return stringify(m_tokens);

QString IdPolicy::stringify(const QVector<int>& tokens) const
    QString policy;
    for (int i = 0; i < tokens.length(); ++i) {
        const int token = tokens.at(i);
        if (i > 0 && token != TOKEN_RPAREN
            && tokens.at(i - 1) != TOKEN_LPAREN) {
            policy += " ";
        const QString element = tokenToName(token);
        const bool upper = (token == TOKEN_AND || token == TOKEN_OR);
        policy += upper ? element.toUpper() : element.toLower();
    return policy;

void IdPolicy::reportSyntaxError(const QString& msg) const
    qWarning().nospace() << "Syntax error in policy (" << msg
                         << "); policy text is: " << m_policy_text;

bool IdPolicy::complies(const AttributesType& attributes) const
    // A duff policy doesn't match anything:
    if (!m_valid) {
        return false;
    // A valid empty policy matches anything:
    if (m_tokens.empty()) {
        return true;
    // Parse the whole policy:
    ChunkValue value = idPolicyChunk(m_tokens, attributes);
    // ... which recurses for sub-chunks if required.
    return value == ChunkValue::True;

IdPolicy::ChunkValue IdPolicy::idPolicyChunk(
    const QVector<int>& tokens, const AttributesType& attributes
) const
    // qDebug() << Q_FUNC_INFO << stringify(tokens);
    if (!m_valid) {
        return ChunkValue::SyntaxError;
    bool want_content = true;
    bool processing_and = false;
    bool processing_or = false;
    int index = 0;
    ChunkValue value = ChunkValue::Unknown;
    while (index < tokens.length()) {
        if (want_content) {
            // We want content (a field token)
            ChunkValue nextchunk = idPolicyContent(tokens, attributes, index);
            if (nextchunk == ChunkValue::Unknown
                || nextchunk == ChunkValue::SyntaxError) {
                return ChunkValue::SyntaxError;
            if (value == ChunkValue::Unknown) {
                // First element in this chunk
                value = nextchunk;
            } else if (processing_and) {
                if (nextchunk != ChunkValue::True) {
                    value = ChunkValue::False;
            } else if (processing_or) {
                if (nextchunk == ChunkValue::True) {
                    value = ChunkValue::True;
            } else {
                reportSyntaxError("invalid expression");
                return ChunkValue::SyntaxError;
            processing_and = false;
            processing_or = false;
        } else {
            // We want an operator
            OperatorValue op = idPolicyOp(tokens, index);
            switch (op) {
                case OperatorValue::And:
                    processing_and = true;
                case OperatorValue::Or:
                    processing_or = true;
                case OperatorValue::None:
                    reportSyntaxError("missing operator");
                    return ChunkValue::SyntaxError;
        want_content = !want_content;
    if (value == ChunkValue::Unknown || want_content) {
        reportSyntaxError("policy incomplete");
        return ChunkValue::SyntaxError;
    return value;

IdPolicy::ChunkValue IdPolicy::idPolicyContent(
    const QVector<int>& tokens, const AttributesType& attributes, int& index
) const
    // qDebug() << Q_FUNC_INFO << "tokens =" << stringify(tokens)
    //          << "; index =" << index;
    if (index >= tokens.length()) {
        reportSyntaxError("policy incomplete; missing content at end");
        return ChunkValue::SyntaxError;
    int token = tokens.at(index++);
    switch (token) {
        case TOKEN_RPAREN:
        case TOKEN_AND:
        case TOKEN_OR:
            reportSyntaxError("chunk can't start with AND/OR/')'");
            return ChunkValue::SyntaxError;
        case TOKEN_LPAREN: {
            // The recursive bit.
            int subchunkstart = index;  // one past the opening bracket
            // Find closing parenthesis:
            int depth = 1;
            while (depth > 0) {
                if (index >= tokens.length()) {
                    reportSyntaxError("unmatched left parenthesis");
                    return ChunkValue::SyntaxError;
                int subtoken = tokens.at(index++);
                if (subtoken == TOKEN_LPAREN) {
                    depth += 1;
                } else if (subtoken == TOKEN_RPAREN) {
                    depth -= 1;
            // At this point, subchunkstart points one past the opening
            // parenthesis, and searchidx points one past the closing
            // parenthesis. We want to exclude the closing parenthesis too.
            int subchunklen = index - subchunkstart - 1;
            QVector<int> subchunk = tokens.mid(subchunkstart, subchunklen);
            return idPolicyChunk(subchunk, attributes);
        case TOKEN_NOT: {
            ChunkValue nextchunk = idPolicyContent(tokens, attributes, index);
            switch (nextchunk) {
                case ChunkValue::SyntaxError:
                case ChunkValue::Unknown:
                    return nextchunk;
                case ChunkValue::False:  // invert
                    return ChunkValue::True;
                case ChunkValue::True:  // invert
                    return ChunkValue::False;
            // meaningful token
            return idPolicyElement(attributes, token);

    IdPolicy::idPolicyOp(const QVector<int>& tokens, int& index) const
    // qDebug() << Q_FUNC_INFO << "tokens =" << stringify(tokens)
    //          << "; index =" << index;
    if (index >= tokens.length()) {
        reportSyntaxError("policy incomplete; missing operator at end");
        return OperatorValue::None;
    const int token = tokens.at(index++);
    switch (token) {
        case TOKEN_AND:
            return OperatorValue::And;
        case TOKEN_OR:
            return OperatorValue::Or;
            return OperatorValue::None;

IdPolicy::ChunkValue IdPolicy::idPolicyElement(
    const AttributesType& attributes, const int token
) const
    const QString name = tokenToName(token);
    if (token <= 0) {
        if (!attributes.contains(name)) {
            ) << "Policy contains element"
              << name
              << "but patient information is unaware of that attribute";
            return ChunkValue::Unknown;
        return attributes[name] ? ChunkValue::True : ChunkValue::False;
    if (attributes.contains(name)) {
        return attributes[name] ? ChunkValue::True : ChunkValue::False;
    // But if it's absent, that's just a missing ID, not a syntax error:
    return ChunkValue::False;

QVector<int> IdPolicy::specificallyMentionedIdNums() const
    QVector<int> mentioned;
    for (const int t : m_tokens) {
        if (t > 0) {
    return mentioned;

// ============================================================================
// Tablet ID policy
// ============================================================================

const IdPolicy
    TABLET_ID_POLICY("sex AND ((forename AND surname AND dob) OR anyidnum)");

// ... clinical environment: forename/surname/dob/sex, and we can await an
//     ID number later
// ... research environment: sex and one ID number for pseudonymised
//     applications