Source code for camcops_server.cc_modules.cc_proquint

"""
camcops_server/cc_modules/cc_proquint.py

===============================================================================

    Copyright (C) 2012, University of Cambridge, Department of Psychiatry.
    Created by Rudolf Cardinal (rnc1001@cam.ac.uk).

    This file is part of CamCOPS.

    CamCOPS is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    CamCOPS is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with CamCOPS. If not, see <https://www.gnu.org/licenses/>.

===============================================================================

Convert integers into Pronounceable Quintuplets (proquints)
https://arxiv.org/html/0901.4016

Based on https://github.com/dsw/proquint, which has the following licence:

--8<---------------------------------------------------------------------------

Copyright (c) 2009 Daniel S. Wilkerson
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

    Redistributions of source code must retain the above copyright
    notice, this list of conditions and the following disclaimer.
    Redistributions in binary form must reproduce the above copyright
    notice, this list of conditions and the following disclaimer in
    the documentation and/or other materials provided with the
    distribution.

    Neither the name of Daniel S. Wilkerson nor the names of its
    contributors may be used to endorse or promote products derived
    from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

--8<---------------------------------------------------------------------------


"""
import uuid

CONSONANTS = "bdfghjklmnprstvz"
VOWELS = "aiou"

SIZE_OF_CONSONANT = 4
SIZE_OF_VOWEL = 2

LOOKUP_CONSONANTS = {
    "b": 0x0,
    "d": 0x1,
    "f": 0x2,
    "g": 0x3,
    "h": 0x4,
    "j": 0x5,
    "k": 0x6,
    "l": 0x7,
    "m": 0x8,
    "n": 0x9,
    "p": 0xA,
    "r": 0xB,
    "s": 0xC,
    "t": 0xD,
    "v": 0xE,
    "z": 0xF,
}
LOOKUP_VOWELS = {"a": 0x0, "i": 0x1, "o": 0x2, "u": 0x3}
LOOKUP_TABLE = {**LOOKUP_CONSONANTS, **LOOKUP_VOWELS}


[docs]class InvalidProquintException(Exception): pass
[docs]def proquint_from_uuid(uuid_obj: uuid.UUID) -> str: """ Convert UUID to proquint (via the UUID's 128-bit integer representation). """ return proquint_from_int(uuid_obj.int, 128)
[docs]def proquint_from_int(int_value: int, size_in_bits: int) -> str: """Convert integer value into proquint .. code-block:: none >>> proquint_from_int(0x493b05ee, 32) hohur-bilov 0x493b05ee in binary is: 0100 1001 0011 1011 - 0000 0101 1110 1110 grouped into alternating 4 and 2 bit values: cons vo cons vo cons - cons vo cons vo cons 0100 10 0100 11 1011 - 0000 01 0111 10 1110 h o h u r - b i l o v Args: int_value: integer value to encode size_in_bits: size of integer in bits (must be a multiple of 16) Returns: proquint string identifier """ proquint = [] if size_in_bits % 16 != 0: raise ValueError( f"size_in_bits ({size_in_bits}) must be a multiple of 16" ) for i in range(size_in_bits // 16): proquint.insert(0, _proquint_from_int16(int_value & 0xFFFF)) int_value >>= 16 check_character = _generate_check_character("".join(proquint)) proquint.append(check_character) return "-".join(proquint)
def _generate_check_character(proquint: str) -> str: """ Luhn mod 16 check digit https://en.wikipedia.org/wiki/Luhn_mod_N_algorithm .. code-block:: none consonant_values = { 'b': 0x0, 'd': 0x1, 'f': 0x2, 'g': 0x3, 'h': 0x4, 'j': 0x5, 'k': 0x6, 'l': 0x7, 'm': 0x8, 'n': 0x9, 'p': 0xa, 'r': 0xb, 's': 0xc, 't': 0xd, 'v': 0xe, 'z': 0xf, } vowel_values = { 'a': 0x0, 'i': 0x1, 'o': 0x2, 'u': 0x3, } To generate the check character, start with the last character in the string and move left doubling every other code-point. The "digits" of the code-points as written in hex (since there are 16 valid input characters) should then be summed up: Example (all in hex): hohur-bilov Character h o h u r b i l o v Code point 4 2 4 3 b 0 1 7 2 e Double 4 6 0 e 1c Reduce 4 4 4 6 b 0 1 e 2 1+c Sum 4 4 4 6 b 0 1 e 2 d Total sum = 4 + 4 + 4 + 6 + b + 0 + 1 + e + 2 + d = 0x3b Next multiple of 0x10 is 0x40 Check character code = 0x40 - 0x3b = 0x5 So check character is 'j' """ remainder = _generate_luhn_mod_16_remainder(proquint, 2) check_code_point = (16 - remainder) % 16 return CONSONANTS[check_code_point] def _proquint_from_int16(int16_value: int) -> str: """ Convert 16-bit integer into proquint. """ proquint = [] for i in range(5): if i & 1: letters = VOWELS mask = 0x3 shift = SIZE_OF_VOWEL else: letters = CONSONANTS mask = 0xF shift = SIZE_OF_CONSONANT index = int16_value & mask proquint.insert(0, letters[index]) int16_value >>= shift return "".join(proquint)
[docs]def uuid_from_proquint(proquint: str) -> uuid.UUID: """ Convert proquint to UUID. """ int_value = int_from_proquint(proquint) return uuid.UUID(int=int_value)
[docs]def int_from_proquint(proquint: str) -> int: """ Convert proquint string into integer. .. code-block:; none >>> hex(int_from_proquint('hohur-bilov-j')) 0x493b05ee h o h u r - b i l o v 0x4 0x2 0x4 0x3 0xb - 0x0 0x1 0x7 0x2 0xe 0100 10 0100 11 1011 - 0000 01 0111 10 1110 0100 1001 0011 1011 - 0000 0101 1110 1110 0x4 0x9 0x3 0xb - 0x0 0x5 0xe 0xe Args: proquint: string to decode Returns: converted integer value """ int_value = 0 words = proquint.split("-") if not _is_valid_proquint("".join(words)): raise InvalidProquintException( f"'{proquint}' is not valid (check character mismatch)" ) # Remove check character words.pop() for word in words: for (i, c) in enumerate(word): if i & 1: lookup_table = LOOKUP_VOWELS shift = SIZE_OF_VOWEL else: lookup_table = LOOKUP_CONSONANTS shift = SIZE_OF_CONSONANT value = lookup_table.get(c) if value is None: raise InvalidProquintException( f"'{proquint}' contains invalid or transposed characters" ) int_value <<= shift int_value += value return int_value
def _is_valid_proquint(proquint: str) -> bool: """ Does the proquint validate? """ return _generate_luhn_mod_16_remainder(proquint, 1) == 0 def _generate_luhn_mod_16_remainder(proquint: str, start_factor: int) -> int: """ Part of the checksum calculations; see :func:`_generate_check_character`. For a valid sequence, the overall remainder should be 0. See https://en.wikipedia.org/wiki/Luhn_mod_N_algorithm. """ factor = start_factor sum_ = 0 for char in reversed(proquint): value = LOOKUP_TABLE[char] * factor sum_ = sum_ + value // 16 + value % 16 if factor == 2: factor = 1 else: factor = 2 return sum_ % 16