diff --git a/python/selfie-lib/selfie_lib/PerCharacterEscaper.py b/python/selfie-lib/selfie_lib/PerCharacterEscaper.py new file mode 100644 index 00000000..3f9d33df --- /dev/null +++ b/python/selfie-lib/selfie_lib/PerCharacterEscaper.py @@ -0,0 +1,95 @@ +from typing import List + + +class PerCharacterEscaper: + def __init__( + self, + escape_code_point: int, + escaped_code_points: List[int], + escaped_by_code_points: List[int], + ): + self.__escape_code_point = escape_code_point + self.__escaped_code_points = escaped_code_points + self.__escaped_by_code_points = escaped_by_code_points + + def __first_offset_needing_escape(self, input_string: str) -> int: + length = len(input_string) + for offset in range(length): + codepoint = ord(input_string[offset]) + if ( + codepoint == self.__escape_code_point + or codepoint in self.__escaped_code_points + ): + return offset + return -1 + + def escape(self, input_string: str) -> str: + no_escapes = self.__first_offset_needing_escape(input_string) + if no_escapes == -1: + return input_string + else: + result = [] + result.append(input_string[:no_escapes]) + for char in input_string[no_escapes:]: + codepoint = ord(char) + if codepoint in self.__escaped_code_points: + idx = self.__escaped_code_points.index(codepoint) + result.append(chr(self.__escape_code_point)) + result.append(chr(self.__escaped_by_code_points[idx])) + else: + result.append(char) + return "".join(result) + + def unescape(self, input_string: str) -> str: + if input_string.endswith( + chr(self.__escape_code_point) + ) and not input_string.endswith(chr(self.__escape_code_point) * 2): + raise ValueError( + "Escape character '{}' can't be the last character in a string.".format( + chr(self.__escape_code_point) + ) + ) + + no_escapes = self.__first_offset_needing_escape(input_string) + if no_escapes == -1: + return input_string + else: + result = [input_string[:no_escapes]] + skip_next = False + for i in range(no_escapes, len(input_string)): + if skip_next: + skip_next = False + continue + codepoint = ord(input_string[i]) + if codepoint == self.__escape_code_point and (i + 1) < len( + input_string + ): + next_codepoint = ord(input_string[i + 1]) + if next_codepoint in self.__escaped_by_code_points: + idx = self.__escaped_by_code_points.index(next_codepoint) + result.append(chr(self.__escaped_code_points[idx])) + skip_next = True + else: + result.append(input_string[i + 1]) + skip_next = True + else: + result.append(chr(codepoint)) + return "".join(result) + + @classmethod + def self_escape(cls, escape_policy): + code_points = [ord(c) for c in escape_policy] + escape_code_point = code_points[0] + return cls(escape_code_point, code_points, code_points) + + @classmethod + def specified_escape(cls, escape_policy): + code_points = [ord(c) for c in escape_policy] + if len(code_points) % 2 != 0: + raise ValueError( + "Escape policy string must have an even number of characters." + ) + escape_code_point = code_points[0] + escaped_code_points = code_points[0::2] + escaped_by_code_points = code_points[1::2] + return cls(escape_code_point, escaped_code_points, escaped_by_code_points) diff --git a/python/selfie-lib/selfie_lib/__init__.py b/python/selfie-lib/selfie_lib/__init__.py index ad6c6317..6cfb94ba 100644 --- a/python/selfie-lib/selfie_lib/__init__.py +++ b/python/selfie-lib/selfie_lib/__init__.py @@ -1,2 +1,3 @@ from .LineReader import LineReader as LineReader from .Slice import Slice as Slice +from .PerCharacterEscaper import PerCharacterEscaper as PerCharacterEscaper diff --git a/python/selfie-lib/tests/PerCharacterEscaper_test.py b/python/selfie-lib/tests/PerCharacterEscaper_test.py new file mode 100644 index 00000000..87f8fec5 --- /dev/null +++ b/python/selfie-lib/tests/PerCharacterEscaper_test.py @@ -0,0 +1,88 @@ +import pytest + +from selfie_lib import PerCharacterEscaper + + +class TestPerCharacterEscaper: + def test_performance_optimization_self(self): + escaper = PerCharacterEscaper.self_escape("`123") + abc = "abc" + # Correct use of 'is' for checking object identity. + assert ( + escaper.escape(abc) is abc + ), "Escape should return the original object when no change is made" + assert ( + escaper.unescape(abc) is abc + ), "Unescape should return the original object when no change is made" + + # Use '==' for checking value equality. + assert ( + escaper.escape("1") == "`1" + ), "Escaping '1' should prepend the escape character" + assert ( + escaper.escape("`") == "``" + ), "Escaping the escape character should duplicate it" + assert ( + escaper.escape("abc123`def") == "abc`1`2`3``def" + ), "Escaping 'abc123`def' did not produce the expected result" + + assert escaper.unescape("`1") == "1", "Unescaping '`1' should produce '1'" + assert escaper.unescape("``") == "`", "Unescaping '``' should produce '`'" + assert ( + escaper.unescape("abc`1`2`3``def") == "abc123`def" + ), "Unescaping 'abc`1`2`3``def' did not produce the expected result" + + def test_performance_optimization_specific(self): + escaper = PerCharacterEscaper.specified_escape("`a1b2c3d") + abc = "abc" + # Correct use of 'is' for object identity. + assert ( + escaper.escape(abc) is abc + ), "Escape should return the original object when no change is made" + assert ( + escaper.unescape(abc) is abc + ), "Unescape should return the original object when no change is made" + + # Use '==' for value equality. + assert escaper.escape("1") == "`b", "Escaping '1' should produce '`b'" + assert escaper.escape("`") == "`a", "Escaping '`' should produce '`a'" + assert ( + escaper.escape("abc123`def") == "abc`b`c`d`adef" + ), "Escaping 'abc123`def' did not produce the expected result" + + assert escaper.unescape("`b") == "1", "Unescaping '`b' should produce '1'" + assert escaper.unescape("`a") == "`", "Unescaping '`a' should produce '`'" + assert ( + escaper.unescape("abc`1`2`3``def") == "abc123`def" + ), "Unescaping 'abc`1`2`3``def' did not produce the expected result" + + def test_corner_cases_self(self): + escaper = PerCharacterEscaper.self_escape("`123") + with pytest.raises(ValueError) as excinfo: + escaper.unescape("`") + assert ( + str(excinfo.value) + == "Escape character '`' can't be the last character in a string." + ), "Unescaping a string ending with a single escape character should raise ValueError" + assert escaper.unescape("`a") == "a", "Unescaping '`a' should produce 'a'" + + def test_corner_cases_specific(self): + escaper = PerCharacterEscaper.specified_escape("`a1b2c3d") + with pytest.raises(ValueError) as excinfo: + escaper.unescape("`") + assert ( + str(excinfo.value) + == "Escape character '`' can't be the last character in a string." + ), "Unescaping a string ending with a single escape character should raise ValueError" + assert escaper.unescape("`e") == "e", "Unescaping '`e' should produce 'e'" + + def test_roundtrip(self): + escaper = PerCharacterEscaper.self_escape("`<>") + + def roundtrip(str): + assert ( + escaper.unescape(escaper.escape(str)) == str + ), f"Roundtrip of '{str}' did not return the original string" + + roundtrip("") + roundtrip("~`/")