From cbcff7e4e4e5cf6fbcbc33f5ab097a292f1e5953 Mon Sep 17 00:00:00 2001 From: Edwin Date: Wed, 6 Mar 2024 20:23:10 -0800 Subject: [PATCH 1/6] Push PerCharacterEscaper --- .../selfie_lib/PerCharacterEscaper.py | 81 +++++++++++++++++++ python/selfie-lib/selfie_lib/__init__.py | 2 +- .../tests/PerCharacterEscaper_test.py | 54 +++++++++++++ 3 files changed, 136 insertions(+), 1 deletion(-) create mode 100644 python/selfie-lib/selfie_lib/PerCharacterEscaper.py create mode 100644 python/selfie-lib/tests/PerCharacterEscaper_test.py diff --git a/python/selfie-lib/selfie_lib/PerCharacterEscaper.py b/python/selfie-lib/selfie_lib/PerCharacterEscaper.py new file mode 100644 index 00000000..42181d93 --- /dev/null +++ b/python/selfie-lib/selfie_lib/PerCharacterEscaper.py @@ -0,0 +1,81 @@ +class PerCharacterEscaper: + def __init__(self, escape_code_point, escaped_code_points, escaped_by_code_points): + self.escape_code_point = escape_code_point + self.escaped_code_points = escaped_code_points + self.escaped_by_code_points = escaped_by_code_points + + @staticmethod + def _first_offset_needing_escape(input_string, escaped_code_points, escape_code_point=None): + length = len(input_string) + for offset in range(length): + codepoint = ord(input_string[offset]) + if escape_code_point is not None and codepoint == escape_code_point: + return offset + if codepoint in escaped_code_points: + return offset + return -1 + + def escape(self, input_string): + no_escapes = self._first_offset_needing_escape(input_string, self.escaped_code_points) + if no_escapes == -1: + return input_string + else: + result = [] + result.append(input_string[:no_escapes]) + for char in input_string[no_escapes:]: + codepoint = ord(char) + if codepoint in self.escaped_code_points: + idx = self.escaped_code_points.index(codepoint) + result.append(chr(self.escape_code_point)) + result.append(chr(self.escaped_by_code_points[idx])) + else: + result.append(char) + return ''.join(result) + + def unescape(self, input_string): + if input_string.endswith(chr(self.escape_code_point)) and not input_string.endswith(chr(self.escape_code_point)*2): + raise ValueError("Escape character '{}' can't be the last character in a string.".format(chr(self.escape_code_point))) + + no_escapes = self._first_offset_needing_escape(input_string, [self.escape_code_point], self.escape_code_point) + if no_escapes == -1: + return input_string + else: + result = [] + result.append(input_string[:no_escapes]) + skip_next = False + for i in range(no_escapes, len(input_string)): + if skip_next: + skip_next = False + continue + codepoint = ord(input_string[i]) + if codepoint == self.escape_code_point and (i + 1) < len(input_string): + next_codepoint = ord(input_string[i + 1]) + if next_codepoint in self.escaped_by_code_points: + idx = self.escaped_by_code_points.index(next_codepoint) + result.append(chr(self.escaped_code_points[idx])) + skip_next = True + else: + result.append(input_string[i + 1]) + skip_next = True + else: + result.append(chr(codepoint)) + return ''.join(result) + + + @classmethod + def self_escape(cls, escape_policy): + code_points = [ord(c) for c in escape_policy] + escape_code_point = code_points[0] + return cls(escape_code_point, code_points, code_points) + + + @classmethod + def specified_escape(cls, escape_policy): + code_points = [ord(c) for c in escape_policy] + if len(code_points) % 2 != 0: + raise ValueError("Escape policy string must have an even number of characters.") + escape_code_point = code_points[0] + escaped_code_points = code_points[0::2] + escaped_by_code_points = code_points[1::2] + return cls(escape_code_point, escaped_code_points, escaped_by_code_points) + diff --git a/python/selfie-lib/selfie_lib/__init__.py b/python/selfie-lib/selfie_lib/__init__.py index e2859a5d..6cfb94ba 100644 --- a/python/selfie-lib/selfie_lib/__init__.py +++ b/python/selfie-lib/selfie_lib/__init__.py @@ -1,3 +1,3 @@ from .LineReader import LineReader as LineReader from .Slice import Slice as Slice - +from .PerCharacterEscaper import PerCharacterEscaper as PerCharacterEscaper diff --git a/python/selfie-lib/tests/PerCharacterEscaper_test.py b/python/selfie-lib/tests/PerCharacterEscaper_test.py new file mode 100644 index 00000000..66ee0789 --- /dev/null +++ b/python/selfie-lib/tests/PerCharacterEscaper_test.py @@ -0,0 +1,54 @@ +import pytest + +from selfie_lib import PerCharacterEscaper + +class TestPerCharacterEscaper: + def test_performance_optimization_self(self): + escaper = PerCharacterEscaper.self_escape("`123") + abc = "abc" + # Using 'is' to check for the exact same object might not behave as in Kotlin, use == for equality in Python + assert escaper.escape(abc) == abc + assert escaper.unescape(abc) == abc + + assert escaper.escape("1") == "`1" + assert escaper.escape("`") == "``" + assert escaper.escape("abc123`def") == "abc`1`2`3``def" + + assert escaper.unescape("`1") == "1" + assert escaper.unescape("``") == "`" + assert escaper.unescape("abc`1`2`3``def") == "abc123`def" + + def test_performance_optimization_specific(self): + escaper = PerCharacterEscaper.specified_escape("`a1b2c3d") + abc = "abc" + assert escaper.escape(abc) == abc + assert escaper.unescape(abc) == abc + + assert escaper.escape("1") == "`b" + assert escaper.escape("`") == "`a" + assert escaper.escape("abc123`def") == "abc`b`c`d`adef" + + assert escaper.unescape("`b") == "1" + assert escaper.unescape("`a") == "`" + assert escaper.unescape("abc`1`2`3``def") == "abc123`def" + + def test_corner_cases_self(self): + escaper = PerCharacterEscaper.self_escape("`123") + with pytest.raises(ValueError) as excinfo: + escaper.unescape("`") + assert str(excinfo.value) == "Escape character '`' can't be the last character in a string." + assert escaper.unescape("`a") == "a" + + def test_corner_cases_specific(self): + escaper = PerCharacterEscaper.specified_escape("`a1b2c3d") + with pytest.raises(ValueError) as excinfo: + escaper.unescape("`") + assert str(excinfo.value) == "Escape character '`' can't be the last character in a string." + assert escaper.unescape("`e") == "e" + + def test_roundtrip(self): + escaper = PerCharacterEscaper.self_escape("`<>") + def roundtrip(str): + assert escaper.unescape(escaper.escape(str)) == str + roundtrip("") + roundtrip("~`/") From b6fd88ef1aa454b6934b0d271c90b1b98f38a3f8 Mon Sep 17 00:00:00 2001 From: Edwin Date: Wed, 6 Mar 2024 20:36:52 -0800 Subject: [PATCH 2/6] fix the ruff format --- .../selfie_lib/PerCharacterEscaper.py | 37 ++++++++++++------- python/selfie-lib/selfie_lib/__init__.py | 1 - .../tests/PerCharacterEscaper_test.py | 15 ++++++-- 3 files changed, 36 insertions(+), 17 deletions(-) diff --git a/python/selfie-lib/selfie_lib/PerCharacterEscaper.py b/python/selfie-lib/selfie_lib/PerCharacterEscaper.py index 42181d93..fa2b347c 100644 --- a/python/selfie-lib/selfie_lib/PerCharacterEscaper.py +++ b/python/selfie-lib/selfie_lib/PerCharacterEscaper.py @@ -5,7 +5,9 @@ def __init__(self, escape_code_point, escaped_code_points, escaped_by_code_point self.escaped_by_code_points = escaped_by_code_points @staticmethod - def _first_offset_needing_escape(input_string, escaped_code_points, escape_code_point=None): + def _first_offset_needing_escape( + input_string, escaped_code_points, escape_code_point=None + ): length = len(input_string) for offset in range(length): codepoint = ord(input_string[offset]) @@ -16,7 +18,9 @@ def _first_offset_needing_escape(input_string, escaped_code_points, escape_code_ return -1 def escape(self, input_string): - no_escapes = self._first_offset_needing_escape(input_string, self.escaped_code_points) + no_escapes = self._first_offset_needing_escape( + input_string, self.escaped_code_points + ) if no_escapes == -1: return input_string else: @@ -30,13 +34,21 @@ def escape(self, input_string): result.append(chr(self.escaped_by_code_points[idx])) else: result.append(char) - return ''.join(result) + return "".join(result) def unescape(self, input_string): - if input_string.endswith(chr(self.escape_code_point)) and not input_string.endswith(chr(self.escape_code_point)*2): - raise ValueError("Escape character '{}' can't be the last character in a string.".format(chr(self.escape_code_point))) - - no_escapes = self._first_offset_needing_escape(input_string, [self.escape_code_point], self.escape_code_point) + if input_string.endswith( + chr(self.escape_code_point) + ) and not input_string.endswith(chr(self.escape_code_point) * 2): + raise ValueError( + "Escape character '{}' can't be the last character in a string.".format( + chr(self.escape_code_point) + ) + ) + + no_escapes = self._first_offset_needing_escape( + input_string, [self.escape_code_point], self.escape_code_point + ) if no_escapes == -1: return input_string else: @@ -59,8 +71,7 @@ def unescape(self, input_string): skip_next = True else: result.append(chr(codepoint)) - return ''.join(result) - + return "".join(result) @classmethod def self_escape(cls, escape_policy): @@ -68,14 +79,14 @@ def self_escape(cls, escape_policy): escape_code_point = code_points[0] return cls(escape_code_point, code_points, code_points) - @classmethod def specified_escape(cls, escape_policy): code_points = [ord(c) for c in escape_policy] - if len(code_points) % 2 != 0: - raise ValueError("Escape policy string must have an even number of characters.") + if len(code_points) % 2 != 0: + raise ValueError( + "Escape policy string must have an even number of characters." + ) escape_code_point = code_points[0] escaped_code_points = code_points[0::2] escaped_by_code_points = code_points[1::2] return cls(escape_code_point, escaped_code_points, escaped_by_code_points) - diff --git a/python/selfie-lib/selfie_lib/__init__.py b/python/selfie-lib/selfie_lib/__init__.py index 9c2d3c08..6cfb94ba 100644 --- a/python/selfie-lib/selfie_lib/__init__.py +++ b/python/selfie-lib/selfie_lib/__init__.py @@ -1,4 +1,3 @@ from .LineReader import LineReader as LineReader from .Slice import Slice as Slice from .PerCharacterEscaper import PerCharacterEscaper as PerCharacterEscaper - diff --git a/python/selfie-lib/tests/PerCharacterEscaper_test.py b/python/selfie-lib/tests/PerCharacterEscaper_test.py index 66ee0789..8a397656 100644 --- a/python/selfie-lib/tests/PerCharacterEscaper_test.py +++ b/python/selfie-lib/tests/PerCharacterEscaper_test.py @@ -2,10 +2,11 @@ from selfie_lib import PerCharacterEscaper + class TestPerCharacterEscaper: def test_performance_optimization_self(self): escaper = PerCharacterEscaper.self_escape("`123") - abc = "abc" + abc = "abc" # Using 'is' to check for the exact same object might not behave as in Kotlin, use == for equality in Python assert escaper.escape(abc) == abc assert escaper.unescape(abc) == abc @@ -36,19 +37,27 @@ def test_corner_cases_self(self): escaper = PerCharacterEscaper.self_escape("`123") with pytest.raises(ValueError) as excinfo: escaper.unescape("`") - assert str(excinfo.value) == "Escape character '`' can't be the last character in a string." + assert ( + str(excinfo.value) + == "Escape character '`' can't be the last character in a string." + ) assert escaper.unescape("`a") == "a" def test_corner_cases_specific(self): escaper = PerCharacterEscaper.specified_escape("`a1b2c3d") with pytest.raises(ValueError) as excinfo: escaper.unescape("`") - assert str(excinfo.value) == "Escape character '`' can't be the last character in a string." + assert ( + str(excinfo.value) + == "Escape character '`' can't be the last character in a string." + ) assert escaper.unescape("`e") == "e" def test_roundtrip(self): escaper = PerCharacterEscaper.self_escape("`<>") + def roundtrip(str): assert escaper.unescape(escaper.escape(str)) == str + roundtrip("") roundtrip("~`/") From a5b7af8531dab86cdc995ebda9cf907aa4d15c24 Mon Sep 17 00:00:00 2001 From: Edwin Date: Thu, 7 Mar 2024 11:16:37 -0800 Subject: [PATCH 3/6] fix is vs == and added the private keywords and remove the static for __first_offset_needing_escape --- .../selfie_lib/PerCharacterEscaper.py | 64 ++++++++----------- .../tests/PerCharacterEscaper_test.py | 57 ++++++++--------- 2 files changed, 53 insertions(+), 68 deletions(-) diff --git a/python/selfie-lib/selfie_lib/PerCharacterEscaper.py b/python/selfie-lib/selfie_lib/PerCharacterEscaper.py index fa2b347c..5835f2ee 100644 --- a/python/selfie-lib/selfie_lib/PerCharacterEscaper.py +++ b/python/selfie-lib/selfie_lib/PerCharacterEscaper.py @@ -1,26 +1,23 @@ +from typing import List + class PerCharacterEscaper: - def __init__(self, escape_code_point, escaped_code_points, escaped_by_code_points): - self.escape_code_point = escape_code_point - self.escaped_code_points = escaped_code_points - self.escaped_by_code_points = escaped_by_code_points + def __init__(self, escape_code_point: int, escaped_code_points: List[int], escaped_by_code_points: List[int]): + self.__escape_code_point = escape_code_point + self.__escaped_code_points = escaped_code_points + self.__escaped_by_code_points = escaped_by_code_points - @staticmethod - def _first_offset_needing_escape( - input_string, escaped_code_points, escape_code_point=None - ): + def __first_offset_needing_escape(self, input_string: str, escape_code_point: int = None) -> int: length = len(input_string) for offset in range(length): codepoint = ord(input_string[offset]) - if escape_code_point is not None and codepoint == escape_code_point: - return offset - if codepoint in escaped_code_points: + if escape_code_point is None: + escape_code_point = self.__escape_code_point + if codepoint == escape_code_point or codepoint in self.__escaped_code_points: return offset return -1 - def escape(self, input_string): - no_escapes = self._first_offset_needing_escape( - input_string, self.escaped_code_points - ) + def escape(self, input_string: str) -> str: + no_escapes = self.__first_offset_needing_escape(input_string) if no_escapes == -1: return input_string else: @@ -28,27 +25,19 @@ def escape(self, input_string): result.append(input_string[:no_escapes]) for char in input_string[no_escapes:]: codepoint = ord(char) - if codepoint in self.escaped_code_points: - idx = self.escaped_code_points.index(codepoint) - result.append(chr(self.escape_code_point)) - result.append(chr(self.escaped_by_code_points[idx])) + if codepoint in self.__escaped_code_points: + idx = self.__escaped_code_points.index(codepoint) + result.append(chr(self.__escape_code_point)) + result.append(chr(self.__escaped_by_code_points[idx])) else: result.append(char) return "".join(result) - def unescape(self, input_string): - if input_string.endswith( - chr(self.escape_code_point) - ) and not input_string.endswith(chr(self.escape_code_point) * 2): - raise ValueError( - "Escape character '{}' can't be the last character in a string.".format( - chr(self.escape_code_point) - ) - ) + def unescape(self, input_string: str) -> str: + if input_string.endswith(chr(self.__escape_code_point)) and not input_string.endswith(chr(self.__escape_code_point) * 2): + raise ValueError(f"Escape character '{chr(self.__escape_code_point)}' can't be the last character in a string.") - no_escapes = self._first_offset_needing_escape( - input_string, [self.escape_code_point], self.escape_code_point - ) + no_escapes = self.__first_offset_needing_escape(input_string, self.__escape_code_point) if no_escapes == -1: return input_string else: @@ -60,11 +49,11 @@ def unescape(self, input_string): skip_next = False continue codepoint = ord(input_string[i]) - if codepoint == self.escape_code_point and (i + 1) < len(input_string): + if codepoint == self.__escape_code_point and (i + 1) < len(input_string): next_codepoint = ord(input_string[i + 1]) - if next_codepoint in self.escaped_by_code_points: - idx = self.escaped_by_code_points.index(next_codepoint) - result.append(chr(self.escaped_code_points[idx])) + if next_codepoint in self.__escaped_by_code_points: + idx = self.__escaped_by_code_points.index(next_codepoint) + result.append(chr(self.__escaped_code_points[idx])) skip_next = True else: result.append(input_string[i + 1]) @@ -73,6 +62,7 @@ def unescape(self, input_string): result.append(chr(codepoint)) return "".join(result) + @classmethod def self_escape(cls, escape_policy): code_points = [ord(c) for c in escape_policy] @@ -83,9 +73,7 @@ def self_escape(cls, escape_policy): def specified_escape(cls, escape_policy): code_points = [ord(c) for c in escape_policy] if len(code_points) % 2 != 0: - raise ValueError( - "Escape policy string must have an even number of characters." - ) + raise ValueError("Escape policy string must have an even number of characters.") escape_code_point = code_points[0] escaped_code_points = code_points[0::2] escaped_by_code_points = code_points[1::2] diff --git a/python/selfie-lib/tests/PerCharacterEscaper_test.py b/python/selfie-lib/tests/PerCharacterEscaper_test.py index 8a397656..a123a245 100644 --- a/python/selfie-lib/tests/PerCharacterEscaper_test.py +++ b/python/selfie-lib/tests/PerCharacterEscaper_test.py @@ -2,62 +2,59 @@ from selfie_lib import PerCharacterEscaper - class TestPerCharacterEscaper: def test_performance_optimization_self(self): escaper = PerCharacterEscaper.self_escape("`123") abc = "abc" - # Using 'is' to check for the exact same object might not behave as in Kotlin, use == for equality in Python - assert escaper.escape(abc) == abc - assert escaper.unescape(abc) == abc + # Correct use of 'is' for checking object identity. + assert escaper.escape(abc) is abc, "Escape should return the original object when no change is made" + assert escaper.unescape(abc) is abc, "Unescape should return the original object when no change is made" - assert escaper.escape("1") == "`1" - assert escaper.escape("`") == "``" - assert escaper.escape("abc123`def") == "abc`1`2`3``def" + # Use '==' for checking value equality. + assert escaper.escape("1") == "`1", "Escaping '1' should prepend the escape character" + assert escaper.escape("`") == "``", "Escaping the escape character should duplicate it" + assert escaper.escape("abc123`def") == "abc`1`2`3``def", "Escaping 'abc123`def' did not produce the expected result" - assert escaper.unescape("`1") == "1" - assert escaper.unescape("``") == "`" - assert escaper.unescape("abc`1`2`3``def") == "abc123`def" + assert escaper.unescape("`1") == "1", "Unescaping '`1' should produce '1'" + assert escaper.unescape("``") == "`", "Unescaping '``' should produce '`'" + assert escaper.unescape("abc`1`2`3``def") == "abc123`def", "Unescaping 'abc`1`2`3``def' did not produce the expected result" def test_performance_optimization_specific(self): escaper = PerCharacterEscaper.specified_escape("`a1b2c3d") abc = "abc" - assert escaper.escape(abc) == abc - assert escaper.unescape(abc) == abc + # Correct use of 'is' for object identity. + assert escaper.escape(abc) is abc, "Escape should return the original object when no change is made" + assert escaper.unescape(abc) is abc, "Unescape should return the original object when no change is made" - assert escaper.escape("1") == "`b" - assert escaper.escape("`") == "`a" - assert escaper.escape("abc123`def") == "abc`b`c`d`adef" + # Use '==' for value equality. + assert escaper.escape("1") == "`b", "Escaping '1' should produce '`b'" + assert escaper.escape("`") == "`a", "Escaping '`' should produce '`a'" + assert escaper.escape("abc123`def") == "abc`b`c`d`adef", "Escaping 'abc123`def' did not produce the expected result" - assert escaper.unescape("`b") == "1" - assert escaper.unescape("`a") == "`" - assert escaper.unescape("abc`1`2`3``def") == "abc123`def" + assert escaper.unescape("`b") == "1", "Unescaping '`b' should produce '1'" + assert escaper.unescape("`a") == "`", "Unescaping '`a' should produce '`'" + assert escaper.unescape("abc`1`2`3``def") == "abc123`def", "Unescaping 'abc`1`2`3``def' did not produce the expected result" def test_corner_cases_self(self): escaper = PerCharacterEscaper.self_escape("`123") with pytest.raises(ValueError) as excinfo: escaper.unescape("`") - assert ( - str(excinfo.value) - == "Escape character '`' can't be the last character in a string." - ) - assert escaper.unescape("`a") == "a" + assert str(excinfo.value) == "Escape character '`' can't be the last character in a string.", \ + "Unescaping a string ending with a single escape character should raise ValueError" + assert escaper.unescape("`a") == "a", "Unescaping '`a' should produce 'a'" def test_corner_cases_specific(self): escaper = PerCharacterEscaper.specified_escape("`a1b2c3d") with pytest.raises(ValueError) as excinfo: escaper.unescape("`") - assert ( - str(excinfo.value) - == "Escape character '`' can't be the last character in a string." - ) - assert escaper.unescape("`e") == "e" + assert str(excinfo.value) == "Escape character '`' can't be the last character in a string.", \ + "Unescaping a string ending with a single escape character should raise ValueError" + assert escaper.unescape("`e") == "e", "Unescaping '`e' should produce 'e'" def test_roundtrip(self): escaper = PerCharacterEscaper.self_escape("`<>") - def roundtrip(str): - assert escaper.unescape(escaper.escape(str)) == str + assert escaper.unescape(escaper.escape(str)) == str, f"Roundtrip of '{str}' did not return the original string" roundtrip("") roundtrip("~`/") From 34db5b81b67b05aabdc8009d50da464d10772010 Mon Sep 17 00:00:00 2001 From: Edwin Date: Thu, 7 Mar 2024 11:20:07 -0800 Subject: [PATCH 4/6] fix is vs == and added the private keywords and remove the static for __first_offset_needing_escape --- .../selfie_lib/PerCharacterEscaper.py | 44 ++++++++++---- .../tests/PerCharacterEscaper_test.py | 58 ++++++++++++++----- 2 files changed, 75 insertions(+), 27 deletions(-) diff --git a/python/selfie-lib/selfie_lib/PerCharacterEscaper.py b/python/selfie-lib/selfie_lib/PerCharacterEscaper.py index 5835f2ee..277df098 100644 --- a/python/selfie-lib/selfie_lib/PerCharacterEscaper.py +++ b/python/selfie-lib/selfie_lib/PerCharacterEscaper.py @@ -1,18 +1,29 @@ -from typing import List +from typing import List, Optional + class PerCharacterEscaper: - def __init__(self, escape_code_point: int, escaped_code_points: List[int], escaped_by_code_points: List[int]): + def __init__( + self, + escape_code_point: int, + escaped_code_points: List[int], + escaped_by_code_points: List[int], + ): self.__escape_code_point = escape_code_point self.__escaped_code_points = escaped_code_points self.__escaped_by_code_points = escaped_by_code_points - def __first_offset_needing_escape(self, input_string: str, escape_code_point: int = None) -> int: + def __first_offset_needing_escape( + self, input_string: str, escape_code_point: Optional[int] = None + ) -> int: + if escape_code_point is None: + escape_code_point = self.__escape_code_point length = len(input_string) for offset in range(length): codepoint = ord(input_string[offset]) - if escape_code_point is None: - escape_code_point = self.__escape_code_point - if codepoint == escape_code_point or codepoint in self.__escaped_code_points: + if ( + codepoint == escape_code_point + or codepoint in self.__escaped_code_points + ): return offset return -1 @@ -34,10 +45,16 @@ def escape(self, input_string: str) -> str: return "".join(result) def unescape(self, input_string: str) -> str: - if input_string.endswith(chr(self.__escape_code_point)) and not input_string.endswith(chr(self.__escape_code_point) * 2): - raise ValueError(f"Escape character '{chr(self.__escape_code_point)}' can't be the last character in a string.") + if input_string.endswith( + chr(self.__escape_code_point) + ) and not input_string.endswith(chr(self.__escape_code_point) * 2): + raise ValueError( + f"Escape character '{chr(self.__escape_code_point)}' can't be the last character in a string." + ) - no_escapes = self.__first_offset_needing_escape(input_string, self.__escape_code_point) + no_escapes = self.__first_offset_needing_escape( + input_string, self.__escape_code_point + ) if no_escapes == -1: return input_string else: @@ -49,7 +66,9 @@ def unescape(self, input_string: str) -> str: skip_next = False continue codepoint = ord(input_string[i]) - if codepoint == self.__escape_code_point and (i + 1) < len(input_string): + if codepoint == self.__escape_code_point and (i + 1) < len( + input_string + ): next_codepoint = ord(input_string[i + 1]) if next_codepoint in self.__escaped_by_code_points: idx = self.__escaped_by_code_points.index(next_codepoint) @@ -62,7 +81,6 @@ def unescape(self, input_string: str) -> str: result.append(chr(codepoint)) return "".join(result) - @classmethod def self_escape(cls, escape_policy): code_points = [ord(c) for c in escape_policy] @@ -73,7 +91,9 @@ def self_escape(cls, escape_policy): def specified_escape(cls, escape_policy): code_points = [ord(c) for c in escape_policy] if len(code_points) % 2 != 0: - raise ValueError("Escape policy string must have an even number of characters.") + raise ValueError( + "Escape policy string must have an even number of characters." + ) escape_code_point = code_points[0] escaped_code_points = code_points[0::2] escaped_by_code_points = code_points[1::2] diff --git a/python/selfie-lib/tests/PerCharacterEscaper_test.py b/python/selfie-lib/tests/PerCharacterEscaper_test.py index a123a245..87f8fec5 100644 --- a/python/selfie-lib/tests/PerCharacterEscaper_test.py +++ b/python/selfie-lib/tests/PerCharacterEscaper_test.py @@ -2,59 +2,87 @@ from selfie_lib import PerCharacterEscaper + class TestPerCharacterEscaper: def test_performance_optimization_self(self): escaper = PerCharacterEscaper.self_escape("`123") abc = "abc" # Correct use of 'is' for checking object identity. - assert escaper.escape(abc) is abc, "Escape should return the original object when no change is made" - assert escaper.unescape(abc) is abc, "Unescape should return the original object when no change is made" + assert ( + escaper.escape(abc) is abc + ), "Escape should return the original object when no change is made" + assert ( + escaper.unescape(abc) is abc + ), "Unescape should return the original object when no change is made" # Use '==' for checking value equality. - assert escaper.escape("1") == "`1", "Escaping '1' should prepend the escape character" - assert escaper.escape("`") == "``", "Escaping the escape character should duplicate it" - assert escaper.escape("abc123`def") == "abc`1`2`3``def", "Escaping 'abc123`def' did not produce the expected result" + assert ( + escaper.escape("1") == "`1" + ), "Escaping '1' should prepend the escape character" + assert ( + escaper.escape("`") == "``" + ), "Escaping the escape character should duplicate it" + assert ( + escaper.escape("abc123`def") == "abc`1`2`3``def" + ), "Escaping 'abc123`def' did not produce the expected result" assert escaper.unescape("`1") == "1", "Unescaping '`1' should produce '1'" assert escaper.unescape("``") == "`", "Unescaping '``' should produce '`'" - assert escaper.unescape("abc`1`2`3``def") == "abc123`def", "Unescaping 'abc`1`2`3``def' did not produce the expected result" + assert ( + escaper.unescape("abc`1`2`3``def") == "abc123`def" + ), "Unescaping 'abc`1`2`3``def' did not produce the expected result" def test_performance_optimization_specific(self): escaper = PerCharacterEscaper.specified_escape("`a1b2c3d") abc = "abc" # Correct use of 'is' for object identity. - assert escaper.escape(abc) is abc, "Escape should return the original object when no change is made" - assert escaper.unescape(abc) is abc, "Unescape should return the original object when no change is made" + assert ( + escaper.escape(abc) is abc + ), "Escape should return the original object when no change is made" + assert ( + escaper.unescape(abc) is abc + ), "Unescape should return the original object when no change is made" # Use '==' for value equality. assert escaper.escape("1") == "`b", "Escaping '1' should produce '`b'" assert escaper.escape("`") == "`a", "Escaping '`' should produce '`a'" - assert escaper.escape("abc123`def") == "abc`b`c`d`adef", "Escaping 'abc123`def' did not produce the expected result" + assert ( + escaper.escape("abc123`def") == "abc`b`c`d`adef" + ), "Escaping 'abc123`def' did not produce the expected result" assert escaper.unescape("`b") == "1", "Unescaping '`b' should produce '1'" assert escaper.unescape("`a") == "`", "Unescaping '`a' should produce '`'" - assert escaper.unescape("abc`1`2`3``def") == "abc123`def", "Unescaping 'abc`1`2`3``def' did not produce the expected result" + assert ( + escaper.unescape("abc`1`2`3``def") == "abc123`def" + ), "Unescaping 'abc`1`2`3``def' did not produce the expected result" def test_corner_cases_self(self): escaper = PerCharacterEscaper.self_escape("`123") with pytest.raises(ValueError) as excinfo: escaper.unescape("`") - assert str(excinfo.value) == "Escape character '`' can't be the last character in a string.", \ - "Unescaping a string ending with a single escape character should raise ValueError" + assert ( + str(excinfo.value) + == "Escape character '`' can't be the last character in a string." + ), "Unescaping a string ending with a single escape character should raise ValueError" assert escaper.unescape("`a") == "a", "Unescaping '`a' should produce 'a'" def test_corner_cases_specific(self): escaper = PerCharacterEscaper.specified_escape("`a1b2c3d") with pytest.raises(ValueError) as excinfo: escaper.unescape("`") - assert str(excinfo.value) == "Escape character '`' can't be the last character in a string.", \ - "Unescaping a string ending with a single escape character should raise ValueError" + assert ( + str(excinfo.value) + == "Escape character '`' can't be the last character in a string." + ), "Unescaping a string ending with a single escape character should raise ValueError" assert escaper.unescape("`e") == "e", "Unescaping '`e' should produce 'e'" def test_roundtrip(self): escaper = PerCharacterEscaper.self_escape("`<>") + def roundtrip(str): - assert escaper.unescape(escaper.escape(str)) == str, f"Roundtrip of '{str}' did not return the original string" + assert ( + escaper.unescape(escaper.escape(str)) == str + ), f"Roundtrip of '{str}' did not return the original string" roundtrip("") roundtrip("~`/") From 236bd6b409af7682c944c6f50394bb23f1d0cabe Mon Sep 17 00:00:00 2001 From: Edwin Date: Fri, 8 Mar 2024 11:08:46 -0800 Subject: [PATCH 5/6] Make changes to PerCharacterEscaper, uses __escape_code_point --- .../selfie_lib/PerCharacterEscaper.py | 76 +++++++++---------- 1 file changed, 37 insertions(+), 39 deletions(-) diff --git a/python/selfie-lib/selfie_lib/PerCharacterEscaper.py b/python/selfie-lib/selfie_lib/PerCharacterEscaper.py index 277df098..59085c6c 100644 --- a/python/selfie-lib/selfie_lib/PerCharacterEscaper.py +++ b/python/selfie-lib/selfie_lib/PerCharacterEscaper.py @@ -1,4 +1,4 @@ -from typing import List, Optional +from typing import List class PerCharacterEscaper: @@ -12,16 +12,12 @@ def __init__( self.__escaped_code_points = escaped_code_points self.__escaped_by_code_points = escaped_by_code_points - def __first_offset_needing_escape( - self, input_string: str, escape_code_point: Optional[int] = None - ) -> int: - if escape_code_point is None: - escape_code_point = self.__escape_code_point + def __first_offset_needing_escape(self, input_string: str) -> int: length = len(input_string) for offset in range(length): codepoint = ord(input_string[offset]) if ( - codepoint == escape_code_point + codepoint == self.__escape_code_point or codepoint in self.__escaped_code_points ): return offset @@ -45,41 +41,43 @@ def escape(self, input_string: str) -> str: return "".join(result) def unescape(self, input_string: str) -> str: - if input_string.endswith( - chr(self.__escape_code_point) - ) and not input_string.endswith(chr(self.__escape_code_point) * 2): - raise ValueError( - f"Escape character '{chr(self.__escape_code_point)}' can't be the last character in a string." - ) - - no_escapes = self.__first_offset_needing_escape( - input_string, self.__escape_code_point - ) - if no_escapes == -1: + if not input_string: return input_string - else: - result = [] - result.append(input_string[:no_escapes]) - skip_next = False - for i in range(no_escapes, len(input_string)): - if skip_next: - skip_next = False - continue - codepoint = ord(input_string[i]) - if codepoint == self.__escape_code_point and (i + 1) < len( - input_string - ): - next_codepoint = ord(input_string[i + 1]) - if next_codepoint in self.__escaped_by_code_points: - idx = self.__escaped_by_code_points.index(next_codepoint) - result.append(chr(self.__escaped_code_points[idx])) - skip_next = True + + result = [] + i = 0 + + while i < len(input_string): + if ord(input_string[i]) == self.__escape_code_point: + if i + 1 < len(input_string): + next_char = input_string[i + 1] + next_codepoint = ord(next_char) + + if next_codepoint == self.__escape_code_point: + result.append(chr(next_codepoint)) + i += 2 else: - result.append(input_string[i + 1]) - skip_next = True + try: + idx = self.__escaped_by_code_points.index(next_codepoint) + result.append(chr(self.__escaped_code_points[idx])) + i += 2 + continue + except ValueError: + result.append(next_char) + i += 2 else: - result.append(chr(codepoint)) - return "".join(result) + raise ValueError( + f"Escape character '{chr(self.__escape_code_point)}' can't be the last character in a string." + ) + else: + result.append(input_string[i]) + i += 1 + + processed_string = "".join(result) + if processed_string == input_string: + return input_string + else: + return processed_string @classmethod def self_escape(cls, escape_policy): From 45a7e26635e0326848429eb1610a84590d00901b Mon Sep 17 00:00:00 2001 From: Edwin Date: Fri, 8 Mar 2024 12:05:23 -0800 Subject: [PATCH 6/6] Change the unescape function to the old logic --- .../selfie_lib/PerCharacterEscaper.py | 65 +++++++++---------- 1 file changed, 31 insertions(+), 34 deletions(-) diff --git a/python/selfie-lib/selfie_lib/PerCharacterEscaper.py b/python/selfie-lib/selfie_lib/PerCharacterEscaper.py index 59085c6c..3f9d33df 100644 --- a/python/selfie-lib/selfie_lib/PerCharacterEscaper.py +++ b/python/selfie-lib/selfie_lib/PerCharacterEscaper.py @@ -41,43 +41,40 @@ def escape(self, input_string: str) -> str: return "".join(result) def unescape(self, input_string: str) -> str: - if not input_string: - return input_string - - result = [] - i = 0 - - while i < len(input_string): - if ord(input_string[i]) == self.__escape_code_point: - if i + 1 < len(input_string): - next_char = input_string[i + 1] - next_codepoint = ord(next_char) - - if next_codepoint == self.__escape_code_point: - result.append(chr(next_codepoint)) - i += 2 - else: - try: - idx = self.__escaped_by_code_points.index(next_codepoint) - result.append(chr(self.__escaped_code_points[idx])) - i += 2 - continue - except ValueError: - result.append(next_char) - i += 2 - else: - raise ValueError( - f"Escape character '{chr(self.__escape_code_point)}' can't be the last character in a string." - ) - else: - result.append(input_string[i]) - i += 1 + if input_string.endswith( + chr(self.__escape_code_point) + ) and not input_string.endswith(chr(self.__escape_code_point) * 2): + raise ValueError( + "Escape character '{}' can't be the last character in a string.".format( + chr(self.__escape_code_point) + ) + ) - processed_string = "".join(result) - if processed_string == input_string: + no_escapes = self.__first_offset_needing_escape(input_string) + if no_escapes == -1: return input_string else: - return processed_string + result = [input_string[:no_escapes]] + skip_next = False + for i in range(no_escapes, len(input_string)): + if skip_next: + skip_next = False + continue + codepoint = ord(input_string[i]) + if codepoint == self.__escape_code_point and (i + 1) < len( + input_string + ): + next_codepoint = ord(input_string[i + 1]) + if next_codepoint in self.__escaped_by_code_points: + idx = self.__escaped_by_code_points.index(next_codepoint) + result.append(chr(self.__escaped_code_points[idx])) + skip_next = True + else: + result.append(input_string[i + 1]) + skip_next = True + else: + result.append(chr(codepoint)) + return "".join(result) @classmethod def self_escape(cls, escape_policy):