Skip to content
This repository was archived by the owner on Apr 4, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 95 additions & 0 deletions python/selfie-lib/selfie_lib/PerCharacterEscaper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
from typing import List


class PerCharacterEscaper:
def __init__(
self,
escape_code_point: int,
escaped_code_points: List[int],
escaped_by_code_points: List[int],
):
self.__escape_code_point = escape_code_point
self.__escaped_code_points = escaped_code_points
self.__escaped_by_code_points = escaped_by_code_points

def __first_offset_needing_escape(self, input_string: str) -> int:
length = len(input_string)
for offset in range(length):
codepoint = ord(input_string[offset])
if (
codepoint == self.__escape_code_point
or codepoint in self.__escaped_code_points
):
return offset
return -1

def escape(self, input_string: str) -> str:
no_escapes = self.__first_offset_needing_escape(input_string)
if no_escapes == -1:
return input_string
else:
result = []
result.append(input_string[:no_escapes])
for char in input_string[no_escapes:]:
codepoint = ord(char)
if codepoint in self.__escaped_code_points:
idx = self.__escaped_code_points.index(codepoint)
result.append(chr(self.__escape_code_point))
result.append(chr(self.__escaped_by_code_points[idx]))
else:
result.append(char)
return "".join(result)

def unescape(self, input_string: str) -> str:
if input_string.endswith(
chr(self.__escape_code_point)
) and not input_string.endswith(chr(self.__escape_code_point) * 2):
raise ValueError(
"Escape character '{}' can't be the last character in a string.".format(
chr(self.__escape_code_point)
)
)

no_escapes = self.__first_offset_needing_escape(input_string)
if no_escapes == -1:
return input_string
else:
result = [input_string[:no_escapes]]
skip_next = False
for i in range(no_escapes, len(input_string)):
if skip_next:
skip_next = False
continue
codepoint = ord(input_string[i])
if codepoint == self.__escape_code_point and (i + 1) < len(
input_string
):
next_codepoint = ord(input_string[i + 1])
if next_codepoint in self.__escaped_by_code_points:
idx = self.__escaped_by_code_points.index(next_codepoint)
result.append(chr(self.__escaped_code_points[idx]))
skip_next = True
else:
result.append(input_string[i + 1])
skip_next = True
else:
result.append(chr(codepoint))
return "".join(result)

@classmethod
def self_escape(cls, escape_policy):
code_points = [ord(c) for c in escape_policy]
escape_code_point = code_points[0]
return cls(escape_code_point, code_points, code_points)

@classmethod
def specified_escape(cls, escape_policy):
code_points = [ord(c) for c in escape_policy]
if len(code_points) % 2 != 0:
raise ValueError(
"Escape policy string must have an even number of characters."
)
escape_code_point = code_points[0]
escaped_code_points = code_points[0::2]
escaped_by_code_points = code_points[1::2]
return cls(escape_code_point, escaped_code_points, escaped_by_code_points)
1 change: 1 addition & 0 deletions python/selfie-lib/selfie_lib/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from .LineReader import LineReader as LineReader
from .Slice import Slice as Slice
from .PerCharacterEscaper import PerCharacterEscaper as PerCharacterEscaper
88 changes: 88 additions & 0 deletions python/selfie-lib/tests/PerCharacterEscaper_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import pytest

from selfie_lib import PerCharacterEscaper


class TestPerCharacterEscaper:
def test_performance_optimization_self(self):
escaper = PerCharacterEscaper.self_escape("`123")
abc = "abc"
# Correct use of 'is' for checking object identity.
assert (
escaper.escape(abc) is abc
), "Escape should return the original object when no change is made"
assert (
escaper.unescape(abc) is abc
), "Unescape should return the original object when no change is made"

# Use '==' for checking value equality.
assert (
escaper.escape("1") == "`1"
), "Escaping '1' should prepend the escape character"
assert (
escaper.escape("`") == "``"
), "Escaping the escape character should duplicate it"
assert (
escaper.escape("abc123`def") == "abc`1`2`3``def"
), "Escaping 'abc123`def' did not produce the expected result"

assert escaper.unescape("`1") == "1", "Unescaping '`1' should produce '1'"
assert escaper.unescape("``") == "`", "Unescaping '``' should produce '`'"
assert (
escaper.unescape("abc`1`2`3``def") == "abc123`def"
), "Unescaping 'abc`1`2`3``def' did not produce the expected result"

def test_performance_optimization_specific(self):
escaper = PerCharacterEscaper.specified_escape("`a1b2c3d")
abc = "abc"
# Correct use of 'is' for object identity.
assert (
escaper.escape(abc) is abc
), "Escape should return the original object when no change is made"
assert (
escaper.unescape(abc) is abc
), "Unescape should return the original object when no change is made"

# Use '==' for value equality.
assert escaper.escape("1") == "`b", "Escaping '1' should produce '`b'"
assert escaper.escape("`") == "`a", "Escaping '`' should produce '`a'"
assert (
escaper.escape("abc123`def") == "abc`b`c`d`adef"
), "Escaping 'abc123`def' did not produce the expected result"

assert escaper.unescape("`b") == "1", "Unescaping '`b' should produce '1'"
assert escaper.unescape("`a") == "`", "Unescaping '`a' should produce '`'"
assert (
escaper.unescape("abc`1`2`3``def") == "abc123`def"
), "Unescaping 'abc`1`2`3``def' did not produce the expected result"

def test_corner_cases_self(self):
escaper = PerCharacterEscaper.self_escape("`123")
with pytest.raises(ValueError) as excinfo:
escaper.unescape("`")
assert (
str(excinfo.value)
== "Escape character '`' can't be the last character in a string."
), "Unescaping a string ending with a single escape character should raise ValueError"
assert escaper.unescape("`a") == "a", "Unescaping '`a' should produce 'a'"

def test_corner_cases_specific(self):
escaper = PerCharacterEscaper.specified_escape("`a1b2c3d")
with pytest.raises(ValueError) as excinfo:
escaper.unescape("`")
assert (
str(excinfo.value)
== "Escape character '`' can't be the last character in a string."
), "Unescaping a string ending with a single escape character should raise ValueError"
assert escaper.unescape("`e") == "e", "Unescaping '`e' should produce 'e'"

def test_roundtrip(self):
escaper = PerCharacterEscaper.self_escape("`<>")

def roundtrip(str):
assert (
escaper.unescape(escaper.escape(str)) == str
), f"Roundtrip of '{str}' did not return the original string"

roundtrip("")
roundtrip("<local>~`/")