Skip to content
This repository was archived by the owner on Apr 4, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 92 additions & 0 deletions python/selfie-lib/selfie_lib/PerCharacterEscaper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
class PerCharacterEscaper:
def __init__(self, escape_code_point, escaped_code_points, escaped_by_code_points):
self.escape_code_point = escape_code_point
self.escaped_code_points = escaped_code_points
self.escaped_by_code_points = escaped_by_code_points

@staticmethod
def _first_offset_needing_escape(
input_string, escaped_code_points, escape_code_point=None
):
length = len(input_string)
for offset in range(length):
codepoint = ord(input_string[offset])
if escape_code_point is not None and codepoint == escape_code_point:
return offset
if codepoint in escaped_code_points:
return offset
return -1

def escape(self, input_string):
no_escapes = self._first_offset_needing_escape(
input_string, self.escaped_code_points
)
if no_escapes == -1:
return input_string
else:
result = []
result.append(input_string[:no_escapes])
for char in input_string[no_escapes:]:
codepoint = ord(char)
if codepoint in self.escaped_code_points:
idx = self.escaped_code_points.index(codepoint)
result.append(chr(self.escape_code_point))
result.append(chr(self.escaped_by_code_points[idx]))
else:
result.append(char)
return "".join(result)

def unescape(self, input_string):
if input_string.endswith(
chr(self.escape_code_point)
) and not input_string.endswith(chr(self.escape_code_point) * 2):
raise ValueError(
"Escape character '{}' can't be the last character in a string.".format(
chr(self.escape_code_point)
)
)

no_escapes = self._first_offset_needing_escape(
input_string, [self.escape_code_point], self.escape_code_point
)
if no_escapes == -1:
return input_string
else:
result = []
result.append(input_string[:no_escapes])
skip_next = False
for i in range(no_escapes, len(input_string)):
if skip_next:
skip_next = False
continue
codepoint = ord(input_string[i])
if codepoint == self.escape_code_point and (i + 1) < len(input_string):
next_codepoint = ord(input_string[i + 1])
if next_codepoint in self.escaped_by_code_points:
idx = self.escaped_by_code_points.index(next_codepoint)
result.append(chr(self.escaped_code_points[idx]))
skip_next = True
else:
result.append(input_string[i + 1])
skip_next = True
else:
result.append(chr(codepoint))
return "".join(result)

@classmethod
def self_escape(cls, escape_policy):
code_points = [ord(c) for c in escape_policy]
escape_code_point = code_points[0]
return cls(escape_code_point, code_points, code_points)

@classmethod
def specified_escape(cls, escape_policy):
code_points = [ord(c) for c in escape_policy]
if len(code_points) % 2 != 0:
raise ValueError(
"Escape policy string must have an even number of characters."
)
escape_code_point = code_points[0]
escaped_code_points = code_points[0::2]
escaped_by_code_points = code_points[1::2]
return cls(escape_code_point, escaped_code_points, escaped_by_code_points)
1 change: 1 addition & 0 deletions python/selfie-lib/selfie_lib/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from .LineReader import LineReader as LineReader
from .Slice import Slice as Slice
from .PerCharacterEscaper import PerCharacterEscaper as PerCharacterEscaper
63 changes: 63 additions & 0 deletions python/selfie-lib/tests/PerCharacterEscaper_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import pytest

from selfie_lib import PerCharacterEscaper


class TestPerCharacterEscaper:
def test_performance_optimization_self(self):
escaper = PerCharacterEscaper.self_escape("`123")
abc = "abc"
# Using 'is' to check for the exact same object might not behave as in Kotlin, use == for equality in Python
assert escaper.escape(abc) == abc
assert escaper.unescape(abc) == abc

assert escaper.escape("1") == "`1"
assert escaper.escape("`") == "``"
assert escaper.escape("abc123`def") == "abc`1`2`3``def"

assert escaper.unescape("`1") == "1"
assert escaper.unescape("``") == "`"
assert escaper.unescape("abc`1`2`3``def") == "abc123`def"

def test_performance_optimization_specific(self):
escaper = PerCharacterEscaper.specified_escape("`a1b2c3d")
abc = "abc"
assert escaper.escape(abc) == abc
assert escaper.unescape(abc) == abc

assert escaper.escape("1") == "`b"
assert escaper.escape("`") == "`a"
assert escaper.escape("abc123`def") == "abc`b`c`d`adef"

assert escaper.unescape("`b") == "1"
assert escaper.unescape("`a") == "`"
assert escaper.unescape("abc`1`2`3``def") == "abc123`def"

def test_corner_cases_self(self):
escaper = PerCharacterEscaper.self_escape("`123")
with pytest.raises(ValueError) as excinfo:
escaper.unescape("`")
assert (
str(excinfo.value)
== "Escape character '`' can't be the last character in a string."
)
assert escaper.unescape("`a") == "a"

def test_corner_cases_specific(self):
escaper = PerCharacterEscaper.specified_escape("`a1b2c3d")
with pytest.raises(ValueError) as excinfo:
escaper.unescape("`")
assert (
str(excinfo.value)
== "Escape character '`' can't be the last character in a string."
)
assert escaper.unescape("`e") == "e"

def test_roundtrip(self):
escaper = PerCharacterEscaper.self_escape("`<>")

def roundtrip(str):
assert escaper.unescape(escaper.escape(str)) == str

roundtrip("")
roundtrip("<local>~`/")