This repository was archived by the owner on Apr 4, 2024. It is now read-only.
forked from diffplug/selfie
-
Notifications
You must be signed in to change notification settings - Fork 1
Push PerCharacterEscaper #27
Merged
Merged
Changes from 3 commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
cbcff7e
Push PerCharacterEscaper
2cfe41c
Merge branch 'main' into PerCharacterEscaper-Edwin
Trickybrain b6fd88e
fix the ruff format
a5b7af8
fix is vs == and added the private keywords and remove the static for…
34db5b8
fix is vs == and added the private keywords and remove the static for…
236bd6b
Make changes to PerCharacterEscaper, uses __escape_code_point
45a7e26
Change the unescape function to the old logic
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,92 @@ | ||
| class PerCharacterEscaper: | ||
| def __init__(self, escape_code_point, escaped_code_points, escaped_by_code_points): | ||
| self.escape_code_point = escape_code_point | ||
| self.escaped_code_points = escaped_code_points | ||
| self.escaped_by_code_points = escaped_by_code_points | ||
nedtwigg marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| @staticmethod | ||
| def _first_offset_needing_escape( | ||
nedtwigg marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| input_string, escaped_code_points, escape_code_point=None | ||
| ): | ||
| length = len(input_string) | ||
| for offset in range(length): | ||
| codepoint = ord(input_string[offset]) | ||
| if escape_code_point is not None and codepoint == escape_code_point: | ||
| return offset | ||
| if codepoint in escaped_code_points: | ||
| return offset | ||
| return -1 | ||
|
|
||
| def escape(self, input_string): | ||
| no_escapes = self._first_offset_needing_escape( | ||
| input_string, self.escaped_code_points | ||
| ) | ||
| if no_escapes == -1: | ||
| return input_string | ||
| else: | ||
| result = [] | ||
| result.append(input_string[:no_escapes]) | ||
| for char in input_string[no_escapes:]: | ||
| codepoint = ord(char) | ||
| if codepoint in self.escaped_code_points: | ||
| idx = self.escaped_code_points.index(codepoint) | ||
| result.append(chr(self.escape_code_point)) | ||
| result.append(chr(self.escaped_by_code_points[idx])) | ||
| else: | ||
| result.append(char) | ||
| return "".join(result) | ||
|
|
||
| def unescape(self, input_string): | ||
| if input_string.endswith( | ||
| chr(self.escape_code_point) | ||
| ) and not input_string.endswith(chr(self.escape_code_point) * 2): | ||
| raise ValueError( | ||
| "Escape character '{}' can't be the last character in a string.".format( | ||
| chr(self.escape_code_point) | ||
| ) | ||
| ) | ||
|
|
||
| no_escapes = self._first_offset_needing_escape( | ||
| input_string, [self.escape_code_point], self.escape_code_point | ||
| ) | ||
| if no_escapes == -1: | ||
| return input_string | ||
| else: | ||
| result = [] | ||
| result.append(input_string[:no_escapes]) | ||
| skip_next = False | ||
| for i in range(no_escapes, len(input_string)): | ||
| if skip_next: | ||
| skip_next = False | ||
| continue | ||
| codepoint = ord(input_string[i]) | ||
| if codepoint == self.escape_code_point and (i + 1) < len(input_string): | ||
| next_codepoint = ord(input_string[i + 1]) | ||
| if next_codepoint in self.escaped_by_code_points: | ||
| idx = self.escaped_by_code_points.index(next_codepoint) | ||
| result.append(chr(self.escaped_code_points[idx])) | ||
| skip_next = True | ||
| else: | ||
| result.append(input_string[i + 1]) | ||
| skip_next = True | ||
| else: | ||
| result.append(chr(codepoint)) | ||
| return "".join(result) | ||
|
|
||
| @classmethod | ||
| def self_escape(cls, escape_policy): | ||
| code_points = [ord(c) for c in escape_policy] | ||
| escape_code_point = code_points[0] | ||
| return cls(escape_code_point, code_points, code_points) | ||
|
|
||
| @classmethod | ||
| def specified_escape(cls, escape_policy): | ||
| code_points = [ord(c) for c in escape_policy] | ||
| if len(code_points) % 2 != 0: | ||
| raise ValueError( | ||
| "Escape policy string must have an even number of characters." | ||
| ) | ||
| escape_code_point = code_points[0] | ||
| escaped_code_points = code_points[0::2] | ||
| escaped_by_code_points = code_points[1::2] | ||
| return cls(escape_code_point, escaped_code_points, escaped_by_code_points) | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,2 +1,3 @@ | ||
| from .LineReader import LineReader as LineReader | ||
| from .Slice import Slice as Slice | ||
| from .PerCharacterEscaper import PerCharacterEscaper as PerCharacterEscaper |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,63 @@ | ||
| import pytest | ||
|
|
||
| from selfie_lib import PerCharacterEscaper | ||
|
|
||
|
|
||
| class TestPerCharacterEscaper: | ||
| def test_performance_optimization_self(self): | ||
| escaper = PerCharacterEscaper.self_escape("`123") | ||
| abc = "abc" | ||
| # Using 'is' to check for the exact same object might not behave as in Kotlin, use == for equality in Python | ||
| assert escaper.escape(abc) == abc | ||
| assert escaper.unescape(abc) == abc | ||
nedtwigg marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| assert escaper.escape("1") == "`1" | ||
| assert escaper.escape("`") == "``" | ||
| assert escaper.escape("abc123`def") == "abc`1`2`3``def" | ||
|
|
||
| assert escaper.unescape("`1") == "1" | ||
| assert escaper.unescape("``") == "`" | ||
| assert escaper.unescape("abc`1`2`3``def") == "abc123`def" | ||
|
|
||
| def test_performance_optimization_specific(self): | ||
| escaper = PerCharacterEscaper.specified_escape("`a1b2c3d") | ||
| abc = "abc" | ||
| assert escaper.escape(abc) == abc | ||
| assert escaper.unescape(abc) == abc | ||
nedtwigg marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| assert escaper.escape("1") == "`b" | ||
| assert escaper.escape("`") == "`a" | ||
| assert escaper.escape("abc123`def") == "abc`b`c`d`adef" | ||
|
|
||
| assert escaper.unescape("`b") == "1" | ||
| assert escaper.unescape("`a") == "`" | ||
| assert escaper.unescape("abc`1`2`3``def") == "abc123`def" | ||
|
|
||
| def test_corner_cases_self(self): | ||
| escaper = PerCharacterEscaper.self_escape("`123") | ||
| with pytest.raises(ValueError) as excinfo: | ||
| escaper.unescape("`") | ||
| assert ( | ||
| str(excinfo.value) | ||
| == "Escape character '`' can't be the last character in a string." | ||
| ) | ||
| assert escaper.unescape("`a") == "a" | ||
|
|
||
| def test_corner_cases_specific(self): | ||
| escaper = PerCharacterEscaper.specified_escape("`a1b2c3d") | ||
| with pytest.raises(ValueError) as excinfo: | ||
| escaper.unescape("`") | ||
| assert ( | ||
| str(excinfo.value) | ||
| == "Escape character '`' can't be the last character in a string." | ||
| ) | ||
| assert escaper.unescape("`e") == "e" | ||
|
|
||
| def test_roundtrip(self): | ||
| escaper = PerCharacterEscaper.self_escape("`<>") | ||
|
|
||
| def roundtrip(str): | ||
| assert escaper.unescape(escaper.escape(str)) == str | ||
|
|
||
| roundtrip("") | ||
| roundtrip("<local>~`/") | ||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.