Skip to content

Commit b667dc0

Browse files
committed
Fix converter for multi-page response
1 parent 0db2f94 commit b667dc0

File tree

1 file changed

+12
-8
lines changed

1 file changed

+12
-8
lines changed

textractor/utils/legacy_utils.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,11 @@
1414

1515
def converter(response):
1616
blocks_to_delete = []
17-
page_block = None
17+
page_blocks = []
1818
try:
1919
for i, block in enumerate(response["Blocks"]):
2020
if block.get("BlockType") == "PAGE":
21-
page_block = block
21+
page_blocks.append(block)
2222
elif block.get("BlockType", "").startswith("LAYOUT_FIGURE_"):
2323
block["BlockType"] = LAYOUT_TEXT
2424
elif (
@@ -40,15 +40,19 @@ def converter(response):
4040
elif block.get("BlockType") == LAYOUT_FIGURE and "CONTAINER" in block.get("EntityTypes", []):
4141
blocks_to_delete.append((i, block))
4242

43-
page_relationships = []
44-
for relationship in page_block.get("Relationships", []):
45-
if relationship["Type"] == "CHILD":
46-
page_relationships = relationship["Ids"]
47-
break
43+
blocks_to_delete_id_set = set([b["Id"] for _, b in blocks_to_delete])
44+
for page_block in page_blocks:
45+
for relationship in page_block.get("Relationships", []):
46+
if relationship["Type"] == "CHILD":
47+
relationship["Ids"] = [
48+
id
49+
for id in relationship["Ids"]
50+
if id not in blocks_to_delete_id_set
51+
]
52+
break
4853

4954
for i, block in blocks_to_delete[::-1]:
5055
del response["Blocks"][i]
51-
page_relationships.remove(block["Id"])
5256
except Exception as ex:
5357
logging.warning(f"Failed to convert the response for backward compatibility. {str(ex)}")
5458

0 commit comments

Comments
 (0)