Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pypdf/_cmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ def build_char_map(
"/GB-EUC-V": "gbk", # TBC
"/GBpc-EUC-H": "gb2312", # TBC
"/GBpc-EUC-V": "gb2312", # TBC
"/GBK-EUC-H": "gbk", # TBC
"/GBK-EUC-V": "gbk", # TBC
# UCS2 in code
}

Expand Down
10 changes: 9 additions & 1 deletion tests/test_cmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,9 +135,17 @@ def test_iss1533():


@pytest.mark.enable_socket()
def test_ucs2(caplog):
def test_ucs2_gbk(caplog):
url = "https://github.com/py-pdf/pypdf/files/11190189/pdf_font_garbled.pdf"
name = "tstUCS2.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
reader.pages[1].extract_text() # no error
assert caplog.text == ""
# iss 1809
url = "https://github.com/py-pdf/pypdf/files/11315397/3.pdf"
name = "tst-GBK_EUC.pdf"
reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
t = reader.pages[0].extract_text()
assert "NJA" in t
assert "中华男科学杂志" in t
# assert caplog.text == "" a duplicate field confirmed in page 0, so no check of caplog