Skip to content

Commit

Permalink
Fixing issue py-pdf#1236
Browse files Browse the repository at this point in the history
  • Loading branch information
smilerightnow committed Sep 4, 2024
1 parent 33eb7ed commit 9f56f08
Show file tree
Hide file tree
Showing 9 changed files with 79 additions and 47 deletions.
2 changes: 1 addition & 1 deletion docs/TextStyling.md
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ pdf = FPDF()
pdf.add_page()
pdf.set_font("Times", size=50)
pdf.cell(text="**Lorem** __Ipsum__ --dolor--", markdown=True, new_x='LEFT', new_y='NEXT')
pdf.cell(text="\\**Lorem\\** \\\\__Ipsum\\\\__ --dolor--", markdown=True)
pdf.cell(text="\\**Lorem\\** __\\Ipsum\\ __ --dolor--", markdown=True)
pdf.output("markdown-styled.pdf")
```

Expand Down
40 changes: 18 additions & 22 deletions fpdf/fpdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3456,9 +3456,16 @@ def frag():
font_glyphs = self.current_font.cmap
else:
font_glyphs = []
num_escape_chars = 0

while text:
tlt = text[:3] ## get triples to check for escape character
if self.MARKDOWN_ESCAPE_CHARACTER == tlt[0] and tlt[1:] in ["**", "__", "--"]:
text = text[1:] ## remove the escape character
for i in range(2):
txt_frag.append(text[0])
text = text[1:]
yield frag()
continue
is_marker = text[:2] in (
self.MARKDOWN_BOLD_MARKER,
self.MARKDOWN_ITALICS_MARKER,
Expand All @@ -3482,27 +3489,16 @@ def frag():
and (not txt_frag or txt_frag[-1] != half_marker)
and (len(text) < 3 or text[2] != half_marker)
):
txt_frag = (
txt_frag[: -((num_escape_chars + 1) // 2)]
if num_escape_chars > 0
else txt_frag
)
if num_escape_chars % 2 == 0:
if txt_frag:
yield frag()
if text[:2] == self.MARKDOWN_BOLD_MARKER:
in_bold = not in_bold
if text[:2] == self.MARKDOWN_ITALICS_MARKER:
in_italics = not in_italics
if text[:2] == self.MARKDOWN_UNDERLINE_MARKER:
in_underline = not in_underline
text = text[2:]
continue
num_escape_chars = (
num_escape_chars + 1
if text[0] == self.MARKDOWN_ESCAPE_CHARACTER
else 0
)
if txt_frag:
yield frag()
if text[:2] == self.MARKDOWN_BOLD_MARKER:
in_bold = not in_bold
if text[:2] == self.MARKDOWN_ITALICS_MARKER:
in_italics = not in_italics
if text[:2] == self.MARKDOWN_UNDERLINE_MARKER:
in_underline = not in_underline
text = text[2:]
continue
is_link = self.MARKDOWN_LINK_REGEX.match(text)
if is_link:
link_text, link_dest, text = is_link.groups()
Expand Down
Binary file modified test/text/cell_markdown_bold_italic_escaped.pdf
Binary file not shown.
Binary file modified test/text/cell_markdown_escaped.pdf
Binary file not shown.
Binary file modified test/text/cell_markdown_with_ttf_fonts_escaped.pdf
Binary file not shown.
Binary file modified test/text/multi_cell_markdown_escaped.pdf
Binary file not shown.
Binary file modified test/text/multi_cell_markdown_with_ttf_fonts_escaped.pdf
Binary file not shown.
2 changes: 2 additions & 0 deletions test/text/test_cell.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,8 @@ def test_cell_markdown_escaped(tmp_path):
pdf.add_page()
pdf.set_font("Times", size=40)
pdf.cell(text="**Lo\\rem** \\__Ipsum\\__ \\\\--dolor\\\\--", markdown=True)
pdf.write(text="\n")
pdf.cell(text="\\****BOLD**\\**", markdown=True)
assert_pdf_equal(pdf, HERE / "cell_markdown_escaped.pdf", tmp_path)


Expand Down
82 changes: 58 additions & 24 deletions test/text/test_markdown_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,40 @@
GSTATE_BI = GSTATE.copy()
GSTATE_BI["font_style"] = "BI"

def merge_fragments(fragments):
"""
Helper function for testing the escaping chracters
Will merge fragments that have different characters but same fragment.graphics_state
and same fragment.k and same fragment.link.
Example Input:
(
Fragment(characters=['a'], graphics_state={000}, k=1, link=None),
Fragment(characters=['b'], graphics_state={000}, k=1, link=None)
)
Example Output:
(Fragment(characters=['a', 'b'], graphics_state={000}, k=1, link=None))
"""
if not fragments:
return []

merged_fragments = []
current_fragment = fragments[0]

for fragment in fragments[1:]:
if fragment.graphics_state == current_fragment.graphics_state and fragment.k == current_fragment.k and fragment.link == current_fragment.link:
current_fragment.characters.extend(fragment.characters)
else:
merged_fragments.append(current_fragment)
current_fragment = fragment

merged_fragments.append(current_fragment)

return tuple(merged_fragments)

def test_markdown_parse_simple_ok():
frags = tuple(FPDF()._parse_chars("**bold**, __italics__ and --underlined--", True))
Expand All @@ -27,30 +61,30 @@ def test_markdown_parse_simple_ok():


def test_markdown_parse_simple_ok_escaped():
frags = tuple(
frags = merge_fragments(tuple(
FPDF()._parse_chars(
"\\**bold\\**, \\__italics\\__ and \\--underlined\\-- escaped", True
)
)
))
expected = (
Fragment("**bold**, __italics__ and --underlined-- escaped", GSTATE, k=PDF.k),
)
assert frags == expected
frags = tuple(
frags = merge_fragments(tuple(
FPDF()._parse_chars(
r"raw \**bold\**, \__italics\__ and \--underlined\-- escaped", True
)
)
))
expected = (
Fragment(
"raw **bold**, __italics__ and --underlined-- escaped", GSTATE, k=PDF.k
),
)
assert frags == expected
frags = tuple(FPDF()._parse_chars("escape *\\*between marker*\\*", True))
frags = merge_fragments(tuple(FPDF()._parse_chars("escape *\\*between marker*\\*", True)))
expected = (Fragment("escape *\\*between marker*\\*", GSTATE, k=PDF.k),)
assert frags == expected
frags = tuple(FPDF()._parse_chars("escape **\\after marker**\\", True))
frags = merge_fragments(tuple(FPDF()._parse_chars("escape **\\after marker**\\", True)))
expected = (
Fragment("escape ", GSTATE, k=PDF.k),
Fragment("\\after marker", GSTATE_B, k=PDF.k),
Expand All @@ -59,26 +93,22 @@ def test_markdown_parse_simple_ok_escaped():


def test_markdown_unrelated_escape():
frags = tuple(FPDF()._parse_chars("unrelated \\ escape \\**bold\\**", True))
frags = merge_fragments(tuple(FPDF()._parse_chars("unrelated \\ escape \\**bold\\**", True)))
expected = (Fragment("unrelated \\ escape **bold**", GSTATE, k=PDF.k),)
assert frags == expected
frags = tuple(
frags = merge_fragments(tuple(
FPDF()._parse_chars("unrelated \\\\ double escape \\**bold\\**", True)
)
))
expected = (Fragment("unrelated \\\\ double escape **bold**", GSTATE, k=PDF.k),)
assert frags == expected


def test_markdown_parse_multiple_escape():
frags = tuple(FPDF()._parse_chars("\\\\**bold\\\\** double escaped", True))
expected = (
Fragment("\\", GSTATE, k=PDF.k),
Fragment("bold\\", GSTATE_B, k=PDF.k),
Fragment(" double escaped", GSTATE, k=PDF.k),
)
frags = merge_fragments(tuple(FPDF()._parse_chars("\\\\**bold\\\\** double escaped", True)))
expected = (Fragment("\\**bold\\** double escaped", GSTATE, k=PDF.k),)
assert frags == expected
frags = tuple(FPDF()._parse_chars("\\\\\\**triple bold\\\\\\** escaped", True))
expected = (Fragment("\\**triple bold\\** escaped", GSTATE, k=PDF.k),)
frags = merge_fragments(tuple(FPDF()._parse_chars("\\\\\\**triple bold\\\\\\** escaped", True)))
expected = (Fragment("\\\\**triple bold\\\\** escaped", GSTATE, k=PDF.k),)
assert frags == expected


Expand All @@ -92,7 +122,7 @@ def test_markdown_parse_overlapping():


def test_markdown_parse_overlapping_escaped():
frags = tuple(FPDF()._parse_chars("**bold \\__italics\\__**", True))
frags = merge_fragments(tuple(FPDF()._parse_chars("**bold \\__italics\\__**", True)))
expected = (Fragment("bold __italics__", GSTATE_B, k=PDF.k),)
assert frags == expected

Expand All @@ -108,7 +138,7 @@ def test_markdown_parse_crossing_markers():


def test_markdown_parse_crossing_markers_escaped():
frags = tuple(FPDF()._parse_chars("**bold __and\\** italics__", True))
frags = merge_fragments(tuple(FPDF()._parse_chars("**bold __and\\** italics__", True)))
expected = (
Fragment("bold ", GSTATE_B, k=PDF.k),
Fragment("and** italics", GSTATE_BI, k=PDF.k),
Expand All @@ -126,7 +156,7 @@ def test_markdown_parse_unterminated():


def test_markdown_parse_unterminated_escaped():
frags = tuple(FPDF()._parse_chars("**bold\\** __italics__", True))
frags = merge_fragments(tuple(FPDF()._parse_chars("**bold\\** __italics__", True)))
expected = (
Fragment("bold** ", GSTATE_B, k=PDF.k),
Fragment("italics", GSTATE_BI, k=PDF.k),
Expand All @@ -153,11 +183,15 @@ def test_markdown_parse_line_of_markers():


def test_markdown_parse_line_of_markers_escaped():
frags = tuple(FPDF()._parse_chars("\\****BOLD**", True))
expected = (Fragment("\\****BOLD", GSTATE, k=PDF.k),)
frags = merge_fragments(tuple(FPDF()._parse_chars("\\****BOLD**\\**", True)))
expected = (
Fragment("**", GSTATE, k=PDF.k),
Fragment("BOLD", GSTATE_B, k=PDF.k),
Fragment("**", GSTATE, k=PDF.k),
)
assert frags == expected
frags = tuple(FPDF()._parse_chars("*\\***BOLD**", True))
expected = (Fragment("*\\***BOLD", GSTATE, k=PDF.k),)
frags = merge_fragments(tuple(FPDF()._parse_chars("*\\***BOLD**", True)))
expected = (Fragment("****BOLD", GSTATE, k=PDF.k),)
assert frags == expected


Expand Down

0 comments on commit 9f56f08

Please sign in to comment.