From 2dc982bf9e795905486ec9ddb890eeee12678b31 Mon Sep 17 00:00:00 2001 From: Lucas Cimon <925560+Lucas-C@users.noreply.github.com> Date: Tue, 11 Jul 2023 01:57:06 +0200 Subject: [PATCH] Better handling of text overflow in FPDF.write() & FPDF.write_html() - fix #847 --- CHANGELOG.md | 1 + fpdf/fonts.py | 3 + fpdf/fpdf.py | 2 +- fpdf/line_break.py | 21 +---- test/text/test_line_break.py | 10 +++ test/text/test_unbreakable.py | 78 +++++++++--------- test/text/test_varied_fragments.py | 2 +- test/text/test_write.py | 29 +++++-- .../write_overflow_no_initial_newline.pdf | Bin 0 -> 979 bytes test/text/write_soft_hyphen.pdf | Bin 1427 -> 1434 bytes 10 files changed, 80 insertions(+), 66 deletions(-) create mode 100644 test/text/write_overflow_no_initial_newline.pdf diff --git a/CHANGELOG.md b/CHANGELOG.md index fda90ac23..d2626a07a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ This can also be enabled programmatically with `warnings.simplefilter('default', ### Fixed - [`FPDF.table()`](https://pyfpdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.table): the `colspan` setting has been fixed - [documentation](https://pyfpdf.github.io/fpdf2/Tables.html#column-span) - [`FPDF.image()`](https://pyfpdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.image): allowing images path starting with `data` to be passed as input +- text overflow is better handled by `FPDF.write()` & `FPDF.write_html()` - _cf._ [issue #847](https://github.com/PyFPDF/fpdf2/issues/847) - the initial text color is preserved when using `FPDF.write_html()` - _cf._ [issue #846](https://github.com/PyFPDF/fpdf2/issues/846) ### Deprecated - the `center` optional parameter of [`FPDF.cell()`](https://pyfpdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.cell) is **no more** deprecated, as it allows for horizontal positioning, which is different from text alignment control with `align="C"` diff --git a/fpdf/fonts.py b/fpdf/fonts.py index 1fb78d83e..6eb18f9ab 100644 --- a/fpdf/fonts.py +++ b/fpdf/fonts.py @@ -61,6 +61,9 @@ def __init__(self, fpdf, fontkey, style): self.fontkey = fontkey self.emphasis = TextEmphasis.coerce(style) + def __repr__(self): + return f"CoreFont(i={self.i}, fontkey={self.fontkey})" + class TTFFont: __slots__ = ( diff --git a/fpdf/fpdf.py b/fpdf/fpdf.py index 5a15d0820..c84d2c571 100644 --- a/fpdf/fpdf.py +++ b/fpdf/fpdf.py @@ -3593,7 +3593,7 @@ def write( # first line from current x position to right margin first_width = self.w - self.x - self.r_margin txt_line = multi_line_break.get_line_of_given_width( - first_width - 2 * self.c_margin, wordsplit=False + first_width - 2 * self.c_margin, ) # remaining lines fill between margins full_width = self.w - self.l_margin - self.r_margin diff --git a/fpdf/line_break.py b/fpdf/line_break.py index 56c71a96d..1b3ad0aaa 100644 --- a/fpdf/line_break.py +++ b/fpdf/line_break.py @@ -38,12 +38,10 @@ def __init__( self.link = link def __repr__(self): - gstate = self.graphics_state.copy() - if "current_font" in gstate: - del gstate["current_font"] # TMI return ( f"Fragment(characters={self.characters}," - f" graphics_state={gstate}, k={self.k}, link={self.link})" + f" graphics_state={self.graphics_state}," + f" k={self.k}, link={self.link})" ) @property @@ -394,7 +392,7 @@ def __init__( self.idx_last_forced_break = None # pylint: disable=too-many-return-statements - def get_line_of_given_width(self, maximum_width: float, wordsplit: bool = True): + def get_line_of_given_width(self, maximum_width: float): first_char = True # "Tw" ignores the first character in a text object. idx_last_forced_break = self.idx_last_forced_break self.idx_last_forced_break = None @@ -402,10 +400,6 @@ def get_line_of_given_width(self, maximum_width: float, wordsplit: bool = True): if self.fragment_index == len(self.styled_text_fragments): return None - last_fragment_index = self.fragment_index - last_character_index = self.character_index - line_full = False - current_line = CurrentLine(print_sh=self.print_sh) while self.fragment_index < len(self.styled_text_fragments): current_fragment = self.styled_text_fragments[self.fragment_index] @@ -442,9 +436,6 @@ def get_line_of_given_width(self, maximum_width: float, wordsplit: bool = True): ) = current_line.automatic_break(self.justify) self.character_index += 1 return line - if not wordsplit: - line_full = True - break if idx_last_forced_break == self.character_index: raise FPDFException( "Not enough horizontal space to render a single character" @@ -464,12 +455,6 @@ def get_line_of_given_width(self, maximum_width: float, wordsplit: bool = True): self.character_index += 1 - if line_full and not wordsplit: - # roll back and return empty line to trigger continuation - # on the next line. - self.fragment_index = last_fragment_index - self.character_index = last_character_index - return CurrentLine().manual_break(self.justify) if current_line.width: return current_line.manual_break() return None diff --git a/test/text/test_line_break.py b/test/text/test_line_break.py index f07705bd3..3e79438e8 100644 --- a/test/text/test_line_break.py +++ b/test/text/test_line_break.py @@ -1129,3 +1129,13 @@ def test_trim_trailing_spaces(): cl.fragments = [frag] res = cl.trim_trailing_spaces() assert res is None + + +def test_line_break_no_initial_newline(): # issue-847 + text = "X" * 50 + alphabet = {"normal": {}} + alphabet["normal"]["X"] = 4.7 + fragments = [FxFragment(alphabet, text, _gs_normal, 1)] + multi_line_break = MultiLineBreak(fragments) + text_line = multi_line_break.get_line_of_given_width(188) + assert text_line.fragments diff --git a/test/text/test_unbreakable.py b/test/text/test_unbreakable.py index 8ecdb8969..1c21dc5d4 100644 --- a/test/text/test_unbreakable.py +++ b/test/text/test_unbreakable.py @@ -167,12 +167,12 @@ def test_multi_cell_table_unbreakable_with_split_only(tmp_path): # issue 359 pdf.ln() - with pdf.unbreakable() as doc: - for _ in range(4): - for row in data: - max_no_of_lines_in_cell = 1 - for cell in row: - with pytest.warns(DeprecationWarning, match=expected_warn): + with pytest.warns(DeprecationWarning, match=expected_warn): + with pdf.unbreakable() as doc: + for _ in range(4): + for row in data: + max_no_of_lines_in_cell = 1 + for cell in row: result = doc.multi_cell( cell_width, l_height, @@ -184,39 +184,39 @@ def test_multi_cell_table_unbreakable_with_split_only(tmp_path): # issue 359 max_line_height=l_height, split_only=True, ) - no_of_lines_in_cell = len(result) - if no_of_lines_in_cell > max_no_of_lines_in_cell: - max_no_of_lines_in_cell = no_of_lines_in_cell - no_of_lines_list.append(max_no_of_lines_in_cell) - - for j, row in enumerate(data): - cell_height = no_of_lines_list[j] * l_height - for cell in row: - if j == 0: - doc.multi_cell( - cell_width, - cell_height, - "**" + cell + "**", - border=1, - fill=False, - align="L", - new_x="RIGHT", - new_y="TOP", - max_line_height=l_height, - markdown=False, - ) - else: - doc.multi_cell( - cell_width, - cell_height, - cell, - border=1, - align="L", - new_x="RIGHT", - new_y="TOP", - max_line_height=l_height, - ) - doc.ln(cell_height) + no_of_lines_in_cell = len(result) + if no_of_lines_in_cell > max_no_of_lines_in_cell: + max_no_of_lines_in_cell = no_of_lines_in_cell + no_of_lines_list.append(max_no_of_lines_in_cell) + + for j, row in enumerate(data): + cell_height = no_of_lines_list[j] * l_height + for cell in row: + if j == 0: + doc.multi_cell( + cell_width, + cell_height, + "**" + cell + "**", + border=1, + fill=False, + align="L", + new_x="RIGHT", + new_y="TOP", + max_line_height=l_height, + markdown=False, + ) + else: + doc.multi_cell( + cell_width, + cell_height, + cell, + border=1, + align="L", + new_x="RIGHT", + new_y="TOP", + max_line_height=l_height, + ) + doc.ln(cell_height) assert_pdf_equal( pdf, HERE / "multi_cell_table_unbreakable_with_split_only.pdf", tmp_path diff --git a/test/text/test_varied_fragments.py b/test/text/test_varied_fragments.py index 0447e2d8a..8f1423089 100644 --- a/test/text/test_varied_fragments.py +++ b/test/text/test_varied_fragments.py @@ -29,7 +29,7 @@ def write_fragments(self, frags, align=Align.L): # first line from current x position to right margin first_width = self.w - self.x - self.r_margin text_line = multi_line_break.get_line_of_given_width( - first_width - 2 * self.c_margin, wordsplit=False + first_width - 2 * self.c_margin ) # remaining lines fill between margins full_width = self.w - self.l_margin - self.r_margin diff --git a/test/text/test_write.py b/test/text/test_write.py index 1bd4cf5d0..16ae9a83d 100644 --- a/test/text/test_write.py +++ b/test/text/test_write.py @@ -1,6 +1,6 @@ from pathlib import Path -import fpdf +from fpdf import FPDF from test.conftest import assert_pdf_equal, LOREM_IPSUM HERE = Path(__file__).resolve().parent @@ -8,7 +8,7 @@ def test_write_page_break(tmp_path): - doc = fpdf.FPDF() + doc = FPDF() doc.add_page() doc.set_font("helvetica", size=24) doc.y = 20 @@ -18,8 +18,15 @@ def test_write_page_break(tmp_path): def test_write_soft_hyphen(tmp_path): + """ + The current behaviour is close to CSS word-break: break-all + cf. https://developer.mozilla.org/en-US/docs/Web/CSS/overflow-wrap#comparing_overflow-wrap_word-break_and_hyphens + We used to prefer a line break over a word split without regards to soft hyphens: + https://github.com/PyFPDF/fpdf2/blob/2.7.4/test/text/write_soft_hyphen.pdf + But that caused issue with write_html(), cf. issue #847 + """ s = "Donau\u00addamp\u00adfschiff\u00adfahrts\u00adgesellschafts\u00adkapitäns\u00admützen\u00adstreifen. " - doc = fpdf.FPDF() + doc = FPDF() doc.add_page() doc.set_font("helvetica", size=24) doc.y = 20 @@ -41,7 +48,7 @@ def test_write_soft_hyphen(tmp_path): def test_write_trailing_nl(tmp_path): # issue #455 """Each item in lines triggers a line break at the end.""" - pdf = fpdf.FPDF() + pdf = FPDF() pdf.add_page() pdf.set_font("Times", size=16) lines = ["Hello\n", "Sweet\n", "World\n"] @@ -53,7 +60,7 @@ def test_write_trailing_nl(tmp_path): # issue #455 def test_write_font_stretching(tmp_path): # issue #478 right_boundary = 60 - pdf = fpdf.FPDF() + pdf = FPDF() pdf.add_page() # built-in font pdf.set_font("Helvetica", "", 8) @@ -81,7 +88,7 @@ def test_write_font_stretching(tmp_path): # issue #478 def test_write_superscript(tmp_path): - pdf = fpdf.FPDF() + pdf = FPDF() pdf.add_page() pdf.set_font("Helvetica", "", 20) @@ -131,7 +138,7 @@ def write_this(): def test_write_char_wrap(tmp_path): # issue #649 right_boundary = 50 - pdf = fpdf.FPDF() + pdf = FPDF() pdf.add_page() pdf.set_right_margin(pdf.w - right_boundary) pdf.set_font("Helvetica", "", 10) @@ -150,3 +157,11 @@ def test_write_char_wrap(tmp_path): # issue #649 pdf.line(pdf.l_margin, 10, pdf.l_margin, 130) pdf.line(right_boundary, 10, right_boundary, 130) assert_pdf_equal(pdf, HERE / "write_char_wrap.pdf", tmp_path) + + +def test_write_overflow_no_initial_newline(tmp_path): # issue-847 + pdf = FPDF() + pdf.add_page() + pdf.set_font(family="Helvetica", size=20) + pdf.write(7, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX") + assert_pdf_equal(pdf, HERE / "write_overflow_no_initial_newline.pdf", tmp_path) diff --git a/test/text/write_overflow_no_initial_newline.pdf b/test/text/write_overflow_no_initial_newline.pdf new file mode 100644 index 0000000000000000000000000000000000000000..74c2000766ad6b3dfbe1e8ea26ed4e4b7854fd87 GIT binary patch literal 979 zcmah|J#Q015DhfI3J?v2MKcnz;uLsy?|fg#vSQ!)6pJ7mJ5r=rDK7TfIdtB&?)Jc; zrlki&iTD9D{Ec+)&)GPRT4Di1q7cios)h@0jaFSxd5oSJf-}(N^*2fUNJfd67SiYvx7BmW zxXhrois?nEx^5WyEE9P;Vj0*)N}asGr$aYos#=psvVe`mm?3<~_*9;QQ`142 zOj$HTi%(|%&ky6$w2mu3Q}p8UwRuF;mGo|R2J?G2?vx(=DxFl`4?q6?P}+O@xpE#n zxK(@D`r3O(obTqhedp$vyE{AgAMb1R)}(74twKSqC}W{eE#U)}ykasQMF_XKDlFzx zz{fFf@+{r}2oLfzxg3rjk(%miQktRAu~y?>$C|ue#~}rsx35~ny;O__Ou`VWjRAbb z7Ak*dretfaPk()OP7Ab7f!0?l>FEbbN2ml(sb_oCFe&wjvJ>j<60*Bj2wtRYf(W=s z`&S0GWtkRCV2gRMbOGODp6W(h0i)jacZPEvs~KwcS7I$gt=|>QChiuKX%r_cUH&|S z_zhDf!`x48Z?NTwd!pwRI!)h&ZF{a)+5uWKw!j&iIDSblz@!BM@3_VcGo&)VT7 zF5%_BZQ{m&*d_cTF;i?-KL|_{h-c-0SIn))@;CgT{!%-Ax6K|*iA;f(3I-sckf*=} zW*8Wl8<=5;Sr}Mgh*=sMql+0D7#U3d#bOidW@2Gz?&f6a=4fbaZ02TeWaeh!Vqj|E mWME?9>}+gcXG2g$EF`)Ti%KerQq#B$4J-_~R8?L5-M9c)fv*n$ delta 458 zcmbQmJ(+uhBV)ZGmz^C~aY<2XVlG$3oVAl2^9~sZxPG6LG;PC*jq2Nk^955cmg*%4 z?q0&nm>52_>Ym)kB#>rwFz_tevBrbh7K63RmSsW$B`Vlbs?9UvwttXp6o# zyEL!#SqI0{EyBOpi>62(?onu)&biQaPNjU+rOn3{YPtoqS~>T1Xny02uMNw}NKakZ zwYwzZiOtHxizDi+W$r95i{Rj_3U1%eSRbHiwAr62ktxtf!2kpl@)Wqh3