From 2dc982bf9e795905486ec9ddb890eeee12678b31 Mon Sep 17 00:00:00 2001
From: Lucas Cimon <925560+Lucas-C@users.noreply.github.com>
Date: Tue, 11 Jul 2023 01:57:06 +0200
Subject: [PATCH] Better handling of text overflow in FPDF.write() &
 FPDF.write_html() - fix #847

---
 CHANGELOG.md                                  |   1 +
 fpdf/fonts.py                                 |   3 +
 fpdf/fpdf.py                                  |   2 +-
 fpdf/line_break.py                            |  21 +----
 test/text/test_line_break.py                  |  10 +++
 test/text/test_unbreakable.py                 |  78 +++++++++---------
 test/text/test_varied_fragments.py            |   2 +-
 test/text/test_write.py                       |  29 +++++--
 .../write_overflow_no_initial_newline.pdf     | Bin 0 -> 979 bytes
 test/text/write_soft_hyphen.pdf               | Bin 1427 -> 1434 bytes
 10 files changed, 80 insertions(+), 66 deletions(-)
 create mode 100644 test/text/write_overflow_no_initial_newline.pdf

diff --git a/CHANGELOG.md b/CHANGELOG.md
index fda90ac23..d2626a07a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,7 @@ This can also be enabled programmatically with `warnings.simplefilter('default',
 ### Fixed
 - [`FPDF.table()`](https://pyfpdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.table): the `colspan` setting has been fixed - [documentation](https://pyfpdf.github.io/fpdf2/Tables.html#column-span)
 - [`FPDF.image()`](https://pyfpdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.image): allowing images path starting with `data` to be passed as input
+- text overflow is better handled by `FPDF.write()` & `FPDF.write_html()` - _cf._ [issue #847](https://github.com/PyFPDF/fpdf2/issues/847)
 - the initial text color is preserved when using `FPDF.write_html()` - _cf._ [issue #846](https://github.com/PyFPDF/fpdf2/issues/846)
 ### Deprecated
 - the `center` optional parameter of [`FPDF.cell()`](https://pyfpdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.cell) is **no more** deprecated, as it allows for horizontal positioning, which is different from text alignment control with `align="C"`
diff --git a/fpdf/fonts.py b/fpdf/fonts.py
index 1fb78d83e..6eb18f9ab 100644
--- a/fpdf/fonts.py
+++ b/fpdf/fonts.py
@@ -61,6 +61,9 @@ def __init__(self, fpdf, fontkey, style):
         self.fontkey = fontkey
         self.emphasis = TextEmphasis.coerce(style)
 
+    def __repr__(self):
+        return f"CoreFont(i={self.i}, fontkey={self.fontkey})"
+
 
 class TTFFont:
     __slots__ = (
diff --git a/fpdf/fpdf.py b/fpdf/fpdf.py
index 5a15d0820..c84d2c571 100644
--- a/fpdf/fpdf.py
+++ b/fpdf/fpdf.py
@@ -3593,7 +3593,7 @@ def write(
         # first line from current x position to right margin
         first_width = self.w - self.x - self.r_margin
         txt_line = multi_line_break.get_line_of_given_width(
-            first_width - 2 * self.c_margin, wordsplit=False
+            first_width - 2 * self.c_margin,
         )
         # remaining lines fill between margins
         full_width = self.w - self.l_margin - self.r_margin
diff --git a/fpdf/line_break.py b/fpdf/line_break.py
index 56c71a96d..1b3ad0aaa 100644
--- a/fpdf/line_break.py
+++ b/fpdf/line_break.py
@@ -38,12 +38,10 @@ def __init__(
         self.link = link
 
     def __repr__(self):
-        gstate = self.graphics_state.copy()
-        if "current_font" in gstate:
-            del gstate["current_font"]  # TMI
         return (
             f"Fragment(characters={self.characters},"
-            f" graphics_state={gstate}, k={self.k}, link={self.link})"
+            f" graphics_state={self.graphics_state},"
+            f" k={self.k}, link={self.link})"
         )
 
     @property
@@ -394,7 +392,7 @@ def __init__(
         self.idx_last_forced_break = None
 
     # pylint: disable=too-many-return-statements
-    def get_line_of_given_width(self, maximum_width: float, wordsplit: bool = True):
+    def get_line_of_given_width(self, maximum_width: float):
         first_char = True  # "Tw" ignores the first character in a text object.
         idx_last_forced_break = self.idx_last_forced_break
         self.idx_last_forced_break = None
@@ -402,10 +400,6 @@ def get_line_of_given_width(self, maximum_width: float, wordsplit: bool = True):
         if self.fragment_index == len(self.styled_text_fragments):
             return None
 
-        last_fragment_index = self.fragment_index
-        last_character_index = self.character_index
-        line_full = False
-
         current_line = CurrentLine(print_sh=self.print_sh)
         while self.fragment_index < len(self.styled_text_fragments):
             current_fragment = self.styled_text_fragments[self.fragment_index]
@@ -442,9 +436,6 @@ def get_line_of_given_width(self, maximum_width: float, wordsplit: bool = True):
                     ) = current_line.automatic_break(self.justify)
                     self.character_index += 1
                     return line
-                if not wordsplit:
-                    line_full = True
-                    break
                 if idx_last_forced_break == self.character_index:
                     raise FPDFException(
                         "Not enough horizontal space to render a single character"
@@ -464,12 +455,6 @@ def get_line_of_given_width(self, maximum_width: float, wordsplit: bool = True):
 
             self.character_index += 1
 
-        if line_full and not wordsplit:
-            # roll back and return empty line to trigger continuation
-            # on the next line.
-            self.fragment_index = last_fragment_index
-            self.character_index = last_character_index
-            return CurrentLine().manual_break(self.justify)
         if current_line.width:
             return current_line.manual_break()
         return None
diff --git a/test/text/test_line_break.py b/test/text/test_line_break.py
index f07705bd3..3e79438e8 100644
--- a/test/text/test_line_break.py
+++ b/test/text/test_line_break.py
@@ -1129,3 +1129,13 @@ def test_trim_trailing_spaces():
     cl.fragments = [frag]
     res = cl.trim_trailing_spaces()
     assert res is None
+
+
+def test_line_break_no_initial_newline():  # issue-847
+    text = "X" * 50
+    alphabet = {"normal": {}}
+    alphabet["normal"]["X"] = 4.7
+    fragments = [FxFragment(alphabet, text, _gs_normal, 1)]
+    multi_line_break = MultiLineBreak(fragments)
+    text_line = multi_line_break.get_line_of_given_width(188)
+    assert text_line.fragments
diff --git a/test/text/test_unbreakable.py b/test/text/test_unbreakable.py
index 8ecdb8969..1c21dc5d4 100644
--- a/test/text/test_unbreakable.py
+++ b/test/text/test_unbreakable.py
@@ -167,12 +167,12 @@ def test_multi_cell_table_unbreakable_with_split_only(tmp_path):  # issue 359
 
     pdf.ln()
 
-    with pdf.unbreakable() as doc:
-        for _ in range(4):
-            for row in data:
-                max_no_of_lines_in_cell = 1
-                for cell in row:
-                    with pytest.warns(DeprecationWarning, match=expected_warn):
+    with pytest.warns(DeprecationWarning, match=expected_warn):
+        with pdf.unbreakable() as doc:
+            for _ in range(4):
+                for row in data:
+                    max_no_of_lines_in_cell = 1
+                    for cell in row:
                         result = doc.multi_cell(
                             cell_width,
                             l_height,
@@ -184,39 +184,39 @@ def test_multi_cell_table_unbreakable_with_split_only(tmp_path):  # issue 359
                             max_line_height=l_height,
                             split_only=True,
                         )
-                    no_of_lines_in_cell = len(result)
-                    if no_of_lines_in_cell > max_no_of_lines_in_cell:
-                        max_no_of_lines_in_cell = no_of_lines_in_cell
-                no_of_lines_list.append(max_no_of_lines_in_cell)
-
-            for j, row in enumerate(data):
-                cell_height = no_of_lines_list[j] * l_height
-                for cell in row:
-                    if j == 0:
-                        doc.multi_cell(
-                            cell_width,
-                            cell_height,
-                            "**" + cell + "**",
-                            border=1,
-                            fill=False,
-                            align="L",
-                            new_x="RIGHT",
-                            new_y="TOP",
-                            max_line_height=l_height,
-                            markdown=False,
-                        )
-                    else:
-                        doc.multi_cell(
-                            cell_width,
-                            cell_height,
-                            cell,
-                            border=1,
-                            align="L",
-                            new_x="RIGHT",
-                            new_y="TOP",
-                            max_line_height=l_height,
-                        )
-                doc.ln(cell_height)
+                        no_of_lines_in_cell = len(result)
+                        if no_of_lines_in_cell > max_no_of_lines_in_cell:
+                            max_no_of_lines_in_cell = no_of_lines_in_cell
+                    no_of_lines_list.append(max_no_of_lines_in_cell)
+
+                for j, row in enumerate(data):
+                    cell_height = no_of_lines_list[j] * l_height
+                    for cell in row:
+                        if j == 0:
+                            doc.multi_cell(
+                                cell_width,
+                                cell_height,
+                                "**" + cell + "**",
+                                border=1,
+                                fill=False,
+                                align="L",
+                                new_x="RIGHT",
+                                new_y="TOP",
+                                max_line_height=l_height,
+                                markdown=False,
+                            )
+                        else:
+                            doc.multi_cell(
+                                cell_width,
+                                cell_height,
+                                cell,
+                                border=1,
+                                align="L",
+                                new_x="RIGHT",
+                                new_y="TOP",
+                                max_line_height=l_height,
+                            )
+                    doc.ln(cell_height)
 
     assert_pdf_equal(
         pdf, HERE / "multi_cell_table_unbreakable_with_split_only.pdf", tmp_path
diff --git a/test/text/test_varied_fragments.py b/test/text/test_varied_fragments.py
index 0447e2d8a..8f1423089 100644
--- a/test/text/test_varied_fragments.py
+++ b/test/text/test_varied_fragments.py
@@ -29,7 +29,7 @@ def write_fragments(self, frags, align=Align.L):
         # first line from current x position to right margin
         first_width = self.w - self.x - self.r_margin
         text_line = multi_line_break.get_line_of_given_width(
-            first_width - 2 * self.c_margin, wordsplit=False
+            first_width - 2 * self.c_margin
         )
         # remaining lines fill between margins
         full_width = self.w - self.l_margin - self.r_margin
diff --git a/test/text/test_write.py b/test/text/test_write.py
index 1bd4cf5d0..16ae9a83d 100644
--- a/test/text/test_write.py
+++ b/test/text/test_write.py
@@ -1,6 +1,6 @@
 from pathlib import Path
 
-import fpdf
+from fpdf import FPDF
 from test.conftest import assert_pdf_equal, LOREM_IPSUM
 
 HERE = Path(__file__).resolve().parent
@@ -8,7 +8,7 @@
 
 
 def test_write_page_break(tmp_path):
-    doc = fpdf.FPDF()
+    doc = FPDF()
     doc.add_page()
     doc.set_font("helvetica", size=24)
     doc.y = 20
@@ -18,8 +18,15 @@ def test_write_page_break(tmp_path):
 
 
 def test_write_soft_hyphen(tmp_path):
+    """
+    The current behaviour is close to CSS word-break: break-all
+    cf. https://developer.mozilla.org/en-US/docs/Web/CSS/overflow-wrap#comparing_overflow-wrap_word-break_and_hyphens
+    We used to prefer a line break over a word split without regards to soft hyphens:
+    https://github.com/PyFPDF/fpdf2/blob/2.7.4/test/text/write_soft_hyphen.pdf
+    But that caused issue with write_html(), cf. issue #847
+    """
     s = "Donau\u00addamp\u00adfschiff\u00adfahrts\u00adgesellschafts\u00adkapitäns\u00admützen\u00adstreifen. "
-    doc = fpdf.FPDF()
+    doc = FPDF()
     doc.add_page()
     doc.set_font("helvetica", size=24)
     doc.y = 20
@@ -41,7 +48,7 @@ def test_write_soft_hyphen(tmp_path):
 
 def test_write_trailing_nl(tmp_path):  # issue #455
     """Each item in lines triggers a line break at the end."""
-    pdf = fpdf.FPDF()
+    pdf = FPDF()
     pdf.add_page()
     pdf.set_font("Times", size=16)
     lines = ["Hello\n", "Sweet\n", "World\n"]
@@ -53,7 +60,7 @@ def test_write_trailing_nl(tmp_path):  # issue #455
 
 def test_write_font_stretching(tmp_path):  # issue #478
     right_boundary = 60
-    pdf = fpdf.FPDF()
+    pdf = FPDF()
     pdf.add_page()
     # built-in font
     pdf.set_font("Helvetica", "", 8)
@@ -81,7 +88,7 @@ def test_write_font_stretching(tmp_path):  # issue #478
 
 
 def test_write_superscript(tmp_path):
-    pdf = fpdf.FPDF()
+    pdf = FPDF()
     pdf.add_page()
     pdf.set_font("Helvetica", "", 20)
 
@@ -131,7 +138,7 @@ def write_this():
 
 def test_write_char_wrap(tmp_path):  # issue #649
     right_boundary = 50
-    pdf = fpdf.FPDF()
+    pdf = FPDF()
     pdf.add_page()
     pdf.set_right_margin(pdf.w - right_boundary)
     pdf.set_font("Helvetica", "", 10)
@@ -150,3 +157,11 @@ def test_write_char_wrap(tmp_path):  # issue #649
     pdf.line(pdf.l_margin, 10, pdf.l_margin, 130)
     pdf.line(right_boundary, 10, right_boundary, 130)
     assert_pdf_equal(pdf, HERE / "write_char_wrap.pdf", tmp_path)
+
+
+def test_write_overflow_no_initial_newline(tmp_path):  # issue-847
+    pdf = FPDF()
+    pdf.add_page()
+    pdf.set_font(family="Helvetica", size=20)
+    pdf.write(7, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX")
+    assert_pdf_equal(pdf, HERE / "write_overflow_no_initial_newline.pdf", tmp_path)
diff --git a/test/text/write_overflow_no_initial_newline.pdf b/test/text/write_overflow_no_initial_newline.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..74c2000766ad6b3dfbe1e8ea26ed4e4b7854fd87
GIT binary patch
literal 979
zcmah|J#Q015DhfI3J?v2MKcnz;uLsy?|fg#vSQ!)6pJ7mJ5r=rDK7TfIdtB&?)Jc;
zrlki&iTD9D{Ec+<G)U0!6WH|!J|zXy?CqPKH#2YM%Do_bLaQdCKtP<mMD;qtzR0-*
zitw{|oWamkuKp>)&)GPRT4Di1q7cios)h@0jaFSxd5<pU3~(=+vJ5pEi1D$eGYYib
zIpfWdj0InF0uEz&06b3;MW90uqYIHsz+KLKk>oSJf-}(N^*2fUNJfd67SiYvx7BmW
zxXhrois?nEx^5WyEE9P;Vj0*)N}asGr$aY<VVp>os#=psvVe`mm?3<~_*9;QQ`142
zOj$HTi%(|%&ky6$w2mu3Q}p8UwRuF;mGo|R2J?G2?vx(=DxFl`4?q6?P}+O@xpE#n
zxK(@D`r3O(obTqhedp$vyE{AgAMb1R)}(74twKSqC}W{eE#U)}ykasQMF_XKDlFzx
zz{fFf@+{r}2oLfzxg3rjk(%miQktRAu~y?>$C|ue#~}rsx35~ny;O__Ou`VWjRAbb
z7Ak*dretfaPk()OP7Ab7f!0?l>FEbbN2ml(sb_oCFe&wjvJ>j<60*Bj2wtRYf(W=s
z`&S0GWtkRCV2gRMbOGODp6W(h0i)jacZPEvs~KwcS7I$gt=|>QChiuKX%r_cUH&|S
z_zhDf<Gv6Q^!lr{?eK{JErtFLU|6@CA@!-@`XsR1wYEc=erQvd5YKfjr%f9Ff3Z|K
SlTj*{GTF9=%H?)9M1KJuZ3ia+

literal 0
HcmV?d00001

diff --git a/test/text/write_soft_hyphen.pdf b/test/text/write_soft_hyphen.pdf
index 123c3b752233f1cb7cd4eba0f7d5fe9af70f95a7..6774e4eb3fde5faa98753edf878454eb2ae3a8d8 100644
GIT binary patch
delta 465
zcmbQtJ&SvTBV)Y<mz^C~aY<2XVlG$3oTZa&^Oy_;TC4pGQ-c!TMBeVc&7l$Wo^?$^
z`_pvC8yuaVpGk=PQQ2K|%`d67%;fXE<(2#oOC35j9$HkaOfX4qm=nM?wKcO;TC`__
z;gq0z%Zn>!<w)1gczyQVuI;Y5Yp-_iEq{M(=GR|V7q9J}dhhDL`o)EZ(^pKpUuv<<
zYW8OPna6(aUAZ=sgJt(($!~@ej20Q}@h?`<xw*sE)9K%-z%Q0Np15=cd^sn0+w17<
z-aYBHMJg>`x48Z?NTwd!pwRI!)h&ZF{a)+5uWKw!j&iIDSblz@!BM@3_VcGo&)VT7
zF5%_BZQ{m&*d_cTF;i?-KL|_{h-c-0SIn))@;CgT{!%-Ax6K|*iA;f(3I-sckf*=}
zW*8Wl8<=5;Sr}Mgh*=sMql+0D7#U3d#bOidW@2Gz?&f6a=4fbaZ02TeWaeh!Vqj|E
mWME?9>}+gcXG2g$EF`)Ti%KerQq#B$4J-_~R8?L5-M9c)fv*n$

delta 458
zcmbQmJ(+uhBV)ZGmz^C~aY<2XVlG$3oVAl2^9~sZxPG6LG;PC*jq2Nk^955cmg*%4
z?q0&nm>52_>Ym)<Zm|p{^TLBpIqt^)?a%(_@E7)S;d?*XmGiSG^OKe!4PBdQ#{-XX
zXe&(rWU@A4*PX4u)?T{3xAe7d?%J!fo6FB1o4M@g%#7D}SKqt-uYPkWzp+OAzgGos
zO18!SJG|wbyicyLx5ACN>kB#>rwFz_tevBrbh7K63RmSsW$B`Vlbs?9UvwttXp6o#
zyEL!#SqI0{EyBOpi>62(?onu)&biQaPNjU+rOn3{YPtoqS~>T1Xny02uMNw}NKakZ
zwYwzZiOtHxizDi+W$r95i{Rj_3U1%eSRbHiwAr62ktxtf!2kpl@)Wqh3<Co*OG^wf
z3j;$8F-rq8bTLB%L*vPetTwTZmTs<YrY<g)ZWb=iCe8-Nu5RXr=FVnDF6I_SmTo3?
eHUw3~LgFd0sHCDOHI2*Az}$pORn^tsjSB#IG^lR?