py-pdf · Lucas-C · Feb 27, 2023 · Feb 27, 2023 · Feb 27, 2023 · Feb 27, 2023
@@ -54,8 +54,6 @@ jobs:
           find . -name '*.pdf' | xargs -n 1 scripts/verapdf.py
           scripts/verapdf.py  # printing aggregated report
       - name: Running tests ☑
-        env:
-          PYTHONMALLOCSTATS: 1
         run: |
           # Ensuring there is no `generate=True` left remaining in calls to assert_pdf_equal:
           grep -IRF generate=True test/ && exit 1

@@ -29,6 +29,7 @@ This can also be enabled programmatically with `warnings.simplefilter('default',
 - vector images parsing is now more robust: `fpdf2` can now embed SVG files without `viewPort` or no `height` / `width`
 - bitonal images are now encoded using `CCITTFaxDecode`, reducing their size in the PDF document - thanks to @eroux
 - when possible, JPG and group4 encoded TIFFs are now embedded directly without recompression - thanks to @eroux
+- ICC Profiles of included images are now extracted and turned into PDF objects; they should now be taken into account by PDF viewers - thanks to @eroux
 
 ## [2.6.1] - 2023-01-13
 ### Added

@@ -150,8 +150,7 @@ pdf.image("https://upload.wikimedia.org/wikipedia/commons/7/70/Example.png")
 
 ## Image compression ##
 
-By default, `fpdf2` will avoid altering your images :
-no image conversion from / to PNG / JPEG is performed.
+By default, `fpdf2` will avoid altering or recompressing your images: when possible, the original bytes from the JPG or TIFF file will be used directly. Bitonal images are by default compressed as TIFF Group4.
 
 However, you can easily tell `fpdf2` to embed all images as JPEGs in order to reduce your PDF size,
 using [`set_image_filter()`](fpdf/fpdf.html#fpdf.fpdf.FPDF.set_image_filter):
@@ -171,6 +170,9 @@ Beware that "flattening" images into JPEGs this way will fill transparent areas
 The allowed `image_filter` values are listed in the [image_parsing]( https://github.com/PyFPDF/fpdf2/blob/master/fpdf/image_parsing.py) module and are currently:
 `FlateDecode` (lossless zlib/deflate compression), `DCTDecode` (lossy compression with JPEG) and `JPXDecode` (lossy compression with JPEG2000).
 
+## ICC Profiles
+
+The ICC profile of the included images are read through the PIL function `Image.info.get("icc_profile)"` and are included in the PDF as objects.
 
 ## Oversized images detection & downscaling ##
 

@@ -291,6 +291,7 @@ def __init__(
         self.pages = {}  # array of PDFPage objects starting at index 1
         self.fonts = {}  # map font string keys to dicts describing the fonts used
         self.images = {}  # map image identifiers to dicts describing the raster images
+        self.icc_profiles = {}  # map icc profiles (bytes) to their index (number)
         self.links = {}  # array of Destination objects starting at index 1
         self.embedded_files = []  # array of PDFEmbeddedFile
 
@@ -3719,6 +3720,20 @@ def preload_image(self, name, dims=None):
             info = ImageInfo(get_img_info(name, img, self.image_filter, dims))
             info["i"] = len(self.images) + 1
             info["usages"] = 1
+            info["iccp_i"] = None
+            iccp = info.get("iccp")
+            if iccp:
+                LOGGER.debug(
+                    "ICC profile found for image %s - It will be inserted in the PDF document",
+                    name,
+                )
+                if iccp in self.icc_profiles:
+                    info["iccp_i"] = self.icc_profiles[iccp]
+                else:
+                    iccp_i = len(self.icc_profiles)
+                    self.icc_profiles[iccp] = iccp_i
+                    info["iccp_i"] = iccp_i
+                info["iccp"] = None
             self.images[name] = info
         return name, img, info
 

@@ -353,6 +353,10 @@ def get_img_info(filename, img=None, image_filter="AUTO", dims=None):
     w, h = img.size
     info = {}
 
+    iccp = None
+    if "icc_profile" in img.info:
+        iccp = img.info.get("icc_profile")
+
     if img_raw_data is not None and not img_altered:
         # if we can use the original image bytes directly we do (JPEG and group4 TIFF only):
         if img.format == "JPEG" and image_filter == "DCTDecode":
@@ -365,6 +369,8 @@ def get_img_info(filename, img=None, image_filter="AUTO", dims=None):
                 "w": w,
                 "h": h,
                 "cs": colspace,
+                "iccp": iccp,
+                "dpn": dpn,
                 "bpc": bpc,
                 "f": image_filter,
                 "dp": f"/Predictor 15 /Colors {dpn} /Columns {w}",
@@ -406,6 +412,8 @@ def get_img_info(filename, img=None, image_filter="AUTO", dims=None):
                 "data": ccittrawdata,
                 "w": w,
                 "h": h,
+                "iccp": None,
+                "dpn": dpn,
                 "cs": colspace,
                 "bpc": bpc,
                 "f": image_filter,
@@ -477,7 +485,9 @@ def get_img_info(filename, img=None, image_filter="AUTO", dims=None):
             "w": w,
             "h": h,
             "cs": colspace,
+            "iccp": iccp,
             "bpc": bpc,
+            "dpn": dpn,
             "f": image_filter,
             "dp": dp,
         }

@@ -194,7 +194,6 @@ class PDFXObject(PDFContentStream):
         "height",
         "color_space",
         "bits_per_component",
-        "filter",
         "decode",
         "decode_parms",
         "s_mask",
@@ -225,6 +224,27 @@ def __init__(
         self.s_mask = None
 
 
+class PDFICCPObject(PDFContentStream):
+    __slots__ = (  # RAM usage optimization
+        "_id",
+        "_contents",
+        "filter",
+        "length",
+        "n",
+        "alternate",
+    )
+
+    def __init__(
+        self,
+        contents,
+        n,
+        alternate,
+    ):
+        super().__init__(contents=contents, compress=True)
+        self.n = n
+        self.alternate = Name(alternate)
+
+
 class PDFPage(PDFObject):
     __slots__ = (  # RAM usage optimization
         "_id",
@@ -341,6 +361,7 @@ class OutputProducer:
     def __init__(self, fpdf):
         self.fpdf = fpdf
         self.pdf_objs = []
+        self.iccp_i_to_pdf_i = {}
         self.obj_id = 0  # current PDF object number
         # array of PDF object offsets in self.buffer, used to build the xref table:
         self.offsets = {}
@@ -719,10 +740,35 @@ def _add_images(self):
                 img_objs_per_index[img["i"]] = self._add_image(img)
         return img_objs_per_index
 
+    def _ensure_iccp(self, img_info):
+        """
+        Returns the PDF object of the ICC profile indexed iccp_i in the FPDF object.
+        Adds it if not present.
+        """
+        iccp_i = img_info["iccp_i"]
+        if iccp_i in self.iccp_i_to_pdf_i:
+            return self.iccp_i_to_pdf_i[iccp_i]
+        iccp_content = None
+        for iccp_c, i in self.fpdf.icc_profiles.items():
+            if iccp_i == i:
+                iccp_content = iccp_c
+                break
+        assert iccp_content is not None
+        iccp_obj = PDFICCPObject(
+            contents=iccp_content, n=img_info["dpn"], alternate=img_info["cs"]
+        )
+        iccp_pdf_i = self._add_pdf_obj(iccp_obj, "iccp")
+        self.iccp_i_to_pdf_i[iccp_i] = iccp_pdf_i
+        return iccp_pdf_i
+
     def _add_image(self, info):
         color_space = Name(info["cs"])
         decode = None
-        if color_space == "Indexed":
+        iccp_i = info.get("iccp_i")
+        if iccp_i is not None:
+            iccp_pdf_i = self._ensure_iccp(info)
+            color_space = PDFArray(["/ICCBased", str(iccp_pdf_i), str("0"), "R"])
+        elif color_space == "Indexed":
             color_space = PDFArray(
                 ["/Indexed", "/DeviceRGB", f"{len(info['pal']) // 3 - 1}"]
             )

@@ -116,6 +116,22 @@ def test_insert_bmp(tmp_path):
     assert_pdf_equal(pdf, HERE / "image_types_insert_bmp.pdf", tmp_path)
 
 
+def test_insert_jpg_icc(tmp_path):
+    pdf = fpdf.FPDF()
+    pdf.add_page(format=(448, 498))
+    pdf.set_margin(0)
+    pdf.image(HERE / "insert_images_insert_jpg_icc.jpg", x=0, y=0, h=498)
+    # we add the same image a second time to make sure the ICC profile is only included
+    # only once in that case
+    pdf.add_page(format=(448, 498))
+    pdf.image(HERE / "insert_images_insert_jpg_icc.jpg", x=0, y=0, h=498)
+    # we add another image with the same ICC profile to make sure it's also included
+    # only once in that case
+    pdf.add_page(format=(314, 500))
+    pdf.image(HERE / "insert_images_insert_jpg_icc_2.jpg", x=0, y=0, h=500)
+    assert_pdf_equal(pdf, HERE / "image_types_insert_jpg_icc.pdf", tmp_path)
+
+
 def test_insert_gif(tmp_path):
     pdf = fpdf.FPDF()
     pdf.compress = False

@@ -45,10 +45,12 @@ def test_load_invalid_base64_data():
 @memunit.assert_lt_mb(147)
 def test_share_images_cache(tmp_path):
     images_cache = {}
+    icc_profiles_cache = {}
 
     def build_pdf_with_big_images():
         pdf = fpdf.FPDF()
         pdf.images = images_cache
+        pdf.icc_profiles = icc_profiles_cache
         pdf.add_page()
         for img_path in glob(f"{HERE}/png_images/*.png"):
             pdf.image(img_path, h=pdf.eph)