py-pdf · Lucas-C · Feb 27, 2023 · Feb 27, 2023 · Feb 27, 2023 · Feb 27, 2023
@@ -29,6 +29,7 @@ This can also be enabled programmatically with `warnings.simplefilter('default',
 - vector images parsing is now more robust: `fpdf2` can now embed SVG files without `viewPort` or no `height` / `width`
 - bitonal images are now encoded using `CCITTFaxDecode`, reducing their size in the PDF document - thanks to @eroux
 - when possible, JPG and group4 encoded TIFFs are now embedded directly without recompression - thanks to @eroux
+- ICC Profiles of included images are now extracted and turned into PDF objects; they should now be taken into account by PDF viewers - thanks to @eroux
 
 ## [2.6.1] - 2023-01-13
 ### Added

@@ -291,6 +291,7 @@ def __init__(
         self.pages = {}  # array of PDFPage objects starting at index 1
         self.fonts = {}  # map font string keys to dicts describing the fonts used
         self.images = {}  # map image identifiers to dicts describing the raster images
+        self.iccps = {}  # map icc profiles (bytes) to their index (number)
         self.links = {}  # array of Destination objects starting at index 1
         self.embedded_files = []  # array of PDFEmbeddedFile
 
@@ -3719,6 +3720,15 @@ def preload_image(self, name, dims=None):
             info = ImageInfo(get_img_info(name, img, self.image_filter, dims))
             info["i"] = len(self.images) + 1
             info["usages"] = 1
+            info["iccp_i"] = None
+            if "iccp" in info and info["iccp"]:
+                if info["iccp"] in self.iccps:
+                    info["iccp_i"] = self.iccps[info["iccp"]]
+                else:
+                    iccp_i = len(self.iccps)
+                    self.iccps[info["iccp"]] = iccp_i
+                    info["iccp_i"] = iccp_i
+                info["iccp"] = None
             self.images[name] = info
         return name, img, info
 

@@ -353,6 +353,10 @@ def get_img_info(filename, img=None, image_filter="AUTO", dims=None):
     w, h = img.size
     info = {}
 
+    iccp = None
+    if "icc_profile" in img.info:
+        iccp = img.info.get("icc_profile")
+
     if img_raw_data is not None and not img_altered:
         # if we can use the original image bytes directly we do (JPEG and group4 TIFF only):
         if img.format == "JPEG" and image_filter == "DCTDecode":
@@ -365,6 +369,8 @@ def get_img_info(filename, img=None, image_filter="AUTO", dims=None):
                 "w": w,
                 "h": h,
                 "cs": colspace,
+                "iccp": iccp,
+                "dpn": dpn,
                 "bpc": bpc,
                 "f": image_filter,
                 "dp": f"/Predictor 15 /Colors {dpn} /Columns {w}",
@@ -406,6 +412,8 @@ def get_img_info(filename, img=None, image_filter="AUTO", dims=None):
                 "data": ccittrawdata,
                 "w": w,
                 "h": h,
+                "iccp": None,
+                "dpn": dpn,
                 "cs": colspace,
                 "bpc": bpc,
                 "f": image_filter,
@@ -477,7 +485,9 @@ def get_img_info(filename, img=None, image_filter="AUTO", dims=None):
             "w": w,
             "h": h,
             "cs": colspace,
+            "iccp": iccp,
             "bpc": bpc,
+            "dpn": dpn,
             "f": image_filter,
             "dp": dp,
         }

@@ -194,7 +194,6 @@ class PDFXObject(PDFContentStream):
         "height",
         "color_space",
         "bits_per_component",
-        "filter",
         "decode",
         "decode_parms",
         "s_mask",
@@ -225,6 +224,27 @@ def __init__(
         self.s_mask = None
 
 
+class PDFICCPObject(PDFContentStream):
+    __slots__ = (  # RAM usage optimization
+        "_id",
+        "_contents",
+        "filter",
+        "length",
+        "n",
+        "alternate",
+    )
+
+    def __init__(
+        self,
+        contents,
+        n,
+        alternate,
+    ):
+        super().__init__(contents=contents, compress=True)
+        self.n = n
+        self.alternate = Name(alternate)
+
+
 class PDFPage(PDFObject):
     __slots__ = (  # RAM usage optimization
         "_id",
@@ -341,6 +361,7 @@ class OutputProducer:
     def __init__(self, fpdf):
         self.fpdf = fpdf
         self.pdf_objs = []
+        self.iccp_i_to_pdf_i = {}
         self.obj_id = 0  # current PDF object number
         # array of PDF object offsets in self.buffer, used to build the xref table:
         self.offsets = {}
@@ -719,10 +740,33 @@ def _add_images(self):
                 img_objs_per_index[img["i"]] = self._add_image(img)
         return img_objs_per_index
 
+    def _ensure_iccp(self, info, iccp_i):
+        """
+        Returns the PDF object of the ICC profile indexed iccp_i in the FPDF object.
+        Adds it if not present.
+        """
+        if iccp_i in self.iccp_i_to_pdf_i:
+            return self.iccp_i_to_pdf_i[iccp_i]
+        iccp_content = None
+        for iccp_c, i in self.fpdf.iccps.items():
+            if iccp_i == i:
+                iccp_content = iccp_c
+                break
+        assert iccp_content is not None
+        iccp_obj = PDFICCPObject(
+            contents=iccp_content, n=info["dpn"], alternate=info["cs"]
+        )
+        iccp_pdf_i = self._add_pdf_obj(iccp_obj, "iccp")
+        self.iccp_i_to_pdf_i[iccp_i] = iccp_pdf_i
+        return iccp_pdf_i
+
     def _add_image(self, info):
         color_space = Name(info["cs"])
         decode = None
-        if color_space == "Indexed":
+        if "iccp_i" in info and info["iccp_i"] is not None:
+            iccp_pdf_i = self._ensure_iccp(info, info["iccp_i"])
+            color_space = PDFArray(["/ICCBased", str(iccp_pdf_i), str("0"), "R"])
+        elif color_space == "Indexed":
             color_space = PDFArray(
                 ["/Indexed", "/DeviceRGB", f"{len(info['pal']) // 3 - 1}"]
             )

@@ -116,6 +116,14 @@ def test_insert_bmp(tmp_path):
     assert_pdf_equal(pdf, HERE / "image_types_insert_bmp.pdf", tmp_path)
 
 
+def test_insert_jpg_icc(tmp_path):
+    pdf = fpdf.FPDF()
+    pdf.add_page(format=(448, 498))
+    pdf.set_margin(0)
+    pdf.image(HERE / "insert_images_insert_jpg_icc.jpg", x=0, y=0, h=498)
+    assert_pdf_equal(pdf, HERE / "image_types_insert_jpg_icc.pdf", tmp_path)
+
+
 def test_insert_gif(tmp_path):
     pdf = fpdf.FPDF()
     pdf.compress = False

@@ -45,10 +45,12 @@ def test_load_invalid_base64_data():
 @memunit.assert_lt_mb(147)
 def test_share_images_cache(tmp_path):
     images_cache = {}
+    iccp_cache = {}
 
     def build_pdf_with_big_images():
         pdf = fpdf.FPDF()
         pdf.images = images_cache
+        pdf.iccps = iccp_cache
         pdf.add_page()
         for img_path in glob(f"{HERE}/png_images/*.png"):
             pdf.image(img_path, h=pdf.eph)