diff --git a/.github/workflows/continuous-integration-workflow.yml b/.github/workflows/continuous-integration-workflow.yml index 4e2d4ebab..3b11a973a 100644 --- a/.github/workflows/continuous-integration-workflow.yml +++ b/.github/workflows/continuous-integration-workflow.yml @@ -54,8 +54,6 @@ jobs: find . -name '*.pdf' | xargs -n 1 scripts/verapdf.py scripts/verapdf.py # printing aggregated report - name: Running tests ☑ - env: - PYTHONMALLOCSTATS: 1 run: | # Ensuring there is no `generate=True` left remaining in calls to assert_pdf_equal: grep -IRF generate=True test/ && exit 1 diff --git a/CHANGELOG.md b/CHANGELOG.md index f78aa1b61..473e19d97 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ This can also be enabled programmatically with `warnings.simplefilter('default', - vector images parsing is now more robust: `fpdf2` can now embed SVG files without `viewPort` or no `height` / `width` - bitonal images are now encoded using `CCITTFaxDecode`, reducing their size in the PDF document - thanks to @eroux - when possible, JPG and group4 encoded TIFFs are now embedded directly without recompression - thanks to @eroux +- ICC Profiles of included images are now extracted and turned into PDF objects; they should now be taken into account by PDF viewers - thanks to @eroux ## [2.6.1] - 2023-01-13 ### Added diff --git a/docs/Images.md b/docs/Images.md index ea9050650..925091e84 100644 --- a/docs/Images.md +++ b/docs/Images.md @@ -150,8 +150,7 @@ pdf.image("https://upload.wikimedia.org/wikipedia/commons/7/70/Example.png") ## Image compression ## -By default, `fpdf2` will avoid altering your images : -no image conversion from / to PNG / JPEG is performed. +By default, `fpdf2` will avoid altering or recompressing your images: when possible, the original bytes from the JPG or TIFF file will be used directly. Bitonal images are by default compressed as TIFF Group4. However, you can easily tell `fpdf2` to embed all images as JPEGs in order to reduce your PDF size, using [`set_image_filter()`](fpdf/fpdf.html#fpdf.fpdf.FPDF.set_image_filter): @@ -171,6 +170,9 @@ Beware that "flattening" images into JPEGs this way will fill transparent areas The allowed `image_filter` values are listed in the [image_parsing]( https://github.com/PyFPDF/fpdf2/blob/master/fpdf/image_parsing.py) module and are currently: `FlateDecode` (lossless zlib/deflate compression), `DCTDecode` (lossy compression with JPEG) and `JPXDecode` (lossy compression with JPEG2000). +## ICC Profiles + +The ICC profile of the included images are read through the PIL function `Image.info.get("icc_profile)"` and are included in the PDF as objects. ## Oversized images detection & downscaling ## diff --git a/fpdf/fpdf.py b/fpdf/fpdf.py index 8bca5c6e6..799ac0ec3 100644 --- a/fpdf/fpdf.py +++ b/fpdf/fpdf.py @@ -291,6 +291,7 @@ def __init__( self.pages = {} # array of PDFPage objects starting at index 1 self.fonts = {} # map font string keys to dicts describing the fonts used self.images = {} # map image identifiers to dicts describing the raster images + self.icc_profiles = {} # map icc profiles (bytes) to their index (number) self.links = {} # array of Destination objects starting at index 1 self.embedded_files = [] # array of PDFEmbeddedFile @@ -3719,6 +3720,20 @@ def preload_image(self, name, dims=None): info = ImageInfo(get_img_info(name, img, self.image_filter, dims)) info["i"] = len(self.images) + 1 info["usages"] = 1 + info["iccp_i"] = None + iccp = info.get("iccp") + if iccp: + LOGGER.debug( + "ICC profile found for image %s - It will be inserted in the PDF document", + name, + ) + if iccp in self.icc_profiles: + info["iccp_i"] = self.icc_profiles[iccp] + else: + iccp_i = len(self.icc_profiles) + self.icc_profiles[iccp] = iccp_i + info["iccp_i"] = iccp_i + info["iccp"] = None self.images[name] = info return name, img, info diff --git a/fpdf/image_parsing.py b/fpdf/image_parsing.py index 2ab245eb8..c142b2573 100644 --- a/fpdf/image_parsing.py +++ b/fpdf/image_parsing.py @@ -3,9 +3,11 @@ from math import ceil from urllib.request import urlopen from pathlib import Path +import logging try: from PIL import Image, TiffImagePlugin + from PIL import ImageCms try: from PIL.Image import Resampling @@ -19,6 +21,7 @@ from .errors import FPDFException +LOGGER = logging.getLogger(__name__) SUPPORTED_IMAGE_FILTERS = ("AUTO", "FlateDecode", "DCTDecode", "JPXDecode") @@ -311,6 +314,19 @@ def _decode_base64_image(base64Image): ] +def iccp_is_valid(iccp): + """ + checks the validity of an iccp profile + """ + try: + iccp_io = BytesIO(iccp) + profile = ImageCms.getOpenProfile(iccp_io) + ImageCms.getProfileInfo(profile) + return True + except ImageCms.PyCMSError: + return False + + def get_img_info(filename, img=None, image_filter="AUTO", dims=None): """ Args: @@ -353,6 +369,13 @@ def get_img_info(filename, img=None, image_filter="AUTO", dims=None): w, h = img.size info = {} + iccp = None + if "icc_profile" in img.info: + iccp = img.info.get("icc_profile") + if not iccp_is_valid(iccp): + LOGGER.error("ICCP for %s is invalid", filename) + iccp = None + if img_raw_data is not None and not img_altered: # if we can use the original image bytes directly we do (JPEG and group4 TIFF only): if img.format == "JPEG" and image_filter == "DCTDecode": @@ -365,6 +388,8 @@ def get_img_info(filename, img=None, image_filter="AUTO", dims=None): "w": w, "h": h, "cs": colspace, + "iccp": iccp, + "dpn": dpn, "bpc": bpc, "f": image_filter, "dp": f"/Predictor 15 /Colors {dpn} /Columns {w}", @@ -406,6 +431,8 @@ def get_img_info(filename, img=None, image_filter="AUTO", dims=None): "data": ccittrawdata, "w": w, "h": h, + "iccp": None, + "dpn": dpn, "cs": colspace, "bpc": bpc, "f": image_filter, @@ -477,7 +504,9 @@ def get_img_info(filename, img=None, image_filter="AUTO", dims=None): "w": w, "h": h, "cs": colspace, + "iccp": iccp, "bpc": bpc, + "dpn": dpn, "f": image_filter, "dp": dp, } diff --git a/fpdf/output.py b/fpdf/output.py index 720aa5a7a..2e2c26b22 100644 --- a/fpdf/output.py +++ b/fpdf/output.py @@ -194,7 +194,6 @@ class PDFXObject(PDFContentStream): "height", "color_space", "bits_per_component", - "filter", "decode", "decode_parms", "s_mask", @@ -225,6 +224,27 @@ def __init__( self.s_mask = None +class PDFICCPObject(PDFContentStream): + __slots__ = ( # RAM usage optimization + "_id", + "_contents", + "filter", + "length", + "n", + "alternate", + ) + + def __init__( + self, + contents, + n, + alternate, + ): + super().__init__(contents=contents, compress=True) + self.n = n + self.alternate = Name(alternate) + + class PDFPage(PDFObject): __slots__ = ( # RAM usage optimization "_id", @@ -341,6 +361,7 @@ class OutputProducer: def __init__(self, fpdf): self.fpdf = fpdf self.pdf_objs = [] + self.iccp_i_to_pdf_i = {} self.obj_id = 0 # current PDF object number # array of PDF object offsets in self.buffer, used to build the xref table: self.offsets = {} @@ -719,13 +740,39 @@ def _add_images(self): img_objs_per_index[img["i"]] = self._add_image(img) return img_objs_per_index + def _ensure_iccp(self, img_info): + """ + Returns the PDF object of the ICC profile indexed iccp_i in the FPDF object. + Adds it if not present. + """ + iccp_i = img_info["iccp_i"] + if iccp_i in self.iccp_i_to_pdf_i: + return self.iccp_i_to_pdf_i[iccp_i] + iccp_content = None + for iccp_c, i in self.fpdf.icc_profiles.items(): + if iccp_i == i: + iccp_content = iccp_c + break + assert iccp_content is not None + iccp_obj = PDFICCPObject( + contents=iccp_content, n=img_info["dpn"], alternate=img_info["cs"] + ) + iccp_pdf_i = self._add_pdf_obj(iccp_obj, "iccp") + self.iccp_i_to_pdf_i[iccp_i] = iccp_pdf_i + return iccp_pdf_i + def _add_image(self, info): color_space = Name(info["cs"]) decode = None + iccp_i = info.get("iccp_i") if color_space == "Indexed": color_space = PDFArray( ["/Indexed", "/DeviceRGB", f"{len(info['pal']) // 3 - 1}"] ) + elif iccp_i is not None: + # indexed images are not supposed to have ICC profiles + iccp_pdf_i = self._ensure_iccp(info) + color_space = PDFArray(["/ICCBased", str(iccp_pdf_i), str("0"), "R"]) elif color_space == "DeviceCMYK": decode = "[1 0 1 0 1 0 1 0]" diff --git a/test/html/test_img_inside_html_table_centered_with_caption.pdf b/test/html/test_img_inside_html_table_centered_with_caption.pdf index 813841803..280d72442 100644 Binary files a/test/html/test_img_inside_html_table_centered_with_caption.pdf and b/test/html/test_img_inside_html_table_centered_with_caption.pdf differ diff --git a/test/image/elliptic_clip.pdf b/test/image/elliptic_clip.pdf index 67767a30d..fdbea532f 100644 Binary files a/test/image/elliptic_clip.pdf and b/test/image/elliptic_clip.pdf differ diff --git a/test/image/full_height_image.pdf b/test/image/full_height_image.pdf index 935820d26..a8334de13 100644 Binary files a/test/image/full_height_image.pdf and b/test/image/full_height_image.pdf differ diff --git a/test/image/full_pdf_height_image.pdf b/test/image/full_pdf_height_image.pdf index 1649954d9..b618033d5 100644 Binary files a/test/image/full_pdf_height_image.pdf and b/test/image/full_pdf_height_image.pdf differ diff --git a/test/image/full_pdf_width_image.pdf b/test/image/full_pdf_width_image.pdf index 291eb7488..e05c5442f 100644 Binary files a/test/image/full_pdf_width_image.pdf and b/test/image/full_pdf_width_image.pdf differ diff --git a/test/image/full_width_image.pdf b/test/image/full_width_image.pdf index 935820d26..a8334de13 100644 Binary files a/test/image/full_width_image.pdf and b/test/image/full_width_image.pdf differ diff --git a/test/image/image_fit_in_rect.pdf b/test/image/image_fit_in_rect.pdf index b482a0670..aeafda152 100644 Binary files a/test/image/image_fit_in_rect.pdf and b/test/image/image_fit_in_rect.pdf differ diff --git a/test/image/image_types/image_types_insert_jpg_icc.pdf b/test/image/image_types/image_types_insert_jpg_icc.pdf new file mode 100644 index 000000000..c54d54001 Binary files /dev/null and b/test/image/image_types/image_types_insert_jpg_icc.pdf differ diff --git a/test/image/image_types/image_types_insert_jpg_icc_invalid.pdf b/test/image/image_types/image_types_insert_jpg_icc_invalid.pdf new file mode 100644 index 000000000..f622f6174 Binary files /dev/null and b/test/image/image_types/image_types_insert_jpg_icc_invalid.pdf differ diff --git a/test/image/image_types/image_types_insert_png_alpha.pdf b/test/image/image_types/image_types_insert_png_alpha.pdf index 2d88c1c15..ead8bdeb0 100644 Binary files a/test/image/image_types/image_types_insert_png_alpha.pdf and b/test/image/image_types/image_types_insert_png_alpha.pdf differ diff --git a/test/image/image_types/image_types_insert_png_alpha_dctdecode.pdf b/test/image/image_types/image_types_insert_png_alpha_dctdecode.pdf index fdc656a86..89f468971 100644 Binary files a/test/image/image_types/image_types_insert_png_alpha_dctdecode.pdf and b/test/image/image_types/image_types_insert_png_alpha_dctdecode.pdf differ diff --git a/test/image/image_types/image_types_insert_png_alpha_dctdecode_windows.pdf b/test/image/image_types/image_types_insert_png_alpha_dctdecode_windows.pdf index fdc656a86..89f468971 100644 Binary files a/test/image/image_types/image_types_insert_png_alpha_dctdecode_windows.pdf and b/test/image/image_types/image_types_insert_png_alpha_dctdecode_windows.pdf differ diff --git a/test/image/image_types/image_types_insert_png_disallow_transparency.pdf b/test/image/image_types/image_types_insert_png_disallow_transparency.pdf index 0d865665c..b43d8fccb 100644 Binary files a/test/image/image_types/image_types_insert_png_disallow_transparency.pdf and b/test/image/image_types/image_types_insert_png_disallow_transparency.pdf differ diff --git a/test/image/image_types/insert_images_insert_jpg_icc.jpg b/test/image/image_types/insert_images_insert_jpg_icc.jpg new file mode 100644 index 000000000..248ddedcf Binary files /dev/null and b/test/image/image_types/insert_images_insert_jpg_icc.jpg differ diff --git a/test/image/image_types/insert_images_insert_jpg_icc_2.jpg b/test/image/image_types/insert_images_insert_jpg_icc_2.jpg new file mode 100644 index 000000000..759921ab2 Binary files /dev/null and b/test/image/image_types/insert_images_insert_jpg_icc_2.jpg differ diff --git a/test/image/image_types/insert_images_insert_jpg_icc_invalid.jpg b/test/image/image_types/insert_images_insert_jpg_icc_invalid.jpg new file mode 100644 index 000000000..ba1b5ab9b Binary files /dev/null and b/test/image/image_types/insert_images_insert_jpg_icc_invalid.jpg differ diff --git a/test/image/image_types/test_insert_images.py b/test/image/image_types/test_insert_images.py index 637297247..1b539efa6 100644 --- a/test/image/image_types/test_insert_images.py +++ b/test/image/image_types/test_insert_images.py @@ -116,6 +116,30 @@ def test_insert_bmp(tmp_path): assert_pdf_equal(pdf, HERE / "image_types_insert_bmp.pdf", tmp_path) +def test_insert_jpg_icc(tmp_path): + pdf = fpdf.FPDF() + pdf.add_page(format=(448, 498)) + pdf.set_margin(0) + pdf.image(HERE / "insert_images_insert_jpg_icc.jpg", x=0, y=0, h=498) + # we add the same image a second time to make sure the ICC profile is only included + # only once in that case + pdf.add_page(format=(448, 498)) + pdf.image(HERE / "insert_images_insert_jpg_icc.jpg", x=0, y=0, h=498) + # we add another image with the same ICC profile to make sure it's also included + # only once in that case + pdf.add_page(format=(314, 500)) + pdf.image(HERE / "insert_images_insert_jpg_icc_2.jpg", x=0, y=0, h=500) + assert_pdf_equal(pdf, HERE / "image_types_insert_jpg_icc.pdf", tmp_path) + + +def test_insert_jpg_invalid_icc(tmp_path): + pdf = fpdf.FPDF() + pdf.add_page(format=(448, 498)) + pdf.set_margin(0) + pdf.image(HERE / "insert_images_insert_jpg_icc_invalid.jpg", x=0, y=0, h=498) + assert_pdf_equal(pdf, HERE / "image_types_insert_jpg_icc_invalid.pdf", tmp_path) + + def test_insert_gif(tmp_path): pdf = fpdf.FPDF() pdf.compress = False diff --git a/test/image/image_with_explicit_dimensions.pdf b/test/image/image_with_explicit_dimensions.pdf index 88be8d516..4a756786d 100644 Binary files a/test/image/image_with_explicit_dimensions.pdf and b/test/image/image_with_explicit_dimensions.pdf differ diff --git a/test/image/image_x_align_center.pdf b/test/image/image_x_align_center.pdf index 99a89b620..d365298cc 100644 Binary files a/test/image/image_x_align_center.pdf and b/test/image/image_x_align_center.pdf differ diff --git a/test/image/image_x_align_right.pdf b/test/image/image_x_align_right.pdf index b07669699..3407018bd 100644 Binary files a/test/image/image_x_align_right.pdf and b/test/image/image_x_align_right.pdf differ diff --git a/test/image/png_images/image_png_insert_png_files.pdf b/test/image/png_images/image_png_insert_png_files.pdf index 703016b6e..39f6c6a72 100644 Binary files a/test/image/png_images/image_png_insert_png_files.pdf and b/test/image/png_images/image_png_insert_png_files.pdf differ diff --git a/test/image/rect_clip.pdf b/test/image/rect_clip.pdf index 090c12324..8664fd199 100644 Binary files a/test/image/rect_clip.pdf and b/test/image/rect_clip.pdf differ diff --git a/test/image/round_clip.pdf b/test/image/round_clip.pdf index 628909bb3..924d950b8 100644 Binary files a/test/image/round_clip.pdf and b/test/image/round_clip.pdf differ diff --git a/test/image/test_load_image.py b/test/image/test_load_image.py index 57e2453a9..0cac455e4 100644 --- a/test/image/test_load_image.py +++ b/test/image/test_load_image.py @@ -45,10 +45,12 @@ def test_load_invalid_base64_data(): @memunit.assert_lt_mb(147) def test_share_images_cache(tmp_path): images_cache = {} + icc_profiles_cache = {} def build_pdf_with_big_images(): pdf = fpdf.FPDF() pdf.images = images_cache + pdf.icc_profiles = icc_profiles_cache pdf.add_page() for img_path in glob(f"{HERE}/png_images/*.png"): pdf.image(img_path, h=pdf.eph) diff --git a/test/template/template_nominal_hardcoded.pdf b/test/template/template_nominal_hardcoded.pdf index bd8cb05c1..2735c5228 100644 Binary files a/test/template/template_nominal_hardcoded.pdf and b/test/template/template_nominal_hardcoded.pdf differ diff --git a/test/transparency.pdf b/test/transparency.pdf index 6bdd580a0..717d8a61a 100644 Binary files a/test/transparency.pdf and b/test/transparency.pdf differ