Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add ICC profiles to PDFs #709

Merged
merged 11 commits into from
Feb 27, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ This can also be enabled programmatically with `warnings.simplefilter('default',
- vector images parsing is now more robust: `fpdf2` can now embed SVG files without `viewPort` or no `height` / `width`
- bitonal images are now encoded using `CCITTFaxDecode`, reducing their size in the PDF document - thanks to @eroux
- when possible, JPG and group4 encoded TIFFs are now embedded directly without recompression - thanks to @eroux
- ICC Profiles of included images are now extracted and turned into PDF objects; they should now be taken into account by PDF viewers - thanks to @eroux
eroux marked this conversation as resolved.
Show resolved Hide resolved

## [2.6.1] - 2023-01-13
### Added
Expand Down
10 changes: 10 additions & 0 deletions fpdf/fpdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@ def __init__(
self.pages = {} # array of PDFPage objects starting at index 1
self.fonts = {} # map font string keys to dicts describing the fonts used
self.images = {} # map image identifiers to dicts describing the raster images
self.iccps = {} # map icc profiles (bytes) to their index (number)
eroux marked this conversation as resolved.
Show resolved Hide resolved
self.links = {} # array of Destination objects starting at index 1
self.embedded_files = [] # array of PDFEmbeddedFile

Expand Down Expand Up @@ -3719,6 +3720,15 @@ def preload_image(self, name, dims=None):
info = ImageInfo(get_img_info(name, img, self.image_filter, dims))
info["i"] = len(self.images) + 1
info["usages"] = 1
info["iccp_i"] = None
if "iccp" in info and info["iccp"]:
if info["iccp"] in self.iccps:
eroux marked this conversation as resolved.
Show resolved Hide resolved
info["iccp_i"] = self.iccps[info["iccp"]]
else:
iccp_i = len(self.iccps)
self.iccps[info["iccp"]] = iccp_i
info["iccp_i"] = iccp_i
info["iccp"] = None
eroux marked this conversation as resolved.
Show resolved Hide resolved
self.images[name] = info
return name, img, info

Expand Down
10 changes: 10 additions & 0 deletions fpdf/image_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,10 @@ def get_img_info(filename, img=None, image_filter="AUTO", dims=None):
w, h = img.size
info = {}

iccp = None
if "icc_profile" in img.info:
iccp = img.info.get("icc_profile")
Lucas-C marked this conversation as resolved.
Show resolved Hide resolved

if img_raw_data is not None and not img_altered:
# if we can use the original image bytes directly we do (JPEG and group4 TIFF only):
if img.format == "JPEG" and image_filter == "DCTDecode":
Expand All @@ -365,6 +369,8 @@ def get_img_info(filename, img=None, image_filter="AUTO", dims=None):
"w": w,
"h": h,
"cs": colspace,
"iccp": iccp,
"dpn": dpn,
"bpc": bpc,
"f": image_filter,
"dp": f"/Predictor 15 /Colors {dpn} /Columns {w}",
Expand Down Expand Up @@ -406,6 +412,8 @@ def get_img_info(filename, img=None, image_filter="AUTO", dims=None):
"data": ccittrawdata,
"w": w,
"h": h,
"iccp": None,
"dpn": dpn,
"cs": colspace,
"bpc": bpc,
"f": image_filter,
Expand Down Expand Up @@ -477,7 +485,9 @@ def get_img_info(filename, img=None, image_filter="AUTO", dims=None):
"w": w,
"h": h,
"cs": colspace,
"iccp": iccp,
"bpc": bpc,
"dpn": dpn,
"f": image_filter,
"dp": dp,
}
Expand Down
48 changes: 46 additions & 2 deletions fpdf/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,6 @@ class PDFXObject(PDFContentStream):
"height",
"color_space",
"bits_per_component",
"filter",
eroux marked this conversation as resolved.
Show resolved Hide resolved
"decode",
"decode_parms",
"s_mask",
Expand Down Expand Up @@ -225,6 +224,27 @@ def __init__(
self.s_mask = None


class PDFICCPObject(PDFContentStream):
__slots__ = ( # RAM usage optimization
"_id",
"_contents",
"filter",
"length",
"n",
"alternate",
)

def __init__(
self,
contents,
n,
alternate,
):
super().__init__(contents=contents, compress=True)
self.n = n
self.alternate = Name(alternate)


class PDFPage(PDFObject):
__slots__ = ( # RAM usage optimization
"_id",
Expand Down Expand Up @@ -341,6 +361,7 @@ class OutputProducer:
def __init__(self, fpdf):
self.fpdf = fpdf
self.pdf_objs = []
self.iccp_i_to_pdf_i = {}
self.obj_id = 0 # current PDF object number
# array of PDF object offsets in self.buffer, used to build the xref table:
self.offsets = {}
Expand Down Expand Up @@ -719,10 +740,33 @@ def _add_images(self):
img_objs_per_index[img["i"]] = self._add_image(img)
return img_objs_per_index

def _ensure_iccp(self, info, iccp_i):
Lucas-C marked this conversation as resolved.
Show resolved Hide resolved
"""
Returns the PDF object of the ICC profile indexed iccp_i in the FPDF object.
Adds it if not present.
"""
if iccp_i in self.iccp_i_to_pdf_i:
eroux marked this conversation as resolved.
Show resolved Hide resolved
return self.iccp_i_to_pdf_i[iccp_i]
iccp_content = None
for iccp_c, i in self.fpdf.iccps.items():
if iccp_i == i:
iccp_content = iccp_c
break
assert iccp_content is not None
iccp_obj = PDFICCPObject(
contents=iccp_content, n=info["dpn"], alternate=info["cs"]
)
iccp_pdf_i = self._add_pdf_obj(iccp_obj, "iccp")
self.iccp_i_to_pdf_i[iccp_i] = iccp_pdf_i
return iccp_pdf_i

def _add_image(self, info):
color_space = Name(info["cs"])
decode = None
if color_space == "Indexed":
if "iccp_i" in info and info["iccp_i"] is not None:
eroux marked this conversation as resolved.
Show resolved Hide resolved
iccp_pdf_i = self._ensure_iccp(info, info["iccp_i"])
eroux marked this conversation as resolved.
Show resolved Hide resolved
color_space = PDFArray(["/ICCBased", str(iccp_pdf_i), str("0"), "R"])
elif color_space == "Indexed":
eroux marked this conversation as resolved.
Show resolved Hide resolved
color_space = PDFArray(
["/Indexed", "/DeviceRGB", f"{len(info['pal']) // 3 - 1}"]
)
Expand Down
Binary file modified test/html/test_img_inside_html_table_centered_with_caption.pdf
Binary file not shown.
Binary file modified test/image/elliptic_clip.pdf
Binary file not shown.
Binary file modified test/image/full_height_image.pdf
Binary file not shown.
Binary file modified test/image/full_pdf_height_image.pdf
Binary file not shown.
Binary file modified test/image/full_pdf_width_image.pdf
Binary file not shown.
Binary file modified test/image/full_width_image.pdf
Binary file not shown.
Binary file modified test/image/image_fit_in_rect.pdf
Binary file not shown.
Binary file not shown.
Binary file modified test/image/image_types/image_types_insert_png_alpha.pdf
Binary file not shown.
Binary file modified test/image/image_types/image_types_insert_png_alpha_dctdecode.pdf
Binary file not shown.
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
8 changes: 8 additions & 0 deletions test/image/image_types/test_insert_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,14 @@ def test_insert_bmp(tmp_path):
assert_pdf_equal(pdf, HERE / "image_types_insert_bmp.pdf", tmp_path)


def test_insert_jpg_icc(tmp_path):
eroux marked this conversation as resolved.
Show resolved Hide resolved
pdf = fpdf.FPDF()
pdf.add_page(format=(448, 498))
pdf.set_margin(0)
pdf.image(HERE / "insert_images_insert_jpg_icc.jpg", x=0, y=0, h=498)
assert_pdf_equal(pdf, HERE / "image_types_insert_jpg_icc.pdf", tmp_path)


def test_insert_gif(tmp_path):
pdf = fpdf.FPDF()
pdf.compress = False
Expand Down
Binary file modified test/image/image_with_explicit_dimensions.pdf
Binary file not shown.
Binary file modified test/image/image_x_align_center.pdf
Binary file not shown.
Binary file modified test/image/image_x_align_right.pdf
Binary file not shown.
Binary file modified test/image/png_images/image_png_insert_png_files.pdf
Binary file not shown.
Binary file modified test/image/rect_clip.pdf
Binary file not shown.
Binary file modified test/image/round_clip.pdf
Binary file not shown.
2 changes: 2 additions & 0 deletions test/image/test_load_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,12 @@ def test_load_invalid_base64_data():
@memunit.assert_lt_mb(147)
def test_share_images_cache(tmp_path):
images_cache = {}
iccp_cache = {}

def build_pdf_with_big_images():
pdf = fpdf.FPDF()
pdf.images = images_cache
pdf.iccps = iccp_cache
eroux marked this conversation as resolved.
Show resolved Hide resolved
pdf.add_page()
for img_path in glob(f"{HERE}/png_images/*.png"):
pdf.image(img_path, h=pdf.eph)
Expand Down
Binary file modified test/template/template_nominal_hardcoded.pdf
Binary file not shown.
Binary file modified test/transparency.pdf
Binary file not shown.