diff --git a/misc/watcher.py b/misc/watcher.py index efcdbab4b..8c0e9a5a5 100644 --- a/misc/watcher.py +++ b/misc/watcher.py @@ -8,6 +8,7 @@ # Do not enable annotations! # https://github.com/tiangolo/typer/discussions/598 # from __future__ import annotations +from __future__ import annotations import json import logging @@ -131,7 +132,7 @@ def execute_ocrmypdf( class HandleObserverEvent(PatternMatchingEventHandler): - def __init__( + def __init__( # noqa: D107 self, patterns=None, ignore_patterns=None, @@ -191,7 +192,7 @@ def main( bool, typer.Option( envvar='OCR_OUTPUT_DIRECTORY_YEAR_MONTH', - help='Create a subdirectory in the output directory for each year and month', + help='Create a subdirectory in the output directory for each year/month', ), ] = False, on_success_delete: Annotated[ diff --git a/src/ocrmypdf/_exec/unpaper.py b/src/ocrmypdf/_exec/unpaper.py index 1c433df57..d1165c46d 100644 --- a/src/ocrmypdf/_exec/unpaper.py +++ b/src/ocrmypdf/_exec/unpaper.py @@ -14,7 +14,6 @@ from pathlib import Path from subprocess import PIPE, STDOUT from tempfile import TemporaryDirectory -from typing import Union from packaging.version import Version from PIL import Image @@ -28,7 +27,7 @@ UNPAPER_IMAGE_PIXEL_LIMIT = 256 * 1024 * 1024 -DecFloat = Union[Decimal, float] +DecFloat = Decimal | float log = logging.getLogger(__name__) diff --git a/src/ocrmypdf/api.py b/src/ocrmypdf/api.py index 9d78fe574..700a3a98c 100644 --- a/src/ocrmypdf/api.py +++ b/src/ocrmypdf/api.py @@ -14,7 +14,7 @@ from enum import IntEnum from io import IOBase from pathlib import Path -from typing import AnyStr, BinaryIO, Union +from typing import AnyStr, BinaryIO from warnings import warn import pluggy @@ -28,8 +28,8 @@ from ocrmypdf.cli import ArgumentParser, get_parser from ocrmypdf.helpers import is_iterable_notstr -StrPath = Union[Path, AnyStr] -PathOrIO = Union[BinaryIO, StrPath] +StrPath = Path | AnyStr +PathOrIO = BinaryIO | StrPath # Installing plugins affects the global state of the Python interpreter, # so we need to use a lock to prevent multiple threads from installing @@ -169,7 +169,7 @@ def _kwargs_to_cmdline( # We have a parameter cmdline.append(f"--{cmd_style_arg}") - if isinstance(val, (int, float)): + if isinstance(val, int | float): cmdline.append(str(val)) elif isinstance(val, str): cmdline.append(val) @@ -201,11 +201,11 @@ def create_options( defer_kwargs={'progress_bar', 'plugins', 'parser', 'input_file', 'output_file'}, **kwargs, ) - if isinstance(input_file, (BinaryIO, IOBase)): + if isinstance(input_file, BinaryIO | IOBase): cmdline.append('stream://input_file') else: cmdline.append(os.fspath(input_file)) - if isinstance(output_file, (BinaryIO, IOBase)): + if isinstance(output_file, BinaryIO | IOBase): cmdline.append('stream://output_file') else: cmdline.append(os.fspath(output_file)) @@ -343,7 +343,7 @@ def ocr( # noqa: D417 if not plugins: plugins = [] - elif isinstance(plugins, (str, Path)): + elif isinstance(plugins, str | Path): plugins = [plugins] else: plugins = list(plugins) diff --git a/src/ocrmypdf/pdfinfo/info.py b/src/ocrmypdf/pdfinfo/info.py index 571980f3c..1f197b786 100644 --- a/src/ocrmypdf/pdfinfo/info.py +++ b/src/ocrmypdf/pdfinfo/info.py @@ -10,7 +10,6 @@ import logging import re import statistics -import sys from collections import defaultdict from collections.abc import Callable, Container, Iterable, Iterator, Mapping, Sequence from contextlib import contextmanager @@ -1060,12 +1059,7 @@ def page_dpi_profile(self) -> PageResolutionProfile | None: weights = [area / total_drawn_area for area in image_areas] # Calculate harmonic mean of DPIs weighted by area - if sys.version_info >= (3, 10): - weighted_dpi = statistics.harmonic_mean(image_dpis, weights) - else: - weighted_dpi = sum(weights) / sum( - weight / dpi for weight, dpi in zip(weights, image_dpis) - ) + weighted_dpi = statistics.harmonic_mean(image_dpis, weights) max_dpi = max(image_dpis) dpi_average_max_ratio = weighted_dpi / max_dpi @@ -1176,7 +1170,7 @@ def is_tagged(self) -> bool: @property def filename(self) -> str | Path: """Return filename of PDF.""" - if not isinstance(self._infile, (str, Path)): + if not isinstance(self._infile, str | Path): raise NotImplementedError("can't get filename from stream") return self._infile