From bc8a047c8ce7b4f9867050281b6a6daa1eaddac7 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 8 Feb 2025 08:04:28 +0100 Subject: [PATCH] Merging upstream version 0.10.0. Signed-off-by: Daniel Baumann --- .github/workflows/tests.yml | 4 +- README.rst | 2 +- docs/changelog.rst | 34 +++++++++++ docs/contribute.rst | 9 +-- pydyf/__init__.py | 114 ++++++++++++++++++------------------ pyproject.toml | 10 ++-- tests/test_pydyf.py | 28 ++++++++- 7 files changed, 128 insertions(+), 73 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8de9881..fd08565 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -38,6 +38,4 @@ jobs: - name: Launch tests run: python -m pytest - name: Check coding style - run: python -m flake8 - - name: Check imports order - run: python -m isort . --check --diff + run: python -m ruff check diff --git a/README.rst b/README.rst index f76eccb..dd924eb 100644 --- a/README.rst +++ b/README.rst @@ -2,7 +2,7 @@ pydyf is a low-level PDF generator written in Python and based on PDF specification 1.7. * Free software: BSD license -* For Python 3.7+, tested on CPython and PyPy +* For Python 3.8+, tested on CPython and PyPy * Documentation: https://doc.courtbouillon.org/pydyf * Changelog: https://github.com/CourtBouillon/pydyf/releases * Code, issues, tests: https://github.com/CourtBouillon/pydyf diff --git a/docs/changelog.rst b/docs/changelog.rst index 661a696..7afce9d 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -2,6 +2,40 @@ Changelog ========= +Version 0.10.0 +------------- + +Released on 2024-04-29. + +New features: + +* Add standard compliant default identifier + +Contributors: + +* Guillaume Ayoub +* Wolfgang Walther + +Backers and sponsors: + +* Spacinov +* Kobalt +* Grip Angebotssoftware +* Manuel Barkhau +* SimonSoft +* Menutech +* KontextWork +* Simon Sapin +* René Fritz +* TrainingSparkle +* Healthchecks.io +* Docraptor +* Yanal-Yvez Fargialla +* Douwe van Loenen +* Morntag +* Xavid + + Version 0.9.0 ------------- diff --git a/docs/contribute.rst b/docs/contribute.rst index f94a3d6..d14e6b9 100644 --- a/docs/contribute.rst +++ b/docs/contribute.rst @@ -50,16 +50,13 @@ You can launch tests using the following command:: venv/bin/pytest -WeasyPrint also uses isort_ to check imports and flake8_ to check the coding -style:: +pydyf also uses ruff_ to check the coding style:: - venv/bin/python -m isort . --check --diff - venv/bin/python -m flake8 + venv/bin/python -m ruff check .. _pytest: https://docs.pytest.org/ .. _Ghostscript: https://www.ghostscript.com/ -.. _isort: https://pycqa.github.io/isort/ -.. _flake8: https://flake8.pycqa.org/ +.. _ruff: https://docs.astral.sh/ruff/ Documentation diff --git a/pydyf/__init__.py b/pydyf/__init__.py index d8e1d7b..86d321d 100755 --- a/pydyf/__init__.py +++ b/pydyf/__init__.py @@ -9,8 +9,9 @@ import zlib from codecs import BOM_UTF16_BE from hashlib import md5 from math import ceil, log +from warnings import warn -VERSION = __version__ = '0.9.0' +VERSION = __version__ = '0.10.0' def _to_bytes(item): @@ -203,6 +204,37 @@ class Stream(Object): """ self.stream.append(b'b*' if even_odd else b'b') + def inline_image(self, width, height, color_space, bpc, raw_data): + """Add an inline image. + + :param width: The width of the image. + :type width: :obj:`int` + :param height: The height of the image. + :type height: :obj:`int` + :param colorspace: The color space of the image, f.e. RGB, Gray. + :type colorspace: :obj:`str` + :param bpc: The bits per component. 1 for BW, 8 for grayscale. + :type bpc: :obj:`int` + :param raw_data: The raw pixel data. + + """ + data = zlib.compress(raw_data) if self.compress else raw_data + a85_data = base64.a85encode(data) + b'~>' + self.stream.append(b' '.join(( + b'BI', + b'/W', _to_bytes(width), + b'/H', _to_bytes(height), + b'/BPC', _to_bytes(bpc), + b'/CS', + b'/Device' + _to_bytes(color_space), + b'/F', + b'[/A85 /Fl]' if self.compress else b'/A85', + b'/L', _to_bytes(len(a85_data)), + b'ID', + a85_data, + b'EI', + ))) + def line_to(self, x, y): """Add line from current point to point ``(x, y)``.""" self.stream.append(b' '.join((_to_bytes(x), _to_bytes(y), b'l'))) @@ -365,44 +397,6 @@ class Stream(Object): _to_bytes(a), _to_bytes(b), _to_bytes(c), _to_bytes(d), _to_bytes(e), _to_bytes(f), b'cm'))) - def inline_image(self, width, height, color_space, bpc, raw_data): - """Add an inline image. - - :param width: The width of the image. - :type width: :obj:`int` - :param height: The height of the image. - :type height: :obj:`int` - :param colorspace: The color space of the image, f.e. RGB, Gray. - :type colorspace: :obj:`str` - :param bpc: The bits per component. 1 for BW, 8 for grayscale. - :type bpc: :obj:`int` - :param raw_data: The raw pixel data. - - """ - if self.compress: - data = zlib.compress(raw_data) - else: - data = raw_data - enc_data = base64.a85encode(data) - self.stream.append( - b' '.join( - ( - b'BI', - b'/W', _to_bytes(width), - b'/H', _to_bytes(height), - b'/BPC', _to_bytes(bpc), - b'/CS', - b'/Device' + color_space.encode(), - b'/F', - b'[/A85 /Fl]' if self.compress else b'/A85', - b'/L', _to_bytes(len(enc_data) + 2), - b'ID', - enc_data + b'~>', - b'EI', - ) - ) - ) - @property def data(self): stream = b'\n'.join(_to_bytes(item) for item in self.stream) @@ -450,17 +444,16 @@ class Array(Object, list): class PDF: """PDF document.""" - def __init__(self, version=b'1.7', identifier=None): - """Create a PDF document. - - :param bytes version: PDF version. - :param bytes identifier: PDF file identifier. - - """ - #: PDF version, as :obj:`bytes`. - self.version = _to_bytes(version) - #: PDF file identifier. - self.identifier = identifier + def __init__(self, version=None, identifier=None): + """Create a PDF document.""" + if version or identifier: # to be removed in next version + warn( + "PDF objects don’t take version or identifier during initialization " + "anymore. These properties are now stored but ignored, and will be " + "removed and rejected in next version of pydyf. Please pass these " + "properties to the PDF.write() method instead.", DeprecationWarning) + self.version = _to_bytes(version) if version else b'1.7' # to be removed + self.identifier = identifier # to be removed #: Python :obj:`list` containing the PDF’s objects. self.objects = [] @@ -528,18 +521,23 @@ class PDF: self.current_position += len(content) + 1 output.write(content + b'\n') - def write(self, output, version=None, identifier=None, compress=False): + def write(self, output, version=b'1.7', identifier=False, compress=False): """Write PDF to output. :param output: Output stream. :type output: binary :term:`file object` :param bytes version: PDF version. - :param bytes identifier: PDF file identifier. + :param identifier: PDF file identifier. Default is :obj:`False` + to include no identifier, can be set to :obj:`True` to generate an + automatic identifier. + :type identifier: :obj:`bytes` or :obj:`bool` :param bool compress: whether the PDF uses a compressed object stream. """ - version = self.version if version is None else _to_bytes(version) - identifier = self.identifier if identifier is None else identifier + # Convert version and identifier to bytes + version = _to_bytes(version or b'1.7') # Force 1.7 when None + if identifier not in (False, True, None): + identifier = _to_bytes(identifier) # Write header self.write_line(b'%PDF-' + version, output) @@ -607,10 +605,12 @@ class PDF: 'Root': self.catalog.reference, 'Info': self.info.reference, } - if identifier is not None: + if identifier: data = b''.join( obj.data for obj in self.objects if obj.free != 'f') data_hash = md5(data).hexdigest().encode() + if identifier is True: + identifier = data_hash extra['ID'] = Array(( String(identifier).data, String(data_hash).data)) dict_stream = Stream([xref_stream], extra, compress) @@ -640,10 +640,12 @@ class PDF: self.write_line(f'/Size {len(self.objects)}'.encode(), output) self.write_line(b'/Root ' + self.catalog.reference, output) self.write_line(b'/Info ' + self.info.reference, output) - if identifier is not None: + if identifier: data = b''.join( obj.data for obj in self.objects if obj.free != 'f') data_hash = md5(data).hexdigest().encode() + if identifier is True: + identifier = data_hash self.write_line( b'/ID [' + String(identifier).data + b' ' + String(data_hash).data + b']', output) diff --git a/pyproject.toml b/pyproject.toml index 2c17097..0096863 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ description = 'A low-level PDF generator.' keywords = ['pdf', 'generator'] authors = [{name = 'CourtBouillon', email = 'contact@courtbouillon.org'}] maintainers = [{name = 'CourtBouillon', email = 'contact@courtbouillon.org'}] -requires-python = '>=3.7' +requires-python = '>=3.8' readme = {file = 'README.rst', content-type = 'text/x-rst'} license = {file = 'LICENSE'} classifiers = [ @@ -39,7 +39,7 @@ Donation = 'https://opencollective.com/courtbouillon' [project.optional-dependencies] doc = ['sphinx', 'sphinx_rtd_theme'] -test = ['pytest', 'isort', 'flake8', 'pillow'] +test = ['pytest', 'ruff', 'pillow'] [tool.flit.sdist] exclude = ['.*'] @@ -52,6 +52,6 @@ include = ['tests/*', 'pydyf/*'] exclude_lines = ['pragma: no cover', 'def __repr__', 'raise NotImplementedError'] omit = ['.*'] -[tool.isort] -default_section = 'FIRSTPARTY' -multi_line_output = 4 +[tool.ruff.lint] +select = ['E', 'W', 'F', 'I', 'N', 'RUF'] +ignore = ['RUF001', 'RUF002', 'RUF003'] diff --git a/tests/test_pydyf.py b/tests/test_pydyf.py index 83c260c..ff63be2 100644 --- a/tests/test_pydyf.py +++ b/tests/test_pydyf.py @@ -1,4 +1,5 @@ import io +import re import pydyf @@ -704,11 +705,34 @@ def test_text(): ''') -def test_identifier(): +def test_no_identifier(): + document = pydyf.PDF() + pdf = io.BytesIO() + document.write(pdf, identifier=False) + assert re.search( + b'/ID \\[\\((?P[0-9a-f]{32})\\) \\((?P=hash)\\)\\]', + pdf.getvalue() + ) is None + + +def test_default_identifier(): + document = pydyf.PDF() + pdf = io.BytesIO() + document.write(pdf, identifier=True) + assert re.search( + b'/ID \\[\\((?P[0-9a-f]{32})\\) \\((?P=hash)\\)\\]', + pdf.getvalue() + ) is not None + + +def test_custom_identifier(): document = pydyf.PDF() pdf = io.BytesIO() document.write(pdf, identifier=b'abc') - assert b'abc' in pdf.getvalue() + assert re.search( + b'/ID \\[\\(abc\\) \\(([0-9a-f]{32})\\)\\]', + pdf.getvalue() + ) is not None def test_version():