From bc8a047c8ce7b4f9867050281b6a6daa1eaddac7 Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel@debian.org>
Date: Sat, 8 Feb 2025 08:04:28 +0100
Subject: [PATCH] Merging upstream version 0.10.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
---
 .github/workflows/tests.yml |   4 +-
 README.rst                  |   2 +-
 docs/changelog.rst          |  34 +++++++++++
 docs/contribute.rst         |   9 +--
 pydyf/__init__.py           | 114 ++++++++++++++++++------------------
 pyproject.toml              |  10 ++--
 tests/test_pydyf.py         |  28 ++++++++-
 7 files changed, 128 insertions(+), 73 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 8de9881..fd08565 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -38,6 +38,4 @@ jobs:
       - name: Launch tests
         run: python -m pytest
       - name: Check coding style
-        run: python -m flake8
-      - name: Check imports order
-        run: python -m isort . --check --diff
+        run: python -m ruff check
diff --git a/README.rst b/README.rst
index f76eccb..dd924eb 100644
--- a/README.rst
+++ b/README.rst
@@ -2,7 +2,7 @@ pydyf is a low-level PDF generator written in Python and based on PDF
 specification 1.7.
 
 * Free software: BSD license
-* For Python 3.7+, tested on CPython and PyPy
+* For Python 3.8+, tested on CPython and PyPy
 * Documentation: https://doc.courtbouillon.org/pydyf
 * Changelog: https://github.com/CourtBouillon/pydyf/releases
 * Code, issues, tests: https://github.com/CourtBouillon/pydyf
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 661a696..7afce9d 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -2,6 +2,40 @@ Changelog
 =========
 
 
+Version 0.10.0
+-------------
+
+Released on 2024-04-29.
+
+New features:
+
+* Add standard compliant default identifier
+
+Contributors:
+
+* Guillaume Ayoub
+* Wolfgang Walther
+
+Backers and sponsors:
+
+* Spacinov
+* Kobalt
+* Grip Angebotssoftware
+* Manuel Barkhau
+* SimonSoft
+* Menutech
+* KontextWork
+* Simon Sapin
+* René Fritz
+* TrainingSparkle
+* Healthchecks.io
+* Docraptor
+* Yanal-Yvez Fargialla
+* Douwe van Loenen
+* Morntag
+* Xavid
+
+
 Version 0.9.0
 -------------
 
diff --git a/docs/contribute.rst b/docs/contribute.rst
index f94a3d6..d14e6b9 100644
--- a/docs/contribute.rst
+++ b/docs/contribute.rst
@@ -50,16 +50,13 @@ You can launch tests using the following command::
 
   venv/bin/pytest
 
-WeasyPrint also uses isort_ to check imports and flake8_ to check the coding
-style::
+pydyf also uses ruff_ to check the coding style::
 
-  venv/bin/python -m isort . --check --diff
-  venv/bin/python -m flake8
+  venv/bin/python -m ruff check
 
 .. _pytest: https://docs.pytest.org/
 .. _Ghostscript: https://www.ghostscript.com/
-.. _isort: https://pycqa.github.io/isort/
-.. _flake8: https://flake8.pycqa.org/
+.. _ruff: https://docs.astral.sh/ruff/
 
 
 Documentation
diff --git a/pydyf/__init__.py b/pydyf/__init__.py
index d8e1d7b..86d321d 100755
--- a/pydyf/__init__.py
+++ b/pydyf/__init__.py
@@ -9,8 +9,9 @@ import zlib
 from codecs import BOM_UTF16_BE
 from hashlib import md5
 from math import ceil, log
+from warnings import warn
 
-VERSION = __version__ = '0.9.0'
+VERSION = __version__ = '0.10.0'
 
 
 def _to_bytes(item):
@@ -203,6 +204,37 @@ class Stream(Object):
         """
         self.stream.append(b'b*' if even_odd else b'b')
 
+    def inline_image(self, width, height, color_space, bpc, raw_data):
+        """Add an inline image.
+
+        :param width: The width of the image.
+        :type width: :obj:`int`
+        :param height: The height of the image.
+        :type height: :obj:`int`
+        :param colorspace: The color space of the image, f.e. RGB, Gray.
+        :type colorspace: :obj:`str`
+        :param bpc: The bits per component. 1 for BW, 8 for grayscale.
+        :type bpc: :obj:`int`
+        :param raw_data: The raw pixel data.
+
+        """
+        data = zlib.compress(raw_data) if self.compress else raw_data
+        a85_data = base64.a85encode(data) + b'~>'
+        self.stream.append(b' '.join((
+            b'BI',
+            b'/W', _to_bytes(width),
+            b'/H', _to_bytes(height),
+            b'/BPC', _to_bytes(bpc),
+            b'/CS',
+            b'/Device' + _to_bytes(color_space),
+            b'/F',
+            b'[/A85 /Fl]' if self.compress else b'/A85',
+            b'/L', _to_bytes(len(a85_data)),
+            b'ID',
+            a85_data,
+            b'EI',
+        )))
+
     def line_to(self, x, y):
         """Add line from current point to point ``(x, y)``."""
         self.stream.append(b' '.join((_to_bytes(x), _to_bytes(y), b'l')))
@@ -365,44 +397,6 @@ class Stream(Object):
             _to_bytes(a), _to_bytes(b), _to_bytes(c),
             _to_bytes(d), _to_bytes(e), _to_bytes(f), b'cm')))
 
-    def inline_image(self, width, height, color_space, bpc, raw_data):
-        """Add an inline image.
-
-        :param width: The width of the image.
-        :type width: :obj:`int`
-        :param height: The height of the image.
-        :type height: :obj:`int`
-        :param colorspace: The color space of the image, f.e. RGB, Gray.
-        :type colorspace: :obj:`str`
-        :param bpc: The bits per component. 1 for BW, 8 for grayscale.
-        :type bpc: :obj:`int`
-        :param raw_data: The raw pixel data.
-
-        """
-        if self.compress:
-            data = zlib.compress(raw_data)
-        else:
-            data = raw_data
-        enc_data = base64.a85encode(data)
-        self.stream.append(
-            b' '.join(
-                (
-                    b'BI',
-                    b'/W', _to_bytes(width),
-                    b'/H', _to_bytes(height),
-                    b'/BPC', _to_bytes(bpc),
-                    b'/CS',
-                    b'/Device' + color_space.encode(),
-                    b'/F',
-                    b'[/A85 /Fl]' if self.compress else b'/A85',
-                    b'/L', _to_bytes(len(enc_data) + 2),
-                    b'ID',
-                    enc_data + b'~>',
-                    b'EI',
-                )
-            )
-        )
-
     @property
     def data(self):
         stream = b'\n'.join(_to_bytes(item) for item in self.stream)
@@ -450,17 +444,16 @@ class Array(Object, list):
 
 class PDF:
     """PDF document."""
-    def __init__(self, version=b'1.7', identifier=None):
-        """Create a PDF document.
-
-        :param bytes version: PDF version.
-        :param bytes identifier: PDF file identifier.
-
-        """
-        #: PDF version, as :obj:`bytes`.
-        self.version = _to_bytes(version)
-        #: PDF file identifier.
-        self.identifier = identifier
+    def __init__(self, version=None, identifier=None):
+        """Create a PDF document."""
+        if version or identifier:  # to be removed in next version
+            warn(
+                "PDF objects don’t take version or identifier during initialization "
+                "anymore. These properties are now stored but ignored, and will be "
+                "removed and rejected in next version of pydyf. Please pass these "
+                "properties to the PDF.write() method instead.", DeprecationWarning)
+        self.version = _to_bytes(version) if version else b'1.7'  # to be removed
+        self.identifier = identifier  # to be removed
 
         #: Python :obj:`list` containing the PDF’s objects.
         self.objects = []
@@ -528,18 +521,23 @@ class PDF:
         self.current_position += len(content) + 1
         output.write(content + b'\n')
 
-    def write(self, output, version=None, identifier=None, compress=False):
+    def write(self, output, version=b'1.7', identifier=False, compress=False):
         """Write PDF to output.
 
         :param output: Output stream.
         :type output: binary :term:`file object`
         :param bytes version: PDF version.
-        :param bytes identifier: PDF file identifier.
+        :param identifier: PDF file identifier. Default is :obj:`False`
+          to include no identifier, can be set to :obj:`True` to generate an
+          automatic identifier.
+        :type identifier: :obj:`bytes` or :obj:`bool`
         :param bool compress: whether the PDF uses a compressed object stream.
 
         """
-        version = self.version if version is None else _to_bytes(version)
-        identifier = self.identifier if identifier is None else identifier
+        # Convert version and identifier to bytes
+        version = _to_bytes(version or b'1.7')  # Force 1.7 when None
+        if identifier not in (False, True, None):
+            identifier = _to_bytes(identifier)
 
         # Write header
         self.write_line(b'%PDF-' + version, output)
@@ -607,10 +605,12 @@ class PDF:
                 'Root': self.catalog.reference,
                 'Info': self.info.reference,
             }
-            if identifier is not None:
+            if identifier:
                 data = b''.join(
                     obj.data for obj in self.objects if obj.free != 'f')
                 data_hash = md5(data).hexdigest().encode()
+                if identifier is True:
+                    identifier = data_hash
                 extra['ID'] = Array((
                     String(identifier).data, String(data_hash).data))
             dict_stream = Stream([xref_stream], extra, compress)
@@ -640,10 +640,12 @@ class PDF:
             self.write_line(f'/Size {len(self.objects)}'.encode(), output)
             self.write_line(b'/Root ' + self.catalog.reference, output)
             self.write_line(b'/Info ' + self.info.reference, output)
-            if identifier is not None:
+            if identifier:
                 data = b''.join(
                     obj.data for obj in self.objects if obj.free != 'f')
                 data_hash = md5(data).hexdigest().encode()
+                if identifier is True:
+                    identifier = data_hash
                 self.write_line(
                     b'/ID [' + String(identifier).data + b' ' +
                     String(data_hash).data + b']', output)
diff --git a/pyproject.toml b/pyproject.toml
index 2c17097..0096863 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,7 +8,7 @@ description = 'A low-level PDF generator.'
 keywords = ['pdf', 'generator']
 authors = [{name = 'CourtBouillon', email = 'contact@courtbouillon.org'}]
 maintainers = [{name = 'CourtBouillon', email = 'contact@courtbouillon.org'}]
-requires-python = '>=3.7'
+requires-python = '>=3.8'
 readme = {file = 'README.rst', content-type = 'text/x-rst'}
 license = {file = 'LICENSE'}
 classifiers = [
@@ -39,7 +39,7 @@ Donation = 'https://opencollective.com/courtbouillon'
 
 [project.optional-dependencies]
 doc = ['sphinx', 'sphinx_rtd_theme']
-test = ['pytest', 'isort', 'flake8', 'pillow']
+test = ['pytest', 'ruff', 'pillow']
 
 [tool.flit.sdist]
 exclude = ['.*']
@@ -52,6 +52,6 @@ include = ['tests/*', 'pydyf/*']
 exclude_lines = ['pragma: no cover', 'def __repr__', 'raise NotImplementedError']
 omit = ['.*']
 
-[tool.isort]
-default_section = 'FIRSTPARTY'
-multi_line_output = 4
+[tool.ruff.lint]
+select = ['E', 'W', 'F', 'I', 'N', 'RUF']
+ignore = ['RUF001', 'RUF002', 'RUF003']
diff --git a/tests/test_pydyf.py b/tests/test_pydyf.py
index 83c260c..ff63be2 100644
--- a/tests/test_pydyf.py
+++ b/tests/test_pydyf.py
@@ -1,4 +1,5 @@
 import io
+import re
 
 import pydyf
 
@@ -704,11 +705,34 @@ def test_text():
     ''')
 
 
-def test_identifier():
+def test_no_identifier():
+    document = pydyf.PDF()
+    pdf = io.BytesIO()
+    document.write(pdf, identifier=False)
+    assert re.search(
+        b'/ID \\[\\((?P<hash>[0-9a-f]{32})\\) \\((?P=hash)\\)\\]',
+        pdf.getvalue()
+    ) is None
+
+
+def test_default_identifier():
+    document = pydyf.PDF()
+    pdf = io.BytesIO()
+    document.write(pdf, identifier=True)
+    assert re.search(
+        b'/ID \\[\\((?P<hash>[0-9a-f]{32})\\) \\((?P=hash)\\)\\]',
+        pdf.getvalue()
+    ) is not None
+
+
+def test_custom_identifier():
     document = pydyf.PDF()
     pdf = io.BytesIO()
     document.write(pdf, identifier=b'abc')
-    assert b'abc' in pdf.getvalue()
+    assert re.search(
+        b'/ID \\[\\(abc\\) \\(([0-9a-f]{32})\\)\\]',
+        pdf.getvalue()
+    ) is not None
 
 
 def test_version():