Merging upstream version 0.6.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-02-08 07:47:42 +01:00 · 2025-02-08 07:47:42 +01:00 · 60b3c4718d
commit 60b3c4718d
parent 72fc7a26f9
4 changed files with 176 additions and 77 deletions
--- a/docs/api_reference.rst
+++ b/docs/api_reference.rst
@ -7,13 +7,17 @@ API Reference
   :members:
 .. autoclass:: Dictionary
   :show-inheritance:
 .. autoclass:: Stream
   :members:
   :show-inheritance:
 .. autoclass:: String
   :show-inheritance:
 .. autoclass:: Array
   :show-inheritance:
 .. autoclass:: PDF
   :members:
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@ -2,6 +2,40 @@ Changelog
 =========
 Version 0.6.0
 -------------
 Released on 2023-03-29.
 New features:
 * Add an option to use compressed object streams for PDF 1.5+, with financial support from Code & Co.
 * Add new text operators
 * Clean and fix documentation
 Backers and sponsors:
 * Kobalt
 * Grip Angebotssoftware
 * Spacinov
 * Crisp BV
 * Castedo Ellerman
 * Manuel Barkhau
 * SimonSoft
 * Menutech
 * KontextWork
 * NCC Group
 * René Fritz
 * Moritz Mahringer
 * Yanal-Yvez Fargialla
 * Piotr Horzycki
 * Healthchecks.io
 * Hammerbacher
 * TrainingSparkle
 * Synapsium
 Version 0.5.0
 -------------
@ -10,7 +44,7 @@ Released on 2022-10-11.
 New features:
 * Add the PDF.page_references property
-* Revert the PDF.pages['Kids'] behavior to be retro-compatible with version 0.3.0
+* Revert the PDF.pages['Kids'] behavior to be backwards compatible with version 0.3.0
 Backers and sponsors:
--- a/docs/common_use_cases.rst
+++ b/docs/common_use_cases.rst
@ -97,7 +97,7 @@ Display image
   document = pydyf.PDF()
-   extra = Dictionary({
+   extra = pydyf.Dictionary({
       'Type': '/XObject',
       'Subtype': '/Image',
       'Width': 197,
@ -158,9 +158,9 @@ Display text
  # And display it
  text = pydyf.Stream()
  text.begin_text()
-  text.set_font_size('F1', 24)
+  text.set_font_size('F1', 20)
  text.text_matrix(1, 0, 0, 1, 10, 90)
-  text.show_text(pydyf.String('Hello World'))
+  text.show_text(pydyf.String('Bœuf grillé & café'.encode('macroman')))
  text.end_text()
  document.add_object(text)
@ -179,4 +179,3 @@ Display text
  with open('document.pdf', 'wb') as f:
      document.write(f)
--- a/pydyf/init.py
+++ b/pydyf/init.py
@ -7,8 +7,9 @@ import re
 import zlib
 from codecs import BOM_UTF16_BE
 from hashlib import md5
 from math import ceil, log
-VERSION = __version__ = '0.5.0'
+VERSION = __version__ = '0.6.0'
 def _to_bytes(item):
@ -21,7 +22,7 @@ def _to_bytes(item):
        if item.is_integer():
            return f'{int(item):d}'.encode('ascii')
        else:
-            return f'{item:f}'.encode('ascii')
+            return f'{item:f}'.rstrip('0').encode('ascii')
    elif isinstance(item, int):
        return f'{item:d}'.encode('ascii')
    return str(item).encode('ascii')
@ -43,51 +44,41 @@ class Object:
    @property
    def indirect(self):
        """Indirect representation of an object."""
-        return b'\n'.join((
+        header = f'{self.number} {self.generation} obj\n'.encode()
-            str(self.number).encode() + b' ' +
+        return header + self.data + b'\nendobj'
            str(self.generation).encode() + b' obj',
            self.data,
            b'endobj',
        ))
    @property
    def reference(self):
        """Object identifier."""
-        return (
+        return f'{self.number} {self.generation} R'.encode()
            str(self.number).encode() + b' ' +
            str(self.generation).encode() + b' R')
    @property
    def data(self):
        """Data contained in the object. Shall be defined in each subclass."""
        raise NotImplementedError()
    @property
    def compressible(self):
        """Whether the object can be included in an object stream."""
        return not self.generation and not isinstance(self, Stream)
 class Dictionary(Object, dict):
-    """PDF Dictionary object.
+    """PDF Dictionary object."""
    Inherits from :class:`Object` and Python :obj:`dict`.
    """
    def __init__(self, values=None):
        Object.__init__(self)
        dict.__init__(self, values or {})
    @property
    def data(self):
-        result = [b'<<']
+        result = [
-        for key, value in self.items():
+            b'/' + _to_bytes(key) + b' ' + _to_bytes(value)
-            result.append(b'/' + _to_bytes(key) + b' ' + _to_bytes(value))
+            for key, value in self.items()]
-        result.append(b'>>')
+        return b'<<' + b''.join(result) + b'>>'
        return b'\n'.join(result)
 class Stream(Object):
-    """PDF Stream object.
+    """PDF Stream object."""
    Inherits from :class:`Object`.
    """
    def __init__(self, stream=None, extra=None, compress=False):
        super().__init__()
        #: Python array of data composing stream.
@ -221,6 +212,10 @@ class Stream(Object):
        """Begin new subpath by moving current point to ``(x, y)``."""
        self.stream.append(b' '.join((_to_bytes(x), _to_bytes(y), b'm')))
    def move_text_to(self, x, y):
        """Move text to next line at ``(x, y)`` distance from previous line."""
        self.stream.append(b' '.join((_to_bytes(x), _to_bytes(y), b'Td')))
    def shading(self, name):
        """Paint shape and color shading using shading dictionary ``name``."""
        self.stream.append(b'/' + _to_bytes(name) + b' sh')
@ -310,9 +305,13 @@ class Stream(Object):
        self.stream.append(b'/' + _to_bytes(state_name) + b' gs')
    def show_text(self, text):
-        """Show text."""
+        """Show text strings with individual glyph positioning."""
        self.stream.append(b'[' + _to_bytes(text) + b'] TJ')
    def show_text_string(self, text):
        """Show single text string."""
        self.stream.append(String(text).data + b' Tj')
    def stroke(self):
        """Stroke path."""
        self.stream.append(b'S')
@ -369,7 +368,7 @@ class Stream(Object):
        extra = Dictionary(self.extra.copy())
        if self.compress:
            extra['Filter'] = '/FlateDecode'
-            compressobj = zlib.compressobj()
+            compressobj = zlib.compressobj(level=9)
            stream = compressobj.compress(stream)
            stream += compressobj.flush()
        extra['Length'] = len(stream)
@ -377,11 +376,7 @@ class Stream(Object):
 class String(Object):
-    """PDF String object.
+    """PDF String object."""
    Inherits from :class:`Object`.
    """
    def __init__(self, string=''):
        super().__init__()
        #: Unicode string.
@ -402,22 +397,14 @@ class String(Object):
 class Array(Object, list):
-    """PDF Array object.
+    """PDF Array object."""
    Inherits from :class:`Object` and Python :obj:`list`.
    """
    def __init__(self, array=None):
        Object.__init__(self)
        list.__init__(self, array or [])
    @property
    def data(self):
-        result = [b'[']
+        return b'[' + b' '.join(_to_bytes(child) for child in self) + b']'
        for child in self:
            result.append(_to_bytes(child))
        result.append(b']')
        return b' '.join(result)
 class PDF:
@ -500,13 +487,14 @@ class PDF:
        self.current_position += len(content) + 1
        output.write(content + b'\n')
-    def write(self, output, version=None, identifier=None):
+    def write(self, output, version=None, identifier=None, compress=False):
        """Write PDF to output.
        :param output: Output stream.
        :type output: binary :term:`file object`
        :param bytes version: PDF version.
        :param bytes identifier: PDF file identifier.
        :param bool compress: whether the PDF uses a compressed object stream.
        """
        version = self.version if version is None else _to_bytes(version)
@ -516,6 +504,79 @@ class PDF:
        self.write_line(b'%PDF-' + version, output)
        self.write_line(b'%\xf0\x9f\x96\xa4', output)
        if version >= b'1.5' and compress:
            # Store compressed objects for later and write other ones in PDF
            compressed_objects = []
            for object_ in self.objects:
                if object_.free == 'f':
                    continue
                if object_.compressible:
                    compressed_objects.append(object_)
                else:
                    object_.offset = self.current_position
                    self.write_line(object_.indirect, output)
            # Write compressed objects in object stream
            stream = [[]]
            position = 0
            for i, object_ in enumerate(compressed_objects):
                data = object_.data
                stream.append(data)
                stream[0].append(object_.number)
                stream[0].append(position)
                position += len(data) + 1
            stream[0] = ' '.join(str(i) for i in stream[0])
            extra = {
                'Type': '/ObjStm',
                'N': len(compressed_objects),
                'First': len(stream[0]) + 1,
            }
            object_stream = Stream(stream, extra, compress)
            object_stream.offset = self.current_position
            self.add_object(object_stream)
            self.write_line(object_stream.indirect, output)
            # Write cross-reference stream
            xref = []
            dict_index = 0
            for object_ in self.objects:
                if object_.compressible:
                    xref.append((2, object_stream.number, dict_index))
                    dict_index += 1
                else:
                    xref.append((
                        bool(object_.number), object_.offset,
                        object_.generation))
            xref.append((1, self.current_position, 0))
            field2_size = ceil(log(self.current_position, 8))
            max_generation = max(
                object_.generation for object_ in self.objects)
            field3_size = ceil(log(
                max(max_generation, len(compressed_objects)), 8))
            xref_lengths = (1, field2_size, field3_size)
            xref_stream = b''.join(
                value.to_bytes(length, 'big')
                for line in xref for length, value in zip(xref_lengths, line))
            extra = {
                'Type': '/XRef',
                'Index': Array((0, len(self.objects) + 1)),
                'W': Array(xref_lengths),
                'Size': len(self.objects) + 1,
                'Root': self.catalog.reference,
                'Info': self.info.reference,
            }
            if identifier is not None:
                data = b''.join(
                    obj.data for obj in self.objects if obj.free != 'f')
                data_hash = md5(data).hexdigest().encode()
                extra['ID'] = Array((
                    String(identifier).data, String(data_hash).data))
            dict_stream = Stream([xref_stream], extra, compress)
            self.xref_position = dict_stream.offset = self.current_position
            self.add_object(dict_stream)
            self.write_line(dict_stream.indirect, output)
        else:
            # Write all non-free PDF objects
            for object_ in self.objects:
                if object_.free == 'f':
@ -523,7 +584,7 @@ class PDF:
                object_.offset = self.current_position
                self.write_line(object_.indirect, output)
-        # Write cross reference table
+            # Write cross-reference table
            self.xref_position = self.current_position
            self.write_line(b'xref', output)
            self.write_line(f'0 {len(self.objects)}'.encode(), output)
@ -546,6 +607,7 @@ class PDF:
                    b'/ID [' + String(identifier).data + b' ' +
                    String(data_hash).data + b']', output)
            self.write_line(b'>>', output)
        self.write_line(b'startxref', output)
        self.write_line(f'{self.xref_position}'.encode(), output)
        self.write_line(b'%%EOF', output)