1
0
Fork 0

Merging upstream version 0.10.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-08 08:04:28 +01:00
parent 8e8c5588ab
commit bc8a047c8c
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
7 changed files with 128 additions and 73 deletions

View file

@ -38,6 +38,4 @@ jobs:
- name: Launch tests - name: Launch tests
run: python -m pytest run: python -m pytest
- name: Check coding style - name: Check coding style
run: python -m flake8 run: python -m ruff check
- name: Check imports order
run: python -m isort . --check --diff

View file

@ -2,7 +2,7 @@ pydyf is a low-level PDF generator written in Python and based on PDF
specification 1.7. specification 1.7.
* Free software: BSD license * Free software: BSD license
* For Python 3.7+, tested on CPython and PyPy * For Python 3.8+, tested on CPython and PyPy
* Documentation: https://doc.courtbouillon.org/pydyf * Documentation: https://doc.courtbouillon.org/pydyf
* Changelog: https://github.com/CourtBouillon/pydyf/releases * Changelog: https://github.com/CourtBouillon/pydyf/releases
* Code, issues, tests: https://github.com/CourtBouillon/pydyf * Code, issues, tests: https://github.com/CourtBouillon/pydyf

View file

@ -2,6 +2,40 @@ Changelog
========= =========
Version 0.10.0
-------------
Released on 2024-04-29.
New features:
* Add standard compliant default identifier
Contributors:
* Guillaume Ayoub
* Wolfgang Walther
Backers and sponsors:
* Spacinov
* Kobalt
* Grip Angebotssoftware
* Manuel Barkhau
* SimonSoft
* Menutech
* KontextWork
* Simon Sapin
* René Fritz
* TrainingSparkle
* Healthchecks.io
* Docraptor
* Yanal-Yvez Fargialla
* Douwe van Loenen
* Morntag
* Xavid
Version 0.9.0 Version 0.9.0
------------- -------------

View file

@ -50,16 +50,13 @@ You can launch tests using the following command::
venv/bin/pytest venv/bin/pytest
WeasyPrint also uses isort_ to check imports and flake8_ to check the coding pydyf also uses ruff_ to check the coding style::
style::
venv/bin/python -m isort . --check --diff venv/bin/python -m ruff check
venv/bin/python -m flake8
.. _pytest: https://docs.pytest.org/ .. _pytest: https://docs.pytest.org/
.. _Ghostscript: https://www.ghostscript.com/ .. _Ghostscript: https://www.ghostscript.com/
.. _isort: https://pycqa.github.io/isort/ .. _ruff: https://docs.astral.sh/ruff/
.. _flake8: https://flake8.pycqa.org/
Documentation Documentation

View file

@ -9,8 +9,9 @@ import zlib
from codecs import BOM_UTF16_BE from codecs import BOM_UTF16_BE
from hashlib import md5 from hashlib import md5
from math import ceil, log from math import ceil, log
from warnings import warn
VERSION = __version__ = '0.9.0' VERSION = __version__ = '0.10.0'
def _to_bytes(item): def _to_bytes(item):
@ -203,6 +204,37 @@ class Stream(Object):
""" """
self.stream.append(b'b*' if even_odd else b'b') self.stream.append(b'b*' if even_odd else b'b')
def inline_image(self, width, height, color_space, bpc, raw_data):
"""Add an inline image.
:param width: The width of the image.
:type width: :obj:`int`
:param height: The height of the image.
:type height: :obj:`int`
:param colorspace: The color space of the image, f.e. RGB, Gray.
:type colorspace: :obj:`str`
:param bpc: The bits per component. 1 for BW, 8 for grayscale.
:type bpc: :obj:`int`
:param raw_data: The raw pixel data.
"""
data = zlib.compress(raw_data) if self.compress else raw_data
a85_data = base64.a85encode(data) + b'~>'
self.stream.append(b' '.join((
b'BI',
b'/W', _to_bytes(width),
b'/H', _to_bytes(height),
b'/BPC', _to_bytes(bpc),
b'/CS',
b'/Device' + _to_bytes(color_space),
b'/F',
b'[/A85 /Fl]' if self.compress else b'/A85',
b'/L', _to_bytes(len(a85_data)),
b'ID',
a85_data,
b'EI',
)))
def line_to(self, x, y): def line_to(self, x, y):
"""Add line from current point to point ``(x, y)``.""" """Add line from current point to point ``(x, y)``."""
self.stream.append(b' '.join((_to_bytes(x), _to_bytes(y), b'l'))) self.stream.append(b' '.join((_to_bytes(x), _to_bytes(y), b'l')))
@ -365,44 +397,6 @@ class Stream(Object):
_to_bytes(a), _to_bytes(b), _to_bytes(c), _to_bytes(a), _to_bytes(b), _to_bytes(c),
_to_bytes(d), _to_bytes(e), _to_bytes(f), b'cm'))) _to_bytes(d), _to_bytes(e), _to_bytes(f), b'cm')))
def inline_image(self, width, height, color_space, bpc, raw_data):
"""Add an inline image.
:param width: The width of the image.
:type width: :obj:`int`
:param height: The height of the image.
:type height: :obj:`int`
:param colorspace: The color space of the image, f.e. RGB, Gray.
:type colorspace: :obj:`str`
:param bpc: The bits per component. 1 for BW, 8 for grayscale.
:type bpc: :obj:`int`
:param raw_data: The raw pixel data.
"""
if self.compress:
data = zlib.compress(raw_data)
else:
data = raw_data
enc_data = base64.a85encode(data)
self.stream.append(
b' '.join(
(
b'BI',
b'/W', _to_bytes(width),
b'/H', _to_bytes(height),
b'/BPC', _to_bytes(bpc),
b'/CS',
b'/Device' + color_space.encode(),
b'/F',
b'[/A85 /Fl]' if self.compress else b'/A85',
b'/L', _to_bytes(len(enc_data) + 2),
b'ID',
enc_data + b'~>',
b'EI',
)
)
)
@property @property
def data(self): def data(self):
stream = b'\n'.join(_to_bytes(item) for item in self.stream) stream = b'\n'.join(_to_bytes(item) for item in self.stream)
@ -450,17 +444,16 @@ class Array(Object, list):
class PDF: class PDF:
"""PDF document.""" """PDF document."""
def __init__(self, version=b'1.7', identifier=None): def __init__(self, version=None, identifier=None):
"""Create a PDF document. """Create a PDF document."""
if version or identifier: # to be removed in next version
:param bytes version: PDF version. warn(
:param bytes identifier: PDF file identifier. "PDF objects dont take version or identifier during initialization "
"anymore. These properties are now stored but ignored, and will be "
""" "removed and rejected in next version of pydyf. Please pass these "
#: PDF version, as :obj:`bytes`. "properties to the PDF.write() method instead.", DeprecationWarning)
self.version = _to_bytes(version) self.version = _to_bytes(version) if version else b'1.7' # to be removed
#: PDF file identifier. self.identifier = identifier # to be removed
self.identifier = identifier
#: Python :obj:`list` containing the PDFs objects. #: Python :obj:`list` containing the PDFs objects.
self.objects = [] self.objects = []
@ -528,18 +521,23 @@ class PDF:
self.current_position += len(content) + 1 self.current_position += len(content) + 1
output.write(content + b'\n') output.write(content + b'\n')
def write(self, output, version=None, identifier=None, compress=False): def write(self, output, version=b'1.7', identifier=False, compress=False):
"""Write PDF to output. """Write PDF to output.
:param output: Output stream. :param output: Output stream.
:type output: binary :term:`file object` :type output: binary :term:`file object`
:param bytes version: PDF version. :param bytes version: PDF version.
:param bytes identifier: PDF file identifier. :param identifier: PDF file identifier. Default is :obj:`False`
to include no identifier, can be set to :obj:`True` to generate an
automatic identifier.
:type identifier: :obj:`bytes` or :obj:`bool`
:param bool compress: whether the PDF uses a compressed object stream. :param bool compress: whether the PDF uses a compressed object stream.
""" """
version = self.version if version is None else _to_bytes(version) # Convert version and identifier to bytes
identifier = self.identifier if identifier is None else identifier version = _to_bytes(version or b'1.7') # Force 1.7 when None
if identifier not in (False, True, None):
identifier = _to_bytes(identifier)
# Write header # Write header
self.write_line(b'%PDF-' + version, output) self.write_line(b'%PDF-' + version, output)
@ -607,10 +605,12 @@ class PDF:
'Root': self.catalog.reference, 'Root': self.catalog.reference,
'Info': self.info.reference, 'Info': self.info.reference,
} }
if identifier is not None: if identifier:
data = b''.join( data = b''.join(
obj.data for obj in self.objects if obj.free != 'f') obj.data for obj in self.objects if obj.free != 'f')
data_hash = md5(data).hexdigest().encode() data_hash = md5(data).hexdigest().encode()
if identifier is True:
identifier = data_hash
extra['ID'] = Array(( extra['ID'] = Array((
String(identifier).data, String(data_hash).data)) String(identifier).data, String(data_hash).data))
dict_stream = Stream([xref_stream], extra, compress) dict_stream = Stream([xref_stream], extra, compress)
@ -640,10 +640,12 @@ class PDF:
self.write_line(f'/Size {len(self.objects)}'.encode(), output) self.write_line(f'/Size {len(self.objects)}'.encode(), output)
self.write_line(b'/Root ' + self.catalog.reference, output) self.write_line(b'/Root ' + self.catalog.reference, output)
self.write_line(b'/Info ' + self.info.reference, output) self.write_line(b'/Info ' + self.info.reference, output)
if identifier is not None: if identifier:
data = b''.join( data = b''.join(
obj.data for obj in self.objects if obj.free != 'f') obj.data for obj in self.objects if obj.free != 'f')
data_hash = md5(data).hexdigest().encode() data_hash = md5(data).hexdigest().encode()
if identifier is True:
identifier = data_hash
self.write_line( self.write_line(
b'/ID [' + String(identifier).data + b' ' + b'/ID [' + String(identifier).data + b' ' +
String(data_hash).data + b']', output) String(data_hash).data + b']', output)

View file

@ -8,7 +8,7 @@ description = 'A low-level PDF generator.'
keywords = ['pdf', 'generator'] keywords = ['pdf', 'generator']
authors = [{name = 'CourtBouillon', email = 'contact@courtbouillon.org'}] authors = [{name = 'CourtBouillon', email = 'contact@courtbouillon.org'}]
maintainers = [{name = 'CourtBouillon', email = 'contact@courtbouillon.org'}] maintainers = [{name = 'CourtBouillon', email = 'contact@courtbouillon.org'}]
requires-python = '>=3.7' requires-python = '>=3.8'
readme = {file = 'README.rst', content-type = 'text/x-rst'} readme = {file = 'README.rst', content-type = 'text/x-rst'}
license = {file = 'LICENSE'} license = {file = 'LICENSE'}
classifiers = [ classifiers = [
@ -39,7 +39,7 @@ Donation = 'https://opencollective.com/courtbouillon'
[project.optional-dependencies] [project.optional-dependencies]
doc = ['sphinx', 'sphinx_rtd_theme'] doc = ['sphinx', 'sphinx_rtd_theme']
test = ['pytest', 'isort', 'flake8', 'pillow'] test = ['pytest', 'ruff', 'pillow']
[tool.flit.sdist] [tool.flit.sdist]
exclude = ['.*'] exclude = ['.*']
@ -52,6 +52,6 @@ include = ['tests/*', 'pydyf/*']
exclude_lines = ['pragma: no cover', 'def __repr__', 'raise NotImplementedError'] exclude_lines = ['pragma: no cover', 'def __repr__', 'raise NotImplementedError']
omit = ['.*'] omit = ['.*']
[tool.isort] [tool.ruff.lint]
default_section = 'FIRSTPARTY' select = ['E', 'W', 'F', 'I', 'N', 'RUF']
multi_line_output = 4 ignore = ['RUF001', 'RUF002', 'RUF003']

View file

@ -1,4 +1,5 @@
import io import io
import re
import pydyf import pydyf
@ -704,11 +705,34 @@ def test_text():
''') ''')
def test_identifier(): def test_no_identifier():
document = pydyf.PDF()
pdf = io.BytesIO()
document.write(pdf, identifier=False)
assert re.search(
b'/ID \\[\\((?P<hash>[0-9a-f]{32})\\) \\((?P=hash)\\)\\]',
pdf.getvalue()
) is None
def test_default_identifier():
document = pydyf.PDF()
pdf = io.BytesIO()
document.write(pdf, identifier=True)
assert re.search(
b'/ID \\[\\((?P<hash>[0-9a-f]{32})\\) \\((?P=hash)\\)\\]',
pdf.getvalue()
) is not None
def test_custom_identifier():
document = pydyf.PDF() document = pydyf.PDF()
pdf = io.BytesIO() pdf = io.BytesIO()
document.write(pdf, identifier=b'abc') document.write(pdf, identifier=b'abc')
assert b'abc' in pdf.getvalue() assert re.search(
b'/ID \\[\\(abc\\) \\(([0-9a-f]{32})\\)\\]',
pdf.getvalue()
) is not None
def test_version(): def test_version():