551 lines
17 KiB
Python
Executable file
551 lines
17 KiB
Python
Executable file
"""
|
||
A low-level PDF generator.
|
||
|
||
"""
|
||
|
||
import re
|
||
import zlib
|
||
from codecs import BOM_UTF16_BE
|
||
from hashlib import md5
|
||
|
||
VERSION = __version__ = '0.5.0'
|
||
|
||
|
||
def _to_bytes(item):
|
||
"""Convert item to bytes."""
|
||
if isinstance(item, bytes):
|
||
return item
|
||
elif isinstance(item, Object):
|
||
return item.data
|
||
elif isinstance(item, float):
|
||
if item.is_integer():
|
||
return f'{int(item):d}'.encode('ascii')
|
||
else:
|
||
return f'{item:f}'.encode('ascii')
|
||
elif isinstance(item, int):
|
||
return f'{item:d}'.encode('ascii')
|
||
return str(item).encode('ascii')
|
||
|
||
|
||
class Object:
|
||
"""Base class for PDF objects."""
|
||
def __init__(self):
|
||
#: Number of the object.
|
||
self.number = None
|
||
#: Position in the PDF of the object.
|
||
self.offset = 0
|
||
#: Version number of the object, non-negative.
|
||
self.generation = 0
|
||
#: Indicate if an object is used (``'n'``), or has been deleted
|
||
#: and therefore is free (``'f'``).
|
||
self.free = 'n'
|
||
|
||
@property
|
||
def indirect(self):
|
||
"""Indirect representation of an object."""
|
||
return b'\n'.join((
|
||
str(self.number).encode() + b' ' +
|
||
str(self.generation).encode() + b' obj',
|
||
self.data,
|
||
b'endobj',
|
||
))
|
||
|
||
@property
|
||
def reference(self):
|
||
"""Object identifier."""
|
||
return (
|
||
str(self.number).encode() + b' ' +
|
||
str(self.generation).encode() + b' R')
|
||
|
||
@property
|
||
def data(self):
|
||
"""Data contained in the object. Shall be defined in each subclass."""
|
||
raise NotImplementedError()
|
||
|
||
|
||
class Dictionary(Object, dict):
|
||
"""PDF Dictionary object.
|
||
|
||
Inherits from :class:`Object` and Python :obj:`dict`.
|
||
|
||
"""
|
||
def __init__(self, values=None):
|
||
Object.__init__(self)
|
||
dict.__init__(self, values or {})
|
||
|
||
@property
|
||
def data(self):
|
||
result = [b'<<']
|
||
for key, value in self.items():
|
||
result.append(b'/' + _to_bytes(key) + b' ' + _to_bytes(value))
|
||
result.append(b'>>')
|
||
return b'\n'.join(result)
|
||
|
||
|
||
class Stream(Object):
|
||
"""PDF Stream object.
|
||
|
||
Inherits from :class:`Object`.
|
||
|
||
"""
|
||
def __init__(self, stream=None, extra=None, compress=False):
|
||
super().__init__()
|
||
#: Python array of data composing stream.
|
||
self.stream = stream or []
|
||
#: Metadata containing at least the length of the Stream.
|
||
self.extra = extra or {}
|
||
#: Compress the stream data if set to ``True``. Default is ``False``.
|
||
self.compress = compress
|
||
|
||
def begin_marked_content(self, tag, property_list=None):
|
||
"""Begin marked-content sequence."""
|
||
self.stream.append(f'/{tag}')
|
||
if property_list is None:
|
||
self.stream.append(b'BMC')
|
||
else:
|
||
self.stream.append(property_list)
|
||
self.stream.append(b'BDC')
|
||
|
||
def begin_text(self):
|
||
"""Begin a text object."""
|
||
self.stream.append(b'BT')
|
||
|
||
def clip(self, even_odd=False):
|
||
"""Modify current clipping path by intersecting it with current path.
|
||
|
||
Use the nonzero winding number rule to determine which regions lie
|
||
inside the clipping path by default.
|
||
|
||
Use the even-odd rule if ``even_odd`` set to ``True``.
|
||
|
||
"""
|
||
self.stream.append(b'W*' if even_odd else b'W')
|
||
|
||
def close(self):
|
||
"""Close current subpath.
|
||
|
||
Append a straight line segment from the current point to the starting
|
||
point of the subpath.
|
||
|
||
"""
|
||
self.stream.append(b'h')
|
||
|
||
def color_space(self, space, stroke=False):
|
||
"""Set the nonstroking color space.
|
||
|
||
If stroke is set to ``True``, set the stroking color space instead.
|
||
|
||
"""
|
||
self.stream.append(
|
||
b'/' + _to_bytes(space) + b' ' + (b'CS' if stroke else b'cs'))
|
||
|
||
def curve_to(self, x1, y1, x2, y2, x3, y3):
|
||
"""Add cubic Bézier curve to current path.
|
||
|
||
The curve shall extend from ``(x3, y3)`` using ``(x1, y1)`` and ``(x2,
|
||
y2)`` as the Bézier control points.
|
||
|
||
"""
|
||
self.stream.append(b' '.join((
|
||
_to_bytes(x1), _to_bytes(y1),
|
||
_to_bytes(x2), _to_bytes(y2),
|
||
_to_bytes(x3), _to_bytes(y3), b'c')))
|
||
|
||
def curve_start_to(self, x2, y2, x3, y3):
|
||
"""Add cubic Bézier curve to current path
|
||
|
||
The curve shall extend to ``(x3, y3)`` using the current point and
|
||
``(x2, y2)`` as the Bézier control points.
|
||
|
||
"""
|
||
self.stream.append(b' '.join((
|
||
_to_bytes(x2), _to_bytes(y2),
|
||
_to_bytes(x3), _to_bytes(y3), b'v')))
|
||
|
||
def curve_end_to(self, x1, y1, x3, y3):
|
||
"""Add cubic Bézier curve to current path
|
||
|
||
The curve shall extend to ``(x3, y3)`` using `(x1, y1)`` and ``(x3,
|
||
y3)`` as the Bézier control points.
|
||
|
||
"""
|
||
self.stream.append(b' '.join((
|
||
_to_bytes(x1), _to_bytes(y1),
|
||
_to_bytes(x3), _to_bytes(y3), b'y')))
|
||
|
||
def draw_x_object(self, reference):
|
||
"""Draw object given by reference."""
|
||
self.stream.append(b'/' + _to_bytes(reference) + b' Do')
|
||
|
||
def end(self):
|
||
"""End path without filling or stroking."""
|
||
self.stream.append(b'n')
|
||
|
||
def end_marked_content(self):
|
||
"""End marked-content sequence."""
|
||
self.stream.append(b'EMC')
|
||
|
||
def end_text(self):
|
||
"""End text object."""
|
||
self.stream.append(b'ET')
|
||
|
||
def fill(self, even_odd=False):
|
||
"""Fill path using nonzero winding rule.
|
||
|
||
Use even-odd rule if ``even_odd`` is set to ``True``.
|
||
|
||
"""
|
||
self.stream.append(b'f*' if even_odd else b'f')
|
||
|
||
def fill_and_stroke(self, even_odd=False):
|
||
"""Fill and stroke path usign nonzero winding rule.
|
||
|
||
Use even-odd rule if ``even_odd`` is set to ``True``.
|
||
|
||
"""
|
||
self.stream.append(b'B*' if even_odd else b'B')
|
||
|
||
def fill_stroke_and_close(self, even_odd=False):
|
||
"""Fill, stroke and close path using nonzero winding rule.
|
||
|
||
Use even-odd rule if ``even_odd`` is set to ``True``.
|
||
|
||
"""
|
||
self.stream.append(b'b*' if even_odd else b'b')
|
||
|
||
def line_to(self, x, y):
|
||
"""Add line from current point to point ``(x, y)``."""
|
||
self.stream.append(b' '.join((_to_bytes(x), _to_bytes(y), b'l')))
|
||
|
||
def move_to(self, x, y):
|
||
"""Begin new subpath by moving current point to ``(x, y)``."""
|
||
self.stream.append(b' '.join((_to_bytes(x), _to_bytes(y), b'm')))
|
||
|
||
def shading(self, name):
|
||
"""Paint shape and color shading using shading dictionary ``name``."""
|
||
self.stream.append(b'/' + _to_bytes(name) + b' sh')
|
||
|
||
def pop_state(self):
|
||
"""Restore graphic state."""
|
||
self.stream.append(b'Q')
|
||
|
||
def push_state(self):
|
||
"""Save graphic state."""
|
||
self.stream.append(b'q')
|
||
|
||
def rectangle(self, x, y, width, height):
|
||
"""Add rectangle to current path as complete subpath.
|
||
|
||
``(x, y)`` is the lower-left corner and width and height the
|
||
dimensions.
|
||
|
||
"""
|
||
self.stream.append(b' '.join((
|
||
_to_bytes(x), _to_bytes(y),
|
||
_to_bytes(width), _to_bytes(height), b're')))
|
||
|
||
def set_color_rgb(self, r, g, b, stroke=False):
|
||
"""Set RGB color for nonstroking operations.
|
||
|
||
Set RGB color for stroking operations instead if ``stroke`` is set to
|
||
``True``.
|
||
|
||
"""
|
||
self.stream.append(b' '.join((
|
||
_to_bytes(r), _to_bytes(g), _to_bytes(b),
|
||
(b'RG' if stroke else b'rg'))))
|
||
|
||
def set_color_special(self, name, stroke=False):
|
||
"""Set color for nonstroking operations.
|
||
|
||
Set color for stroking operation if ``stroke`` is set to ``True``.
|
||
|
||
"""
|
||
self.stream.append(
|
||
b'/' + _to_bytes(name) + b' ' + (b'SCN' if stroke else b'scn'))
|
||
|
||
def set_dash(self, dash_array, dash_phase):
|
||
"""Set dash line pattern.
|
||
|
||
:param dash_array: Dash pattern.
|
||
:type dash_array: :term:`iterable`
|
||
:param dash_phase: Start of dash phase.
|
||
:type dash_phase: :obj:`int`
|
||
|
||
"""
|
||
self.stream.append(b' '.join((
|
||
Array(dash_array).data, _to_bytes(dash_phase), b'd')))
|
||
|
||
def set_font_size(self, font, size):
|
||
"""Set font name and size."""
|
||
self.stream.append(
|
||
b'/' + _to_bytes(font) + b' ' + _to_bytes(size) + b' Tf')
|
||
|
||
def set_text_rendering(self, mode):
|
||
"""Set text rendering mode."""
|
||
self.stream.append(_to_bytes(mode) + b' Tr')
|
||
|
||
def set_line_cap(self, line_cap):
|
||
"""Set line cap style."""
|
||
self.stream.append(_to_bytes(line_cap) + b' J')
|
||
|
||
def set_line_join(self, line_join):
|
||
"""Set line join style."""
|
||
self.stream.append(_to_bytes(line_join) + b' j')
|
||
|
||
def set_line_width(self, width):
|
||
"""Set line width."""
|
||
self.stream.append(_to_bytes(width) + b' w')
|
||
|
||
def set_miter_limit(self, miter_limit):
|
||
"""Set miter limit."""
|
||
self.stream.append(_to_bytes(miter_limit) + b' M')
|
||
|
||
def set_state(self, state_name):
|
||
"""Set specified parameters in graphic state.
|
||
|
||
:param state_name: Name of the graphic state.
|
||
|
||
"""
|
||
self.stream.append(b'/' + _to_bytes(state_name) + b' gs')
|
||
|
||
def show_text(self, text):
|
||
"""Show text."""
|
||
self.stream.append(b'[' + _to_bytes(text) + b'] TJ')
|
||
|
||
def stroke(self):
|
||
"""Stroke path."""
|
||
self.stream.append(b'S')
|
||
|
||
def stroke_and_close(self):
|
||
"""Stroke and close path."""
|
||
self.stream.append(b's')
|
||
|
||
def text_matrix(self, a, b, c, d, e, f):
|
||
"""Set text matrix and text line matrix.
|
||
|
||
:param a: Top left number in the matrix.
|
||
:type a: :obj:`int` or :obj:`float`
|
||
:param b: Top middle number in the matrix.
|
||
:type b: :obj:`int` or :obj:`float`
|
||
:param c: Middle left number in the matrix.
|
||
:type c: :obj:`int` or :obj:`float`
|
||
:param d: Middle middle number in the matrix.
|
||
:type d: :obj:`int` or :obj:`float`
|
||
:param e: Bottom left number in the matrix.
|
||
:type e: :obj:`int` or :obj:`float`
|
||
:param f: Bottom middle number in the matrix.
|
||
:type f: :obj:`int` or :obj:`float`
|
||
|
||
"""
|
||
self.stream.append(b' '.join((
|
||
_to_bytes(a), _to_bytes(b), _to_bytes(c),
|
||
_to_bytes(d), _to_bytes(e), _to_bytes(f), b'Tm')))
|
||
|
||
def transform(self, a, b, c, d, e, f):
|
||
"""Modify current transformation matrix.
|
||
|
||
:param a: Top left number in the matrix.
|
||
:type a: :obj:`int` or :obj:`float`
|
||
:param b: Top middle number in the matrix.
|
||
:type b: :obj:`int` or :obj:`float`
|
||
:param c: Middle left number in the matrix.
|
||
:type c: :obj:`int` or :obj:`float`
|
||
:param d: Middle middle number in the matrix.
|
||
:type d: :obj:`int` or :obj:`float`
|
||
:param e: Bottom left number in the matrix.
|
||
:type e: :obj:`int` or :obj:`float`
|
||
:param f: Bottom middle number in the matrix.
|
||
:type f: :obj:`int` or :obj:`float`
|
||
|
||
"""
|
||
self.stream.append(b' '.join((
|
||
_to_bytes(a), _to_bytes(b), _to_bytes(c),
|
||
_to_bytes(d), _to_bytes(e), _to_bytes(f), b'cm')))
|
||
|
||
@property
|
||
def data(self):
|
||
stream = b'\n'.join(_to_bytes(item) for item in self.stream)
|
||
extra = Dictionary(self.extra.copy())
|
||
if self.compress:
|
||
extra['Filter'] = '/FlateDecode'
|
||
compressobj = zlib.compressobj()
|
||
stream = compressobj.compress(stream)
|
||
stream += compressobj.flush()
|
||
extra['Length'] = len(stream)
|
||
return b'\n'.join((extra.data, b'stream', stream, b'endstream'))
|
||
|
||
|
||
class String(Object):
|
||
"""PDF String object.
|
||
|
||
Inherits from :class:`Object`.
|
||
|
||
"""
|
||
def __init__(self, string=''):
|
||
super().__init__()
|
||
#: Unicode string.
|
||
self.string = string
|
||
|
||
@property
|
||
def data(self):
|
||
try:
|
||
# "A literal string is written as an arbitrary number of characters
|
||
# enclosed in parentheses. Any characters may appear in a string
|
||
# except unbalanced parentheses and the backslash, which must be
|
||
# treated specially."
|
||
escaped = re.sub(rb'([\\\(\)])', rb'\\\1', _to_bytes(self.string))
|
||
return b'(' + escaped + b')'
|
||
except UnicodeEncodeError:
|
||
encoded = BOM_UTF16_BE + str(self.string).encode('utf-16-be')
|
||
return b'<' + encoded.hex().encode() + b'>'
|
||
|
||
|
||
class Array(Object, list):
|
||
"""PDF Array object.
|
||
|
||
Inherits from :class:`Object` and Python :obj:`list`.
|
||
|
||
"""
|
||
def __init__(self, array=None):
|
||
Object.__init__(self)
|
||
list.__init__(self, array or [])
|
||
|
||
@property
|
||
def data(self):
|
||
result = [b'[']
|
||
for child in self:
|
||
result.append(_to_bytes(child))
|
||
result.append(b']')
|
||
return b' '.join(result)
|
||
|
||
|
||
class PDF:
|
||
"""PDF document."""
|
||
def __init__(self, version=b'1.7', identifier=None):
|
||
"""Create a PDF document.
|
||
|
||
:param bytes version: PDF version.
|
||
:param bytes identifier: PDF file identifier.
|
||
|
||
"""
|
||
#: PDF version, as :obj:`bytes`.
|
||
self.version = _to_bytes(version)
|
||
#: PDF file identifier.
|
||
self.identifier = identifier
|
||
|
||
#: Python :obj:`list` containing the PDF’s objects.
|
||
self.objects = []
|
||
|
||
zero_object = Object()
|
||
zero_object.generation = 65535
|
||
zero_object.free = 'f'
|
||
self.add_object(zero_object)
|
||
|
||
#: PDF :class:`Dictionary` containing the PDF’s pages.
|
||
self.pages = Dictionary({
|
||
'Type': '/Pages',
|
||
'Kids': Array([]),
|
||
'Count': 0,
|
||
})
|
||
self.add_object(self.pages)
|
||
|
||
#: PDF :class:`Dictionary` containing the PDF’s metadata.
|
||
self.info = Dictionary({})
|
||
self.add_object(self.info)
|
||
|
||
#: PDF :class:`Dictionary` containing references to the other objects.
|
||
self.catalog = Dictionary({
|
||
'Type': '/Catalog',
|
||
'Pages': self.pages.reference,
|
||
})
|
||
self.add_object(self.catalog)
|
||
|
||
#: Current position in the PDF.
|
||
self.current_position = 0
|
||
#: Position of the cross reference table.
|
||
self.xref_position = None
|
||
|
||
def add_page(self, page):
|
||
"""Add page to the PDF.
|
||
|
||
:param page: New page.
|
||
:type page: :class:`Dictionary`
|
||
|
||
"""
|
||
self.pages['Count'] += 1
|
||
self.add_object(page)
|
||
self.pages['Kids'].extend([page.number, 0, 'R'])
|
||
|
||
def add_object(self, object_):
|
||
"""Add object to the PDF."""
|
||
object_.number = len(self.objects)
|
||
self.objects.append(object_)
|
||
|
||
@property
|
||
def page_references(self):
|
||
return tuple(
|
||
f'{object_number} 0 R'.encode('ascii')
|
||
for object_number in self.pages['Kids'][::3])
|
||
|
||
def write_line(self, content, output):
|
||
"""Write line to output.
|
||
|
||
:param content: Content to write.
|
||
:type content: :obj:`bytes`
|
||
:param output: Output stream.
|
||
:type output: binary :term:`file object`
|
||
|
||
"""
|
||
self.current_position += len(content) + 1
|
||
output.write(content + b'\n')
|
||
|
||
def write(self, output, version=None, identifier=None):
|
||
"""Write PDF to output.
|
||
|
||
:param output: Output stream.
|
||
:type output: binary :term:`file object`
|
||
:param bytes version: PDF version.
|
||
:param bytes identifier: PDF file identifier.
|
||
|
||
"""
|
||
version = self.version if version is None else _to_bytes(version)
|
||
identifier = self.identifier if identifier is None else identifier
|
||
|
||
# Write header
|
||
self.write_line(b'%PDF-' + version, output)
|
||
self.write_line(b'%\xf0\x9f\x96\xa4', output)
|
||
|
||
# Write all non-free PDF objects
|
||
for object_ in self.objects:
|
||
if object_.free == 'f':
|
||
continue
|
||
object_.offset = self.current_position
|
||
self.write_line(object_.indirect, output)
|
||
|
||
# Write cross reference table
|
||
self.xref_position = self.current_position
|
||
self.write_line(b'xref', output)
|
||
self.write_line(f'0 {len(self.objects)}'.encode(), output)
|
||
for object_ in self.objects:
|
||
self.write_line(
|
||
(f'{object_.offset:010} {object_.generation:05} '
|
||
f'{object_.free} ').encode(), output)
|
||
|
||
# Write trailer
|
||
self.write_line(b'trailer', output)
|
||
self.write_line(b'<<', output)
|
||
self.write_line(f'/Size {len(self.objects)}'.encode(), output)
|
||
self.write_line(b'/Root ' + self.catalog.reference, output)
|
||
self.write_line(b'/Info ' + self.info.reference, output)
|
||
if identifier is not None:
|
||
data = b''.join(
|
||
obj.data for obj in self.objects if obj.free != 'f')
|
||
data_hash = md5(data).hexdigest().encode()
|
||
self.write_line(
|
||
b'/ID [' + String(identifier).data + b' ' +
|
||
String(data_hash).data + b']', output)
|
||
self.write_line(b'>>', output)
|
||
self.write_line(b'startxref', output)
|
||
self.write_line(f'{self.xref_position}'.encode(), output)
|
||
self.write_line(b'%%EOF', output)
|