1
0
Fork 0

Adding upstream version 2.1.0.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-12 21:44:15 +01:00
parent c3f707bfbc
commit 085459798b
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
14 changed files with 132 additions and 104 deletions

View file

@ -2,22 +2,24 @@ repos:
- repo: https://github.com/pre-commit/pre-commit-hooks - repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.4.0 rev: v3.4.0
hooks: hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-docstring-first - id: check-docstring-first
- id: check-merge-conflict
- id: check-yaml - id: check-yaml
- id: debug-statements - id: debug-statements
- id: double-quote-string-fixer - id: double-quote-string-fixer
- id: end-of-file-fixer
- id: name-tests-test - id: name-tests-test
- id: check-added-large-files - id: requirements-txt-fixer
- id: check-byte-order-marker - id: trailing-whitespace
- id: fix-encoding-pragma - repo: https://github.com/asottile/setup-cfg-fmt
rev: v1.16.0
hooks:
- id: setup-cfg-fmt
- repo: https://gitlab.com/pycqa/flake8 - repo: https://gitlab.com/pycqa/flake8
rev: 3.8.4 rev: 3.8.4
hooks: hooks:
- id: flake8 - id: flake8
exclude: ^identify/vendor/licenses\.py$ exclude: ^identify/vendor/licenses\.py$
additional_dependencies: [flake8-typing-imports==1.10.1]
- repo: https://github.com/pre-commit/mirrors-autopep8 - repo: https://github.com/pre-commit/mirrors-autopep8
rev: v1.5.4 rev: v1.5.4
hooks: hooks:
@ -26,11 +28,18 @@ repos:
rev: v2.4.0 rev: v2.4.0
hooks: hooks:
- id: reorder-python-imports - id: reorder-python-imports
args: [ args: [--py3-plus]
'--add-import', 'from __future__ import absolute_import',
'--add-import', 'from __future__ import unicode_literals',
]
- repo: https://github.com/asottile/add-trailing-comma - repo: https://github.com/asottile/add-trailing-comma
rev: v2.1.0 rev: v2.1.0
hooks: hooks:
- id: add-trailing-comma - id: add-trailing-comma
args: [--py36-plus]
- repo: https://github.com/asottile/pyupgrade
rev: v2.10.0
hooks:
- id: pyupgrade
args: [--py36-plus]
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v0.812
hooks:
- id: mypy

View file

@ -37,7 +37,7 @@ If you have an actual file on disk, you can get the most information possible
When using a file on disk, the checks performed are: When using a file on disk, the checks performed are:
* File type (file, symlink, directory) * File type (file, symlink, directory, socket)
* Mode (is it executable?) * Mode (is it executable?)
* File name (mostly based on extension) * File name (mostly based on extension)
* If executable, the shebang is read and the interpreter interpreted * If executable, the shebang is read and the interpreter interpreted
@ -76,11 +76,11 @@ optional arguments:
--filename-only --filename-only
``` ```
```bash ```console
$ identify-cli setup.py; echo $? $ identify-cli setup.py; echo $?
["file", "non-executable", "python", "text"] ["file", "non-executable", "python", "text"]
0 0
identify setup.py --filename-only; echo $? $ identify setup.py --filename-only; echo $?
["python", "text"] ["python", "text"]
0 0
$ identify-cli wat.wat; echo $? $ identify-cli wat.wat; echo $?

View file

@ -1,19 +1,15 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Usage: """Usage:
./bin/vendor-licenses > identify/vendor/licenses.py ./bin/vendor-licenses > identify/vendor/licenses.py
""" """
from __future__ import absolute_import
from __future__ import unicode_literals
import argparse import argparse
import os.path import os.path
import subprocess import subprocess
import tempfile import tempfile
def main(): def main() -> int:
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--revision', default='HEAD') parser.add_argument('--revision', default='HEAD')
args = parser.parse_args() args = parser.parse_args()
@ -45,18 +41,16 @@ def main():
licenses.append((spdx, license_text)) licenses.append((spdx, license_text))
print('# -*- coding: utf-8 -*-')
print('from __future__ import absolute_import')
print('from __future__ import unicode_literals')
print('LICENSES = (') print('LICENSES = (')
for spdx, text in sorted(licenses): for spdx, text in sorted(licenses):
print(' (') print(' (')
print(' {!r},'.format(spdx)) print(f' {spdx!r},')
print(" '''\\") print(" '''\\")
print(text.replace('\t', ' ').replace(' \n', '').strip()) print(text.replace('\t', ' ').replace(' \n', '').strip())
print("''',") print("''',")
print(' ),') print(' ),')
print(')') print(')')
return 0
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -1,14 +1,12 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import unicode_literals
import argparse import argparse
import json import json
from typing import Optional
from typing import Sequence
from identify import identify from identify import identify
def main(argv=None): def main(argv: Optional[Sequence[str]] = None) -> int:
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--filename-only', action='store_true') parser.add_argument('--filename-only', action='store_true')
parser.add_argument('path') parser.add_argument('path')

View file

@ -1,13 +1,9 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import unicode_literals
EXTENSIONS = { EXTENSIONS = {
'adoc': {'text', 'asciidoc'}, 'adoc': {'text', 'asciidoc'},
'asciidoc': {'text', 'asciidoc'}, 'asciidoc': {'text', 'asciidoc'},
'apinotes': {'text', 'apinotes'}, 'apinotes': {'text', 'apinotes'},
'asar': {'binary', 'asar'}, 'asar': {'binary', 'asar'},
'avif': {'binary', 'image', 'avif'},
'bash': {'text', 'shell', 'bash'}, 'bash': {'text', 'shell', 'bash'},
'bat': {'text', 'batch'}, 'bat': {'text', 'batch'},
'bib': {'text', 'bib'}, 'bib': {'text', 'bib'},

View file

@ -1,14 +1,14 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import io
import os.path import os.path
import re import re
import shlex import shlex
import stat
import string import string
import sys import sys
from typing import IO
from typing import List
from typing import Optional
from typing import Set
from typing import Tuple
from identify import extensions from identify import extensions
from identify import interpreters from identify import interpreters
@ -19,27 +19,37 @@ printable = frozenset(string.printable)
DIRECTORY = 'directory' DIRECTORY = 'directory'
SYMLINK = 'symlink' SYMLINK = 'symlink'
SOCKET = 'socket'
FILE = 'file' FILE = 'file'
EXECUTABLE = 'executable' EXECUTABLE = 'executable'
NON_EXECUTABLE = 'non-executable' NON_EXECUTABLE = 'non-executable'
TEXT = 'text' TEXT = 'text'
BINARY = 'binary' BINARY = 'binary'
ALL_TAGS = {DIRECTORY, SYMLINK, FILE, EXECUTABLE, NON_EXECUTABLE, TEXT, BINARY} TYPE_TAGS = frozenset((DIRECTORY, FILE, SYMLINK, SOCKET))
ALL_TAGS.update(*extensions.EXTENSIONS.values()) MODE_TAGS = frozenset((EXECUTABLE, NON_EXECUTABLE))
ALL_TAGS.update(*extensions.EXTENSIONS_NEED_BINARY_CHECK.values()) ENCODING_TAGS = frozenset((BINARY, TEXT))
ALL_TAGS.update(*extensions.NAMES.values()) _ALL_TAGS = {*TYPE_TAGS, *MODE_TAGS, *ENCODING_TAGS}
ALL_TAGS.update(*interpreters.INTERPRETERS.values()) _ALL_TAGS.update(*extensions.EXTENSIONS.values())
ALL_TAGS = frozenset(ALL_TAGS) _ALL_TAGS.update(*extensions.EXTENSIONS_NEED_BINARY_CHECK.values())
_ALL_TAGS.update(*extensions.NAMES.values())
_ALL_TAGS.update(*interpreters.INTERPRETERS.values())
ALL_TAGS = frozenset(_ALL_TAGS)
def tags_from_path(path): def tags_from_path(path: str) -> Set[str]:
if not os.path.lexists(path): try:
raise ValueError('{} does not exist.'.format(path)) sr = os.lstat(path)
if os.path.isdir(path): except (OSError, ValueError): # same error-handling as `os.lexists()`
raise ValueError(f'{path} does not exist.')
mode = sr.st_mode
if stat.S_ISDIR(mode):
return {DIRECTORY} return {DIRECTORY}
if os.path.islink(path): if stat.S_ISLNK(mode):
return {SYMLINK} return {SYMLINK}
if stat.S_ISSOCK(mode):
return {SOCKET}
tags = {FILE} tags = {FILE}
@ -62,19 +72,19 @@ def tags_from_path(path):
# some extensions can be both binary and text # some extensions can be both binary and text
# see EXTENSIONS_NEED_BINARY_CHECK # see EXTENSIONS_NEED_BINARY_CHECK
if not {TEXT, BINARY} & tags: if not ENCODING_TAGS & tags:
if file_is_text(path): if file_is_text(path):
tags.add(TEXT) tags.add(TEXT)
else: else:
tags.add(BINARY) tags.add(BINARY)
assert {TEXT, BINARY} & tags, tags assert ENCODING_TAGS & tags, tags
assert {EXECUTABLE, NON_EXECUTABLE} & tags, tags assert MODE_TAGS & tags, tags
return tags return tags
def tags_from_filename(filename): def tags_from_filename(path: str) -> Set[str]:
_, filename = os.path.split(filename) _, filename = os.path.split(path)
_, ext = os.path.splitext(filename) _, ext = os.path.splitext(filename)
ret = set() ret = set()
@ -95,7 +105,7 @@ def tags_from_filename(filename):
return ret return ret
def tags_from_interpreter(interpreter): def tags_from_interpreter(interpreter: str) -> Set[str]:
_, _, interpreter = interpreter.rpartition('/') _, _, interpreter = interpreter.rpartition('/')
# Try "python3.5.2" => "python3.5" => "python3" until one matches. # Try "python3.5.2" => "python3.5" => "python3" until one matches.
@ -108,7 +118,7 @@ def tags_from_interpreter(interpreter):
return set() return set()
def is_text(bytesio): def is_text(bytesio: IO[bytes]) -> bool:
"""Return whether the first KB of contents seems to be binary. """Return whether the first KB of contents seems to be binary.
This is roughly based on libmagic's binary/text detection: This is roughly based on libmagic's binary/text detection:
@ -122,14 +132,14 @@ def is_text(bytesio):
return not bool(bytesio.read(1024).translate(None, text_chars)) return not bool(bytesio.read(1024).translate(None, text_chars))
def file_is_text(path): def file_is_text(path: str) -> bool:
if not os.path.lexists(path): if not os.path.lexists(path):
raise ValueError('{} does not exist.'.format(path)) raise ValueError(f'{path} does not exist.')
with open(path, 'rb') as f: with open(path, 'rb') as f:
return is_text(f) return is_text(f)
def _shebang_split(line): def _shebang_split(line: str) -> List[str]:
try: try:
# shebangs aren't supposed to be quoted, though some tools such as # shebangs aren't supposed to be quoted, though some tools such as
# setuptools will write them with quotes so we'll best-guess parse # setuptools will write them with quotes so we'll best-guess parse
@ -141,11 +151,14 @@ def _shebang_split(line):
return line.split() return line.split()
def _parse_nix_shebang(bytesio, cmd): def _parse_nix_shebang(
bytesio: IO[bytes],
cmd: Tuple[str, ...],
) -> Tuple[str, ...]:
while bytesio.read(2) == b'#!': while bytesio.read(2) == b'#!':
next_line = bytesio.readline() next_line_b = bytesio.readline()
try: try:
next_line = next_line.decode('UTF-8') next_line = next_line_b.decode('UTF-8')
except UnicodeDecodeError: except UnicodeDecodeError:
return cmd return cmd
@ -162,13 +175,13 @@ def _parse_nix_shebang(bytesio, cmd):
return cmd return cmd
def parse_shebang(bytesio): def parse_shebang(bytesio: IO[bytes]) -> Tuple[str, ...]:
"""Parse the shebang from a file opened for reading binary.""" """Parse the shebang from a file opened for reading binary."""
if bytesio.read(2) != b'#!': if bytesio.read(2) != b'#!':
return () return ()
first_line = bytesio.readline() first_line_b = bytesio.readline()
try: try:
first_line = first_line.decode('UTF-8') first_line = first_line_b.decode('UTF-8')
except UnicodeDecodeError: except UnicodeDecodeError:
return () return ()
@ -185,10 +198,10 @@ def parse_shebang(bytesio):
return cmd return cmd
def parse_shebang_from_file(path): def parse_shebang_from_file(path: str) -> Tuple[str, ...]:
"""Parse the shebang given a file path.""" """Parse the shebang given a file path."""
if not os.path.lexists(path): if not os.path.lexists(path):
raise ValueError('{} does not exist.'.format(path)) raise ValueError(f'{path} does not exist.')
if not os.access(path, os.X_OK): if not os.access(path, os.X_OK):
return () return ()
@ -200,13 +213,13 @@ COPYRIGHT_RE = re.compile(r'^\s*(Copyright|\(C\)) .*$', re.I | re.MULTILINE)
WS_RE = re.compile(r'\s+') WS_RE = re.compile(r'\s+')
def _norm_license(s): def _norm_license(s: str) -> str:
s = COPYRIGHT_RE.sub('', s) s = COPYRIGHT_RE.sub('', s)
s = WS_RE.sub(' ', s) s = WS_RE.sub(' ', s)
return s.strip() return s.strip()
def license_id(filename): def license_id(filename: str) -> Optional[str]:
"""Return the spdx id for the license contained in `filename`. If no """Return the spdx id for the license contained in `filename`. If no
license is detected, returns `None`. license is detected, returns `None`.
@ -222,7 +235,7 @@ def license_id(filename):
""" """
import editdistance # `pip install identify[license]` import editdistance # `pip install identify[license]`
with io.open(filename, encoding='UTF-8') as f: with open(filename, encoding='UTF-8') as f:
contents = f.read() contents = f.read()
norm = _norm_license(contents) norm = _norm_license(contents)

View file

@ -1,7 +1,3 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import unicode_literals
INTERPRETERS = { INTERPRETERS = {
'ash': {'shell', 'ash'}, 'ash': {'shell', 'ash'},
'awk': {'awk'}, 'awk': {'awk'},

View file

@ -1,6 +1,3 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import unicode_literals
LICENSES = ( LICENSES = (
( (
'0BSD', '0BSD',

View file

@ -1,6 +1,6 @@
[metadata] [metadata]
name = identify name = identify
version = 1.5.14 version = 2.1.0
description = File identification library for Python description = File identification library for Python
long_description = file: README.md long_description = file: README.md
long_description_content_type = text/markdown long_description_content_type = text/markdown
@ -11,26 +11,26 @@ license = MIT
license_file = LICENSE license_file = LICENSE
classifiers = classifiers =
License :: OSI Approved :: MIT License License :: OSI Approved :: MIT License
Programming Language :: Python :: 2
Programming Language :: Python :: 2.7
Programming Language :: Python :: 3 Programming Language :: Python :: 3
Programming Language :: Python :: 3.4 Programming Language :: Python :: 3 :: Only
Programming Language :: Python :: 3.5
Programming Language :: Python :: 3.6 Programming Language :: Python :: 3.6
Programming Language :: Python :: 3.7 Programming Language :: Python :: 3.7
Programming Language :: Python :: 3.8
Programming Language :: Python :: 3.9
Programming Language :: Python :: Implementation :: CPython Programming Language :: Python :: Implementation :: CPython
Programming Language :: Python :: Implementation :: PyPy Programming Language :: Python :: Implementation :: PyPy
[options] [options]
packages = find: packages = find:
python_requires = >=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.* python_requires = >=3.6.1
[options.entry_points] [options.entry_points]
console_scripts = console_scripts =
identify-cli=identify.cli:main identify-cli=identify.cli:main
[options.extras_require] [options.extras_require]
license = editdistance license =
editdistance
[options.packages.find] [options.packages.find]
exclude = exclude =
@ -42,3 +42,16 @@ universal = True
[coverage:run] [coverage:run]
plugins = covdefaults plugins = covdefaults
[mypy]
check_untyped_defs = true
disallow_any_generics = true
disallow_incomplete_defs = true
disallow_untyped_defs = true
no_implicit_optional = true
[mypy-testing.*]
disallow_untyped_defs = false
[mypy-tests.*]
disallow_untyped_defs = false

View file

@ -1,6 +1,2 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import unicode_literals
from setuptools import setup from setuptools import setup
setup() setup()

View file

@ -1,7 +1,3 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import unicode_literals
from identify import cli from identify import cli

View file

@ -1,7 +1,3 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import unicode_literals
import pytest import pytest
from identify import extensions from identify import extensions

View file

@ -1,10 +1,8 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import unicode_literals
import io import io
import os import os
import socket
import stat import stat
from tempfile import TemporaryDirectory
import pytest import pytest
@ -14,6 +12,21 @@ from identify import identify
def test_all_tags_includes_basic_ones(): def test_all_tags_includes_basic_ones():
assert 'file' in identify.ALL_TAGS assert 'file' in identify.ALL_TAGS
assert 'directory' in identify.ALL_TAGS assert 'directory' in identify.ALL_TAGS
assert 'executable' in identify.ALL_TAGS
assert 'text' in identify.ALL_TAGS
assert 'socket' in identify.ALL_TAGS
@pytest.mark.parametrize(
'tag_group',
(
identify.TYPE_TAGS,
identify.MODE_TAGS,
identify.ENCODING_TAGS,
),
)
def test_all_tags_contains_all_groups(tag_group):
assert tag_group < identify.ALL_TAGS
def test_all_tags_contains_each_type(): def test_all_tags_contains_each_type():
@ -41,6 +54,17 @@ def test_tags_from_path_symlink(tmpdir):
assert identify.tags_from_path(x.strpath) == {'symlink'} assert identify.tags_from_path(x.strpath) == {'symlink'}
def test_tags_from_path_socket():
tmproot = '/tmp' # short path avoids `OSError: AF_UNIX path too long`
with TemporaryDirectory(dir=tmproot) as tmpdir:
socket_path = os.path.join(tmpdir, 'socket')
with socket.socket(socket.AF_UNIX) as sock:
sock.bind(socket_path)
tags = identify.tags_from_path(socket_path)
assert tags == {'socket'}
def test_tags_from_path_broken_symlink(tmpdir): def test_tags_from_path_broken_symlink(tmpdir):
x = tmpdir.join('foo') x = tmpdir.join('foo')
x.mksymlinkto(tmpdir.join('lol')) x.mksymlinkto(tmpdir.join('lol'))
@ -177,9 +201,9 @@ def test_tags_from_interpreter(interpreter, expected):
( (
(b'hello world', True), (b'hello world', True),
(b'', True), (b'', True),
('éóñəå ⊂(◉‿◉)つ(ノ≥∇≤)'.encode('utf8'), True), ('éóñəå ⊂(◉‿◉)つ(ノ≥∇≤)'.encode(), True),
(r'¯\_(ツ)_/¯'.encode('utf8'), True), (r'¯\_(ツ)_/¯'.encode(), True),
('♪┏(・o・)┛♪┗ ( ・o・) ┓♪┏ ( ) ┛♪┗ (・o・ ) ┓♪'.encode('utf8'), True), ('♪┏(・o・)┛♪┗ ( ・o・) ┓♪┏ ( ) ┛♪┗ (・o・ ) ┓♪'.encode(), True),
('éóñå'.encode('latin1'), True), ('éóñå'.encode('latin1'), True),
(b'hello world\x00', False), (b'hello world\x00', False),

View file

@ -1,5 +1,5 @@
[tox] [tox]
envlist = py27,py35,py36,pypy,pre-commit envlist = py36,pypy3,pre-commit
[testenv] [testenv]
deps = -rrequirements-dev.txt deps = -rrequirements-dev.txt