Adding upstream version 2.1.0.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
c3f707bfbc
commit
085459798b
14 changed files with 132 additions and 104 deletions
|
@ -2,22 +2,24 @@ repos:
|
|||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v3.4.0
|
||||
hooks:
|
||||
- id: trailing-whitespace
|
||||
- id: end-of-file-fixer
|
||||
- id: check-docstring-first
|
||||
- id: check-merge-conflict
|
||||
- id: check-yaml
|
||||
- id: debug-statements
|
||||
- id: double-quote-string-fixer
|
||||
- id: end-of-file-fixer
|
||||
- id: name-tests-test
|
||||
- id: check-added-large-files
|
||||
- id: check-byte-order-marker
|
||||
- id: fix-encoding-pragma
|
||||
- id: requirements-txt-fixer
|
||||
- id: trailing-whitespace
|
||||
- repo: https://github.com/asottile/setup-cfg-fmt
|
||||
rev: v1.16.0
|
||||
hooks:
|
||||
- id: setup-cfg-fmt
|
||||
- repo: https://gitlab.com/pycqa/flake8
|
||||
rev: 3.8.4
|
||||
hooks:
|
||||
- id: flake8
|
||||
exclude: ^identify/vendor/licenses\.py$
|
||||
additional_dependencies: [flake8-typing-imports==1.10.1]
|
||||
- repo: https://github.com/pre-commit/mirrors-autopep8
|
||||
rev: v1.5.4
|
||||
hooks:
|
||||
|
@ -26,11 +28,18 @@ repos:
|
|||
rev: v2.4.0
|
||||
hooks:
|
||||
- id: reorder-python-imports
|
||||
args: [
|
||||
'--add-import', 'from __future__ import absolute_import',
|
||||
'--add-import', 'from __future__ import unicode_literals',
|
||||
]
|
||||
args: [--py3-plus]
|
||||
- repo: https://github.com/asottile/add-trailing-comma
|
||||
rev: v2.1.0
|
||||
hooks:
|
||||
- id: add-trailing-comma
|
||||
args: [--py36-plus]
|
||||
- repo: https://github.com/asottile/pyupgrade
|
||||
rev: v2.10.0
|
||||
hooks:
|
||||
- id: pyupgrade
|
||||
args: [--py36-plus]
|
||||
- repo: https://github.com/pre-commit/mirrors-mypy
|
||||
rev: v0.812
|
||||
hooks:
|
||||
- id: mypy
|
||||
|
|
|
@ -37,7 +37,7 @@ If you have an actual file on disk, you can get the most information possible
|
|||
|
||||
When using a file on disk, the checks performed are:
|
||||
|
||||
* File type (file, symlink, directory)
|
||||
* File type (file, symlink, directory, socket)
|
||||
* Mode (is it executable?)
|
||||
* File name (mostly based on extension)
|
||||
* If executable, the shebang is read and the interpreter interpreted
|
||||
|
@ -76,11 +76,11 @@ optional arguments:
|
|||
--filename-only
|
||||
```
|
||||
|
||||
```bash
|
||||
```console
|
||||
$ identify-cli setup.py; echo $?
|
||||
["file", "non-executable", "python", "text"]
|
||||
0
|
||||
identify setup.py --filename-only; echo $?
|
||||
$ identify setup.py --filename-only; echo $?
|
||||
["python", "text"]
|
||||
0
|
||||
$ identify-cli wat.wat; echo $?
|
||||
|
|
|
@ -1,19 +1,15 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Usage:
|
||||
|
||||
./bin/vendor-licenses > identify/vendor/licenses.py
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import argparse
|
||||
import os.path
|
||||
import subprocess
|
||||
import tempfile
|
||||
|
||||
|
||||
def main():
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--revision', default='HEAD')
|
||||
args = parser.parse_args()
|
||||
|
@ -45,18 +41,16 @@ def main():
|
|||
|
||||
licenses.append((spdx, license_text))
|
||||
|
||||
print('# -*- coding: utf-8 -*-')
|
||||
print('from __future__ import absolute_import')
|
||||
print('from __future__ import unicode_literals')
|
||||
print('LICENSES = (')
|
||||
for spdx, text in sorted(licenses):
|
||||
print(' (')
|
||||
print(' {!r},'.format(spdx))
|
||||
print(f' {spdx!r},')
|
||||
print(" '''\\")
|
||||
print(text.replace('\t', ' ').replace(' \n', '').strip())
|
||||
print("''',")
|
||||
print(' ),')
|
||||
print(')')
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -1,14 +1,12 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from typing import Optional
|
||||
from typing import Sequence
|
||||
|
||||
from identify import identify
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
def main(argv: Optional[Sequence[str]] = None) -> int:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--filename-only', action='store_true')
|
||||
parser.add_argument('path')
|
||||
|
|
|
@ -1,13 +1,9 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
EXTENSIONS = {
|
||||
'adoc': {'text', 'asciidoc'},
|
||||
'asciidoc': {'text', 'asciidoc'},
|
||||
'apinotes': {'text', 'apinotes'},
|
||||
'asar': {'binary', 'asar'},
|
||||
'avif': {'binary', 'image', 'avif'},
|
||||
'bash': {'text', 'shell', 'bash'},
|
||||
'bat': {'text', 'batch'},
|
||||
'bib': {'text', 'bib'},
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import io
|
||||
import os.path
|
||||
import re
|
||||
import shlex
|
||||
import stat
|
||||
import string
|
||||
import sys
|
||||
from typing import IO
|
||||
from typing import List
|
||||
from typing import Optional
|
||||
from typing import Set
|
||||
from typing import Tuple
|
||||
|
||||
from identify import extensions
|
||||
from identify import interpreters
|
||||
|
@ -19,27 +19,37 @@ printable = frozenset(string.printable)
|
|||
|
||||
DIRECTORY = 'directory'
|
||||
SYMLINK = 'symlink'
|
||||
SOCKET = 'socket'
|
||||
FILE = 'file'
|
||||
EXECUTABLE = 'executable'
|
||||
NON_EXECUTABLE = 'non-executable'
|
||||
TEXT = 'text'
|
||||
BINARY = 'binary'
|
||||
|
||||
ALL_TAGS = {DIRECTORY, SYMLINK, FILE, EXECUTABLE, NON_EXECUTABLE, TEXT, BINARY}
|
||||
ALL_TAGS.update(*extensions.EXTENSIONS.values())
|
||||
ALL_TAGS.update(*extensions.EXTENSIONS_NEED_BINARY_CHECK.values())
|
||||
ALL_TAGS.update(*extensions.NAMES.values())
|
||||
ALL_TAGS.update(*interpreters.INTERPRETERS.values())
|
||||
ALL_TAGS = frozenset(ALL_TAGS)
|
||||
TYPE_TAGS = frozenset((DIRECTORY, FILE, SYMLINK, SOCKET))
|
||||
MODE_TAGS = frozenset((EXECUTABLE, NON_EXECUTABLE))
|
||||
ENCODING_TAGS = frozenset((BINARY, TEXT))
|
||||
_ALL_TAGS = {*TYPE_TAGS, *MODE_TAGS, *ENCODING_TAGS}
|
||||
_ALL_TAGS.update(*extensions.EXTENSIONS.values())
|
||||
_ALL_TAGS.update(*extensions.EXTENSIONS_NEED_BINARY_CHECK.values())
|
||||
_ALL_TAGS.update(*extensions.NAMES.values())
|
||||
_ALL_TAGS.update(*interpreters.INTERPRETERS.values())
|
||||
ALL_TAGS = frozenset(_ALL_TAGS)
|
||||
|
||||
|
||||
def tags_from_path(path):
|
||||
if not os.path.lexists(path):
|
||||
raise ValueError('{} does not exist.'.format(path))
|
||||
if os.path.isdir(path):
|
||||
def tags_from_path(path: str) -> Set[str]:
|
||||
try:
|
||||
sr = os.lstat(path)
|
||||
except (OSError, ValueError): # same error-handling as `os.lexists()`
|
||||
raise ValueError(f'{path} does not exist.')
|
||||
|
||||
mode = sr.st_mode
|
||||
if stat.S_ISDIR(mode):
|
||||
return {DIRECTORY}
|
||||
if os.path.islink(path):
|
||||
if stat.S_ISLNK(mode):
|
||||
return {SYMLINK}
|
||||
if stat.S_ISSOCK(mode):
|
||||
return {SOCKET}
|
||||
|
||||
tags = {FILE}
|
||||
|
||||
|
@ -62,19 +72,19 @@ def tags_from_path(path):
|
|||
|
||||
# some extensions can be both binary and text
|
||||
# see EXTENSIONS_NEED_BINARY_CHECK
|
||||
if not {TEXT, BINARY} & tags:
|
||||
if not ENCODING_TAGS & tags:
|
||||
if file_is_text(path):
|
||||
tags.add(TEXT)
|
||||
else:
|
||||
tags.add(BINARY)
|
||||
|
||||
assert {TEXT, BINARY} & tags, tags
|
||||
assert {EXECUTABLE, NON_EXECUTABLE} & tags, tags
|
||||
assert ENCODING_TAGS & tags, tags
|
||||
assert MODE_TAGS & tags, tags
|
||||
return tags
|
||||
|
||||
|
||||
def tags_from_filename(filename):
|
||||
_, filename = os.path.split(filename)
|
||||
def tags_from_filename(path: str) -> Set[str]:
|
||||
_, filename = os.path.split(path)
|
||||
_, ext = os.path.splitext(filename)
|
||||
|
||||
ret = set()
|
||||
|
@ -95,7 +105,7 @@ def tags_from_filename(filename):
|
|||
return ret
|
||||
|
||||
|
||||
def tags_from_interpreter(interpreter):
|
||||
def tags_from_interpreter(interpreter: str) -> Set[str]:
|
||||
_, _, interpreter = interpreter.rpartition('/')
|
||||
|
||||
# Try "python3.5.2" => "python3.5" => "python3" until one matches.
|
||||
|
@ -108,7 +118,7 @@ def tags_from_interpreter(interpreter):
|
|||
return set()
|
||||
|
||||
|
||||
def is_text(bytesio):
|
||||
def is_text(bytesio: IO[bytes]) -> bool:
|
||||
"""Return whether the first KB of contents seems to be binary.
|
||||
|
||||
This is roughly based on libmagic's binary/text detection:
|
||||
|
@ -122,14 +132,14 @@ def is_text(bytesio):
|
|||
return not bool(bytesio.read(1024).translate(None, text_chars))
|
||||
|
||||
|
||||
def file_is_text(path):
|
||||
def file_is_text(path: str) -> bool:
|
||||
if not os.path.lexists(path):
|
||||
raise ValueError('{} does not exist.'.format(path))
|
||||
raise ValueError(f'{path} does not exist.')
|
||||
with open(path, 'rb') as f:
|
||||
return is_text(f)
|
||||
|
||||
|
||||
def _shebang_split(line):
|
||||
def _shebang_split(line: str) -> List[str]:
|
||||
try:
|
||||
# shebangs aren't supposed to be quoted, though some tools such as
|
||||
# setuptools will write them with quotes so we'll best-guess parse
|
||||
|
@ -141,11 +151,14 @@ def _shebang_split(line):
|
|||
return line.split()
|
||||
|
||||
|
||||
def _parse_nix_shebang(bytesio, cmd):
|
||||
def _parse_nix_shebang(
|
||||
bytesio: IO[bytes],
|
||||
cmd: Tuple[str, ...],
|
||||
) -> Tuple[str, ...]:
|
||||
while bytesio.read(2) == b'#!':
|
||||
next_line = bytesio.readline()
|
||||
next_line_b = bytesio.readline()
|
||||
try:
|
||||
next_line = next_line.decode('UTF-8')
|
||||
next_line = next_line_b.decode('UTF-8')
|
||||
except UnicodeDecodeError:
|
||||
return cmd
|
||||
|
||||
|
@ -162,13 +175,13 @@ def _parse_nix_shebang(bytesio, cmd):
|
|||
return cmd
|
||||
|
||||
|
||||
def parse_shebang(bytesio):
|
||||
def parse_shebang(bytesio: IO[bytes]) -> Tuple[str, ...]:
|
||||
"""Parse the shebang from a file opened for reading binary."""
|
||||
if bytesio.read(2) != b'#!':
|
||||
return ()
|
||||
first_line = bytesio.readline()
|
||||
first_line_b = bytesio.readline()
|
||||
try:
|
||||
first_line = first_line.decode('UTF-8')
|
||||
first_line = first_line_b.decode('UTF-8')
|
||||
except UnicodeDecodeError:
|
||||
return ()
|
||||
|
||||
|
@ -185,10 +198,10 @@ def parse_shebang(bytesio):
|
|||
return cmd
|
||||
|
||||
|
||||
def parse_shebang_from_file(path):
|
||||
def parse_shebang_from_file(path: str) -> Tuple[str, ...]:
|
||||
"""Parse the shebang given a file path."""
|
||||
if not os.path.lexists(path):
|
||||
raise ValueError('{} does not exist.'.format(path))
|
||||
raise ValueError(f'{path} does not exist.')
|
||||
if not os.access(path, os.X_OK):
|
||||
return ()
|
||||
|
||||
|
@ -200,13 +213,13 @@ COPYRIGHT_RE = re.compile(r'^\s*(Copyright|\(C\)) .*$', re.I | re.MULTILINE)
|
|||
WS_RE = re.compile(r'\s+')
|
||||
|
||||
|
||||
def _norm_license(s):
|
||||
def _norm_license(s: str) -> str:
|
||||
s = COPYRIGHT_RE.sub('', s)
|
||||
s = WS_RE.sub(' ', s)
|
||||
return s.strip()
|
||||
|
||||
|
||||
def license_id(filename):
|
||||
def license_id(filename: str) -> Optional[str]:
|
||||
"""Return the spdx id for the license contained in `filename`. If no
|
||||
license is detected, returns `None`.
|
||||
|
||||
|
@ -222,7 +235,7 @@ def license_id(filename):
|
|||
"""
|
||||
import editdistance # `pip install identify[license]`
|
||||
|
||||
with io.open(filename, encoding='UTF-8') as f:
|
||||
with open(filename, encoding='UTF-8') as f:
|
||||
contents = f.read()
|
||||
|
||||
norm = _norm_license(contents)
|
||||
|
|
|
@ -1,7 +1,3 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import
|
||||
from __future__ import unicode_literals
|
||||
|
||||
INTERPRETERS = {
|
||||
'ash': {'shell', 'ash'},
|
||||
'awk': {'awk'},
|
||||
|
|
3
identify/vendor/licenses.py
vendored
3
identify/vendor/licenses.py
vendored
|
@ -1,6 +1,3 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import
|
||||
from __future__ import unicode_literals
|
||||
LICENSES = (
|
||||
(
|
||||
'0BSD',
|
||||
|
|
27
setup.cfg
27
setup.cfg
|
@ -1,6 +1,6 @@
|
|||
[metadata]
|
||||
name = identify
|
||||
version = 1.5.14
|
||||
version = 2.1.0
|
||||
description = File identification library for Python
|
||||
long_description = file: README.md
|
||||
long_description_content_type = text/markdown
|
||||
|
@ -11,26 +11,26 @@ license = MIT
|
|||
license_file = LICENSE
|
||||
classifiers =
|
||||
License :: OSI Approved :: MIT License
|
||||
Programming Language :: Python :: 2
|
||||
Programming Language :: Python :: 2.7
|
||||
Programming Language :: Python :: 3
|
||||
Programming Language :: Python :: 3.4
|
||||
Programming Language :: Python :: 3.5
|
||||
Programming Language :: Python :: 3 :: Only
|
||||
Programming Language :: Python :: 3.6
|
||||
Programming Language :: Python :: 3.7
|
||||
Programming Language :: Python :: 3.8
|
||||
Programming Language :: Python :: 3.9
|
||||
Programming Language :: Python :: Implementation :: CPython
|
||||
Programming Language :: Python :: Implementation :: PyPy
|
||||
|
||||
[options]
|
||||
packages = find:
|
||||
python_requires = >=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*
|
||||
python_requires = >=3.6.1
|
||||
|
||||
[options.entry_points]
|
||||
console_scripts =
|
||||
identify-cli=identify.cli:main
|
||||
|
||||
[options.extras_require]
|
||||
license = editdistance
|
||||
license =
|
||||
editdistance
|
||||
|
||||
[options.packages.find]
|
||||
exclude =
|
||||
|
@ -42,3 +42,16 @@ universal = True
|
|||
|
||||
[coverage:run]
|
||||
plugins = covdefaults
|
||||
|
||||
[mypy]
|
||||
check_untyped_defs = true
|
||||
disallow_any_generics = true
|
||||
disallow_incomplete_defs = true
|
||||
disallow_untyped_defs = true
|
||||
no_implicit_optional = true
|
||||
|
||||
[mypy-testing.*]
|
||||
disallow_untyped_defs = false
|
||||
|
||||
[mypy-tests.*]
|
||||
disallow_untyped_defs = false
|
||||
|
|
4
setup.py
4
setup.py
|
@ -1,6 +1,2 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from setuptools import setup
|
||||
setup()
|
||||
|
|
|
@ -1,7 +1,3 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from identify import cli
|
||||
|
||||
|
||||
|
|
|
@ -1,7 +1,3 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import pytest
|
||||
|
||||
from identify import extensions
|
||||
|
|
|
@ -1,10 +1,8 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import io
|
||||
import os
|
||||
import socket
|
||||
import stat
|
||||
from tempfile import TemporaryDirectory
|
||||
|
||||
import pytest
|
||||
|
||||
|
@ -14,6 +12,21 @@ from identify import identify
|
|||
def test_all_tags_includes_basic_ones():
|
||||
assert 'file' in identify.ALL_TAGS
|
||||
assert 'directory' in identify.ALL_TAGS
|
||||
assert 'executable' in identify.ALL_TAGS
|
||||
assert 'text' in identify.ALL_TAGS
|
||||
assert 'socket' in identify.ALL_TAGS
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'tag_group',
|
||||
(
|
||||
identify.TYPE_TAGS,
|
||||
identify.MODE_TAGS,
|
||||
identify.ENCODING_TAGS,
|
||||
),
|
||||
)
|
||||
def test_all_tags_contains_all_groups(tag_group):
|
||||
assert tag_group < identify.ALL_TAGS
|
||||
|
||||
|
||||
def test_all_tags_contains_each_type():
|
||||
|
@ -41,6 +54,17 @@ def test_tags_from_path_symlink(tmpdir):
|
|||
assert identify.tags_from_path(x.strpath) == {'symlink'}
|
||||
|
||||
|
||||
def test_tags_from_path_socket():
|
||||
tmproot = '/tmp' # short path avoids `OSError: AF_UNIX path too long`
|
||||
with TemporaryDirectory(dir=tmproot) as tmpdir:
|
||||
socket_path = os.path.join(tmpdir, 'socket')
|
||||
with socket.socket(socket.AF_UNIX) as sock:
|
||||
sock.bind(socket_path)
|
||||
tags = identify.tags_from_path(socket_path)
|
||||
|
||||
assert tags == {'socket'}
|
||||
|
||||
|
||||
def test_tags_from_path_broken_symlink(tmpdir):
|
||||
x = tmpdir.join('foo')
|
||||
x.mksymlinkto(tmpdir.join('lol'))
|
||||
|
@ -177,9 +201,9 @@ def test_tags_from_interpreter(interpreter, expected):
|
|||
(
|
||||
(b'hello world', True),
|
||||
(b'', True),
|
||||
('éóñəå ⊂(◉‿◉)つ(ノ≥∇≤)ノ'.encode('utf8'), True),
|
||||
(r'¯\_(ツ)_/¯'.encode('utf8'), True),
|
||||
('♪┏(・o・)┛♪┗ ( ・o・) ┓♪┏ ( ) ┛♪┗ (・o・ ) ┓♪'.encode('utf8'), True),
|
||||
('éóñəå ⊂(◉‿◉)つ(ノ≥∇≤)ノ'.encode(), True),
|
||||
(r'¯\_(ツ)_/¯'.encode(), True),
|
||||
('♪┏(・o・)┛♪┗ ( ・o・) ┓♪┏ ( ) ┛♪┗ (・o・ ) ┓♪'.encode(), True),
|
||||
('éóñå'.encode('latin1'), True),
|
||||
|
||||
(b'hello world\x00', False),
|
||||
|
|
2
tox.ini
2
tox.ini
|
@ -1,5 +1,5 @@
|
|||
[tox]
|
||||
envlist = py27,py35,py36,pypy,pre-commit
|
||||
envlist = py36,pypy3,pre-commit
|
||||
|
||||
[testenv]
|
||||
deps = -rrequirements-dev.txt
|
||||
|
|
Loading…
Add table
Reference in a new issue