1
0
Fork 0

Adding upstream version 1.5.5.

Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
Daniel Baumann 2025-02-12 21:23:56 +01:00
parent 4fe81eb5c0
commit 6680c388d6
Signed by: daniel
GPG key ID: FBB4F0E80A80222F
6 changed files with 110 additions and 13 deletions

View file

@ -21,6 +21,7 @@ If you have an actual file on disk, you can get the most information possible
(a superset of all other methods):
```python
>>> from identify import identify
>>> identify.tags_from_path('/path/to/file.py')
{'file', 'text', 'python', 'non-executable'}
>>> identify.tags_from_path('/path/to/file-with-shebang')

View file

@ -10,11 +10,11 @@ EXTENSIONS = {
'asar': {'binary', 'asar'},
'bash': {'text', 'shell', 'bash'},
'bat': {'text', 'batch'},
'bib': {'text', 'bib'},
'bmp': {'binary', 'image', 'bitmap'},
'bz2': {'binary', 'bzip2'},
'c': {'text', 'c'},
'cc': {'text', 'c++'},
'cu': {'text', 'cuda'},
'cfg': {'text'},
'chs': {'text', 'c2hs'},
'clj': {'text', 'clojure'},
@ -31,6 +31,7 @@ EXTENSIONS = {
'cson': {'text', 'cson'},
'css': {'text', 'css'},
'csv': {'text', 'csv'},
'cu': {'text', 'cuda'},
'cxx': {'text', 'c++'},
'dart': {'text', 'dart'},
'def': {'text', 'def'},
@ -89,6 +90,7 @@ EXTENSIONS = {
'key': {'text', 'pem'},
'kml': {'text', 'kml', 'xml'},
'kt': {'text', 'kotlin'},
'lean': {'text', 'lean'},
'less': {'text', 'less'},
'lhs': {'text', 'literate-haskell'},
'libsonnet': {'text', 'jsonnet'},
@ -130,31 +132,32 @@ EXTENSIONS = {
'proto': {'text', 'proto'},
'puml': {'text', 'plantuml'},
'purs': {'text', 'purescript'},
'pxd': {'text', 'cython'},
'pxi': {'text', 'cython'},
'py': {'text', 'python'},
'pyi': {'text', 'pyi'},
'pyx': {'text', 'cython'},
'pyz': {'binary', 'pyz'},
'pyzw': {'binary', 'pyz'},
'pxd': {'text', 'cython'},
'pxi': {'text', 'cython'},
'r': {'text', 'r'},
'rb': {'text', 'ruby'},
'rs': {'text', 'rust'},
'rst': {'text', 'rst'},
's': {'text', 'asm'},
'sass': {'text', 'sass'},
'sbt': {'text', 'sbt', 'scala'},
'sc': {'text', 'scala'},
'scala': {'text', 'scala'},
'scss': {'text', 'scss'},
'scm': {'text', 'scheme'},
'scss': {'text', 'scss'},
'sh': {'text', 'shell'},
'sls': {'text', 'salt'},
'so': {'binary'},
'sol': {'text', 'solidity'},
'spec': {'text', 'spec'},
'sql': {'text', 'sql'},
'ss': {'text', 'scheme'},
'styl': {'text', 'stylus'},
'sql': {'text', 'sql'},
'sv': {'text', 'system-verilog'},
'svg': {'text', 'image', 'svg', 'xml'},
'svh': {'text', 'system-verilog'},
@ -163,15 +166,17 @@ EXTENSIONS = {
'swiftdeps': {'text', 'swiftdeps'},
'tac': {'text', 'twisted', 'python'},
'tar': {'binary', 'tar'},
'tex': {'text', 'tex'},
'tf': {'text', 'terraform'},
'tfvars': {'text', 'terraform'},
'tgz': {'binary', 'gzip'},
'thrift': {'text', 'thrift'},
'tiff': {'binary', 'image', 'tiff'},
'toml': {'text', 'toml'},
'tf': {'text', 'terraform'},
'tfvars': {'text', 'terraform'},
'ts': {'text', 'ts'},
'tsx': {'text', 'tsx'},
'ttf': {'binary', 'ttf'},
'txsprofile': {'text', 'ini', 'txsprofile'},
'txt': {'text', 'plain-text'},
'v': {'text', 'verilog'},
'vdx': {'text', 'vdx'},
@ -181,11 +186,12 @@ EXTENSIONS = {
'vue': {'text', 'vue'},
'war': {'binary', 'zip', 'jar'},
'wav': {'binary', 'audio', 'wav'},
'wkt': {'text', 'wkt'},
'whl': {'binary', 'wheel', 'zip'},
'wkt': {'text', 'wkt'},
'woff': {'binary', 'woff'},
'woff2': {'binary', 'woff2'},
'wsgi': {'text', 'wsgi', 'python'},
'xhtml': {'text', 'xml', 'html', 'xhtml'},
'xml': {'text', 'xml'},
'xq': {'text', 'xquery'},
'xql': {'text', 'xquery'},
@ -209,30 +215,32 @@ EXTENSIONS_NEED_BINARY_CHECK = {
NAMES = {
'.babelrc': EXTENSIONS['json'] | {'babelrc'},
'.bashrc': EXTENSIONS['bash'],
'.bash_aliases': EXTENSIONS['bash'],
'.bash_profile': EXTENSIONS['bash'],
'.bashrc': EXTENSIONS['bash'],
'.bowerrc': EXTENSIONS['json'] | {'bowerrc'},
'.coveragerc': EXTENSIONS['ini'] | {'coveragerc'},
'.cshrc': EXTENSIONS['csh'],
'.dockerignore': {'text', 'dockerignore'},
'.editorconfig': {'text', 'editorconfig'},
'.gitconfig': EXTENSIONS['ini'] | {'gitconfig'},
'.hgrc': EXTENSIONS['ini'] | {'hgrc'},
'.flake8': EXTENSIONS['ini'] | {'flake8'},
'.gitattributes': {'text', 'gitattributes'},
'.gitconfig': EXTENSIONS['ini'] | {'gitconfig'},
'.gitignore': {'text', 'gitignore'},
'.gitmodules': {'text', 'gitmodules'},
'.hgrc': EXTENSIONS['ini'] | {'hgrc'},
'.jshintrc': EXTENSIONS['json'] | {'jshintrc'},
'.mailmap': {'text', 'mailmap'},
'.mention-bot': EXTENSIONS['json'] | {'mention-bot'},
'.npmignore': {'text', 'npmignore'},
'.pdbrc': EXTENSIONS['py'] | {'pdbrc'},
'.pypirc': EXTENSIONS['ini'] | {'pypirc'},
'.rstcheck.cfg': EXTENSIONS['ini'],
'.yamllint': EXTENSIONS['yaml'] | {'yamllint'},
'.zshrc': EXTENSIONS['zsh'],
'AUTHORS': EXTENSIONS['txt'],
'BUILD.bazel': {'text', 'bazel'},
'BUILD': {'text', 'bazel'},
'BUILD.bazel': {'text', 'bazel'},
'CMakeLists.txt': EXTENSIONS['cmake'],
'COPYING': EXTENSIONS['txt'],
'Dockerfile': {'text', 'dockerfile'},

View file

@ -141,6 +141,27 @@ def _shebang_split(line):
return line.split()
def _parse_nix_shebang(bytesio, cmd):
while bytesio.read(2) == b'#!':
next_line = bytesio.readline()
try:
next_line = next_line.decode('UTF-8')
except UnicodeDecodeError:
return cmd
for c in next_line:
if c not in printable:
return cmd
line_tokens = tuple(_shebang_split(next_line.strip()))
for i, token in enumerate(line_tokens[:-1]):
if token != '-i':
continue
# the argument to -i flag
cmd = (line_tokens[i + 1],)
return cmd
def parse_shebang(bytesio):
"""Parse the shebang from a file opened for reading binary."""
if bytesio.read(2) != b'#!':
@ -159,6 +180,8 @@ def parse_shebang(bytesio):
cmd = tuple(_shebang_split(first_line.strip()))
if cmd and cmd[0] == '/usr/bin/env':
cmd = cmd[1:]
if cmd == ('nix-shell',):
return _parse_nix_shebang(bytesio, cmd)
return cmd

View file

@ -3,9 +3,14 @@ from __future__ import absolute_import
from __future__ import unicode_literals
INTERPRETERS = {
'ash': {'shell', 'ash'},
'awk': {'awk'},
'bash': {'shell', 'bash'},
'bats': {'shell', 'bash', 'bats'},
'csh': {'shell', 'csh'},
'dash': {'shell', 'dash'},
'expect': {'expect'},
'ksh': {'shell', 'ksh'},
'node': {'javascript'},
'nodejs': {'javascript'},
'perl': {'perl'},

View file

@ -1,6 +1,6 @@
[metadata]
name = identify
version = 1.4.29
version = 1.5.5
description = File identification library for Python
long_description = file: README.md
long_description_content_type = text/markdown

View file

@ -217,6 +217,66 @@ def test_file_is_text_does_not_exist(tmpdir):
(b"#!/path'with/quotes y", ("/path'with/quotes", 'y')),
# Don't regress on leading/trailing ws
(b"#! /path'with/quotes y ", ("/path'with/quotes", 'y')),
# Test nix-shell specialites with shebang on second line
(
b'#! /usr/bin/env nix-shell\n'
b'#! nix-shell -i bash -p python',
('bash',),
),
(
b'#! /usr/bin/env nix-shell\n'
b'#! nix-shell -i python -p coreutils',
('python',),
),
(
b'#! /usr/bin/env nix-shell\n'
b'#! nix-shell -p coreutils -i python',
('python',),
),
# multi-line and no whitespace variation
(
b'#! /usr/bin/env nix-shell\n'
b'#! nix-shell -p coreutils\n'
b'#! nix-shell -i python',
('python',),
),
(
b'#! /usr/bin/env nix-shell\n'
b'#!nix-shell -p coreutils\n'
b'#!nix-shell -i python',
('python',),
),
(
b'#! /usr/bin/env nix-shell\n'
b'#!\xf9\x93\x01\x42\xcd',
('nix-shell',),
),
(
b'#! /usr/bin/env nix-shell\n'
b'#!\x00\x00\x00\x00',
('nix-shell',),
),
# non-proper nix-shell
(b'#! /usr/bin/nix-shell', ('/usr/bin/nix-shell',)),
(b'#! /usr/bin/env nix-shell', ('nix-shell',)),
(
b'#! /usr/bin/env nix-shell non-portable-argument',
('nix-shell', 'non-portable-argument'),
),
(
b'#! /usr/bin/env nix-shell\n'
b'#! nix-shell -i',
('nix-shell',), # guard against index error
),
# interpret quotes correctly
(
b'#!/usr/bin/env nix-shell\n'
b'#!nix-shell --argstr x "a -i python3 p"\n'
b'#!nix-shell -p hello\n'
b'#!nix-shell -i bash\n'
b'#!nix-shell --argstr y "b -i runhaskell q"',
('bash',),
),
(b'\xf9\x93\x01\x42\xcd', ()),
(b'#!\xf9\x93\x01\x42\xcd', ()),
(b'#!\x00\x00\x00\x00', ()),