Adding upstream version 1.6.4.
Signed-off-by: Daniel Baumann <daniel@debian.org>
This commit is contained in:
parent
a5555eb4a1
commit
d5b8e0af0d
42 changed files with 3857 additions and 0 deletions
examples
207
examples/folder_tree.py
Normal file
207
examples/folder_tree.py
Normal file
|
@ -0,0 +1,207 @@
|
|||
#!/usr/bin/env python
|
||||
# A file folder scanner contributed by @holger
|
||||
#
|
||||
# You can spicify the scanned folder and file pattern by changing rootPath
|
||||
# and pattern variables
|
||||
#
|
||||
|
||||
__author__ = "holger"
|
||||
|
||||
from treelib import tree
|
||||
|
||||
import fnmatch
|
||||
import os
|
||||
import zlib
|
||||
import argparse
|
||||
|
||||
DEBUG = 0
|
||||
FILECOUNT = 0
|
||||
DIRCOUNT = 0
|
||||
DIR_ERRORLIST = []
|
||||
FILE_ERRORLIST = []
|
||||
|
||||
|
||||
# Time Profiling
|
||||
PROFILING = 0
|
||||
# 0 - nothing
|
||||
# 1 - time
|
||||
# 2 - cProfile
|
||||
|
||||
if PROFILING == 1:
|
||||
import timeit
|
||||
if PROFILING == 2:
|
||||
import cProfile
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Scan the given folder and print its structure in a tree."
|
||||
)
|
||||
parser.add_argument("abspath", type=str, help="An absolute path to be scanned.")
|
||||
parser.add_argument(
|
||||
"pattern", type=str, help="File name pattern to filtered, e.g. *.pdf"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
rootPath = args.abspath
|
||||
pattern = args.pattern
|
||||
|
||||
folder_blacklist = []
|
||||
|
||||
dir_tree = tree.Tree()
|
||||
dir_tree.create_node("Root", rootPath) # root node
|
||||
|
||||
|
||||
def crc32(data):
|
||||
data = bytes(data, "UTF-8")
|
||||
|
||||
if DEBUG:
|
||||
print("++++++ CRC32 ++++++")
|
||||
print("input: " + str(data))
|
||||
print("crc32: " + hex(zlib.crc32(data) & 0xFFFFFFFF))
|
||||
print("+++++++++++++++++++")
|
||||
return hex(
|
||||
zlib.crc32(data) & 0xFFFFFFFF
|
||||
) # crc32 returns a signed value, &-ing it will match py3k
|
||||
|
||||
|
||||
parent = rootPath
|
||||
i = 1
|
||||
|
||||
# calculating start depth
|
||||
start_depth = rootPath.count("/")
|
||||
|
||||
|
||||
def get_noteid(depth, root, dir):
|
||||
"""get_noteid returns
|
||||
- depth contains the current depth of the folder hierarchy
|
||||
- dir contains the current directory
|
||||
|
||||
Function returns a string containing the current depth, the folder name and unique ID build by hashing the
|
||||
absolute path of the directory. All spaces are replaced by '_'
|
||||
|
||||
<depth>_<dirname>+++<crc32>
|
||||
e.g. 2_Folder_XYZ_1+++<crc32>
|
||||
"""
|
||||
return (
|
||||
str(str(depth) + "_" + dir).replace(" ", "_")
|
||||
+ "+++"
|
||||
+ crc32(os.path.join(root, dir))
|
||||
)
|
||||
|
||||
|
||||
# TODO: Verzeichnistiefe pruefen: Was ist mit sowas /mp3/
|
||||
|
||||
|
||||
def get_parentid(current_depth, root, dir):
|
||||
# special case for the 'root' of the tree
|
||||
# because we don't want a cryptic root-name
|
||||
if current_depth == 0:
|
||||
return root
|
||||
|
||||
# looking for parent directory
|
||||
# e.g. /home/user1/mp3/folder1/parent_folder/current_folder
|
||||
# get 'parent_folder'
|
||||
|
||||
search_string = os.path.join(root, dir)
|
||||
pos2 = search_string.rfind("/")
|
||||
pos1 = search_string.rfind("/", 0, pos2)
|
||||
parent_dir = search_string[pos1 + 1 : pos2] # noqa: E203
|
||||
parentid = (
|
||||
str(current_depth - 1)
|
||||
+ "_"
|
||||
+ parent_dir.replace(" ", "_")
|
||||
+ "+++"
|
||||
+ crc32(root)
|
||||
)
|
||||
return parentid
|
||||
# TODO: catch error
|
||||
|
||||
|
||||
def print_node(dir, node_id, parent_id):
|
||||
print("#############################")
|
||||
print("node created")
|
||||
print(" dir: " + dir)
|
||||
print(" note_id: " + node_id)
|
||||
print(" parent: " + parent_id)
|
||||
|
||||
|
||||
def crawler():
|
||||
global DIRCOUNT
|
||||
global FILECOUNT
|
||||
|
||||
for root, dirs, files in os.walk(rootPath):
|
||||
# +++ DIRECTORIES +++
|
||||
for dir in dirs:
|
||||
# calculating current depth
|
||||
current_depth = os.path.join(root, dir).count("/") - start_depth
|
||||
|
||||
if DEBUG:
|
||||
print("current: " + os.path.join(root, dir))
|
||||
|
||||
node_id = get_noteid(current_depth, root, dir)
|
||||
parent_id = str(get_parentid(current_depth, root, dir))
|
||||
|
||||
if parent_id == str(None):
|
||||
DIR_ERRORLIST.append(os.path.join(root, dir))
|
||||
|
||||
if DEBUG:
|
||||
print_node(dir, node_id, parent_id)
|
||||
|
||||
# create node
|
||||
dir_tree.create_node(dir, node_id, parent_id)
|
||||
DIRCOUNT += 1
|
||||
|
||||
# +++ FILES +++
|
||||
for filename in fnmatch.filter(files, pattern):
|
||||
if dir in folder_blacklist:
|
||||
continue
|
||||
|
||||
# calculating current depth
|
||||
current_depth = os.path.join(root, filename).count("/") - start_depth
|
||||
|
||||
if DEBUG:
|
||||
print("current: " + os.path.join(root, filename))
|
||||
|
||||
node_id = get_noteid(current_depth, root, filename)
|
||||
parent_id = str(get_parentid(current_depth, root, filename))
|
||||
|
||||
if parent_id == str(None):
|
||||
FILE_ERRORLIST.append(os.path.join(root, dir))
|
||||
|
||||
if DEBUG:
|
||||
print_node(filename, node_id, parent_id)
|
||||
|
||||
# create node
|
||||
dir_tree.create_node(filename, node_id, parent_id)
|
||||
FILECOUNT += 1
|
||||
|
||||
|
||||
if PROFILING == 0:
|
||||
crawler()
|
||||
if PROFILING == 1:
|
||||
t1 = timeit.Timer("crawler()", "from __main__ import crawler")
|
||||
print("time: " + str(t1.timeit(number=1)))
|
||||
if PROFILING == 2:
|
||||
cProfile.run("crawler()")
|
||||
|
||||
|
||||
print("filecount: " + str(FILECOUNT))
|
||||
print("dircount: " + str(DIRCOUNT))
|
||||
|
||||
if DIR_ERRORLIST:
|
||||
for item in DIR_ERRORLIST:
|
||||
print(item)
|
||||
else:
|
||||
print("no directory errors")
|
||||
|
||||
print("\n\n\n")
|
||||
|
||||
if FILE_ERRORLIST:
|
||||
for item in FILE_ERRORLIST:
|
||||
print(item)
|
||||
else:
|
||||
print("no file errors")
|
||||
|
||||
print("nodes: " + str(len(dir_tree.nodes)))
|
||||
|
||||
dir_tree.show()
|
Loading…
Add table
Add a link
Reference in a new issue