1
0
Fork 0
treelib/examples/folder_tree.py

208 lines
5 KiB
Python
Raw Normal View History

#!/usr/bin/env python
# A file folder scanner contributed by @holger
#
# You can spicify the scanned folder and file pattern by changing rootPath
# and pattern variables
#
__author__ = "holger"
from treelib import tree
import fnmatch
import os
import zlib
import argparse
DEBUG = 0
FILECOUNT = 0
DIRCOUNT = 0
DIR_ERRORLIST = []
FILE_ERRORLIST = []
# Time Profiling
PROFILING = 0
# 0 - nothing
# 1 - time
# 2 - cProfile
if PROFILING == 1:
import timeit
if PROFILING == 2:
import cProfile
parser = argparse.ArgumentParser(
description="Scan the given folder and print its structure in a tree."
)
parser.add_argument("abspath", type=str, help="An absolute path to be scanned.")
parser.add_argument(
"pattern", type=str, help="File name pattern to filtered, e.g. *.pdf"
)
args = parser.parse_args()
rootPath = args.abspath
pattern = args.pattern
folder_blacklist = []
dir_tree = tree.Tree()
dir_tree.create_node("Root", rootPath) # root node
def crc32(data):
data = bytes(data, "UTF-8")
if DEBUG:
print("++++++ CRC32 ++++++")
print("input: " + str(data))
print("crc32: " + hex(zlib.crc32(data) & 0xFFFFFFFF))
print("+++++++++++++++++++")
return hex(
zlib.crc32(data) & 0xFFFFFFFF
) # crc32 returns a signed value, &-ing it will match py3k
parent = rootPath
i = 1
# calculating start depth
start_depth = rootPath.count("/")
def get_noteid(depth, root, dir):
"""get_noteid returns
- depth contains the current depth of the folder hierarchy
- dir contains the current directory
Function returns a string containing the current depth, the folder name and unique ID build by hashing the
absolute path of the directory. All spaces are replaced by '_'
<depth>_<dirname>+++<crc32>
e.g. 2_Folder_XYZ_1+++<crc32>
"""
return (
str(str(depth) + "_" + dir).replace(" ", "_")
+ "+++"
+ crc32(os.path.join(root, dir))
)
# TODO: Verzeichnistiefe pruefen: Was ist mit sowas /mp3/
def get_parentid(current_depth, root, dir):
# special case for the 'root' of the tree
# because we don't want a cryptic root-name
if current_depth == 0:
return root
# looking for parent directory
# e.g. /home/user1/mp3/folder1/parent_folder/current_folder
# get 'parent_folder'
search_string = os.path.join(root, dir)
pos2 = search_string.rfind("/")
pos1 = search_string.rfind("/", 0, pos2)
parent_dir = search_string[pos1 + 1 : pos2] # noqa: E203
parentid = (
str(current_depth - 1)
+ "_"
+ parent_dir.replace(" ", "_")
+ "+++"
+ crc32(root)
)
return parentid
# TODO: catch error
def print_node(dir, node_id, parent_id):
print("#############################")
print("node created")
print(" dir: " + dir)
print(" note_id: " + node_id)
print(" parent: " + parent_id)
def crawler():
global DIRCOUNT
global FILECOUNT
for root, dirs, files in os.walk(rootPath):
# +++ DIRECTORIES +++
for dir in dirs:
# calculating current depth
current_depth = os.path.join(root, dir).count("/") - start_depth
if DEBUG:
print("current: " + os.path.join(root, dir))
node_id = get_noteid(current_depth, root, dir)
parent_id = str(get_parentid(current_depth, root, dir))
if parent_id == str(None):
DIR_ERRORLIST.append(os.path.join(root, dir))
if DEBUG:
print_node(dir, node_id, parent_id)
# create node
dir_tree.create_node(dir, node_id, parent_id)
DIRCOUNT += 1
# +++ FILES +++
for filename in fnmatch.filter(files, pattern):
if dir in folder_blacklist:
continue
# calculating current depth
current_depth = os.path.join(root, filename).count("/") - start_depth
if DEBUG:
print("current: " + os.path.join(root, filename))
node_id = get_noteid(current_depth, root, filename)
parent_id = str(get_parentid(current_depth, root, filename))
if parent_id == str(None):
FILE_ERRORLIST.append(os.path.join(root, dir))
if DEBUG:
print_node(filename, node_id, parent_id)
# create node
dir_tree.create_node(filename, node_id, parent_id)
FILECOUNT += 1
if PROFILING == 0:
crawler()
if PROFILING == 1:
t1 = timeit.Timer("crawler()", "from __main__ import crawler")
print("time: " + str(t1.timeit(number=1)))
if PROFILING == 2:
cProfile.run("crawler()")
print("filecount: " + str(FILECOUNT))
print("dircount: " + str(DIRCOUNT))
if DIR_ERRORLIST:
for item in DIR_ERRORLIST:
print(item)
else:
print("no directory errors")
print("\n\n\n")
if FILE_ERRORLIST:
for item in FILE_ERRORLIST:
print(item)
else:
print("no file errors")
print("nodes: " + str(len(dir_tree.nodes)))
dir_tree.show()