#!/usr/bin/env python
# A file folder scanner contributed by @holger
#
# You can spicify the scanned folder and file pattern by changing rootPath
# and pattern variables
#

__author__ = "holger"

from treelib import tree

import fnmatch
import os
import zlib
import argparse

DEBUG = 0
FILECOUNT = 0
DIRCOUNT = 0
DIR_ERRORLIST = []
FILE_ERRORLIST = []


# Time Profiling
PROFILING = 0
# 0 - nothing
# 1 - time
# 2 - cProfile

if PROFILING == 1:
    import timeit
if PROFILING == 2:
    import cProfile


parser = argparse.ArgumentParser(
    description="Scan the given folder and print its structure in a tree."
)
parser.add_argument("abspath", type=str, help="An absolute path to be scanned.")
parser.add_argument(
    "pattern", type=str, help="File name pattern to filtered, e.g. *.pdf"
)

args = parser.parse_args()
rootPath = args.abspath
pattern = args.pattern

folder_blacklist = []

dir_tree = tree.Tree()
dir_tree.create_node("Root", rootPath)  # root node


def crc32(data):
    data = bytes(data, "UTF-8")

    if DEBUG:
        print("++++++ CRC32 ++++++")
        print("input: " + str(data))
        print("crc32: " + hex(zlib.crc32(data) & 0xFFFFFFFF))
        print("+++++++++++++++++++")
    return hex(
        zlib.crc32(data) & 0xFFFFFFFF
    )  # crc32 returns a signed value, &-ing it will match py3k


parent = rootPath
i = 1

# calculating start depth
start_depth = rootPath.count("/")


def get_noteid(depth, root, dir):
    """get_noteid returns
    - depth contains the current depth of the folder hierarchy
    - dir contains the current directory

    Function returns a string containing the current depth, the folder name and unique ID build by hashing the
    absolute path of the directory. All spaces are replaced by '_'

    <depth>_<dirname>+++<crc32>
    e.g. 2_Folder_XYZ_1+++<crc32>
    """
    return (
        str(str(depth) + "_" + dir).replace(" ", "_")
        + "+++"
        + crc32(os.path.join(root, dir))
    )


# TODO: Verzeichnistiefe pruefen: Was ist mit sowas /mp3/


def get_parentid(current_depth, root, dir):
    # special case for the 'root' of the tree
    # because we don't want a cryptic root-name
    if current_depth == 0:
        return root

    # looking for parent directory
    # e.g. /home/user1/mp3/folder1/parent_folder/current_folder
    # get 'parent_folder'

    search_string = os.path.join(root, dir)
    pos2 = search_string.rfind("/")
    pos1 = search_string.rfind("/", 0, pos2)
    parent_dir = search_string[pos1 + 1 : pos2]  # noqa: E203
    parentid = (
        str(current_depth - 1)
        + "_"
        + parent_dir.replace(" ", "_")
        + "+++"
        + crc32(root)
    )
    return parentid
    # TODO: catch error


def print_node(dir, node_id, parent_id):
    print("#############################")
    print("node created")
    print("      dir:     " + dir)
    print("      note_id: " + node_id)
    print("      parent:  " + parent_id)


def crawler():
    global DIRCOUNT
    global FILECOUNT

    for root, dirs, files in os.walk(rootPath):
        # +++ DIRECTORIES +++
        for dir in dirs:
            # calculating current depth
            current_depth = os.path.join(root, dir).count("/") - start_depth

            if DEBUG:
                print("current: " + os.path.join(root, dir))

            node_id = get_noteid(current_depth, root, dir)
            parent_id = str(get_parentid(current_depth, root, dir))

            if parent_id == str(None):
                DIR_ERRORLIST.append(os.path.join(root, dir))

            if DEBUG:
                print_node(dir, node_id, parent_id)

            # create node
            dir_tree.create_node(dir, node_id, parent_id)
            DIRCOUNT += 1

        # +++ FILES +++
        for filename in fnmatch.filter(files, pattern):
            if dir in folder_blacklist:
                continue

            # calculating current depth
            current_depth = os.path.join(root, filename).count("/") - start_depth

            if DEBUG:
                print("current: " + os.path.join(root, filename))

            node_id = get_noteid(current_depth, root, filename)
            parent_id = str(get_parentid(current_depth, root, filename))

            if parent_id == str(None):
                FILE_ERRORLIST.append(os.path.join(root, dir))

            if DEBUG:
                print_node(filename, node_id, parent_id)

            # create node
            dir_tree.create_node(filename, node_id, parent_id)
            FILECOUNT += 1


if PROFILING == 0:
    crawler()
if PROFILING == 1:
    t1 = timeit.Timer("crawler()", "from __main__ import crawler")
    print("time:      " + str(t1.timeit(number=1)))
if PROFILING == 2:
    cProfile.run("crawler()")


print("filecount: " + str(FILECOUNT))
print("dircount:  " + str(DIRCOUNT))

if DIR_ERRORLIST:
    for item in DIR_ERRORLIST:
        print(item)
else:
    print("no directory errors")

print("\n\n\n")

if FILE_ERRORLIST:
    for item in FILE_ERRORLIST:
        print(item)
else:
    print("no file errors")

print("nodes: " + str(len(dir_tree.nodes)))

dir_tree.show()