2025-02-13 14:52:26 +01:00
|
|
|
from __future__ import annotations
|
|
|
|
|
2025-02-13 21:16:46 +01:00
|
|
|
import datetime
|
2025-02-13 14:37:25 +01:00
|
|
|
import inspect
|
2025-02-13 06:15:54 +01:00
|
|
|
import logging
|
|
|
|
import re
|
2025-02-13 14:37:25 +01:00
|
|
|
import sys
|
2025-02-13 14:47:39 +01:00
|
|
|
import typing as t
|
2025-02-13 21:25:55 +01:00
|
|
|
from collections.abc import Collection, Set
|
2025-02-13 06:15:54 +01:00
|
|
|
from contextlib import contextmanager
|
2025-02-13 14:47:39 +01:00
|
|
|
from copy import copy
|
2025-02-13 06:15:54 +01:00
|
|
|
from enum import Enum
|
2025-02-13 15:56:32 +01:00
|
|
|
from itertools import count
|
2025-02-13 06:15:54 +01:00
|
|
|
|
2025-02-13 14:52:26 +01:00
|
|
|
if t.TYPE_CHECKING:
|
2025-02-13 14:53:43 +01:00
|
|
|
from sqlglot import exp
|
2025-02-13 21:03:05 +01:00
|
|
|
from sqlglot._typing import A, E, T
|
2025-02-13 14:53:43 +01:00
|
|
|
from sqlglot.expressions import Expression
|
2025-02-13 14:52:26 +01:00
|
|
|
|
2025-02-13 21:03:05 +01:00
|
|
|
|
2025-02-13 06:15:54 +01:00
|
|
|
CAMEL_CASE_PATTERN = re.compile("(?<!^)(?=[A-Z])")
|
2025-02-13 14:52:26 +01:00
|
|
|
PYTHON_VERSION = sys.version_info[:2]
|
2025-02-13 06:15:54 +01:00
|
|
|
logger = logging.getLogger("sqlglot")
|
|
|
|
|
|
|
|
|
|
|
|
class AutoName(Enum):
|
2025-02-13 20:04:17 +01:00
|
|
|
"""
|
|
|
|
This is used for creating Enum classes where `auto()` is the string form
|
|
|
|
of the corresponding enum's identifier (e.g. FOO.value results in "FOO").
|
|
|
|
|
|
|
|
Reference: https://docs.python.org/3/howto/enum.html#using-automatic-values
|
|
|
|
"""
|
2025-02-13 14:52:26 +01:00
|
|
|
|
2025-02-13 15:56:32 +01:00
|
|
|
def _generate_next_value_(name, _start, _count, _last_values):
|
2025-02-13 06:15:54 +01:00
|
|
|
return name
|
|
|
|
|
|
|
|
|
2025-02-13 20:56:33 +01:00
|
|
|
class classproperty(property):
|
|
|
|
"""
|
|
|
|
Similar to a normal property but works for class methods
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __get__(self, obj: t.Any, owner: t.Any = None) -> t.Any:
|
|
|
|
return classmethod(self.fget).__get__(None, owner)() # type: ignore
|
|
|
|
|
|
|
|
|
2025-02-13 14:52:26 +01:00
|
|
|
def seq_get(seq: t.Sequence[T], index: int) -> t.Optional[T]:
|
|
|
|
"""Returns the value in `seq` at position `index`, or `None` if `index` is out of bounds."""
|
2025-02-13 06:15:54 +01:00
|
|
|
try:
|
2025-02-13 14:52:26 +01:00
|
|
|
return seq[index]
|
2025-02-13 06:15:54 +01:00
|
|
|
except IndexError:
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
2025-02-13 14:52:26 +01:00
|
|
|
@t.overload
|
2025-02-13 21:29:15 +01:00
|
|
|
def ensure_list(value: t.Collection[T]) -> t.List[T]: ...
|
2025-02-13 14:52:26 +01:00
|
|
|
|
|
|
|
|
|
|
|
@t.overload
|
2025-02-13 21:29:15 +01:00
|
|
|
def ensure_list(value: T) -> t.List[T]: ...
|
2025-02-13 14:52:26 +01:00
|
|
|
|
|
|
|
|
2025-02-13 06:15:54 +01:00
|
|
|
def ensure_list(value):
|
2025-02-13 14:52:26 +01:00
|
|
|
"""
|
|
|
|
Ensures that a value is a list, otherwise casts or wraps it into one.
|
|
|
|
|
|
|
|
Args:
|
2025-02-13 20:04:17 +01:00
|
|
|
value: The value of interest.
|
2025-02-13 14:52:26 +01:00
|
|
|
|
|
|
|
Returns:
|
|
|
|
The value cast as a list if it's a list or a tuple, or else the value wrapped in a list.
|
|
|
|
"""
|
2025-02-13 06:15:54 +01:00
|
|
|
if value is None:
|
|
|
|
return []
|
2025-02-13 15:47:04 +01:00
|
|
|
if isinstance(value, (list, tuple)):
|
2025-02-13 14:52:26 +01:00
|
|
|
return list(value)
|
|
|
|
|
|
|
|
return [value]
|
|
|
|
|
|
|
|
|
|
|
|
@t.overload
|
2025-02-13 21:29:15 +01:00
|
|
|
def ensure_collection(value: t.Collection[T]) -> t.Collection[T]: ...
|
2025-02-13 06:15:54 +01:00
|
|
|
|
|
|
|
|
2025-02-13 14:52:26 +01:00
|
|
|
@t.overload
|
2025-02-13 21:29:15 +01:00
|
|
|
def ensure_collection(value: T) -> t.Collection[T]: ...
|
2025-02-13 14:52:26 +01:00
|
|
|
|
|
|
|
|
|
|
|
def ensure_collection(value):
|
|
|
|
"""
|
|
|
|
Ensures that a value is a collection (excluding `str` and `bytes`), otherwise wraps it into a list.
|
|
|
|
|
|
|
|
Args:
|
2025-02-13 20:04:17 +01:00
|
|
|
value: The value of interest.
|
2025-02-13 14:52:26 +01:00
|
|
|
|
|
|
|
Returns:
|
|
|
|
The value if it's a collection, or else the value wrapped in a list.
|
|
|
|
"""
|
|
|
|
if value is None:
|
|
|
|
return []
|
|
|
|
return (
|
|
|
|
value if isinstance(value, Collection) and not isinstance(value, (str, bytes)) else [value]
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2025-02-13 15:56:32 +01:00
|
|
|
def csv(*args: str, sep: str = ", ") -> str:
|
2025-02-13 14:52:26 +01:00
|
|
|
"""
|
|
|
|
Formats any number of string arguments as CSV.
|
|
|
|
|
|
|
|
Args:
|
2025-02-13 20:04:17 +01:00
|
|
|
args: The string arguments to format.
|
|
|
|
sep: The argument separator.
|
2025-02-13 14:52:26 +01:00
|
|
|
|
|
|
|
Returns:
|
|
|
|
The arguments formatted as a CSV string.
|
|
|
|
"""
|
2025-02-13 06:15:54 +01:00
|
|
|
return sep.join(arg for arg in args if arg)
|
|
|
|
|
|
|
|
|
2025-02-13 14:52:26 +01:00
|
|
|
def subclasses(
|
|
|
|
module_name: str,
|
|
|
|
classes: t.Type | t.Tuple[t.Type, ...],
|
|
|
|
exclude: t.Type | t.Tuple[t.Type, ...] = (),
|
|
|
|
) -> t.List[t.Type]:
|
2025-02-13 14:37:25 +01:00
|
|
|
"""
|
2025-02-13 14:52:26 +01:00
|
|
|
Returns all subclasses for a collection of classes, possibly excluding some of them.
|
2025-02-13 14:37:25 +01:00
|
|
|
|
|
|
|
Args:
|
2025-02-13 20:04:17 +01:00
|
|
|
module_name: The name of the module to search for subclasses in.
|
|
|
|
classes: Class(es) we want to find the subclasses of.
|
|
|
|
exclude: Class(es) we want to exclude from the returned list.
|
2025-02-13 14:52:26 +01:00
|
|
|
|
2025-02-13 14:37:25 +01:00
|
|
|
Returns:
|
2025-02-13 14:52:26 +01:00
|
|
|
The target subclasses.
|
2025-02-13 14:37:25 +01:00
|
|
|
"""
|
|
|
|
return [
|
|
|
|
obj
|
|
|
|
for _, obj in inspect.getmembers(
|
|
|
|
sys.modules[module_name],
|
|
|
|
lambda obj: inspect.isclass(obj) and issubclass(obj, classes) and obj not in exclude,
|
|
|
|
)
|
|
|
|
]
|
|
|
|
|
|
|
|
|
2025-02-13 15:51:35 +01:00
|
|
|
def apply_index_offset(
|
|
|
|
this: exp.Expression,
|
2025-02-13 20:56:33 +01:00
|
|
|
expressions: t.List[E],
|
2025-02-13 15:51:35 +01:00
|
|
|
offset: int,
|
2025-02-13 20:56:33 +01:00
|
|
|
) -> t.List[E]:
|
2025-02-13 14:52:26 +01:00
|
|
|
"""
|
|
|
|
Applies an offset to a given integer literal expression.
|
|
|
|
|
|
|
|
Args:
|
2025-02-13 20:04:17 +01:00
|
|
|
this: The target of the index.
|
|
|
|
expressions: The expression the offset will be applied to, wrapped in a list.
|
|
|
|
offset: The offset that will be applied.
|
2025-02-13 14:52:26 +01:00
|
|
|
|
|
|
|
Returns:
|
|
|
|
The original expression with the offset applied to it, wrapped in a list. If the provided
|
2025-02-13 20:04:17 +01:00
|
|
|
`expressions` argument contains more than one expression, it's returned unaffected.
|
2025-02-13 14:52:26 +01:00
|
|
|
"""
|
2025-02-13 06:15:54 +01:00
|
|
|
if not offset or len(expressions) != 1:
|
|
|
|
return expressions
|
|
|
|
|
|
|
|
expression = expressions[0]
|
|
|
|
|
2025-02-13 15:51:35 +01:00
|
|
|
from sqlglot import exp
|
|
|
|
from sqlglot.optimizer.annotate_types import annotate_types
|
|
|
|
from sqlglot.optimizer.simplify import simplify
|
|
|
|
|
|
|
|
if not this.type:
|
|
|
|
annotate_types(this)
|
|
|
|
|
|
|
|
if t.cast(exp.DataType, this.type).this not in (
|
|
|
|
exp.DataType.Type.UNKNOWN,
|
|
|
|
exp.DataType.Type.ARRAY,
|
|
|
|
):
|
|
|
|
return expressions
|
|
|
|
|
2025-02-13 20:56:33 +01:00
|
|
|
if not expression.type:
|
|
|
|
annotate_types(expression)
|
2025-02-13 21:51:59 +01:00
|
|
|
|
2025-02-13 20:56:33 +01:00
|
|
|
if t.cast(exp.DataType, expression.type).this in exp.DataType.INTEGER_TYPES:
|
2025-02-13 21:51:59 +01:00
|
|
|
logger.info("Applying array index offset (%s)", offset)
|
2025-02-13 21:30:02 +01:00
|
|
|
expression = simplify(expression + offset)
|
2025-02-13 20:56:33 +01:00
|
|
|
return [expression]
|
2025-02-13 14:52:26 +01:00
|
|
|
|
2025-02-13 06:15:54 +01:00
|
|
|
return expressions
|
|
|
|
|
|
|
|
|
2025-02-13 14:52:26 +01:00
|
|
|
def camel_to_snake_case(name: str) -> str:
|
|
|
|
"""Converts `name` from camelCase to snake_case and returns the result."""
|
2025-02-13 06:15:54 +01:00
|
|
|
return CAMEL_CASE_PATTERN.sub("_", name).upper()
|
|
|
|
|
|
|
|
|
2025-02-13 15:47:04 +01:00
|
|
|
def while_changing(expression: Expression, func: t.Callable[[Expression], E]) -> E:
|
2025-02-13 14:52:26 +01:00
|
|
|
"""
|
|
|
|
Applies a transformation to a given expression until a fix point is reached.
|
|
|
|
|
|
|
|
Args:
|
2025-02-13 20:04:17 +01:00
|
|
|
expression: The expression to be transformed.
|
|
|
|
func: The transformation to be applied.
|
2025-02-13 14:52:26 +01:00
|
|
|
|
|
|
|
Returns:
|
|
|
|
The transformed expression.
|
|
|
|
"""
|
2025-02-13 06:15:54 +01:00
|
|
|
while True:
|
2025-02-13 21:30:02 +01:00
|
|
|
for n in reversed(tuple(expression.walk())):
|
2025-02-13 15:47:04 +01:00
|
|
|
n._hash = hash(n)
|
2025-02-13 20:04:17 +01:00
|
|
|
|
2025-02-13 06:15:54 +01:00
|
|
|
start = hash(expression)
|
|
|
|
expression = func(expression)
|
2025-02-13 15:47:04 +01:00
|
|
|
|
2025-02-13 21:30:02 +01:00
|
|
|
for n in expression.walk():
|
2025-02-13 15:47:04 +01:00
|
|
|
n._hash = None
|
2025-02-13 06:15:54 +01:00
|
|
|
if start == hash(expression):
|
|
|
|
break
|
2025-02-13 20:04:17 +01:00
|
|
|
|
2025-02-13 06:15:54 +01:00
|
|
|
return expression
|
|
|
|
|
|
|
|
|
2025-02-13 16:00:14 +01:00
|
|
|
def tsort(dag: t.Dict[T, t.Set[T]]) -> t.List[T]:
|
2025-02-13 14:52:26 +01:00
|
|
|
"""
|
|
|
|
Sorts a given directed acyclic graph in topological order.
|
|
|
|
|
|
|
|
Args:
|
2025-02-13 20:04:17 +01:00
|
|
|
dag: The graph to be sorted.
|
2025-02-13 14:52:26 +01:00
|
|
|
|
|
|
|
Returns:
|
|
|
|
A list that contains all of the graph's nodes in topological order.
|
|
|
|
"""
|
2025-02-13 06:15:54 +01:00
|
|
|
result = []
|
|
|
|
|
2025-02-13 16:00:14 +01:00
|
|
|
for node, deps in tuple(dag.items()):
|
|
|
|
for dep in deps:
|
2025-02-13 21:20:19 +01:00
|
|
|
if dep not in dag:
|
2025-02-13 16:00:14 +01:00
|
|
|
dag[dep] = set()
|
|
|
|
|
|
|
|
while dag:
|
|
|
|
current = {node for node, deps in dag.items() if not deps}
|
2025-02-13 06:15:54 +01:00
|
|
|
|
2025-02-13 16:00:14 +01:00
|
|
|
if not current:
|
|
|
|
raise ValueError("Cycle error")
|
2025-02-13 06:15:54 +01:00
|
|
|
|
2025-02-13 16:00:14 +01:00
|
|
|
for node in current:
|
|
|
|
dag.pop(node)
|
2025-02-13 06:15:54 +01:00
|
|
|
|
2025-02-13 16:00:14 +01:00
|
|
|
for deps in dag.values():
|
|
|
|
deps -= current
|
2025-02-13 06:15:54 +01:00
|
|
|
|
2025-02-13 16:00:14 +01:00
|
|
|
result.extend(sorted(current)) # type: ignore
|
2025-02-13 06:15:54 +01:00
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
2025-02-13 14:52:26 +01:00
|
|
|
def open_file(file_name: str) -> t.TextIO:
|
|
|
|
"""Open a file that may be compressed as gzip and return it in universal newline mode."""
|
2025-02-13 06:15:54 +01:00
|
|
|
with open(file_name, "rb") as f:
|
|
|
|
gzipped = f.read(2) == b"\x1f\x8b"
|
|
|
|
|
|
|
|
if gzipped:
|
|
|
|
import gzip
|
|
|
|
|
|
|
|
return gzip.open(file_name, "rt", newline="")
|
|
|
|
|
2025-02-13 15:02:59 +01:00
|
|
|
return open(file_name, encoding="utf-8", newline="")
|
2025-02-13 06:15:54 +01:00
|
|
|
|
|
|
|
|
|
|
|
@contextmanager
|
2025-02-13 14:53:43 +01:00
|
|
|
def csv_reader(read_csv: exp.ReadCSV) -> t.Any:
|
2025-02-13 06:15:54 +01:00
|
|
|
"""
|
2025-02-13 14:52:26 +01:00
|
|
|
Returns a csv reader given the expression `READ_CSV(name, ['delimiter', '|', ...])`.
|
2025-02-13 06:15:54 +01:00
|
|
|
|
|
|
|
Args:
|
2025-02-13 20:04:17 +01:00
|
|
|
read_csv: A `ReadCSV` function call.
|
2025-02-13 06:15:54 +01:00
|
|
|
|
2025-02-13 14:52:26 +01:00
|
|
|
Yields:
|
2025-02-13 06:15:54 +01:00
|
|
|
A python csv reader.
|
|
|
|
"""
|
2025-02-13 14:53:43 +01:00
|
|
|
args = read_csv.expressions
|
|
|
|
file = open_file(read_csv.name)
|
2025-02-13 06:15:54 +01:00
|
|
|
|
|
|
|
delimiter = ","
|
2025-02-13 21:16:46 +01:00
|
|
|
args = iter(arg.name for arg in args) # type: ignore
|
2025-02-13 06:15:54 +01:00
|
|
|
for k, v in zip(args, args):
|
|
|
|
if k == "delimiter":
|
|
|
|
delimiter = v
|
|
|
|
|
|
|
|
try:
|
|
|
|
import csv as csv_
|
|
|
|
|
|
|
|
yield csv_.reader(file, delimiter=delimiter)
|
|
|
|
finally:
|
|
|
|
file.close()
|
2025-02-13 14:37:25 +01:00
|
|
|
|
|
|
|
|
2025-02-13 15:02:59 +01:00
|
|
|
def find_new_name(taken: t.Collection[str], base: str) -> str:
|
2025-02-13 14:37:25 +01:00
|
|
|
"""
|
|
|
|
Searches for a new name.
|
|
|
|
|
|
|
|
Args:
|
2025-02-13 20:04:17 +01:00
|
|
|
taken: A collection of taken names.
|
|
|
|
base: Base name to alter.
|
2025-02-13 14:52:26 +01:00
|
|
|
|
|
|
|
Returns:
|
|
|
|
The new, available name.
|
2025-02-13 14:37:25 +01:00
|
|
|
"""
|
|
|
|
if base not in taken:
|
|
|
|
return base
|
|
|
|
|
|
|
|
i = 2
|
|
|
|
new = f"{base}_{i}"
|
|
|
|
while new in taken:
|
|
|
|
i += 1
|
|
|
|
new = f"{base}_{i}"
|
2025-02-13 14:52:26 +01:00
|
|
|
|
2025-02-13 14:37:25 +01:00
|
|
|
return new
|
2025-02-13 14:47:39 +01:00
|
|
|
|
|
|
|
|
2025-02-13 21:20:19 +01:00
|
|
|
def is_int(text: str) -> bool:
|
2025-02-13 21:30:02 +01:00
|
|
|
return is_type(text, int)
|
|
|
|
|
|
|
|
|
|
|
|
def is_float(text: str) -> bool:
|
|
|
|
return is_type(text, float)
|
|
|
|
|
|
|
|
|
|
|
|
def is_type(text: str, target_type: t.Type) -> bool:
|
2025-02-13 21:20:19 +01:00
|
|
|
try:
|
2025-02-13 21:30:02 +01:00
|
|
|
target_type(text)
|
2025-02-13 21:20:19 +01:00
|
|
|
return True
|
|
|
|
except ValueError:
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
2025-02-13 15:56:32 +01:00
|
|
|
def name_sequence(prefix: str) -> t.Callable[[], str]:
|
|
|
|
"""Returns a name generator given a prefix (e.g. a0, a1, a2, ... if the prefix is "a")."""
|
|
|
|
sequence = count()
|
|
|
|
return lambda: f"{prefix}{next(sequence)}"
|
|
|
|
|
|
|
|
|
2025-02-13 14:52:26 +01:00
|
|
|
def object_to_dict(obj: t.Any, **kwargs) -> t.Dict:
|
|
|
|
"""Returns a dictionary created from an object's attributes."""
|
2025-02-13 15:56:32 +01:00
|
|
|
return {
|
|
|
|
**{k: v.copy() if hasattr(v, "copy") else copy(v) for k, v in vars(obj).items()},
|
|
|
|
**kwargs,
|
|
|
|
}
|
2025-02-13 14:47:39 +01:00
|
|
|
|
|
|
|
|
2025-02-13 14:52:26 +01:00
|
|
|
def split_num_words(
|
|
|
|
value: str, sep: str, min_num_words: int, fill_from_start: bool = True
|
|
|
|
) -> t.List[t.Optional[str]]:
|
2025-02-13 14:47:39 +01:00
|
|
|
"""
|
2025-02-13 14:52:26 +01:00
|
|
|
Perform a split on a value and return N words as a result with `None` used for words that don't exist.
|
2025-02-13 14:47:39 +01:00
|
|
|
|
|
|
|
Args:
|
2025-02-13 20:04:17 +01:00
|
|
|
value: The value to be split.
|
|
|
|
sep: The value to use to split on.
|
|
|
|
min_num_words: The minimum number of words that are going to be in the result.
|
|
|
|
fill_from_start: Indicates that if `None` values should be inserted at the start or end of the list.
|
2025-02-13 14:47:39 +01:00
|
|
|
|
|
|
|
Examples:
|
|
|
|
>>> split_num_words("db.table", ".", 3)
|
|
|
|
[None, 'db', 'table']
|
|
|
|
>>> split_num_words("db.table", ".", 3, fill_from_start=False)
|
|
|
|
['db', 'table', None]
|
|
|
|
>>> split_num_words("db.table", ".", 1)
|
|
|
|
['db', 'table']
|
2025-02-13 14:52:26 +01:00
|
|
|
|
|
|
|
Returns:
|
|
|
|
The list of words returned by `split`, possibly augmented by a number of `None` values.
|
2025-02-13 14:47:39 +01:00
|
|
|
"""
|
|
|
|
words = value.split(sep)
|
|
|
|
if fill_from_start:
|
|
|
|
return [None] * (min_num_words - len(words)) + words
|
|
|
|
return words + [None] * (min_num_words - len(words))
|
|
|
|
|
|
|
|
|
2025-02-13 14:49:58 +01:00
|
|
|
def is_iterable(value: t.Any) -> bool:
|
|
|
|
"""
|
2025-02-13 14:52:26 +01:00
|
|
|
Checks if the value is an iterable, excluding the types `str` and `bytes`.
|
2025-02-13 14:49:58 +01:00
|
|
|
|
|
|
|
Examples:
|
|
|
|
>>> is_iterable([1,2])
|
|
|
|
True
|
|
|
|
>>> is_iterable("test")
|
|
|
|
False
|
|
|
|
|
|
|
|
Args:
|
2025-02-13 20:04:17 +01:00
|
|
|
value: The value to check if it is an iterable.
|
2025-02-13 14:49:58 +01:00
|
|
|
|
2025-02-13 14:52:26 +01:00
|
|
|
Returns:
|
|
|
|
A `bool` value indicating if it is an iterable.
|
2025-02-13 14:49:58 +01:00
|
|
|
"""
|
2025-02-13 21:03:05 +01:00
|
|
|
from sqlglot import Expression
|
|
|
|
|
|
|
|
return hasattr(value, "__iter__") and not isinstance(value, (str, bytes, Expression))
|
2025-02-13 14:49:58 +01:00
|
|
|
|
|
|
|
|
2025-02-13 15:06:33 +01:00
|
|
|
def flatten(values: t.Iterable[t.Iterable[t.Any] | t.Any]) -> t.Iterator[t.Any]:
|
2025-02-13 14:47:39 +01:00
|
|
|
"""
|
2025-02-13 14:52:26 +01:00
|
|
|
Flattens an iterable that can contain both iterable and non-iterable elements. Objects of
|
|
|
|
type `str` and `bytes` are not regarded as iterables.
|
2025-02-13 14:47:39 +01:00
|
|
|
|
|
|
|
Examples:
|
2025-02-13 14:52:26 +01:00
|
|
|
>>> list(flatten([[1, 2], 3, {4}, (5, "bla")]))
|
|
|
|
[1, 2, 3, 4, 5, 'bla']
|
2025-02-13 14:47:39 +01:00
|
|
|
>>> list(flatten([1, 2, 3]))
|
|
|
|
[1, 2, 3]
|
|
|
|
|
|
|
|
Args:
|
2025-02-13 20:04:17 +01:00
|
|
|
values: The value to be flattened.
|
2025-02-13 14:47:39 +01:00
|
|
|
|
2025-02-13 14:52:26 +01:00
|
|
|
Yields:
|
|
|
|
Non-iterable elements in `values`.
|
2025-02-13 14:47:39 +01:00
|
|
|
"""
|
|
|
|
for value in values:
|
2025-02-13 14:49:58 +01:00
|
|
|
if is_iterable(value):
|
2025-02-13 14:47:39 +01:00
|
|
|
yield from flatten(value)
|
|
|
|
else:
|
|
|
|
yield value
|
2025-02-13 14:53:43 +01:00
|
|
|
|
|
|
|
|
|
|
|
def dict_depth(d: t.Dict) -> int:
|
|
|
|
"""
|
|
|
|
Get the nesting depth of a dictionary.
|
|
|
|
|
2025-02-13 20:04:17 +01:00
|
|
|
Example:
|
2025-02-13 14:53:43 +01:00
|
|
|
>>> dict_depth(None)
|
|
|
|
0
|
|
|
|
>>> dict_depth({})
|
|
|
|
1
|
|
|
|
>>> dict_depth({"a": "b"})
|
|
|
|
1
|
|
|
|
>>> dict_depth({"a": {}})
|
|
|
|
2
|
|
|
|
>>> dict_depth({"a": {"b": {}}})
|
|
|
|
3
|
|
|
|
"""
|
|
|
|
try:
|
|
|
|
return 1 + dict_depth(next(iter(d.values())))
|
|
|
|
except AttributeError:
|
|
|
|
# d doesn't have attribute "values"
|
|
|
|
return 0
|
|
|
|
except StopIteration:
|
|
|
|
# d.values() returns an empty sequence
|
|
|
|
return 1
|
2025-02-13 14:57:38 +01:00
|
|
|
|
|
|
|
|
|
|
|
def first(it: t.Iterable[T]) -> T:
|
2025-02-13 20:04:17 +01:00
|
|
|
"""Returns the first element from an iterable (useful for sets)."""
|
2025-02-13 14:57:38 +01:00
|
|
|
return next(i for i in it)
|
2025-02-13 21:03:05 +01:00
|
|
|
|
|
|
|
|
|
|
|
def merge_ranges(ranges: t.List[t.Tuple[A, A]]) -> t.List[t.Tuple[A, A]]:
|
2025-02-13 21:07:20 +01:00
|
|
|
"""
|
|
|
|
Merges a sequence of ranges, represented as tuples (low, high) whose values
|
|
|
|
belong to some totally-ordered set.
|
|
|
|
|
|
|
|
Example:
|
|
|
|
>>> merge_ranges([(1, 3), (2, 6)])
|
|
|
|
[(1, 6)]
|
|
|
|
"""
|
2025-02-13 21:03:05 +01:00
|
|
|
if not ranges:
|
|
|
|
return []
|
|
|
|
|
|
|
|
ranges = sorted(ranges)
|
|
|
|
|
|
|
|
merged = [ranges[0]]
|
|
|
|
|
|
|
|
for start, end in ranges[1:]:
|
|
|
|
last_start, last_end = merged[-1]
|
|
|
|
|
|
|
|
if start <= last_end:
|
|
|
|
merged[-1] = (last_start, max(last_end, end))
|
|
|
|
else:
|
|
|
|
merged.append((start, end))
|
|
|
|
|
|
|
|
return merged
|
2025-02-13 21:16:46 +01:00
|
|
|
|
|
|
|
|
|
|
|
def is_iso_date(text: str) -> bool:
|
|
|
|
try:
|
|
|
|
datetime.date.fromisoformat(text)
|
|
|
|
return True
|
|
|
|
except ValueError:
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
def is_iso_datetime(text: str) -> bool:
|
|
|
|
try:
|
|
|
|
datetime.datetime.fromisoformat(text)
|
|
|
|
return True
|
|
|
|
except ValueError:
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
# Interval units that operate on date components
|
|
|
|
DATE_UNITS = {"day", "week", "month", "quarter", "year", "year_month"}
|
|
|
|
|
|
|
|
|
|
|
|
def is_date_unit(expression: t.Optional[exp.Expression]) -> bool:
|
|
|
|
return expression is not None and expression.name.lower() in DATE_UNITS
|
2025-02-13 21:25:55 +01:00
|
|
|
|
|
|
|
|
|
|
|
K = t.TypeVar("K")
|
|
|
|
V = t.TypeVar("V")
|
|
|
|
|
|
|
|
|
|
|
|
class SingleValuedMapping(t.Mapping[K, V]):
|
|
|
|
"""
|
|
|
|
Mapping where all keys return the same value.
|
|
|
|
|
|
|
|
This rigamarole is meant to avoid copying keys, which was originally intended
|
|
|
|
as an optimization while qualifying columns for tables with lots of columns.
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(self, keys: t.Collection[K], value: V):
|
|
|
|
self._keys = keys if isinstance(keys, Set) else set(keys)
|
|
|
|
self._value = value
|
|
|
|
|
|
|
|
def __getitem__(self, key: K) -> V:
|
|
|
|
if key in self._keys:
|
|
|
|
return self._value
|
|
|
|
raise KeyError(key)
|
|
|
|
|
|
|
|
def __len__(self) -> int:
|
|
|
|
return len(self._keys)
|
|
|
|
|
|
|
|
def __iter__(self) -> t.Iterator[K]:
|
|
|
|
return iter(self._keys)
|