1
0
Fork 0
sqlglot/sqlglot/dialects/clickhouse.py
Daniel Baumann 147b6e06e8
Adding upstream version 25.1.0.
Signed-off-by: Daniel Baumann <daniel@debian.org>
2025-02-13 21:38:56 +01:00

921 lines
34 KiB
Python

from __future__ import annotations
import typing as t
from sqlglot import exp, generator, parser, tokens, transforms
from sqlglot.dialects.dialect import (
Dialect,
arg_max_or_min_no_count,
build_formatted_time,
date_delta_sql,
inline_array_sql,
json_extract_segments,
json_path_key_only_name,
no_pivot_sql,
build_json_extract_path,
rename_func,
sha256_sql,
var_map_sql,
timestamptrunc_sql,
)
from sqlglot.helper import is_int, seq_get
from sqlglot.tokens import Token, TokenType
def _build_date_format(args: t.List) -> exp.TimeToStr:
expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args)
timezone = seq_get(args, 2)
if timezone:
expr.set("timezone", timezone)
return expr
def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str:
scale = expression.args.get("scale")
timestamp = expression.this
if scale in (None, exp.UnixToTime.SECONDS):
return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT))
if scale == exp.UnixToTime.MILLIS:
return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT))
if scale == exp.UnixToTime.MICROS:
return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT))
if scale == exp.UnixToTime.NANOS:
return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT))
return self.func(
"fromUnixTimestamp",
exp.cast(
exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT
),
)
def _lower_func(sql: str) -> str:
index = sql.index("(")
return sql[:index].lower() + sql[index:]
def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str:
quantile = expression.args["quantile"]
args = f"({self.sql(expression, 'this')})"
if isinstance(quantile, exp.Array):
func = self.func("quantiles", *quantile)
else:
func = self.func("quantile", quantile)
return func + args
def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc:
if len(args) == 1:
return exp.CountIf(this=seq_get(args, 0))
return exp.CombinedAggFunc(this="countIf", expressions=args, parts=("count", "If"))
class ClickHouse(Dialect):
NORMALIZE_FUNCTIONS: bool | str = False
NULL_ORDERING = "nulls_are_last"
SUPPORTS_USER_DEFINED_TYPES = False
SAFE_DIVISION = True
LOG_BASE_FIRST: t.Optional[bool] = None
UNESCAPED_SEQUENCES = {
"\\0": "\0",
}
class Tokenizer(tokens.Tokenizer):
COMMENTS = ["--", "#", "#!", ("/*", "*/")]
IDENTIFIERS = ['"', "`"]
STRING_ESCAPES = ["'", "\\"]
BIT_STRINGS = [("0b", "")]
HEX_STRINGS = [("0x", ""), ("0X", "")]
HEREDOC_STRINGS = ["$"]
KEYWORDS = {
**tokens.Tokenizer.KEYWORDS,
"ATTACH": TokenType.COMMAND,
"DATE32": TokenType.DATE32,
"DATETIME64": TokenType.DATETIME64,
"DICTIONARY": TokenType.DICTIONARY,
"ENUM8": TokenType.ENUM8,
"ENUM16": TokenType.ENUM16,
"FINAL": TokenType.FINAL,
"FIXEDSTRING": TokenType.FIXEDSTRING,
"FLOAT32": TokenType.FLOAT,
"FLOAT64": TokenType.DOUBLE,
"GLOBAL": TokenType.GLOBAL,
"INT256": TokenType.INT256,
"LOWCARDINALITY": TokenType.LOWCARDINALITY,
"MAP": TokenType.MAP,
"NESTED": TokenType.NESTED,
"SAMPLE": TokenType.TABLE_SAMPLE,
"TUPLE": TokenType.STRUCT,
"UINT128": TokenType.UINT128,
"UINT16": TokenType.USMALLINT,
"UINT256": TokenType.UINT256,
"UINT32": TokenType.UINT,
"UINT64": TokenType.UBIGINT,
"UINT8": TokenType.UTINYINT,
"IPV4": TokenType.IPV4,
"IPV6": TokenType.IPV6,
"AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION,
"SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION,
"SYSTEM": TokenType.COMMAND,
"PREWHERE": TokenType.PREWHERE,
}
SINGLE_TOKENS = {
**tokens.Tokenizer.SINGLE_TOKENS,
"$": TokenType.HEREDOC_STRING,
}
class Parser(parser.Parser):
# Tested in ClickHouse's playground, it seems that the following two queries do the same thing
# * select x from t1 union all select x from t2 limit 1;
# * select x from t1 union all (select x from t2 limit 1);
MODIFIERS_ATTACHED_TO_UNION = False
INTERVAL_SPANS = False
FUNCTIONS = {
**parser.Parser.FUNCTIONS,
"ANY": exp.AnyValue.from_arg_list,
"ARRAYSUM": exp.ArraySum.from_arg_list,
"COUNTIF": _build_count_if,
"DATE_ADD": lambda args: exp.DateAdd(
this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)
),
"DATEADD": lambda args: exp.DateAdd(
this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)
),
"DATE_DIFF": lambda args: exp.DateDiff(
this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)
),
"DATEDIFF": lambda args: exp.DateDiff(
this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)
),
"DATE_FORMAT": _build_date_format,
"FORMATDATETIME": _build_date_format,
"JSONEXTRACTSTRING": build_json_extract_path(
exp.JSONExtractScalar, zero_based_indexing=False
),
"MAP": parser.build_var_map,
"MATCH": exp.RegexpLike.from_arg_list,
"RANDCANONICAL": exp.Rand.from_arg_list,
"TUPLE": exp.Struct.from_arg_list,
"UNIQ": exp.ApproxDistinct.from_arg_list,
"XOR": lambda args: exp.Xor(expressions=args),
"MD5": exp.MD5Digest.from_arg_list,
"SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)),
"SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)),
}
AGG_FUNCTIONS = {
"count",
"min",
"max",
"sum",
"avg",
"any",
"stddevPop",
"stddevSamp",
"varPop",
"varSamp",
"corr",
"covarPop",
"covarSamp",
"entropy",
"exponentialMovingAverage",
"intervalLengthSum",
"kolmogorovSmirnovTest",
"mannWhitneyUTest",
"median",
"rankCorr",
"sumKahan",
"studentTTest",
"welchTTest",
"anyHeavy",
"anyLast",
"boundingRatio",
"first_value",
"last_value",
"argMin",
"argMax",
"avgWeighted",
"topK",
"topKWeighted",
"deltaSum",
"deltaSumTimestamp",
"groupArray",
"groupArrayLast",
"groupUniqArray",
"groupArrayInsertAt",
"groupArrayMovingAvg",
"groupArrayMovingSum",
"groupArraySample",
"groupBitAnd",
"groupBitOr",
"groupBitXor",
"groupBitmap",
"groupBitmapAnd",
"groupBitmapOr",
"groupBitmapXor",
"sumWithOverflow",
"sumMap",
"minMap",
"maxMap",
"skewSamp",
"skewPop",
"kurtSamp",
"kurtPop",
"uniq",
"uniqExact",
"uniqCombined",
"uniqCombined64",
"uniqHLL12",
"uniqTheta",
"quantile",
"quantiles",
"quantileExact",
"quantilesExact",
"quantileExactLow",
"quantilesExactLow",
"quantileExactHigh",
"quantilesExactHigh",
"quantileExactWeighted",
"quantilesExactWeighted",
"quantileTiming",
"quantilesTiming",
"quantileTimingWeighted",
"quantilesTimingWeighted",
"quantileDeterministic",
"quantilesDeterministic",
"quantileTDigest",
"quantilesTDigest",
"quantileTDigestWeighted",
"quantilesTDigestWeighted",
"quantileBFloat16",
"quantilesBFloat16",
"quantileBFloat16Weighted",
"quantilesBFloat16Weighted",
"simpleLinearRegression",
"stochasticLinearRegression",
"stochasticLogisticRegression",
"categoricalInformationValue",
"contingency",
"cramersV",
"cramersVBiasCorrected",
"theilsU",
"maxIntersections",
"maxIntersectionsPosition",
"meanZTest",
"quantileInterpolatedWeighted",
"quantilesInterpolatedWeighted",
"quantileGK",
"quantilesGK",
"sparkBar",
"sumCount",
"largestTriangleThreeBuckets",
"histogram",
"sequenceMatch",
"sequenceCount",
"windowFunnel",
"retention",
"uniqUpTo",
"sequenceNextNode",
"exponentialTimeDecayedAvg",
}
AGG_FUNCTIONS_SUFFIXES = [
"If",
"Array",
"ArrayIf",
"Map",
"SimpleState",
"State",
"Merge",
"MergeState",
"ForEach",
"Distinct",
"OrDefault",
"OrNull",
"Resample",
"ArgMin",
"ArgMax",
]
FUNC_TOKENS = {
*parser.Parser.FUNC_TOKENS,
TokenType.SET,
}
AGG_FUNC_MAPPING = (
lambda functions, suffixes: {
f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions
}
)(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES)
FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"}
FUNCTION_PARSERS = {
**parser.Parser.FUNCTION_PARSERS,
"ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()),
"QUANTILE": lambda self: self._parse_quantile(),
}
FUNCTION_PARSERS.pop("MATCH")
NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy()
NO_PAREN_FUNCTION_PARSERS.pop("ANY")
RANGE_PARSERS = {
**parser.Parser.RANGE_PARSERS,
TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN)
and self._parse_in(this, is_global=True),
}
# The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to
# the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler.
COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy()
COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER)
JOIN_KINDS = {
*parser.Parser.JOIN_KINDS,
TokenType.ANY,
TokenType.ASOF,
TokenType.ARRAY,
}
TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - {
TokenType.ANY,
TokenType.ARRAY,
TokenType.FINAL,
TokenType.FORMAT,
TokenType.SETTINGS,
}
ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - {
TokenType.FORMAT,
}
LOG_DEFAULTS_TO_LN = True
QUERY_MODIFIER_PARSERS = {
**parser.Parser.QUERY_MODIFIER_PARSERS,
TokenType.SETTINGS: lambda self: (
"settings",
self._advance() or self._parse_csv(self._parse_assignment),
),
TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()),
}
CONSTRAINT_PARSERS = {
**parser.Parser.CONSTRAINT_PARSERS,
"INDEX": lambda self: self._parse_index_constraint(),
"CODEC": lambda self: self._parse_compress(),
}
ALTER_PARSERS = {
**parser.Parser.ALTER_PARSERS,
"REPLACE": lambda self: self._parse_alter_table_replace(),
}
SCHEMA_UNNAMED_CONSTRAINTS = {
*parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS,
"INDEX",
}
def _parse_assignment(self) -> t.Optional[exp.Expression]:
this = super()._parse_assignment()
if self._match(TokenType.PLACEHOLDER):
return self.expression(
exp.If,
this=this,
true=self._parse_assignment(),
false=self._match(TokenType.COLON) and self._parse_assignment(),
)
return this
def _parse_placeholder(self) -> t.Optional[exp.Expression]:
"""
Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier}
https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters
"""
if not self._match(TokenType.L_BRACE):
return None
this = self._parse_id_var()
self._match(TokenType.COLON)
kind = self._parse_types(check_func=False, allow_identifiers=False) or (
self._match_text_seq("IDENTIFIER") and "Identifier"
)
if not kind:
self.raise_error("Expecting a placeholder type or 'Identifier' for tables")
elif not self._match(TokenType.R_BRACE):
self.raise_error("Expecting }")
return self.expression(exp.Placeholder, this=this, kind=kind)
def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In:
this = super()._parse_in(this)
this.set("is_global", is_global)
return this
def _parse_table(
self,
schema: bool = False,
joins: bool = False,
alias_tokens: t.Optional[t.Collection[TokenType]] = None,
parse_bracket: bool = False,
is_db_reference: bool = False,
parse_partition: bool = False,
) -> t.Optional[exp.Expression]:
this = super()._parse_table(
schema=schema,
joins=joins,
alias_tokens=alias_tokens,
parse_bracket=parse_bracket,
is_db_reference=is_db_reference,
)
if self._match(TokenType.FINAL):
this = self.expression(exp.Final, this=this)
return this
def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition:
return super()._parse_position(haystack_first=True)
# https://clickhouse.com/docs/en/sql-reference/statements/select/with/
def _parse_cte(self) -> exp.CTE:
# WITH <identifier> AS <subquery expression>
cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte)
if not cte:
# WITH <expression> AS <identifier>
cte = self.expression(
exp.CTE,
this=self._parse_assignment(),
alias=self._parse_table_alias(),
scalar=True,
)
return cte
def _parse_join_parts(
self,
) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
is_global = self._match(TokenType.GLOBAL) and self._prev
kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev
if kind_pre:
kind = self._match_set(self.JOIN_KINDS) and self._prev
side = self._match_set(self.JOIN_SIDES) and self._prev
return is_global, side, kind
return (
is_global,
self._match_set(self.JOIN_SIDES) and self._prev,
self._match_set(self.JOIN_KINDS) and self._prev,
)
def _parse_join(
self, skip_join_token: bool = False, parse_bracket: bool = False
) -> t.Optional[exp.Join]:
join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True)
if join:
join.set("global", join.args.pop("method", None))
return join
def _parse_function(
self,
functions: t.Optional[t.Dict[str, t.Callable]] = None,
anonymous: bool = False,
optional_parens: bool = True,
any_token: bool = False,
) -> t.Optional[exp.Expression]:
expr = super()._parse_function(
functions=functions,
anonymous=anonymous,
optional_parens=optional_parens,
any_token=any_token,
)
func = expr.this if isinstance(expr, exp.Window) else expr
# Aggregate functions can be split in 2 parts: <func_name><suffix>
parts = (
self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None
)
if parts:
params = self._parse_func_params(func)
kwargs = {
"this": func.this,
"expressions": func.expressions,
}
if parts[1]:
kwargs["parts"] = parts
exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc
else:
exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc
kwargs["exp_class"] = exp_class
if params:
kwargs["params"] = params
func = self.expression(**kwargs)
if isinstance(expr, exp.Window):
# The window's func was parsed as Anonymous in base parser, fix its
# type to be CH style CombinedAnonymousAggFunc / AnonymousAggFunc
expr.set("this", func)
elif params:
# Params have blocked super()._parse_function() from parsing the following window
# (if that exists) as they're standing between the function call and the window spec
expr = self._parse_window(func)
else:
expr = func
return expr
def _parse_func_params(
self, this: t.Optional[exp.Func] = None
) -> t.Optional[t.List[exp.Expression]]:
if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN):
return self._parse_csv(self._parse_lambda)
if self._match(TokenType.L_PAREN):
params = self._parse_csv(self._parse_lambda)
self._match_r_paren(this)
return params
return None
def _parse_quantile(self) -> exp.Quantile:
this = self._parse_lambda()
params = self._parse_func_params()
if params:
return self.expression(exp.Quantile, this=params[0], quantile=this)
return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5))
def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]:
return super()._parse_wrapped_id_vars(optional=True)
def _parse_primary_key(
self, wrapped_optional: bool = False, in_props: bool = False
) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey:
return super()._parse_primary_key(
wrapped_optional=wrapped_optional or in_props, in_props=in_props
)
def _parse_on_property(self) -> t.Optional[exp.Expression]:
index = self._index
if self._match_text_seq("CLUSTER"):
this = self._parse_id_var()
if this:
return self.expression(exp.OnCluster, this=this)
else:
self._retreat(index)
return None
def _parse_index_constraint(
self, kind: t.Optional[str] = None
) -> exp.IndexColumnConstraint:
# INDEX name1 expr TYPE type1(args) GRANULARITY value
this = self._parse_id_var()
expression = self._parse_assignment()
index_type = self._match_text_seq("TYPE") and (
self._parse_function() or self._parse_var()
)
granularity = self._match_text_seq("GRANULARITY") and self._parse_term()
return self.expression(
exp.IndexColumnConstraint,
this=this,
expression=expression,
index_type=index_type,
granularity=granularity,
)
def _parse_partition(self) -> t.Optional[exp.Partition]:
# https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression
if not self._match(TokenType.PARTITION):
return None
if self._match_text_seq("ID"):
# Corresponds to the PARTITION ID <string_value> syntax
expressions: t.List[exp.Expression] = [
self.expression(exp.PartitionId, this=self._parse_string())
]
else:
expressions = self._parse_expressions()
return self.expression(exp.Partition, expressions=expressions)
def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]:
partition = self._parse_partition()
if not partition or not self._match(TokenType.FROM):
return None
return self.expression(
exp.ReplacePartition, expression=partition, source=self._parse_table_parts()
)
def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]:
if not self._match_text_seq("PROJECTION"):
return None
return self.expression(
exp.ProjectionDef,
this=self._parse_id_var(),
expression=self._parse_wrapped(self._parse_statement),
)
def _parse_constraint(self) -> t.Optional[exp.Expression]:
return super()._parse_constraint() or self._parse_projection_def()
class Generator(generator.Generator):
QUERY_HINTS = False
STRUCT_DELIMITER = ("(", ")")
NVL2_SUPPORTED = False
TABLESAMPLE_REQUIRES_PARENS = False
TABLESAMPLE_SIZE_IS_ROWS = False
TABLESAMPLE_KEYWORDS = "SAMPLE"
LAST_DAY_SUPPORTS_DATE_PART = False
CAN_IMPLEMENT_ARRAY_ANY = True
SUPPORTS_TO_NUMBER = False
STRING_TYPE_MAPPING = {
exp.DataType.Type.CHAR: "String",
exp.DataType.Type.LONGBLOB: "String",
exp.DataType.Type.LONGTEXT: "String",
exp.DataType.Type.MEDIUMBLOB: "String",
exp.DataType.Type.MEDIUMTEXT: "String",
exp.DataType.Type.TINYBLOB: "String",
exp.DataType.Type.TINYTEXT: "String",
exp.DataType.Type.TEXT: "String",
exp.DataType.Type.VARBINARY: "String",
exp.DataType.Type.VARCHAR: "String",
}
SUPPORTED_JSON_PATH_PARTS = {
exp.JSONPathKey,
exp.JSONPathRoot,
exp.JSONPathSubscript,
}
TYPE_MAPPING = {
**generator.Generator.TYPE_MAPPING,
**STRING_TYPE_MAPPING,
exp.DataType.Type.ARRAY: "Array",
exp.DataType.Type.BIGINT: "Int64",
exp.DataType.Type.DATE32: "Date32",
exp.DataType.Type.DATETIME64: "DateTime64",
exp.DataType.Type.DOUBLE: "Float64",
exp.DataType.Type.ENUM: "Enum",
exp.DataType.Type.ENUM8: "Enum8",
exp.DataType.Type.ENUM16: "Enum16",
exp.DataType.Type.FIXEDSTRING: "FixedString",
exp.DataType.Type.FLOAT: "Float32",
exp.DataType.Type.INT: "Int32",
exp.DataType.Type.MEDIUMINT: "Int32",
exp.DataType.Type.INT128: "Int128",
exp.DataType.Type.INT256: "Int256",
exp.DataType.Type.LOWCARDINALITY: "LowCardinality",
exp.DataType.Type.MAP: "Map",
exp.DataType.Type.NESTED: "Nested",
exp.DataType.Type.NULLABLE: "Nullable",
exp.DataType.Type.SMALLINT: "Int16",
exp.DataType.Type.STRUCT: "Tuple",
exp.DataType.Type.TINYINT: "Int8",
exp.DataType.Type.UBIGINT: "UInt64",
exp.DataType.Type.UINT: "UInt32",
exp.DataType.Type.UINT128: "UInt128",
exp.DataType.Type.UINT256: "UInt256",
exp.DataType.Type.USMALLINT: "UInt16",
exp.DataType.Type.UTINYINT: "UInt8",
exp.DataType.Type.IPV4: "IPv4",
exp.DataType.Type.IPV6: "IPv6",
exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction",
exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction",
}
TRANSFORMS = {
**generator.Generator.TRANSFORMS,
exp.AnyValue: rename_func("any"),
exp.ApproxDistinct: rename_func("uniq"),
exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this),
exp.ArraySize: rename_func("LENGTH"),
exp.ArraySum: rename_func("arraySum"),
exp.ArgMax: arg_max_or_min_no_count("argMax"),
exp.ArgMin: arg_max_or_min_no_count("argMin"),
exp.Array: inline_array_sql,
exp.CastToStrType: rename_func("CAST"),
exp.CountIf: rename_func("countIf"),
exp.CompressColumnConstraint: lambda self,
e: f"CODEC({self.expressions(e, key='this', flat=True)})",
exp.ComputedColumnConstraint: lambda self,
e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}",
exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"),
exp.DateAdd: date_delta_sql("DATE_ADD"),
exp.DateDiff: date_delta_sql("DATE_DIFF"),
exp.Explode: rename_func("arrayJoin"),
exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL",
exp.IsNan: rename_func("isNaN"),
exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False),
exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False),
exp.JSONPathKey: json_path_key_only_name,
exp.JSONPathRoot: lambda *_: "",
exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)),
exp.Nullif: rename_func("nullIf"),
exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}",
exp.Pivot: no_pivot_sql,
exp.Quantile: _quantile_sql,
exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression),
exp.Rand: rename_func("randCanonical"),
exp.Select: transforms.preprocess([transforms.eliminate_qualify]),
exp.StartsWith: rename_func("startsWith"),
exp.StrPosition: lambda self, e: self.func(
"position", e.this, e.args.get("substr"), e.args.get("position")
),
exp.TimeToStr: lambda self, e: self.func(
"DATE_FORMAT", e.this, self.format_time(e), e.args.get("timezone")
),
exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)),
exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions),
exp.MD5Digest: rename_func("MD5"),
exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))),
exp.SHA: rename_func("SHA1"),
exp.SHA2: sha256_sql,
exp.UnixToTime: _unix_to_time_sql,
exp.TimestampTrunc: timestamptrunc_sql(zone=True),
exp.Variance: rename_func("varSamp"),
exp.Stddev: rename_func("stddevSamp"),
}
PROPERTIES_LOCATION = {
**generator.Generator.PROPERTIES_LOCATION,
exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED,
exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA,
exp.OnCluster: exp.Properties.Location.POST_NAME,
}
JOIN_HINTS = False
TABLE_HINTS = False
EXPLICIT_UNION = True
GROUPINGS_SEP = ""
OUTER_UNION_MODIFIERS = False
# there's no list in docs, but it can be found in Clickhouse code
# see `ClickHouse/src/Parsers/ParserCreate*.cpp`
ON_CLUSTER_TARGETS = {
"DATABASE",
"TABLE",
"VIEW",
"DICTIONARY",
"INDEX",
"FUNCTION",
"NAMED COLLECTION",
}
def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str:
this = self.json_path_part(expression.this)
return str(int(this) + 1) if is_int(this) else this
def likeproperty_sql(self, expression: exp.LikeProperty) -> str:
return f"AS {self.sql(expression, 'this')}"
def _any_to_has(
self,
expression: exp.EQ | exp.NEQ,
default: t.Callable[[t.Any], str],
prefix: str = "",
) -> str:
if isinstance(expression.left, exp.Any):
arr = expression.left
this = expression.right
elif isinstance(expression.right, exp.Any):
arr = expression.right
this = expression.left
else:
return default(expression)
return prefix + self.func("has", arr.this.unnest(), this)
def eq_sql(self, expression: exp.EQ) -> str:
return self._any_to_has(expression, super().eq_sql)
def neq_sql(self, expression: exp.NEQ) -> str:
return self._any_to_has(expression, super().neq_sql, "NOT ")
def regexpilike_sql(self, expression: exp.RegexpILike) -> str:
# Manually add a flag to make the search case-insensitive
regex = self.func("CONCAT", "'(?i)'", expression.expression)
return self.func("match", expression.this, regex)
def datatype_sql(self, expression: exp.DataType) -> str:
# String is the standard ClickHouse type, every other variant is just an alias.
# Additionally, any supplied length parameter will be ignored.
#
# https://clickhouse.com/docs/en/sql-reference/data-types/string
if expression.this in self.STRING_TYPE_MAPPING:
return "String"
return super().datatype_sql(expression)
def cte_sql(self, expression: exp.CTE) -> str:
if expression.args.get("scalar"):
this = self.sql(expression, "this")
alias = self.sql(expression, "alias")
return f"{this} AS {alias}"
return super().cte_sql(expression)
def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]:
return super().after_limit_modifiers(expression) + [
(
self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True)
if expression.args.get("settings")
else ""
),
(
self.seg("FORMAT ") + self.sql(expression, "format")
if expression.args.get("format")
else ""
),
]
def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str:
params = self.expressions(expression, key="params", flat=True)
return self.func(expression.name, *expression.expressions) + f"({params})"
def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str:
return self.func(expression.name, *expression.expressions)
def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str:
return self.anonymousaggfunc_sql(expression)
def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str:
return self.parameterizedagg_sql(expression)
def placeholder_sql(self, expression: exp.Placeholder) -> str:
return f"{{{expression.name}: {self.sql(expression, 'kind')}}}"
def oncluster_sql(self, expression: exp.OnCluster) -> str:
return f"ON CLUSTER {self.sql(expression, 'this')}"
def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str:
if expression.kind in self.ON_CLUSTER_TARGETS and locations.get(
exp.Properties.Location.POST_NAME
):
this_name = self.sql(expression.this, "this")
this_properties = " ".join(
[self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]]
)
this_schema = self.schema_columns_sql(expression.this)
return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}"
return super().createable_sql(expression, locations)
def prewhere_sql(self, expression: exp.PreWhere) -> str:
this = self.indent(self.sql(expression, "this"))
return f"{self.seg('PREWHERE')}{self.sep()}{this}"
def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str:
this = self.sql(expression, "this")
this = f" {this}" if this else ""
expr = self.sql(expression, "expression")
expr = f" {expr}" if expr else ""
index_type = self.sql(expression, "index_type")
index_type = f" TYPE {index_type}" if index_type else ""
granularity = self.sql(expression, "granularity")
granularity = f" GRANULARITY {granularity}" if granularity else ""
return f"INDEX{this}{expr}{index_type}{granularity}"
def partition_sql(self, expression: exp.Partition) -> str:
return f"PARTITION {self.expressions(expression, flat=True)}"
def partitionid_sql(self, expression: exp.PartitionId) -> str:
return f"ID {self.sql(expression.this)}"
def replacepartition_sql(self, expression: exp.ReplacePartition) -> str:
return (
f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}"
)
def projectiondef_sql(self, expression: exp.ProjectionDef) -> str:
return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"