sqlglot.dialects.clickhouse
1from __future__ import annotations 2import typing as t 3import datetime 4from sqlglot import exp, generator, parser, tokens 5from sqlglot.dialects.dialect import ( 6 Dialect, 7 NormalizationStrategy, 8 arg_max_or_min_no_count, 9 build_date_delta, 10 build_formatted_time, 11 inline_array_sql, 12 json_extract_segments, 13 json_path_key_only_name, 14 length_or_char_length_sql, 15 no_pivot_sql, 16 build_json_extract_path, 17 rename_func, 18 sha256_sql, 19 strposition_sql, 20 var_map_sql, 21 timestamptrunc_sql, 22 unit_to_var, 23 trim_sql, 24) 25from sqlglot.generator import Generator 26from sqlglot.helper import is_int, seq_get 27from sqlglot.tokens import Token, TokenType 28from sqlglot.generator import unsupported_args 29 30DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 31 32 33def _build_date_format(args: t.List) -> exp.TimeToStr: 34 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 35 36 timezone = seq_get(args, 2) 37 if timezone: 38 expr.set("zone", timezone) 39 40 return expr 41 42 43def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 44 scale = expression.args.get("scale") 45 timestamp = expression.this 46 47 if scale in (None, exp.UnixToTime.SECONDS): 48 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 49 if scale == exp.UnixToTime.MILLIS: 50 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 51 if scale == exp.UnixToTime.MICROS: 52 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 53 if scale == exp.UnixToTime.NANOS: 54 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 55 56 return self.func( 57 "fromUnixTimestamp", 58 exp.cast( 59 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 60 ), 61 ) 62 63 64def _lower_func(sql: str) -> str: 65 index = sql.index("(") 66 return sql[:index].lower() + sql[index:] 67 68 69def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 70 quantile = expression.args["quantile"] 71 args = f"({self.sql(expression, 'this')})" 72 73 if isinstance(quantile, exp.Array): 74 func = self.func("quantiles", *quantile) 75 else: 76 func = self.func("quantile", quantile) 77 78 return func + args 79 80 81def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 82 if len(args) == 1: 83 return exp.CountIf(this=seq_get(args, 0)) 84 85 return exp.CombinedAggFunc(this="countIf", expressions=args) 86 87 88def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 89 if len(args) == 3: 90 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 91 92 strtodate = exp.StrToDate.from_arg_list(args) 93 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 94 95 96def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 97 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 98 if not expression.unit: 99 return rename_func(name)(self, expression) 100 101 return self.func( 102 name, 103 unit_to_var(expression), 104 expression.expression, 105 expression.this, 106 ) 107 108 return _delta_sql 109 110 111def _timestrtotime_sql(self: ClickHouse.Generator, expression: exp.TimeStrToTime): 112 ts = expression.this 113 114 tz = expression.args.get("zone") 115 if tz and isinstance(ts, exp.Literal): 116 # Clickhouse will not accept timestamps that include a UTC offset, so we must remove them. 117 # The first step to removing is parsing the string with `datetime.datetime.fromisoformat`. 118 # 119 # In python <3.11, `fromisoformat()` can only parse timestamps of millisecond (3 digit) 120 # or microsecond (6 digit) precision. It will error if passed any other number of fractional 121 # digits, so we extract the fractional seconds and pad to 6 digits before parsing. 122 ts_string = ts.name.strip() 123 124 # separate [date and time] from [fractional seconds and UTC offset] 125 ts_parts = ts_string.split(".") 126 if len(ts_parts) == 2: 127 # separate fractional seconds and UTC offset 128 offset_sep = "+" if "+" in ts_parts[1] else "-" 129 ts_frac_parts = ts_parts[1].split(offset_sep) 130 num_frac_parts = len(ts_frac_parts) 131 132 # pad to 6 digits if fractional seconds present 133 ts_frac_parts[0] = ts_frac_parts[0].ljust(6, "0") 134 ts_string = "".join( 135 [ 136 ts_parts[0], # date and time 137 ".", 138 ts_frac_parts[0], # fractional seconds 139 offset_sep if num_frac_parts > 1 else "", 140 ts_frac_parts[1] if num_frac_parts > 1 else "", # utc offset (if present) 141 ] 142 ) 143 144 # return literal with no timezone, eg turn '2020-01-01 12:13:14-08:00' into '2020-01-01 12:13:14' 145 # this is because Clickhouse encodes the timezone as a data type parameter and throws an error if 146 # it's part of the timestamp string 147 ts_without_tz = ( 148 datetime.datetime.fromisoformat(ts_string).replace(tzinfo=None).isoformat(sep=" ") 149 ) 150 ts = exp.Literal.string(ts_without_tz) 151 152 # Non-nullable DateTime64 with microsecond precision 153 expressions = [exp.DataTypeParam(this=tz)] if tz else [] 154 datatype = exp.DataType.build( 155 exp.DataType.Type.DATETIME64, 156 expressions=[exp.DataTypeParam(this=exp.Literal.number(6)), *expressions], 157 nullable=False, 158 ) 159 160 return self.sql(exp.cast(ts, datatype, dialect=self.dialect)) 161 162 163def _map_sql(self: ClickHouse.Generator, expression: exp.Map | exp.VarMap) -> str: 164 if not (expression.parent and expression.parent.arg_key == "settings"): 165 return _lower_func(var_map_sql(self, expression)) 166 167 keys = expression.args.get("keys") 168 values = expression.args.get("values") 169 170 if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array): 171 self.unsupported("Cannot convert array columns into map.") 172 return "" 173 174 args = [] 175 for key, value in zip(keys.expressions, values.expressions): 176 args.append(f"{self.sql(key)}: {self.sql(value)}") 177 178 csv_args = ", ".join(args) 179 180 return f"{{{csv_args}}}" 181 182 183class ClickHouse(Dialect): 184 NORMALIZE_FUNCTIONS: bool | str = False 185 NULL_ORDERING = "nulls_are_last" 186 SUPPORTS_USER_DEFINED_TYPES = False 187 SAFE_DIVISION = True 188 LOG_BASE_FIRST: t.Optional[bool] = None 189 FORCE_EARLY_ALIAS_REF_EXPANSION = True 190 PRESERVE_ORIGINAL_NAMES = True 191 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 192 IDENTIFIERS_CAN_START_WITH_DIGIT = True 193 HEX_STRING_IS_INTEGER_TYPE = True 194 195 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 196 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 197 198 UNESCAPED_SEQUENCES = { 199 "\\0": "\0", 200 } 201 202 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 203 204 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 205 exp.Except: False, 206 exp.Intersect: False, 207 exp.Union: None, 208 } 209 210 def generate_values_aliases(self, expression: exp.Values) -> t.List[exp.Identifier]: 211 # Clickhouse allows VALUES to have an embedded structure e.g: 212 # VALUES('person String, place String', ('Noah', 'Paris'), ...) 213 # In this case, we don't want to qualify the columns 214 values = expression.expressions[0].expressions 215 216 structure = ( 217 values[0] 218 if (len(values) > 1 and values[0].is_string and isinstance(values[1], exp.Tuple)) 219 else None 220 ) 221 if structure: 222 # Split each column definition into the column name e.g: 223 # 'person String, place String' -> ['person', 'place'] 224 structure_coldefs = [coldef.strip() for coldef in structure.name.split(",")] 225 column_aliases = [ 226 exp.to_identifier(coldef.split(" ")[0]) for coldef in structure_coldefs 227 ] 228 else: 229 # Default column aliases in CH are "c1", "c2", etc. 230 column_aliases = [ 231 exp.to_identifier(f"c{i + 1}") for i in range(len(values[0].expressions)) 232 ] 233 234 return column_aliases 235 236 class Tokenizer(tokens.Tokenizer): 237 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 238 IDENTIFIERS = ['"', "`"] 239 IDENTIFIER_ESCAPES = ["\\"] 240 STRING_ESCAPES = ["'", "\\"] 241 BIT_STRINGS = [("0b", "")] 242 HEX_STRINGS = [("0x", ""), ("0X", "")] 243 HEREDOC_STRINGS = ["$"] 244 245 KEYWORDS = { 246 **tokens.Tokenizer.KEYWORDS, 247 ".:": TokenType.DOTCOLON, 248 "ATTACH": TokenType.COMMAND, 249 "DATE32": TokenType.DATE32, 250 "DATETIME64": TokenType.DATETIME64, 251 "DICTIONARY": TokenType.DICTIONARY, 252 "DYNAMIC": TokenType.DYNAMIC, 253 "ENUM8": TokenType.ENUM8, 254 "ENUM16": TokenType.ENUM16, 255 "FINAL": TokenType.FINAL, 256 "FIXEDSTRING": TokenType.FIXEDSTRING, 257 "FLOAT32": TokenType.FLOAT, 258 "FLOAT64": TokenType.DOUBLE, 259 "GLOBAL": TokenType.GLOBAL, 260 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 261 "MAP": TokenType.MAP, 262 "NESTED": TokenType.NESTED, 263 "SAMPLE": TokenType.TABLE_SAMPLE, 264 "TUPLE": TokenType.STRUCT, 265 "UINT16": TokenType.USMALLINT, 266 "UINT32": TokenType.UINT, 267 "UINT64": TokenType.UBIGINT, 268 "UINT8": TokenType.UTINYINT, 269 "IPV4": TokenType.IPV4, 270 "IPV6": TokenType.IPV6, 271 "POINT": TokenType.POINT, 272 "RING": TokenType.RING, 273 "LINESTRING": TokenType.LINESTRING, 274 "MULTILINESTRING": TokenType.MULTILINESTRING, 275 "POLYGON": TokenType.POLYGON, 276 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 277 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 278 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 279 "SYSTEM": TokenType.COMMAND, 280 "PREWHERE": TokenType.PREWHERE, 281 } 282 KEYWORDS.pop("/*+") 283 284 SINGLE_TOKENS = { 285 **tokens.Tokenizer.SINGLE_TOKENS, 286 "$": TokenType.HEREDOC_STRING, 287 } 288 289 class Parser(parser.Parser): 290 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 291 # * select x from t1 union all select x from t2 limit 1; 292 # * select x from t1 union all (select x from t2 limit 1); 293 MODIFIERS_ATTACHED_TO_SET_OP = False 294 INTERVAL_SPANS = False 295 OPTIONAL_ALIAS_TOKEN_CTE = False 296 297 FUNCTIONS = { 298 **parser.Parser.FUNCTIONS, 299 "ANY": exp.AnyValue.from_arg_list, 300 "ARRAYSUM": exp.ArraySum.from_arg_list, 301 "COUNTIF": _build_count_if, 302 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 303 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 304 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 305 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 306 "DATE_FORMAT": _build_date_format, 307 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 308 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 309 "FORMATDATETIME": _build_date_format, 310 "JSONEXTRACTSTRING": build_json_extract_path( 311 exp.JSONExtractScalar, zero_based_indexing=False 312 ), 313 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 314 "MAP": parser.build_var_map, 315 "MATCH": exp.RegexpLike.from_arg_list, 316 "RANDCANONICAL": exp.Rand.from_arg_list, 317 "STR_TO_DATE": _build_str_to_date, 318 "TUPLE": exp.Struct.from_arg_list, 319 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 320 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 321 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 322 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 323 "UNIQ": exp.ApproxDistinct.from_arg_list, 324 "XOR": lambda args: exp.Xor(expressions=args), 325 "MD5": exp.MD5Digest.from_arg_list, 326 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 327 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 328 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 329 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 330 } 331 FUNCTIONS.pop("TRANSFORM") 332 333 AGG_FUNCTIONS = { 334 "count", 335 "min", 336 "max", 337 "sum", 338 "avg", 339 "any", 340 "stddevPop", 341 "stddevSamp", 342 "varPop", 343 "varSamp", 344 "corr", 345 "covarPop", 346 "covarSamp", 347 "entropy", 348 "exponentialMovingAverage", 349 "intervalLengthSum", 350 "kolmogorovSmirnovTest", 351 "mannWhitneyUTest", 352 "median", 353 "rankCorr", 354 "sumKahan", 355 "studentTTest", 356 "welchTTest", 357 "anyHeavy", 358 "anyLast", 359 "boundingRatio", 360 "first_value", 361 "last_value", 362 "argMin", 363 "argMax", 364 "avgWeighted", 365 "topK", 366 "topKWeighted", 367 "deltaSum", 368 "deltaSumTimestamp", 369 "groupArray", 370 "groupArrayLast", 371 "groupUniqArray", 372 "groupArrayInsertAt", 373 "groupArrayMovingAvg", 374 "groupArrayMovingSum", 375 "groupArraySample", 376 "groupBitAnd", 377 "groupBitOr", 378 "groupBitXor", 379 "groupBitmap", 380 "groupBitmapAnd", 381 "groupBitmapOr", 382 "groupBitmapXor", 383 "sumWithOverflow", 384 "sumMap", 385 "minMap", 386 "maxMap", 387 "skewSamp", 388 "skewPop", 389 "kurtSamp", 390 "kurtPop", 391 "uniq", 392 "uniqExact", 393 "uniqCombined", 394 "uniqCombined64", 395 "uniqHLL12", 396 "uniqTheta", 397 "quantile", 398 "quantiles", 399 "quantileExact", 400 "quantilesExact", 401 "quantileExactLow", 402 "quantilesExactLow", 403 "quantileExactHigh", 404 "quantilesExactHigh", 405 "quantileExactWeighted", 406 "quantilesExactWeighted", 407 "quantileTiming", 408 "quantilesTiming", 409 "quantileTimingWeighted", 410 "quantilesTimingWeighted", 411 "quantileDeterministic", 412 "quantilesDeterministic", 413 "quantileTDigest", 414 "quantilesTDigest", 415 "quantileTDigestWeighted", 416 "quantilesTDigestWeighted", 417 "quantileBFloat16", 418 "quantilesBFloat16", 419 "quantileBFloat16Weighted", 420 "quantilesBFloat16Weighted", 421 "simpleLinearRegression", 422 "stochasticLinearRegression", 423 "stochasticLogisticRegression", 424 "categoricalInformationValue", 425 "contingency", 426 "cramersV", 427 "cramersVBiasCorrected", 428 "theilsU", 429 "maxIntersections", 430 "maxIntersectionsPosition", 431 "meanZTest", 432 "quantileInterpolatedWeighted", 433 "quantilesInterpolatedWeighted", 434 "quantileGK", 435 "quantilesGK", 436 "sparkBar", 437 "sumCount", 438 "largestTriangleThreeBuckets", 439 "histogram", 440 "sequenceMatch", 441 "sequenceCount", 442 "windowFunnel", 443 "retention", 444 "uniqUpTo", 445 "sequenceNextNode", 446 "exponentialTimeDecayedAvg", 447 } 448 449 AGG_FUNCTIONS_SUFFIXES = [ 450 "If", 451 "Array", 452 "ArrayIf", 453 "Map", 454 "SimpleState", 455 "State", 456 "Merge", 457 "MergeState", 458 "ForEach", 459 "Distinct", 460 "OrDefault", 461 "OrNull", 462 "Resample", 463 "ArgMin", 464 "ArgMax", 465 ] 466 467 FUNC_TOKENS = { 468 *parser.Parser.FUNC_TOKENS, 469 TokenType.AND, 470 TokenType.OR, 471 TokenType.SET, 472 } 473 474 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 475 476 ID_VAR_TOKENS = { 477 *parser.Parser.ID_VAR_TOKENS, 478 TokenType.LIKE, 479 } 480 481 AGG_FUNC_MAPPING = ( 482 lambda functions, suffixes: { 483 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 484 } 485 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 486 487 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 488 489 FUNCTION_PARSERS = { 490 **parser.Parser.FUNCTION_PARSERS, 491 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 492 "QUANTILE": lambda self: self._parse_quantile(), 493 "MEDIAN": lambda self: self._parse_quantile(), 494 "COLUMNS": lambda self: self._parse_columns(), 495 } 496 497 FUNCTION_PARSERS.pop("MATCH") 498 499 PROPERTY_PARSERS = parser.Parser.PROPERTY_PARSERS.copy() 500 PROPERTY_PARSERS.pop("DYNAMIC") 501 502 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 503 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 504 505 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 506 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 507 508 RANGE_PARSERS = { 509 **parser.Parser.RANGE_PARSERS, 510 TokenType.GLOBAL: lambda self, this: self._parse_global_in(this), 511 } 512 513 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 514 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 515 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 516 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 517 518 JOIN_KINDS = { 519 *parser.Parser.JOIN_KINDS, 520 TokenType.ANY, 521 TokenType.ASOF, 522 TokenType.ARRAY, 523 } 524 525 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 526 TokenType.ANY, 527 TokenType.ARRAY, 528 TokenType.FINAL, 529 TokenType.FORMAT, 530 TokenType.SETTINGS, 531 } 532 533 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 534 TokenType.FORMAT, 535 } 536 537 LOG_DEFAULTS_TO_LN = True 538 539 QUERY_MODIFIER_PARSERS = { 540 **parser.Parser.QUERY_MODIFIER_PARSERS, 541 TokenType.SETTINGS: lambda self: ( 542 "settings", 543 self._advance() or self._parse_csv(self._parse_assignment), 544 ), 545 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 546 } 547 548 CONSTRAINT_PARSERS = { 549 **parser.Parser.CONSTRAINT_PARSERS, 550 "INDEX": lambda self: self._parse_index_constraint(), 551 "CODEC": lambda self: self._parse_compress(), 552 } 553 554 ALTER_PARSERS = { 555 **parser.Parser.ALTER_PARSERS, 556 "REPLACE": lambda self: self._parse_alter_table_replace(), 557 } 558 559 SCHEMA_UNNAMED_CONSTRAINTS = { 560 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 561 "INDEX", 562 } 563 564 PLACEHOLDER_PARSERS = { 565 **parser.Parser.PLACEHOLDER_PARSERS, 566 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 567 } 568 569 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 570 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 571 return self._parse_lambda() 572 573 def _parse_types( 574 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 575 ) -> t.Optional[exp.Expression]: 576 dtype = super()._parse_types( 577 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 578 ) 579 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 580 # Mark every type as non-nullable which is ClickHouse's default, unless it's 581 # already marked as nullable. This marker helps us transpile types from other 582 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 583 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 584 # fail in ClickHouse without the `Nullable` type constructor. 585 dtype.set("nullable", False) 586 587 return dtype 588 589 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 590 index = self._index 591 this = self._parse_bitwise() 592 if self._match(TokenType.FROM): 593 self._retreat(index) 594 return super()._parse_extract() 595 596 # We return Anonymous here because extract and regexpExtract have different semantics, 597 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 598 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 599 # 600 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 601 self._match(TokenType.COMMA) 602 return self.expression( 603 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 604 ) 605 606 def _parse_assignment(self) -> t.Optional[exp.Expression]: 607 this = super()._parse_assignment() 608 609 if self._match(TokenType.PLACEHOLDER): 610 return self.expression( 611 exp.If, 612 this=this, 613 true=self._parse_assignment(), 614 false=self._match(TokenType.COLON) and self._parse_assignment(), 615 ) 616 617 return this 618 619 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 620 """ 621 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 622 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 623 """ 624 index = self._index 625 626 this = self._parse_id_var() 627 self._match(TokenType.COLON) 628 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 629 self._match_text_seq("IDENTIFIER") and "Identifier" 630 ) 631 632 if not kind: 633 self._retreat(index) 634 return None 635 elif not self._match(TokenType.R_BRACE): 636 self.raise_error("Expecting }") 637 638 if isinstance(this, exp.Identifier) and not this.quoted: 639 this = exp.var(this.name) 640 641 return self.expression(exp.Placeholder, this=this, kind=kind) 642 643 def _parse_bracket( 644 self, this: t.Optional[exp.Expression] = None 645 ) -> t.Optional[exp.Expression]: 646 l_brace = self._match(TokenType.L_BRACE, advance=False) 647 bracket = super()._parse_bracket(this) 648 649 if l_brace and isinstance(bracket, exp.Struct): 650 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 651 for expression in bracket.expressions: 652 if not isinstance(expression, exp.PropertyEQ): 653 break 654 655 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 656 varmap.args["values"].append("expressions", expression.expression) 657 658 return varmap 659 660 return bracket 661 662 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 663 this = super()._parse_in(this) 664 this.set("is_global", is_global) 665 return this 666 667 def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In: 668 is_negated = self._match(TokenType.NOT) 669 this = self._match(TokenType.IN) and self._parse_in(this, is_global=True) 670 return self.expression(exp.Not, this=this) if is_negated else this 671 672 def _parse_table( 673 self, 674 schema: bool = False, 675 joins: bool = False, 676 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 677 parse_bracket: bool = False, 678 is_db_reference: bool = False, 679 parse_partition: bool = False, 680 ) -> t.Optional[exp.Expression]: 681 this = super()._parse_table( 682 schema=schema, 683 joins=joins, 684 alias_tokens=alias_tokens, 685 parse_bracket=parse_bracket, 686 is_db_reference=is_db_reference, 687 ) 688 689 if isinstance(this, exp.Table): 690 inner = this.this 691 alias = this.args.get("alias") 692 693 if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns: 694 alias.set("columns", [exp.to_identifier("generate_series")]) 695 696 if self._match(TokenType.FINAL): 697 this = self.expression(exp.Final, this=this) 698 699 return this 700 701 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 702 return super()._parse_position(haystack_first=True) 703 704 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 705 def _parse_cte(self) -> t.Optional[exp.CTE]: 706 # WITH <identifier> AS <subquery expression> 707 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 708 709 if not cte: 710 # WITH <expression> AS <identifier> 711 cte = self.expression( 712 exp.CTE, 713 this=self._parse_assignment(), 714 alias=self._parse_table_alias(), 715 scalar=True, 716 ) 717 718 return cte 719 720 def _parse_join_parts( 721 self, 722 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 723 is_global = self._match(TokenType.GLOBAL) and self._prev 724 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 725 726 if kind_pre: 727 kind = self._match_set(self.JOIN_KINDS) and self._prev 728 side = self._match_set(self.JOIN_SIDES) and self._prev 729 return is_global, side, kind 730 731 return ( 732 is_global, 733 self._match_set(self.JOIN_SIDES) and self._prev, 734 self._match_set(self.JOIN_KINDS) and self._prev, 735 ) 736 737 def _parse_join( 738 self, skip_join_token: bool = False, parse_bracket: bool = False 739 ) -> t.Optional[exp.Join]: 740 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 741 if join: 742 join.set("global", join.args.pop("method", None)) 743 744 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 745 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 746 if join.kind == "ARRAY": 747 for table in join.find_all(exp.Table): 748 table.replace(table.to_column()) 749 750 return join 751 752 def _parse_function( 753 self, 754 functions: t.Optional[t.Dict[str, t.Callable]] = None, 755 anonymous: bool = False, 756 optional_parens: bool = True, 757 any_token: bool = False, 758 ) -> t.Optional[exp.Expression]: 759 expr = super()._parse_function( 760 functions=functions, 761 anonymous=anonymous, 762 optional_parens=optional_parens, 763 any_token=any_token, 764 ) 765 766 func = expr.this if isinstance(expr, exp.Window) else expr 767 768 # Aggregate functions can be split in 2 parts: <func_name><suffix> 769 parts = ( 770 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 771 ) 772 773 if parts: 774 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 775 params = self._parse_func_params(anon_func) 776 777 kwargs = { 778 "this": anon_func.this, 779 "expressions": anon_func.expressions, 780 } 781 if parts[1]: 782 exp_class: t.Type[exp.Expression] = ( 783 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 784 ) 785 else: 786 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 787 788 kwargs["exp_class"] = exp_class 789 if params: 790 kwargs["params"] = params 791 792 func = self.expression(**kwargs) 793 794 if isinstance(expr, exp.Window): 795 # The window's func was parsed as Anonymous in base parser, fix its 796 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 797 expr.set("this", func) 798 elif params: 799 # Params have blocked super()._parse_function() from parsing the following window 800 # (if that exists) as they're standing between the function call and the window spec 801 expr = self._parse_window(func) 802 else: 803 expr = func 804 805 return expr 806 807 def _parse_func_params( 808 self, this: t.Optional[exp.Func] = None 809 ) -> t.Optional[t.List[exp.Expression]]: 810 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 811 return self._parse_csv(self._parse_lambda) 812 813 if self._match(TokenType.L_PAREN): 814 params = self._parse_csv(self._parse_lambda) 815 self._match_r_paren(this) 816 return params 817 818 return None 819 820 def _parse_quantile(self) -> exp.Quantile: 821 this = self._parse_lambda() 822 params = self._parse_func_params() 823 if params: 824 return self.expression(exp.Quantile, this=params[0], quantile=this) 825 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 826 827 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 828 return super()._parse_wrapped_id_vars(optional=True) 829 830 def _parse_primary_key( 831 self, wrapped_optional: bool = False, in_props: bool = False 832 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 833 return super()._parse_primary_key( 834 wrapped_optional=wrapped_optional or in_props, in_props=in_props 835 ) 836 837 def _parse_on_property(self) -> t.Optional[exp.Expression]: 838 index = self._index 839 if self._match_text_seq("CLUSTER"): 840 this = self._parse_string() or self._parse_id_var() 841 if this: 842 return self.expression(exp.OnCluster, this=this) 843 else: 844 self._retreat(index) 845 return None 846 847 def _parse_index_constraint( 848 self, kind: t.Optional[str] = None 849 ) -> exp.IndexColumnConstraint: 850 # INDEX name1 expr TYPE type1(args) GRANULARITY value 851 this = self._parse_id_var() 852 expression = self._parse_assignment() 853 854 index_type = self._match_text_seq("TYPE") and ( 855 self._parse_function() or self._parse_var() 856 ) 857 858 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 859 860 return self.expression( 861 exp.IndexColumnConstraint, 862 this=this, 863 expression=expression, 864 index_type=index_type, 865 granularity=granularity, 866 ) 867 868 def _parse_partition(self) -> t.Optional[exp.Partition]: 869 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 870 if not self._match(TokenType.PARTITION): 871 return None 872 873 if self._match_text_seq("ID"): 874 # Corresponds to the PARTITION ID <string_value> syntax 875 expressions: t.List[exp.Expression] = [ 876 self.expression(exp.PartitionId, this=self._parse_string()) 877 ] 878 else: 879 expressions = self._parse_expressions() 880 881 return self.expression(exp.Partition, expressions=expressions) 882 883 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 884 partition = self._parse_partition() 885 886 if not partition or not self._match(TokenType.FROM): 887 return None 888 889 return self.expression( 890 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 891 ) 892 893 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 894 if not self._match_text_seq("PROJECTION"): 895 return None 896 897 return self.expression( 898 exp.ProjectionDef, 899 this=self._parse_id_var(), 900 expression=self._parse_wrapped(self._parse_statement), 901 ) 902 903 def _parse_constraint(self) -> t.Optional[exp.Expression]: 904 return super()._parse_constraint() or self._parse_projection_def() 905 906 def _parse_alias( 907 self, this: t.Optional[exp.Expression], explicit: bool = False 908 ) -> t.Optional[exp.Expression]: 909 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 910 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 911 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 912 return this 913 914 return super()._parse_alias(this=this, explicit=explicit) 915 916 def _parse_expression(self) -> t.Optional[exp.Expression]: 917 this = super()._parse_expression() 918 919 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 920 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 921 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 922 self._match(TokenType.R_PAREN) 923 924 return this 925 926 def _parse_columns(self) -> exp.Expression: 927 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 928 929 while self._next and self._match_text_seq(")", "APPLY", "("): 930 self._match(TokenType.R_PAREN) 931 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 932 return this 933 934 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 935 value = super()._parse_value(values=values) 936 if not value: 937 return None 938 939 # In Clickhouse "SELECT * FROM VALUES (1, 2, 3)" generates a table with a single column, in contrast 940 # to other dialects. For this case, we canonicalize the values into a tuple-of-tuples AST if it's not already one. 941 # In INSERT INTO statements the same clause actually references multiple columns (opposite semantics), 942 # but the final result is not altered by the extra parentheses. 943 # Note: Clickhouse allows VALUES([structure], value, ...) so the branch checks for the last expression 944 expressions = value.expressions 945 if values and not isinstance(expressions[-1], exp.Tuple): 946 value.set( 947 "expressions", 948 [self.expression(exp.Tuple, expressions=[expr]) for expr in expressions], 949 ) 950 951 return value 952 953 class Generator(generator.Generator): 954 QUERY_HINTS = False 955 STRUCT_DELIMITER = ("(", ")") 956 NVL2_SUPPORTED = False 957 TABLESAMPLE_REQUIRES_PARENS = False 958 TABLESAMPLE_SIZE_IS_ROWS = False 959 TABLESAMPLE_KEYWORDS = "SAMPLE" 960 LAST_DAY_SUPPORTS_DATE_PART = False 961 CAN_IMPLEMENT_ARRAY_ANY = True 962 SUPPORTS_TO_NUMBER = False 963 JOIN_HINTS = False 964 TABLE_HINTS = False 965 GROUPINGS_SEP = "" 966 SET_OP_MODIFIERS = False 967 ARRAY_SIZE_NAME = "LENGTH" 968 WRAP_DERIVED_VALUES = False 969 970 STRING_TYPE_MAPPING = { 971 exp.DataType.Type.BLOB: "String", 972 exp.DataType.Type.CHAR: "String", 973 exp.DataType.Type.LONGBLOB: "String", 974 exp.DataType.Type.LONGTEXT: "String", 975 exp.DataType.Type.MEDIUMBLOB: "String", 976 exp.DataType.Type.MEDIUMTEXT: "String", 977 exp.DataType.Type.TINYBLOB: "String", 978 exp.DataType.Type.TINYTEXT: "String", 979 exp.DataType.Type.TEXT: "String", 980 exp.DataType.Type.VARBINARY: "String", 981 exp.DataType.Type.VARCHAR: "String", 982 } 983 984 SUPPORTED_JSON_PATH_PARTS = { 985 exp.JSONPathKey, 986 exp.JSONPathRoot, 987 exp.JSONPathSubscript, 988 } 989 990 TYPE_MAPPING = { 991 **generator.Generator.TYPE_MAPPING, 992 **STRING_TYPE_MAPPING, 993 exp.DataType.Type.ARRAY: "Array", 994 exp.DataType.Type.BOOLEAN: "Bool", 995 exp.DataType.Type.BIGINT: "Int64", 996 exp.DataType.Type.DATE32: "Date32", 997 exp.DataType.Type.DATETIME: "DateTime", 998 exp.DataType.Type.DATETIME2: "DateTime", 999 exp.DataType.Type.SMALLDATETIME: "DateTime", 1000 exp.DataType.Type.DATETIME64: "DateTime64", 1001 exp.DataType.Type.DECIMAL: "Decimal", 1002 exp.DataType.Type.DECIMAL32: "Decimal32", 1003 exp.DataType.Type.DECIMAL64: "Decimal64", 1004 exp.DataType.Type.DECIMAL128: "Decimal128", 1005 exp.DataType.Type.DECIMAL256: "Decimal256", 1006 exp.DataType.Type.TIMESTAMP: "DateTime", 1007 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 1008 exp.DataType.Type.DOUBLE: "Float64", 1009 exp.DataType.Type.ENUM: "Enum", 1010 exp.DataType.Type.ENUM8: "Enum8", 1011 exp.DataType.Type.ENUM16: "Enum16", 1012 exp.DataType.Type.FIXEDSTRING: "FixedString", 1013 exp.DataType.Type.FLOAT: "Float32", 1014 exp.DataType.Type.INT: "Int32", 1015 exp.DataType.Type.MEDIUMINT: "Int32", 1016 exp.DataType.Type.INT128: "Int128", 1017 exp.DataType.Type.INT256: "Int256", 1018 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 1019 exp.DataType.Type.MAP: "Map", 1020 exp.DataType.Type.NESTED: "Nested", 1021 exp.DataType.Type.SMALLINT: "Int16", 1022 exp.DataType.Type.STRUCT: "Tuple", 1023 exp.DataType.Type.TINYINT: "Int8", 1024 exp.DataType.Type.UBIGINT: "UInt64", 1025 exp.DataType.Type.UINT: "UInt32", 1026 exp.DataType.Type.UINT128: "UInt128", 1027 exp.DataType.Type.UINT256: "UInt256", 1028 exp.DataType.Type.USMALLINT: "UInt16", 1029 exp.DataType.Type.UTINYINT: "UInt8", 1030 exp.DataType.Type.IPV4: "IPv4", 1031 exp.DataType.Type.IPV6: "IPv6", 1032 exp.DataType.Type.POINT: "Point", 1033 exp.DataType.Type.RING: "Ring", 1034 exp.DataType.Type.LINESTRING: "LineString", 1035 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 1036 exp.DataType.Type.POLYGON: "Polygon", 1037 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 1038 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 1039 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 1040 exp.DataType.Type.DYNAMIC: "Dynamic", 1041 } 1042 1043 TRANSFORMS = { 1044 **generator.Generator.TRANSFORMS, 1045 exp.AnyValue: rename_func("any"), 1046 exp.ApproxDistinct: rename_func("uniq"), 1047 exp.ArrayConcat: rename_func("arrayConcat"), 1048 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 1049 exp.ArraySum: rename_func("arraySum"), 1050 exp.ArgMax: arg_max_or_min_no_count("argMax"), 1051 exp.ArgMin: arg_max_or_min_no_count("argMin"), 1052 exp.Array: inline_array_sql, 1053 exp.CastToStrType: rename_func("CAST"), 1054 exp.CountIf: rename_func("countIf"), 1055 exp.CompressColumnConstraint: lambda self, 1056 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 1057 exp.ComputedColumnConstraint: lambda self, 1058 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 1059 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 1060 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 1061 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 1062 exp.DateStrToDate: rename_func("toDate"), 1063 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 1064 exp.Explode: rename_func("arrayJoin"), 1065 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 1066 exp.IsNan: rename_func("isNaN"), 1067 exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}", 1068 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 1069 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 1070 exp.JSONPathKey: json_path_key_only_name, 1071 exp.JSONPathRoot: lambda *_: "", 1072 exp.Length: length_or_char_length_sql, 1073 exp.Map: _map_sql, 1074 exp.Median: rename_func("median"), 1075 exp.Nullif: rename_func("nullIf"), 1076 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1077 exp.Pivot: no_pivot_sql, 1078 exp.Quantile: _quantile_sql, 1079 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1080 exp.Rand: rename_func("randCanonical"), 1081 exp.StartsWith: rename_func("startsWith"), 1082 exp.StrPosition: lambda self, e: strposition_sql( 1083 self, 1084 e, 1085 func_name="POSITION", 1086 supports_position=True, 1087 use_ansi_position=False, 1088 ), 1089 exp.TimeToStr: lambda self, e: self.func( 1090 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1091 ), 1092 exp.TimeStrToTime: _timestrtotime_sql, 1093 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1094 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1095 exp.VarMap: _map_sql, 1096 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1097 exp.MD5Digest: rename_func("MD5"), 1098 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1099 exp.SHA: rename_func("SHA1"), 1100 exp.SHA2: sha256_sql, 1101 exp.UnixToTime: _unix_to_time_sql, 1102 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1103 exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"), 1104 exp.Variance: rename_func("varSamp"), 1105 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1106 exp.Stddev: rename_func("stddevSamp"), 1107 exp.Chr: rename_func("CHAR"), 1108 exp.Lag: lambda self, e: self.func( 1109 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1110 ), 1111 exp.Lead: lambda self, e: self.func( 1112 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1113 ), 1114 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1115 rename_func("editDistance") 1116 ), 1117 } 1118 1119 PROPERTIES_LOCATION = { 1120 **generator.Generator.PROPERTIES_LOCATION, 1121 exp.OnCluster: exp.Properties.Location.POST_NAME, 1122 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1123 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1124 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1125 } 1126 1127 # There's no list in docs, but it can be found in Clickhouse code 1128 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1129 ON_CLUSTER_TARGETS = { 1130 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1131 "DATABASE", 1132 "TABLE", 1133 "VIEW", 1134 "DICTIONARY", 1135 "INDEX", 1136 "FUNCTION", 1137 "NAMED COLLECTION", 1138 } 1139 1140 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1141 NON_NULLABLE_TYPES = { 1142 exp.DataType.Type.ARRAY, 1143 exp.DataType.Type.MAP, 1144 exp.DataType.Type.STRUCT, 1145 exp.DataType.Type.POINT, 1146 exp.DataType.Type.RING, 1147 exp.DataType.Type.LINESTRING, 1148 exp.DataType.Type.MULTILINESTRING, 1149 exp.DataType.Type.POLYGON, 1150 exp.DataType.Type.MULTIPOLYGON, 1151 } 1152 1153 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1154 strtodate_sql = self.function_fallback_sql(expression) 1155 1156 if not isinstance(expression.parent, exp.Cast): 1157 # StrToDate returns DATEs in other dialects (eg. postgres), so 1158 # this branch aims to improve the transpilation to clickhouse 1159 return self.cast_sql(exp.cast(expression, "DATE")) 1160 1161 return strtodate_sql 1162 1163 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1164 this = expression.this 1165 1166 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1167 return self.sql(this) 1168 1169 return super().cast_sql(expression, safe_prefix=safe_prefix) 1170 1171 def trycast_sql(self, expression: exp.TryCast) -> str: 1172 dtype = expression.to 1173 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1174 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1175 dtype.set("nullable", True) 1176 1177 return super().cast_sql(expression) 1178 1179 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1180 this = self.json_path_part(expression.this) 1181 return str(int(this) + 1) if is_int(this) else this 1182 1183 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1184 return f"AS {self.sql(expression, 'this')}" 1185 1186 def _any_to_has( 1187 self, 1188 expression: exp.EQ | exp.NEQ, 1189 default: t.Callable[[t.Any], str], 1190 prefix: str = "", 1191 ) -> str: 1192 if isinstance(expression.left, exp.Any): 1193 arr = expression.left 1194 this = expression.right 1195 elif isinstance(expression.right, exp.Any): 1196 arr = expression.right 1197 this = expression.left 1198 else: 1199 return default(expression) 1200 1201 return prefix + self.func("has", arr.this.unnest(), this) 1202 1203 def eq_sql(self, expression: exp.EQ) -> str: 1204 return self._any_to_has(expression, super().eq_sql) 1205 1206 def neq_sql(self, expression: exp.NEQ) -> str: 1207 return self._any_to_has(expression, super().neq_sql, "NOT ") 1208 1209 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1210 # Manually add a flag to make the search case-insensitive 1211 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1212 return self.func("match", expression.this, regex) 1213 1214 def datatype_sql(self, expression: exp.DataType) -> str: 1215 # String is the standard ClickHouse type, every other variant is just an alias. 1216 # Additionally, any supplied length parameter will be ignored. 1217 # 1218 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1219 if expression.this in self.STRING_TYPE_MAPPING: 1220 dtype = "String" 1221 else: 1222 dtype = super().datatype_sql(expression) 1223 1224 # This section changes the type to `Nullable(...)` if the following conditions hold: 1225 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1226 # and change their semantics 1227 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1228 # constraint: "Type of Map key must be a type, that can be represented by integer or 1229 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1230 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1231 parent = expression.parent 1232 nullable = expression.args.get("nullable") 1233 if nullable is True or ( 1234 nullable is None 1235 and not ( 1236 isinstance(parent, exp.DataType) 1237 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1238 and expression.index in (None, 0) 1239 ) 1240 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1241 ): 1242 dtype = f"Nullable({dtype})" 1243 1244 return dtype 1245 1246 def cte_sql(self, expression: exp.CTE) -> str: 1247 if expression.args.get("scalar"): 1248 this = self.sql(expression, "this") 1249 alias = self.sql(expression, "alias") 1250 return f"{this} AS {alias}" 1251 1252 return super().cte_sql(expression) 1253 1254 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1255 return super().after_limit_modifiers(expression) + [ 1256 ( 1257 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1258 if expression.args.get("settings") 1259 else "" 1260 ), 1261 ( 1262 self.seg("FORMAT ") + self.sql(expression, "format") 1263 if expression.args.get("format") 1264 else "" 1265 ), 1266 ] 1267 1268 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1269 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1270 1271 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1272 return f"ON CLUSTER {self.sql(expression, 'this')}" 1273 1274 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1275 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1276 exp.Properties.Location.POST_NAME 1277 ): 1278 this_name = self.sql( 1279 expression.this if isinstance(expression.this, exp.Schema) else expression, 1280 "this", 1281 ) 1282 this_properties = " ".join( 1283 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1284 ) 1285 this_schema = self.schema_columns_sql(expression.this) 1286 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1287 1288 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1289 1290 return super().createable_sql(expression, locations) 1291 1292 def create_sql(self, expression: exp.Create) -> str: 1293 # The comment property comes last in CTAS statements, i.e. after the query 1294 query = expression.expression 1295 if isinstance(query, exp.Query): 1296 comment_prop = expression.find(exp.SchemaCommentProperty) 1297 if comment_prop: 1298 comment_prop.pop() 1299 query.replace(exp.paren(query)) 1300 else: 1301 comment_prop = None 1302 1303 create_sql = super().create_sql(expression) 1304 1305 comment_sql = self.sql(comment_prop) 1306 comment_sql = f" {comment_sql}" if comment_sql else "" 1307 1308 return f"{create_sql}{comment_sql}" 1309 1310 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1311 this = self.indent(self.sql(expression, "this")) 1312 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1313 1314 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1315 this = self.sql(expression, "this") 1316 this = f" {this}" if this else "" 1317 expr = self.sql(expression, "expression") 1318 expr = f" {expr}" if expr else "" 1319 index_type = self.sql(expression, "index_type") 1320 index_type = f" TYPE {index_type}" if index_type else "" 1321 granularity = self.sql(expression, "granularity") 1322 granularity = f" GRANULARITY {granularity}" if granularity else "" 1323 1324 return f"INDEX{this}{expr}{index_type}{granularity}" 1325 1326 def partition_sql(self, expression: exp.Partition) -> str: 1327 return f"PARTITION {self.expressions(expression, flat=True)}" 1328 1329 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1330 return f"ID {self.sql(expression.this)}" 1331 1332 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1333 return ( 1334 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1335 ) 1336 1337 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1338 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1339 1340 def is_sql(self, expression: exp.Is) -> str: 1341 is_sql = super().is_sql(expression) 1342 1343 if isinstance(expression.parent, exp.Not): 1344 # value IS NOT NULL -> NOT (value IS NULL) 1345 is_sql = self.wrap(is_sql) 1346 1347 return is_sql 1348 1349 def in_sql(self, expression: exp.In) -> str: 1350 in_sql = super().in_sql(expression) 1351 1352 if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"): 1353 in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1) 1354 1355 return in_sql 1356 1357 def not_sql(self, expression: exp.Not) -> str: 1358 if isinstance(expression.this, exp.In) and expression.this.args.get("is_global"): 1359 # let `GLOBAL IN` child interpose `NOT` 1360 return self.sql(expression, "this") 1361 1362 return super().not_sql(expression) 1363 1364 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1365 # If the VALUES clause contains tuples of expressions, we need to treat it 1366 # as a table since Clickhouse will automatically alias it as such. 1367 alias = expression.args.get("alias") 1368 1369 if alias and alias.args.get("columns") and expression.expressions: 1370 values = expression.expressions[0].expressions 1371 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1372 else: 1373 values_as_table = True 1374 1375 return super().values_sql(expression, values_as_table=values_as_table)
184class ClickHouse(Dialect): 185 NORMALIZE_FUNCTIONS: bool | str = False 186 NULL_ORDERING = "nulls_are_last" 187 SUPPORTS_USER_DEFINED_TYPES = False 188 SAFE_DIVISION = True 189 LOG_BASE_FIRST: t.Optional[bool] = None 190 FORCE_EARLY_ALIAS_REF_EXPANSION = True 191 PRESERVE_ORIGINAL_NAMES = True 192 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 193 IDENTIFIERS_CAN_START_WITH_DIGIT = True 194 HEX_STRING_IS_INTEGER_TYPE = True 195 196 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 197 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 198 199 UNESCAPED_SEQUENCES = { 200 "\\0": "\0", 201 } 202 203 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 204 205 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 206 exp.Except: False, 207 exp.Intersect: False, 208 exp.Union: None, 209 } 210 211 def generate_values_aliases(self, expression: exp.Values) -> t.List[exp.Identifier]: 212 # Clickhouse allows VALUES to have an embedded structure e.g: 213 # VALUES('person String, place String', ('Noah', 'Paris'), ...) 214 # In this case, we don't want to qualify the columns 215 values = expression.expressions[0].expressions 216 217 structure = ( 218 values[0] 219 if (len(values) > 1 and values[0].is_string and isinstance(values[1], exp.Tuple)) 220 else None 221 ) 222 if structure: 223 # Split each column definition into the column name e.g: 224 # 'person String, place String' -> ['person', 'place'] 225 structure_coldefs = [coldef.strip() for coldef in structure.name.split(",")] 226 column_aliases = [ 227 exp.to_identifier(coldef.split(" ")[0]) for coldef in structure_coldefs 228 ] 229 else: 230 # Default column aliases in CH are "c1", "c2", etc. 231 column_aliases = [ 232 exp.to_identifier(f"c{i + 1}") for i in range(len(values[0].expressions)) 233 ] 234 235 return column_aliases 236 237 class Tokenizer(tokens.Tokenizer): 238 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 239 IDENTIFIERS = ['"', "`"] 240 IDENTIFIER_ESCAPES = ["\\"] 241 STRING_ESCAPES = ["'", "\\"] 242 BIT_STRINGS = [("0b", "")] 243 HEX_STRINGS = [("0x", ""), ("0X", "")] 244 HEREDOC_STRINGS = ["$"] 245 246 KEYWORDS = { 247 **tokens.Tokenizer.KEYWORDS, 248 ".:": TokenType.DOTCOLON, 249 "ATTACH": TokenType.COMMAND, 250 "DATE32": TokenType.DATE32, 251 "DATETIME64": TokenType.DATETIME64, 252 "DICTIONARY": TokenType.DICTIONARY, 253 "DYNAMIC": TokenType.DYNAMIC, 254 "ENUM8": TokenType.ENUM8, 255 "ENUM16": TokenType.ENUM16, 256 "FINAL": TokenType.FINAL, 257 "FIXEDSTRING": TokenType.FIXEDSTRING, 258 "FLOAT32": TokenType.FLOAT, 259 "FLOAT64": TokenType.DOUBLE, 260 "GLOBAL": TokenType.GLOBAL, 261 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 262 "MAP": TokenType.MAP, 263 "NESTED": TokenType.NESTED, 264 "SAMPLE": TokenType.TABLE_SAMPLE, 265 "TUPLE": TokenType.STRUCT, 266 "UINT16": TokenType.USMALLINT, 267 "UINT32": TokenType.UINT, 268 "UINT64": TokenType.UBIGINT, 269 "UINT8": TokenType.UTINYINT, 270 "IPV4": TokenType.IPV4, 271 "IPV6": TokenType.IPV6, 272 "POINT": TokenType.POINT, 273 "RING": TokenType.RING, 274 "LINESTRING": TokenType.LINESTRING, 275 "MULTILINESTRING": TokenType.MULTILINESTRING, 276 "POLYGON": TokenType.POLYGON, 277 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 278 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 279 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 280 "SYSTEM": TokenType.COMMAND, 281 "PREWHERE": TokenType.PREWHERE, 282 } 283 KEYWORDS.pop("/*+") 284 285 SINGLE_TOKENS = { 286 **tokens.Tokenizer.SINGLE_TOKENS, 287 "$": TokenType.HEREDOC_STRING, 288 } 289 290 class Parser(parser.Parser): 291 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 292 # * select x from t1 union all select x from t2 limit 1; 293 # * select x from t1 union all (select x from t2 limit 1); 294 MODIFIERS_ATTACHED_TO_SET_OP = False 295 INTERVAL_SPANS = False 296 OPTIONAL_ALIAS_TOKEN_CTE = False 297 298 FUNCTIONS = { 299 **parser.Parser.FUNCTIONS, 300 "ANY": exp.AnyValue.from_arg_list, 301 "ARRAYSUM": exp.ArraySum.from_arg_list, 302 "COUNTIF": _build_count_if, 303 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 304 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 305 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 306 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 307 "DATE_FORMAT": _build_date_format, 308 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 309 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 310 "FORMATDATETIME": _build_date_format, 311 "JSONEXTRACTSTRING": build_json_extract_path( 312 exp.JSONExtractScalar, zero_based_indexing=False 313 ), 314 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 315 "MAP": parser.build_var_map, 316 "MATCH": exp.RegexpLike.from_arg_list, 317 "RANDCANONICAL": exp.Rand.from_arg_list, 318 "STR_TO_DATE": _build_str_to_date, 319 "TUPLE": exp.Struct.from_arg_list, 320 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 321 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 322 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 323 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 324 "UNIQ": exp.ApproxDistinct.from_arg_list, 325 "XOR": lambda args: exp.Xor(expressions=args), 326 "MD5": exp.MD5Digest.from_arg_list, 327 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 328 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 329 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 330 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 331 } 332 FUNCTIONS.pop("TRANSFORM") 333 334 AGG_FUNCTIONS = { 335 "count", 336 "min", 337 "max", 338 "sum", 339 "avg", 340 "any", 341 "stddevPop", 342 "stddevSamp", 343 "varPop", 344 "varSamp", 345 "corr", 346 "covarPop", 347 "covarSamp", 348 "entropy", 349 "exponentialMovingAverage", 350 "intervalLengthSum", 351 "kolmogorovSmirnovTest", 352 "mannWhitneyUTest", 353 "median", 354 "rankCorr", 355 "sumKahan", 356 "studentTTest", 357 "welchTTest", 358 "anyHeavy", 359 "anyLast", 360 "boundingRatio", 361 "first_value", 362 "last_value", 363 "argMin", 364 "argMax", 365 "avgWeighted", 366 "topK", 367 "topKWeighted", 368 "deltaSum", 369 "deltaSumTimestamp", 370 "groupArray", 371 "groupArrayLast", 372 "groupUniqArray", 373 "groupArrayInsertAt", 374 "groupArrayMovingAvg", 375 "groupArrayMovingSum", 376 "groupArraySample", 377 "groupBitAnd", 378 "groupBitOr", 379 "groupBitXor", 380 "groupBitmap", 381 "groupBitmapAnd", 382 "groupBitmapOr", 383 "groupBitmapXor", 384 "sumWithOverflow", 385 "sumMap", 386 "minMap", 387 "maxMap", 388 "skewSamp", 389 "skewPop", 390 "kurtSamp", 391 "kurtPop", 392 "uniq", 393 "uniqExact", 394 "uniqCombined", 395 "uniqCombined64", 396 "uniqHLL12", 397 "uniqTheta", 398 "quantile", 399 "quantiles", 400 "quantileExact", 401 "quantilesExact", 402 "quantileExactLow", 403 "quantilesExactLow", 404 "quantileExactHigh", 405 "quantilesExactHigh", 406 "quantileExactWeighted", 407 "quantilesExactWeighted", 408 "quantileTiming", 409 "quantilesTiming", 410 "quantileTimingWeighted", 411 "quantilesTimingWeighted", 412 "quantileDeterministic", 413 "quantilesDeterministic", 414 "quantileTDigest", 415 "quantilesTDigest", 416 "quantileTDigestWeighted", 417 "quantilesTDigestWeighted", 418 "quantileBFloat16", 419 "quantilesBFloat16", 420 "quantileBFloat16Weighted", 421 "quantilesBFloat16Weighted", 422 "simpleLinearRegression", 423 "stochasticLinearRegression", 424 "stochasticLogisticRegression", 425 "categoricalInformationValue", 426 "contingency", 427 "cramersV", 428 "cramersVBiasCorrected", 429 "theilsU", 430 "maxIntersections", 431 "maxIntersectionsPosition", 432 "meanZTest", 433 "quantileInterpolatedWeighted", 434 "quantilesInterpolatedWeighted", 435 "quantileGK", 436 "quantilesGK", 437 "sparkBar", 438 "sumCount", 439 "largestTriangleThreeBuckets", 440 "histogram", 441 "sequenceMatch", 442 "sequenceCount", 443 "windowFunnel", 444 "retention", 445 "uniqUpTo", 446 "sequenceNextNode", 447 "exponentialTimeDecayedAvg", 448 } 449 450 AGG_FUNCTIONS_SUFFIXES = [ 451 "If", 452 "Array", 453 "ArrayIf", 454 "Map", 455 "SimpleState", 456 "State", 457 "Merge", 458 "MergeState", 459 "ForEach", 460 "Distinct", 461 "OrDefault", 462 "OrNull", 463 "Resample", 464 "ArgMin", 465 "ArgMax", 466 ] 467 468 FUNC_TOKENS = { 469 *parser.Parser.FUNC_TOKENS, 470 TokenType.AND, 471 TokenType.OR, 472 TokenType.SET, 473 } 474 475 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 476 477 ID_VAR_TOKENS = { 478 *parser.Parser.ID_VAR_TOKENS, 479 TokenType.LIKE, 480 } 481 482 AGG_FUNC_MAPPING = ( 483 lambda functions, suffixes: { 484 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 485 } 486 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 487 488 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 489 490 FUNCTION_PARSERS = { 491 **parser.Parser.FUNCTION_PARSERS, 492 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 493 "QUANTILE": lambda self: self._parse_quantile(), 494 "MEDIAN": lambda self: self._parse_quantile(), 495 "COLUMNS": lambda self: self._parse_columns(), 496 } 497 498 FUNCTION_PARSERS.pop("MATCH") 499 500 PROPERTY_PARSERS = parser.Parser.PROPERTY_PARSERS.copy() 501 PROPERTY_PARSERS.pop("DYNAMIC") 502 503 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 504 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 505 506 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 507 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 508 509 RANGE_PARSERS = { 510 **parser.Parser.RANGE_PARSERS, 511 TokenType.GLOBAL: lambda self, this: self._parse_global_in(this), 512 } 513 514 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 515 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 516 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 517 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 518 519 JOIN_KINDS = { 520 *parser.Parser.JOIN_KINDS, 521 TokenType.ANY, 522 TokenType.ASOF, 523 TokenType.ARRAY, 524 } 525 526 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 527 TokenType.ANY, 528 TokenType.ARRAY, 529 TokenType.FINAL, 530 TokenType.FORMAT, 531 TokenType.SETTINGS, 532 } 533 534 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 535 TokenType.FORMAT, 536 } 537 538 LOG_DEFAULTS_TO_LN = True 539 540 QUERY_MODIFIER_PARSERS = { 541 **parser.Parser.QUERY_MODIFIER_PARSERS, 542 TokenType.SETTINGS: lambda self: ( 543 "settings", 544 self._advance() or self._parse_csv(self._parse_assignment), 545 ), 546 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 547 } 548 549 CONSTRAINT_PARSERS = { 550 **parser.Parser.CONSTRAINT_PARSERS, 551 "INDEX": lambda self: self._parse_index_constraint(), 552 "CODEC": lambda self: self._parse_compress(), 553 } 554 555 ALTER_PARSERS = { 556 **parser.Parser.ALTER_PARSERS, 557 "REPLACE": lambda self: self._parse_alter_table_replace(), 558 } 559 560 SCHEMA_UNNAMED_CONSTRAINTS = { 561 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 562 "INDEX", 563 } 564 565 PLACEHOLDER_PARSERS = { 566 **parser.Parser.PLACEHOLDER_PARSERS, 567 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 568 } 569 570 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 571 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 572 return self._parse_lambda() 573 574 def _parse_types( 575 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 576 ) -> t.Optional[exp.Expression]: 577 dtype = super()._parse_types( 578 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 579 ) 580 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 581 # Mark every type as non-nullable which is ClickHouse's default, unless it's 582 # already marked as nullable. This marker helps us transpile types from other 583 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 584 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 585 # fail in ClickHouse without the `Nullable` type constructor. 586 dtype.set("nullable", False) 587 588 return dtype 589 590 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 591 index = self._index 592 this = self._parse_bitwise() 593 if self._match(TokenType.FROM): 594 self._retreat(index) 595 return super()._parse_extract() 596 597 # We return Anonymous here because extract and regexpExtract have different semantics, 598 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 599 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 600 # 601 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 602 self._match(TokenType.COMMA) 603 return self.expression( 604 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 605 ) 606 607 def _parse_assignment(self) -> t.Optional[exp.Expression]: 608 this = super()._parse_assignment() 609 610 if self._match(TokenType.PLACEHOLDER): 611 return self.expression( 612 exp.If, 613 this=this, 614 true=self._parse_assignment(), 615 false=self._match(TokenType.COLON) and self._parse_assignment(), 616 ) 617 618 return this 619 620 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 621 """ 622 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 623 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 624 """ 625 index = self._index 626 627 this = self._parse_id_var() 628 self._match(TokenType.COLON) 629 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 630 self._match_text_seq("IDENTIFIER") and "Identifier" 631 ) 632 633 if not kind: 634 self._retreat(index) 635 return None 636 elif not self._match(TokenType.R_BRACE): 637 self.raise_error("Expecting }") 638 639 if isinstance(this, exp.Identifier) and not this.quoted: 640 this = exp.var(this.name) 641 642 return self.expression(exp.Placeholder, this=this, kind=kind) 643 644 def _parse_bracket( 645 self, this: t.Optional[exp.Expression] = None 646 ) -> t.Optional[exp.Expression]: 647 l_brace = self._match(TokenType.L_BRACE, advance=False) 648 bracket = super()._parse_bracket(this) 649 650 if l_brace and isinstance(bracket, exp.Struct): 651 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 652 for expression in bracket.expressions: 653 if not isinstance(expression, exp.PropertyEQ): 654 break 655 656 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 657 varmap.args["values"].append("expressions", expression.expression) 658 659 return varmap 660 661 return bracket 662 663 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 664 this = super()._parse_in(this) 665 this.set("is_global", is_global) 666 return this 667 668 def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In: 669 is_negated = self._match(TokenType.NOT) 670 this = self._match(TokenType.IN) and self._parse_in(this, is_global=True) 671 return self.expression(exp.Not, this=this) if is_negated else this 672 673 def _parse_table( 674 self, 675 schema: bool = False, 676 joins: bool = False, 677 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 678 parse_bracket: bool = False, 679 is_db_reference: bool = False, 680 parse_partition: bool = False, 681 ) -> t.Optional[exp.Expression]: 682 this = super()._parse_table( 683 schema=schema, 684 joins=joins, 685 alias_tokens=alias_tokens, 686 parse_bracket=parse_bracket, 687 is_db_reference=is_db_reference, 688 ) 689 690 if isinstance(this, exp.Table): 691 inner = this.this 692 alias = this.args.get("alias") 693 694 if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns: 695 alias.set("columns", [exp.to_identifier("generate_series")]) 696 697 if self._match(TokenType.FINAL): 698 this = self.expression(exp.Final, this=this) 699 700 return this 701 702 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 703 return super()._parse_position(haystack_first=True) 704 705 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 706 def _parse_cte(self) -> t.Optional[exp.CTE]: 707 # WITH <identifier> AS <subquery expression> 708 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 709 710 if not cte: 711 # WITH <expression> AS <identifier> 712 cte = self.expression( 713 exp.CTE, 714 this=self._parse_assignment(), 715 alias=self._parse_table_alias(), 716 scalar=True, 717 ) 718 719 return cte 720 721 def _parse_join_parts( 722 self, 723 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 724 is_global = self._match(TokenType.GLOBAL) and self._prev 725 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 726 727 if kind_pre: 728 kind = self._match_set(self.JOIN_KINDS) and self._prev 729 side = self._match_set(self.JOIN_SIDES) and self._prev 730 return is_global, side, kind 731 732 return ( 733 is_global, 734 self._match_set(self.JOIN_SIDES) and self._prev, 735 self._match_set(self.JOIN_KINDS) and self._prev, 736 ) 737 738 def _parse_join( 739 self, skip_join_token: bool = False, parse_bracket: bool = False 740 ) -> t.Optional[exp.Join]: 741 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 742 if join: 743 join.set("global", join.args.pop("method", None)) 744 745 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 746 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 747 if join.kind == "ARRAY": 748 for table in join.find_all(exp.Table): 749 table.replace(table.to_column()) 750 751 return join 752 753 def _parse_function( 754 self, 755 functions: t.Optional[t.Dict[str, t.Callable]] = None, 756 anonymous: bool = False, 757 optional_parens: bool = True, 758 any_token: bool = False, 759 ) -> t.Optional[exp.Expression]: 760 expr = super()._parse_function( 761 functions=functions, 762 anonymous=anonymous, 763 optional_parens=optional_parens, 764 any_token=any_token, 765 ) 766 767 func = expr.this if isinstance(expr, exp.Window) else expr 768 769 # Aggregate functions can be split in 2 parts: <func_name><suffix> 770 parts = ( 771 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 772 ) 773 774 if parts: 775 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 776 params = self._parse_func_params(anon_func) 777 778 kwargs = { 779 "this": anon_func.this, 780 "expressions": anon_func.expressions, 781 } 782 if parts[1]: 783 exp_class: t.Type[exp.Expression] = ( 784 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 785 ) 786 else: 787 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 788 789 kwargs["exp_class"] = exp_class 790 if params: 791 kwargs["params"] = params 792 793 func = self.expression(**kwargs) 794 795 if isinstance(expr, exp.Window): 796 # The window's func was parsed as Anonymous in base parser, fix its 797 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 798 expr.set("this", func) 799 elif params: 800 # Params have blocked super()._parse_function() from parsing the following window 801 # (if that exists) as they're standing between the function call and the window spec 802 expr = self._parse_window(func) 803 else: 804 expr = func 805 806 return expr 807 808 def _parse_func_params( 809 self, this: t.Optional[exp.Func] = None 810 ) -> t.Optional[t.List[exp.Expression]]: 811 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 812 return self._parse_csv(self._parse_lambda) 813 814 if self._match(TokenType.L_PAREN): 815 params = self._parse_csv(self._parse_lambda) 816 self._match_r_paren(this) 817 return params 818 819 return None 820 821 def _parse_quantile(self) -> exp.Quantile: 822 this = self._parse_lambda() 823 params = self._parse_func_params() 824 if params: 825 return self.expression(exp.Quantile, this=params[0], quantile=this) 826 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 827 828 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 829 return super()._parse_wrapped_id_vars(optional=True) 830 831 def _parse_primary_key( 832 self, wrapped_optional: bool = False, in_props: bool = False 833 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 834 return super()._parse_primary_key( 835 wrapped_optional=wrapped_optional or in_props, in_props=in_props 836 ) 837 838 def _parse_on_property(self) -> t.Optional[exp.Expression]: 839 index = self._index 840 if self._match_text_seq("CLUSTER"): 841 this = self._parse_string() or self._parse_id_var() 842 if this: 843 return self.expression(exp.OnCluster, this=this) 844 else: 845 self._retreat(index) 846 return None 847 848 def _parse_index_constraint( 849 self, kind: t.Optional[str] = None 850 ) -> exp.IndexColumnConstraint: 851 # INDEX name1 expr TYPE type1(args) GRANULARITY value 852 this = self._parse_id_var() 853 expression = self._parse_assignment() 854 855 index_type = self._match_text_seq("TYPE") and ( 856 self._parse_function() or self._parse_var() 857 ) 858 859 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 860 861 return self.expression( 862 exp.IndexColumnConstraint, 863 this=this, 864 expression=expression, 865 index_type=index_type, 866 granularity=granularity, 867 ) 868 869 def _parse_partition(self) -> t.Optional[exp.Partition]: 870 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 871 if not self._match(TokenType.PARTITION): 872 return None 873 874 if self._match_text_seq("ID"): 875 # Corresponds to the PARTITION ID <string_value> syntax 876 expressions: t.List[exp.Expression] = [ 877 self.expression(exp.PartitionId, this=self._parse_string()) 878 ] 879 else: 880 expressions = self._parse_expressions() 881 882 return self.expression(exp.Partition, expressions=expressions) 883 884 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 885 partition = self._parse_partition() 886 887 if not partition or not self._match(TokenType.FROM): 888 return None 889 890 return self.expression( 891 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 892 ) 893 894 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 895 if not self._match_text_seq("PROJECTION"): 896 return None 897 898 return self.expression( 899 exp.ProjectionDef, 900 this=self._parse_id_var(), 901 expression=self._parse_wrapped(self._parse_statement), 902 ) 903 904 def _parse_constraint(self) -> t.Optional[exp.Expression]: 905 return super()._parse_constraint() or self._parse_projection_def() 906 907 def _parse_alias( 908 self, this: t.Optional[exp.Expression], explicit: bool = False 909 ) -> t.Optional[exp.Expression]: 910 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 911 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 912 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 913 return this 914 915 return super()._parse_alias(this=this, explicit=explicit) 916 917 def _parse_expression(self) -> t.Optional[exp.Expression]: 918 this = super()._parse_expression() 919 920 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 921 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 922 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 923 self._match(TokenType.R_PAREN) 924 925 return this 926 927 def _parse_columns(self) -> exp.Expression: 928 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 929 930 while self._next and self._match_text_seq(")", "APPLY", "("): 931 self._match(TokenType.R_PAREN) 932 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 933 return this 934 935 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 936 value = super()._parse_value(values=values) 937 if not value: 938 return None 939 940 # In Clickhouse "SELECT * FROM VALUES (1, 2, 3)" generates a table with a single column, in contrast 941 # to other dialects. For this case, we canonicalize the values into a tuple-of-tuples AST if it's not already one. 942 # In INSERT INTO statements the same clause actually references multiple columns (opposite semantics), 943 # but the final result is not altered by the extra parentheses. 944 # Note: Clickhouse allows VALUES([structure], value, ...) so the branch checks for the last expression 945 expressions = value.expressions 946 if values and not isinstance(expressions[-1], exp.Tuple): 947 value.set( 948 "expressions", 949 [self.expression(exp.Tuple, expressions=[expr]) for expr in expressions], 950 ) 951 952 return value 953 954 class Generator(generator.Generator): 955 QUERY_HINTS = False 956 STRUCT_DELIMITER = ("(", ")") 957 NVL2_SUPPORTED = False 958 TABLESAMPLE_REQUIRES_PARENS = False 959 TABLESAMPLE_SIZE_IS_ROWS = False 960 TABLESAMPLE_KEYWORDS = "SAMPLE" 961 LAST_DAY_SUPPORTS_DATE_PART = False 962 CAN_IMPLEMENT_ARRAY_ANY = True 963 SUPPORTS_TO_NUMBER = False 964 JOIN_HINTS = False 965 TABLE_HINTS = False 966 GROUPINGS_SEP = "" 967 SET_OP_MODIFIERS = False 968 ARRAY_SIZE_NAME = "LENGTH" 969 WRAP_DERIVED_VALUES = False 970 971 STRING_TYPE_MAPPING = { 972 exp.DataType.Type.BLOB: "String", 973 exp.DataType.Type.CHAR: "String", 974 exp.DataType.Type.LONGBLOB: "String", 975 exp.DataType.Type.LONGTEXT: "String", 976 exp.DataType.Type.MEDIUMBLOB: "String", 977 exp.DataType.Type.MEDIUMTEXT: "String", 978 exp.DataType.Type.TINYBLOB: "String", 979 exp.DataType.Type.TINYTEXT: "String", 980 exp.DataType.Type.TEXT: "String", 981 exp.DataType.Type.VARBINARY: "String", 982 exp.DataType.Type.VARCHAR: "String", 983 } 984 985 SUPPORTED_JSON_PATH_PARTS = { 986 exp.JSONPathKey, 987 exp.JSONPathRoot, 988 exp.JSONPathSubscript, 989 } 990 991 TYPE_MAPPING = { 992 **generator.Generator.TYPE_MAPPING, 993 **STRING_TYPE_MAPPING, 994 exp.DataType.Type.ARRAY: "Array", 995 exp.DataType.Type.BOOLEAN: "Bool", 996 exp.DataType.Type.BIGINT: "Int64", 997 exp.DataType.Type.DATE32: "Date32", 998 exp.DataType.Type.DATETIME: "DateTime", 999 exp.DataType.Type.DATETIME2: "DateTime", 1000 exp.DataType.Type.SMALLDATETIME: "DateTime", 1001 exp.DataType.Type.DATETIME64: "DateTime64", 1002 exp.DataType.Type.DECIMAL: "Decimal", 1003 exp.DataType.Type.DECIMAL32: "Decimal32", 1004 exp.DataType.Type.DECIMAL64: "Decimal64", 1005 exp.DataType.Type.DECIMAL128: "Decimal128", 1006 exp.DataType.Type.DECIMAL256: "Decimal256", 1007 exp.DataType.Type.TIMESTAMP: "DateTime", 1008 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 1009 exp.DataType.Type.DOUBLE: "Float64", 1010 exp.DataType.Type.ENUM: "Enum", 1011 exp.DataType.Type.ENUM8: "Enum8", 1012 exp.DataType.Type.ENUM16: "Enum16", 1013 exp.DataType.Type.FIXEDSTRING: "FixedString", 1014 exp.DataType.Type.FLOAT: "Float32", 1015 exp.DataType.Type.INT: "Int32", 1016 exp.DataType.Type.MEDIUMINT: "Int32", 1017 exp.DataType.Type.INT128: "Int128", 1018 exp.DataType.Type.INT256: "Int256", 1019 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 1020 exp.DataType.Type.MAP: "Map", 1021 exp.DataType.Type.NESTED: "Nested", 1022 exp.DataType.Type.SMALLINT: "Int16", 1023 exp.DataType.Type.STRUCT: "Tuple", 1024 exp.DataType.Type.TINYINT: "Int8", 1025 exp.DataType.Type.UBIGINT: "UInt64", 1026 exp.DataType.Type.UINT: "UInt32", 1027 exp.DataType.Type.UINT128: "UInt128", 1028 exp.DataType.Type.UINT256: "UInt256", 1029 exp.DataType.Type.USMALLINT: "UInt16", 1030 exp.DataType.Type.UTINYINT: "UInt8", 1031 exp.DataType.Type.IPV4: "IPv4", 1032 exp.DataType.Type.IPV6: "IPv6", 1033 exp.DataType.Type.POINT: "Point", 1034 exp.DataType.Type.RING: "Ring", 1035 exp.DataType.Type.LINESTRING: "LineString", 1036 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 1037 exp.DataType.Type.POLYGON: "Polygon", 1038 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 1039 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 1040 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 1041 exp.DataType.Type.DYNAMIC: "Dynamic", 1042 } 1043 1044 TRANSFORMS = { 1045 **generator.Generator.TRANSFORMS, 1046 exp.AnyValue: rename_func("any"), 1047 exp.ApproxDistinct: rename_func("uniq"), 1048 exp.ArrayConcat: rename_func("arrayConcat"), 1049 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 1050 exp.ArraySum: rename_func("arraySum"), 1051 exp.ArgMax: arg_max_or_min_no_count("argMax"), 1052 exp.ArgMin: arg_max_or_min_no_count("argMin"), 1053 exp.Array: inline_array_sql, 1054 exp.CastToStrType: rename_func("CAST"), 1055 exp.CountIf: rename_func("countIf"), 1056 exp.CompressColumnConstraint: lambda self, 1057 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 1058 exp.ComputedColumnConstraint: lambda self, 1059 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 1060 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 1061 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 1062 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 1063 exp.DateStrToDate: rename_func("toDate"), 1064 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 1065 exp.Explode: rename_func("arrayJoin"), 1066 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 1067 exp.IsNan: rename_func("isNaN"), 1068 exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}", 1069 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 1070 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 1071 exp.JSONPathKey: json_path_key_only_name, 1072 exp.JSONPathRoot: lambda *_: "", 1073 exp.Length: length_or_char_length_sql, 1074 exp.Map: _map_sql, 1075 exp.Median: rename_func("median"), 1076 exp.Nullif: rename_func("nullIf"), 1077 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1078 exp.Pivot: no_pivot_sql, 1079 exp.Quantile: _quantile_sql, 1080 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1081 exp.Rand: rename_func("randCanonical"), 1082 exp.StartsWith: rename_func("startsWith"), 1083 exp.StrPosition: lambda self, e: strposition_sql( 1084 self, 1085 e, 1086 func_name="POSITION", 1087 supports_position=True, 1088 use_ansi_position=False, 1089 ), 1090 exp.TimeToStr: lambda self, e: self.func( 1091 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1092 ), 1093 exp.TimeStrToTime: _timestrtotime_sql, 1094 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1095 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1096 exp.VarMap: _map_sql, 1097 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1098 exp.MD5Digest: rename_func("MD5"), 1099 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1100 exp.SHA: rename_func("SHA1"), 1101 exp.SHA2: sha256_sql, 1102 exp.UnixToTime: _unix_to_time_sql, 1103 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1104 exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"), 1105 exp.Variance: rename_func("varSamp"), 1106 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1107 exp.Stddev: rename_func("stddevSamp"), 1108 exp.Chr: rename_func("CHAR"), 1109 exp.Lag: lambda self, e: self.func( 1110 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1111 ), 1112 exp.Lead: lambda self, e: self.func( 1113 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1114 ), 1115 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1116 rename_func("editDistance") 1117 ), 1118 } 1119 1120 PROPERTIES_LOCATION = { 1121 **generator.Generator.PROPERTIES_LOCATION, 1122 exp.OnCluster: exp.Properties.Location.POST_NAME, 1123 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1124 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1125 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1126 } 1127 1128 # There's no list in docs, but it can be found in Clickhouse code 1129 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1130 ON_CLUSTER_TARGETS = { 1131 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1132 "DATABASE", 1133 "TABLE", 1134 "VIEW", 1135 "DICTIONARY", 1136 "INDEX", 1137 "FUNCTION", 1138 "NAMED COLLECTION", 1139 } 1140 1141 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1142 NON_NULLABLE_TYPES = { 1143 exp.DataType.Type.ARRAY, 1144 exp.DataType.Type.MAP, 1145 exp.DataType.Type.STRUCT, 1146 exp.DataType.Type.POINT, 1147 exp.DataType.Type.RING, 1148 exp.DataType.Type.LINESTRING, 1149 exp.DataType.Type.MULTILINESTRING, 1150 exp.DataType.Type.POLYGON, 1151 exp.DataType.Type.MULTIPOLYGON, 1152 } 1153 1154 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1155 strtodate_sql = self.function_fallback_sql(expression) 1156 1157 if not isinstance(expression.parent, exp.Cast): 1158 # StrToDate returns DATEs in other dialects (eg. postgres), so 1159 # this branch aims to improve the transpilation to clickhouse 1160 return self.cast_sql(exp.cast(expression, "DATE")) 1161 1162 return strtodate_sql 1163 1164 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1165 this = expression.this 1166 1167 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1168 return self.sql(this) 1169 1170 return super().cast_sql(expression, safe_prefix=safe_prefix) 1171 1172 def trycast_sql(self, expression: exp.TryCast) -> str: 1173 dtype = expression.to 1174 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1175 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1176 dtype.set("nullable", True) 1177 1178 return super().cast_sql(expression) 1179 1180 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1181 this = self.json_path_part(expression.this) 1182 return str(int(this) + 1) if is_int(this) else this 1183 1184 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1185 return f"AS {self.sql(expression, 'this')}" 1186 1187 def _any_to_has( 1188 self, 1189 expression: exp.EQ | exp.NEQ, 1190 default: t.Callable[[t.Any], str], 1191 prefix: str = "", 1192 ) -> str: 1193 if isinstance(expression.left, exp.Any): 1194 arr = expression.left 1195 this = expression.right 1196 elif isinstance(expression.right, exp.Any): 1197 arr = expression.right 1198 this = expression.left 1199 else: 1200 return default(expression) 1201 1202 return prefix + self.func("has", arr.this.unnest(), this) 1203 1204 def eq_sql(self, expression: exp.EQ) -> str: 1205 return self._any_to_has(expression, super().eq_sql) 1206 1207 def neq_sql(self, expression: exp.NEQ) -> str: 1208 return self._any_to_has(expression, super().neq_sql, "NOT ") 1209 1210 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1211 # Manually add a flag to make the search case-insensitive 1212 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1213 return self.func("match", expression.this, regex) 1214 1215 def datatype_sql(self, expression: exp.DataType) -> str: 1216 # String is the standard ClickHouse type, every other variant is just an alias. 1217 # Additionally, any supplied length parameter will be ignored. 1218 # 1219 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1220 if expression.this in self.STRING_TYPE_MAPPING: 1221 dtype = "String" 1222 else: 1223 dtype = super().datatype_sql(expression) 1224 1225 # This section changes the type to `Nullable(...)` if the following conditions hold: 1226 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1227 # and change their semantics 1228 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1229 # constraint: "Type of Map key must be a type, that can be represented by integer or 1230 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1231 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1232 parent = expression.parent 1233 nullable = expression.args.get("nullable") 1234 if nullable is True or ( 1235 nullable is None 1236 and not ( 1237 isinstance(parent, exp.DataType) 1238 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1239 and expression.index in (None, 0) 1240 ) 1241 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1242 ): 1243 dtype = f"Nullable({dtype})" 1244 1245 return dtype 1246 1247 def cte_sql(self, expression: exp.CTE) -> str: 1248 if expression.args.get("scalar"): 1249 this = self.sql(expression, "this") 1250 alias = self.sql(expression, "alias") 1251 return f"{this} AS {alias}" 1252 1253 return super().cte_sql(expression) 1254 1255 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1256 return super().after_limit_modifiers(expression) + [ 1257 ( 1258 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1259 if expression.args.get("settings") 1260 else "" 1261 ), 1262 ( 1263 self.seg("FORMAT ") + self.sql(expression, "format") 1264 if expression.args.get("format") 1265 else "" 1266 ), 1267 ] 1268 1269 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1270 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1271 1272 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1273 return f"ON CLUSTER {self.sql(expression, 'this')}" 1274 1275 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1276 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1277 exp.Properties.Location.POST_NAME 1278 ): 1279 this_name = self.sql( 1280 expression.this if isinstance(expression.this, exp.Schema) else expression, 1281 "this", 1282 ) 1283 this_properties = " ".join( 1284 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1285 ) 1286 this_schema = self.schema_columns_sql(expression.this) 1287 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1288 1289 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1290 1291 return super().createable_sql(expression, locations) 1292 1293 def create_sql(self, expression: exp.Create) -> str: 1294 # The comment property comes last in CTAS statements, i.e. after the query 1295 query = expression.expression 1296 if isinstance(query, exp.Query): 1297 comment_prop = expression.find(exp.SchemaCommentProperty) 1298 if comment_prop: 1299 comment_prop.pop() 1300 query.replace(exp.paren(query)) 1301 else: 1302 comment_prop = None 1303 1304 create_sql = super().create_sql(expression) 1305 1306 comment_sql = self.sql(comment_prop) 1307 comment_sql = f" {comment_sql}" if comment_sql else "" 1308 1309 return f"{create_sql}{comment_sql}" 1310 1311 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1312 this = self.indent(self.sql(expression, "this")) 1313 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1314 1315 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1316 this = self.sql(expression, "this") 1317 this = f" {this}" if this else "" 1318 expr = self.sql(expression, "expression") 1319 expr = f" {expr}" if expr else "" 1320 index_type = self.sql(expression, "index_type") 1321 index_type = f" TYPE {index_type}" if index_type else "" 1322 granularity = self.sql(expression, "granularity") 1323 granularity = f" GRANULARITY {granularity}" if granularity else "" 1324 1325 return f"INDEX{this}{expr}{index_type}{granularity}" 1326 1327 def partition_sql(self, expression: exp.Partition) -> str: 1328 return f"PARTITION {self.expressions(expression, flat=True)}" 1329 1330 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1331 return f"ID {self.sql(expression.this)}" 1332 1333 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1334 return ( 1335 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1336 ) 1337 1338 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1339 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1340 1341 def is_sql(self, expression: exp.Is) -> str: 1342 is_sql = super().is_sql(expression) 1343 1344 if isinstance(expression.parent, exp.Not): 1345 # value IS NOT NULL -> NOT (value IS NULL) 1346 is_sql = self.wrap(is_sql) 1347 1348 return is_sql 1349 1350 def in_sql(self, expression: exp.In) -> str: 1351 in_sql = super().in_sql(expression) 1352 1353 if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"): 1354 in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1) 1355 1356 return in_sql 1357 1358 def not_sql(self, expression: exp.Not) -> str: 1359 if isinstance(expression.this, exp.In) and expression.this.args.get("is_global"): 1360 # let `GLOBAL IN` child interpose `NOT` 1361 return self.sql(expression, "this") 1362 1363 return super().not_sql(expression) 1364 1365 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1366 # If the VALUES clause contains tuples of expressions, we need to treat it 1367 # as a table since Clickhouse will automatically alias it as such. 1368 alias = expression.args.get("alias") 1369 1370 if alias and alias.args.get("columns") and expression.expressions: 1371 values = expression.expressions[0].expressions 1372 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1373 else: 1374 values_as_table = True 1375 1376 return super().values_sql(expression, values_as_table=values_as_table)
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Whether number literals can include underscores for better readability
Whether hex strings such as x'CC' evaluate to integer or binary/blob type
Specifies the strategy according to which identifiers should be normalized.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
211 def generate_values_aliases(self, expression: exp.Values) -> t.List[exp.Identifier]: 212 # Clickhouse allows VALUES to have an embedded structure e.g: 213 # VALUES('person String, place String', ('Noah', 'Paris'), ...) 214 # In this case, we don't want to qualify the columns 215 values = expression.expressions[0].expressions 216 217 structure = ( 218 values[0] 219 if (len(values) > 1 and values[0].is_string and isinstance(values[1], exp.Tuple)) 220 else None 221 ) 222 if structure: 223 # Split each column definition into the column name e.g: 224 # 'person String, place String' -> ['person', 'place'] 225 structure_coldefs = [coldef.strip() for coldef in structure.name.split(",")] 226 column_aliases = [ 227 exp.to_identifier(coldef.split(" ")[0]) for coldef in structure_coldefs 228 ] 229 else: 230 # Default column aliases in CH are "c1", "c2", etc. 231 column_aliases = [ 232 exp.to_identifier(f"c{i + 1}") for i in range(len(values[0].expressions)) 233 ] 234 235 return column_aliases
237 class Tokenizer(tokens.Tokenizer): 238 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 239 IDENTIFIERS = ['"', "`"] 240 IDENTIFIER_ESCAPES = ["\\"] 241 STRING_ESCAPES = ["'", "\\"] 242 BIT_STRINGS = [("0b", "")] 243 HEX_STRINGS = [("0x", ""), ("0X", "")] 244 HEREDOC_STRINGS = ["$"] 245 246 KEYWORDS = { 247 **tokens.Tokenizer.KEYWORDS, 248 ".:": TokenType.DOTCOLON, 249 "ATTACH": TokenType.COMMAND, 250 "DATE32": TokenType.DATE32, 251 "DATETIME64": TokenType.DATETIME64, 252 "DICTIONARY": TokenType.DICTIONARY, 253 "DYNAMIC": TokenType.DYNAMIC, 254 "ENUM8": TokenType.ENUM8, 255 "ENUM16": TokenType.ENUM16, 256 "FINAL": TokenType.FINAL, 257 "FIXEDSTRING": TokenType.FIXEDSTRING, 258 "FLOAT32": TokenType.FLOAT, 259 "FLOAT64": TokenType.DOUBLE, 260 "GLOBAL": TokenType.GLOBAL, 261 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 262 "MAP": TokenType.MAP, 263 "NESTED": TokenType.NESTED, 264 "SAMPLE": TokenType.TABLE_SAMPLE, 265 "TUPLE": TokenType.STRUCT, 266 "UINT16": TokenType.USMALLINT, 267 "UINT32": TokenType.UINT, 268 "UINT64": TokenType.UBIGINT, 269 "UINT8": TokenType.UTINYINT, 270 "IPV4": TokenType.IPV4, 271 "IPV6": TokenType.IPV6, 272 "POINT": TokenType.POINT, 273 "RING": TokenType.RING, 274 "LINESTRING": TokenType.LINESTRING, 275 "MULTILINESTRING": TokenType.MULTILINESTRING, 276 "POLYGON": TokenType.POLYGON, 277 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 278 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 279 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 280 "SYSTEM": TokenType.COMMAND, 281 "PREWHERE": TokenType.PREWHERE, 282 } 283 KEYWORDS.pop("/*+") 284 285 SINGLE_TOKENS = { 286 **tokens.Tokenizer.SINGLE_TOKENS, 287 "$": TokenType.HEREDOC_STRING, 288 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
290 class Parser(parser.Parser): 291 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 292 # * select x from t1 union all select x from t2 limit 1; 293 # * select x from t1 union all (select x from t2 limit 1); 294 MODIFIERS_ATTACHED_TO_SET_OP = False 295 INTERVAL_SPANS = False 296 OPTIONAL_ALIAS_TOKEN_CTE = False 297 298 FUNCTIONS = { 299 **parser.Parser.FUNCTIONS, 300 "ANY": exp.AnyValue.from_arg_list, 301 "ARRAYSUM": exp.ArraySum.from_arg_list, 302 "COUNTIF": _build_count_if, 303 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 304 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 305 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 306 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 307 "DATE_FORMAT": _build_date_format, 308 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 309 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 310 "FORMATDATETIME": _build_date_format, 311 "JSONEXTRACTSTRING": build_json_extract_path( 312 exp.JSONExtractScalar, zero_based_indexing=False 313 ), 314 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 315 "MAP": parser.build_var_map, 316 "MATCH": exp.RegexpLike.from_arg_list, 317 "RANDCANONICAL": exp.Rand.from_arg_list, 318 "STR_TO_DATE": _build_str_to_date, 319 "TUPLE": exp.Struct.from_arg_list, 320 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 321 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 322 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 323 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 324 "UNIQ": exp.ApproxDistinct.from_arg_list, 325 "XOR": lambda args: exp.Xor(expressions=args), 326 "MD5": exp.MD5Digest.from_arg_list, 327 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 328 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 329 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 330 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 331 } 332 FUNCTIONS.pop("TRANSFORM") 333 334 AGG_FUNCTIONS = { 335 "count", 336 "min", 337 "max", 338 "sum", 339 "avg", 340 "any", 341 "stddevPop", 342 "stddevSamp", 343 "varPop", 344 "varSamp", 345 "corr", 346 "covarPop", 347 "covarSamp", 348 "entropy", 349 "exponentialMovingAverage", 350 "intervalLengthSum", 351 "kolmogorovSmirnovTest", 352 "mannWhitneyUTest", 353 "median", 354 "rankCorr", 355 "sumKahan", 356 "studentTTest", 357 "welchTTest", 358 "anyHeavy", 359 "anyLast", 360 "boundingRatio", 361 "first_value", 362 "last_value", 363 "argMin", 364 "argMax", 365 "avgWeighted", 366 "topK", 367 "topKWeighted", 368 "deltaSum", 369 "deltaSumTimestamp", 370 "groupArray", 371 "groupArrayLast", 372 "groupUniqArray", 373 "groupArrayInsertAt", 374 "groupArrayMovingAvg", 375 "groupArrayMovingSum", 376 "groupArraySample", 377 "groupBitAnd", 378 "groupBitOr", 379 "groupBitXor", 380 "groupBitmap", 381 "groupBitmapAnd", 382 "groupBitmapOr", 383 "groupBitmapXor", 384 "sumWithOverflow", 385 "sumMap", 386 "minMap", 387 "maxMap", 388 "skewSamp", 389 "skewPop", 390 "kurtSamp", 391 "kurtPop", 392 "uniq", 393 "uniqExact", 394 "uniqCombined", 395 "uniqCombined64", 396 "uniqHLL12", 397 "uniqTheta", 398 "quantile", 399 "quantiles", 400 "quantileExact", 401 "quantilesExact", 402 "quantileExactLow", 403 "quantilesExactLow", 404 "quantileExactHigh", 405 "quantilesExactHigh", 406 "quantileExactWeighted", 407 "quantilesExactWeighted", 408 "quantileTiming", 409 "quantilesTiming", 410 "quantileTimingWeighted", 411 "quantilesTimingWeighted", 412 "quantileDeterministic", 413 "quantilesDeterministic", 414 "quantileTDigest", 415 "quantilesTDigest", 416 "quantileTDigestWeighted", 417 "quantilesTDigestWeighted", 418 "quantileBFloat16", 419 "quantilesBFloat16", 420 "quantileBFloat16Weighted", 421 "quantilesBFloat16Weighted", 422 "simpleLinearRegression", 423 "stochasticLinearRegression", 424 "stochasticLogisticRegression", 425 "categoricalInformationValue", 426 "contingency", 427 "cramersV", 428 "cramersVBiasCorrected", 429 "theilsU", 430 "maxIntersections", 431 "maxIntersectionsPosition", 432 "meanZTest", 433 "quantileInterpolatedWeighted", 434 "quantilesInterpolatedWeighted", 435 "quantileGK", 436 "quantilesGK", 437 "sparkBar", 438 "sumCount", 439 "largestTriangleThreeBuckets", 440 "histogram", 441 "sequenceMatch", 442 "sequenceCount", 443 "windowFunnel", 444 "retention", 445 "uniqUpTo", 446 "sequenceNextNode", 447 "exponentialTimeDecayedAvg", 448 } 449 450 AGG_FUNCTIONS_SUFFIXES = [ 451 "If", 452 "Array", 453 "ArrayIf", 454 "Map", 455 "SimpleState", 456 "State", 457 "Merge", 458 "MergeState", 459 "ForEach", 460 "Distinct", 461 "OrDefault", 462 "OrNull", 463 "Resample", 464 "ArgMin", 465 "ArgMax", 466 ] 467 468 FUNC_TOKENS = { 469 *parser.Parser.FUNC_TOKENS, 470 TokenType.AND, 471 TokenType.OR, 472 TokenType.SET, 473 } 474 475 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 476 477 ID_VAR_TOKENS = { 478 *parser.Parser.ID_VAR_TOKENS, 479 TokenType.LIKE, 480 } 481 482 AGG_FUNC_MAPPING = ( 483 lambda functions, suffixes: { 484 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 485 } 486 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 487 488 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 489 490 FUNCTION_PARSERS = { 491 **parser.Parser.FUNCTION_PARSERS, 492 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 493 "QUANTILE": lambda self: self._parse_quantile(), 494 "MEDIAN": lambda self: self._parse_quantile(), 495 "COLUMNS": lambda self: self._parse_columns(), 496 } 497 498 FUNCTION_PARSERS.pop("MATCH") 499 500 PROPERTY_PARSERS = parser.Parser.PROPERTY_PARSERS.copy() 501 PROPERTY_PARSERS.pop("DYNAMIC") 502 503 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 504 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 505 506 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 507 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 508 509 RANGE_PARSERS = { 510 **parser.Parser.RANGE_PARSERS, 511 TokenType.GLOBAL: lambda self, this: self._parse_global_in(this), 512 } 513 514 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 515 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 516 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 517 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 518 519 JOIN_KINDS = { 520 *parser.Parser.JOIN_KINDS, 521 TokenType.ANY, 522 TokenType.ASOF, 523 TokenType.ARRAY, 524 } 525 526 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 527 TokenType.ANY, 528 TokenType.ARRAY, 529 TokenType.FINAL, 530 TokenType.FORMAT, 531 TokenType.SETTINGS, 532 } 533 534 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 535 TokenType.FORMAT, 536 } 537 538 LOG_DEFAULTS_TO_LN = True 539 540 QUERY_MODIFIER_PARSERS = { 541 **parser.Parser.QUERY_MODIFIER_PARSERS, 542 TokenType.SETTINGS: lambda self: ( 543 "settings", 544 self._advance() or self._parse_csv(self._parse_assignment), 545 ), 546 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 547 } 548 549 CONSTRAINT_PARSERS = { 550 **parser.Parser.CONSTRAINT_PARSERS, 551 "INDEX": lambda self: self._parse_index_constraint(), 552 "CODEC": lambda self: self._parse_compress(), 553 } 554 555 ALTER_PARSERS = { 556 **parser.Parser.ALTER_PARSERS, 557 "REPLACE": lambda self: self._parse_alter_table_replace(), 558 } 559 560 SCHEMA_UNNAMED_CONSTRAINTS = { 561 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 562 "INDEX", 563 } 564 565 PLACEHOLDER_PARSERS = { 566 **parser.Parser.PLACEHOLDER_PARSERS, 567 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 568 } 569 570 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 571 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 572 return self._parse_lambda() 573 574 def _parse_types( 575 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 576 ) -> t.Optional[exp.Expression]: 577 dtype = super()._parse_types( 578 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 579 ) 580 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 581 # Mark every type as non-nullable which is ClickHouse's default, unless it's 582 # already marked as nullable. This marker helps us transpile types from other 583 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 584 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 585 # fail in ClickHouse without the `Nullable` type constructor. 586 dtype.set("nullable", False) 587 588 return dtype 589 590 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 591 index = self._index 592 this = self._parse_bitwise() 593 if self._match(TokenType.FROM): 594 self._retreat(index) 595 return super()._parse_extract() 596 597 # We return Anonymous here because extract and regexpExtract have different semantics, 598 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 599 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 600 # 601 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 602 self._match(TokenType.COMMA) 603 return self.expression( 604 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 605 ) 606 607 def _parse_assignment(self) -> t.Optional[exp.Expression]: 608 this = super()._parse_assignment() 609 610 if self._match(TokenType.PLACEHOLDER): 611 return self.expression( 612 exp.If, 613 this=this, 614 true=self._parse_assignment(), 615 false=self._match(TokenType.COLON) and self._parse_assignment(), 616 ) 617 618 return this 619 620 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 621 """ 622 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 623 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 624 """ 625 index = self._index 626 627 this = self._parse_id_var() 628 self._match(TokenType.COLON) 629 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 630 self._match_text_seq("IDENTIFIER") and "Identifier" 631 ) 632 633 if not kind: 634 self._retreat(index) 635 return None 636 elif not self._match(TokenType.R_BRACE): 637 self.raise_error("Expecting }") 638 639 if isinstance(this, exp.Identifier) and not this.quoted: 640 this = exp.var(this.name) 641 642 return self.expression(exp.Placeholder, this=this, kind=kind) 643 644 def _parse_bracket( 645 self, this: t.Optional[exp.Expression] = None 646 ) -> t.Optional[exp.Expression]: 647 l_brace = self._match(TokenType.L_BRACE, advance=False) 648 bracket = super()._parse_bracket(this) 649 650 if l_brace and isinstance(bracket, exp.Struct): 651 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 652 for expression in bracket.expressions: 653 if not isinstance(expression, exp.PropertyEQ): 654 break 655 656 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 657 varmap.args["values"].append("expressions", expression.expression) 658 659 return varmap 660 661 return bracket 662 663 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 664 this = super()._parse_in(this) 665 this.set("is_global", is_global) 666 return this 667 668 def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In: 669 is_negated = self._match(TokenType.NOT) 670 this = self._match(TokenType.IN) and self._parse_in(this, is_global=True) 671 return self.expression(exp.Not, this=this) if is_negated else this 672 673 def _parse_table( 674 self, 675 schema: bool = False, 676 joins: bool = False, 677 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 678 parse_bracket: bool = False, 679 is_db_reference: bool = False, 680 parse_partition: bool = False, 681 ) -> t.Optional[exp.Expression]: 682 this = super()._parse_table( 683 schema=schema, 684 joins=joins, 685 alias_tokens=alias_tokens, 686 parse_bracket=parse_bracket, 687 is_db_reference=is_db_reference, 688 ) 689 690 if isinstance(this, exp.Table): 691 inner = this.this 692 alias = this.args.get("alias") 693 694 if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns: 695 alias.set("columns", [exp.to_identifier("generate_series")]) 696 697 if self._match(TokenType.FINAL): 698 this = self.expression(exp.Final, this=this) 699 700 return this 701 702 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 703 return super()._parse_position(haystack_first=True) 704 705 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 706 def _parse_cte(self) -> t.Optional[exp.CTE]: 707 # WITH <identifier> AS <subquery expression> 708 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 709 710 if not cte: 711 # WITH <expression> AS <identifier> 712 cte = self.expression( 713 exp.CTE, 714 this=self._parse_assignment(), 715 alias=self._parse_table_alias(), 716 scalar=True, 717 ) 718 719 return cte 720 721 def _parse_join_parts( 722 self, 723 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 724 is_global = self._match(TokenType.GLOBAL) and self._prev 725 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 726 727 if kind_pre: 728 kind = self._match_set(self.JOIN_KINDS) and self._prev 729 side = self._match_set(self.JOIN_SIDES) and self._prev 730 return is_global, side, kind 731 732 return ( 733 is_global, 734 self._match_set(self.JOIN_SIDES) and self._prev, 735 self._match_set(self.JOIN_KINDS) and self._prev, 736 ) 737 738 def _parse_join( 739 self, skip_join_token: bool = False, parse_bracket: bool = False 740 ) -> t.Optional[exp.Join]: 741 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 742 if join: 743 join.set("global", join.args.pop("method", None)) 744 745 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 746 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 747 if join.kind == "ARRAY": 748 for table in join.find_all(exp.Table): 749 table.replace(table.to_column()) 750 751 return join 752 753 def _parse_function( 754 self, 755 functions: t.Optional[t.Dict[str, t.Callable]] = None, 756 anonymous: bool = False, 757 optional_parens: bool = True, 758 any_token: bool = False, 759 ) -> t.Optional[exp.Expression]: 760 expr = super()._parse_function( 761 functions=functions, 762 anonymous=anonymous, 763 optional_parens=optional_parens, 764 any_token=any_token, 765 ) 766 767 func = expr.this if isinstance(expr, exp.Window) else expr 768 769 # Aggregate functions can be split in 2 parts: <func_name><suffix> 770 parts = ( 771 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 772 ) 773 774 if parts: 775 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 776 params = self._parse_func_params(anon_func) 777 778 kwargs = { 779 "this": anon_func.this, 780 "expressions": anon_func.expressions, 781 } 782 if parts[1]: 783 exp_class: t.Type[exp.Expression] = ( 784 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 785 ) 786 else: 787 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 788 789 kwargs["exp_class"] = exp_class 790 if params: 791 kwargs["params"] = params 792 793 func = self.expression(**kwargs) 794 795 if isinstance(expr, exp.Window): 796 # The window's func was parsed as Anonymous in base parser, fix its 797 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 798 expr.set("this", func) 799 elif params: 800 # Params have blocked super()._parse_function() from parsing the following window 801 # (if that exists) as they're standing between the function call and the window spec 802 expr = self._parse_window(func) 803 else: 804 expr = func 805 806 return expr 807 808 def _parse_func_params( 809 self, this: t.Optional[exp.Func] = None 810 ) -> t.Optional[t.List[exp.Expression]]: 811 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 812 return self._parse_csv(self._parse_lambda) 813 814 if self._match(TokenType.L_PAREN): 815 params = self._parse_csv(self._parse_lambda) 816 self._match_r_paren(this) 817 return params 818 819 return None 820 821 def _parse_quantile(self) -> exp.Quantile: 822 this = self._parse_lambda() 823 params = self._parse_func_params() 824 if params: 825 return self.expression(exp.Quantile, this=params[0], quantile=this) 826 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 827 828 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 829 return super()._parse_wrapped_id_vars(optional=True) 830 831 def _parse_primary_key( 832 self, wrapped_optional: bool = False, in_props: bool = False 833 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 834 return super()._parse_primary_key( 835 wrapped_optional=wrapped_optional or in_props, in_props=in_props 836 ) 837 838 def _parse_on_property(self) -> t.Optional[exp.Expression]: 839 index = self._index 840 if self._match_text_seq("CLUSTER"): 841 this = self._parse_string() or self._parse_id_var() 842 if this: 843 return self.expression(exp.OnCluster, this=this) 844 else: 845 self._retreat(index) 846 return None 847 848 def _parse_index_constraint( 849 self, kind: t.Optional[str] = None 850 ) -> exp.IndexColumnConstraint: 851 # INDEX name1 expr TYPE type1(args) GRANULARITY value 852 this = self._parse_id_var() 853 expression = self._parse_assignment() 854 855 index_type = self._match_text_seq("TYPE") and ( 856 self._parse_function() or self._parse_var() 857 ) 858 859 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 860 861 return self.expression( 862 exp.IndexColumnConstraint, 863 this=this, 864 expression=expression, 865 index_type=index_type, 866 granularity=granularity, 867 ) 868 869 def _parse_partition(self) -> t.Optional[exp.Partition]: 870 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 871 if not self._match(TokenType.PARTITION): 872 return None 873 874 if self._match_text_seq("ID"): 875 # Corresponds to the PARTITION ID <string_value> syntax 876 expressions: t.List[exp.Expression] = [ 877 self.expression(exp.PartitionId, this=self._parse_string()) 878 ] 879 else: 880 expressions = self._parse_expressions() 881 882 return self.expression(exp.Partition, expressions=expressions) 883 884 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 885 partition = self._parse_partition() 886 887 if not partition or not self._match(TokenType.FROM): 888 return None 889 890 return self.expression( 891 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 892 ) 893 894 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 895 if not self._match_text_seq("PROJECTION"): 896 return None 897 898 return self.expression( 899 exp.ProjectionDef, 900 this=self._parse_id_var(), 901 expression=self._parse_wrapped(self._parse_statement), 902 ) 903 904 def _parse_constraint(self) -> t.Optional[exp.Expression]: 905 return super()._parse_constraint() or self._parse_projection_def() 906 907 def _parse_alias( 908 self, this: t.Optional[exp.Expression], explicit: bool = False 909 ) -> t.Optional[exp.Expression]: 910 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 911 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 912 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 913 return this 914 915 return super()._parse_alias(this=this, explicit=explicit) 916 917 def _parse_expression(self) -> t.Optional[exp.Expression]: 918 this = super()._parse_expression() 919 920 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 921 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 922 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 923 self._match(TokenType.R_PAREN) 924 925 return this 926 927 def _parse_columns(self) -> exp.Expression: 928 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 929 930 while self._next and self._match_text_seq(")", "APPLY", "("): 931 self._match(TokenType.R_PAREN) 932 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 933 return this 934 935 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 936 value = super()._parse_value(values=values) 937 if not value: 938 return None 939 940 # In Clickhouse "SELECT * FROM VALUES (1, 2, 3)" generates a table with a single column, in contrast 941 # to other dialects. For this case, we canonicalize the values into a tuple-of-tuples AST if it's not already one. 942 # In INSERT INTO statements the same clause actually references multiple columns (opposite semantics), 943 # but the final result is not altered by the extra parentheses. 944 # Note: Clickhouse allows VALUES([structure], value, ...) so the branch checks for the last expression 945 expressions = value.expressions 946 if values and not isinstance(expressions[-1], exp.Tuple): 947 value.set( 948 "expressions", 949 [self.expression(exp.Tuple, expressions=[expr]) for expr in expressions], 950 ) 951 952 return value
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- errors
- sql
954 class Generator(generator.Generator): 955 QUERY_HINTS = False 956 STRUCT_DELIMITER = ("(", ")") 957 NVL2_SUPPORTED = False 958 TABLESAMPLE_REQUIRES_PARENS = False 959 TABLESAMPLE_SIZE_IS_ROWS = False 960 TABLESAMPLE_KEYWORDS = "SAMPLE" 961 LAST_DAY_SUPPORTS_DATE_PART = False 962 CAN_IMPLEMENT_ARRAY_ANY = True 963 SUPPORTS_TO_NUMBER = False 964 JOIN_HINTS = False 965 TABLE_HINTS = False 966 GROUPINGS_SEP = "" 967 SET_OP_MODIFIERS = False 968 ARRAY_SIZE_NAME = "LENGTH" 969 WRAP_DERIVED_VALUES = False 970 971 STRING_TYPE_MAPPING = { 972 exp.DataType.Type.BLOB: "String", 973 exp.DataType.Type.CHAR: "String", 974 exp.DataType.Type.LONGBLOB: "String", 975 exp.DataType.Type.LONGTEXT: "String", 976 exp.DataType.Type.MEDIUMBLOB: "String", 977 exp.DataType.Type.MEDIUMTEXT: "String", 978 exp.DataType.Type.TINYBLOB: "String", 979 exp.DataType.Type.TINYTEXT: "String", 980 exp.DataType.Type.TEXT: "String", 981 exp.DataType.Type.VARBINARY: "String", 982 exp.DataType.Type.VARCHAR: "String", 983 } 984 985 SUPPORTED_JSON_PATH_PARTS = { 986 exp.JSONPathKey, 987 exp.JSONPathRoot, 988 exp.JSONPathSubscript, 989 } 990 991 TYPE_MAPPING = { 992 **generator.Generator.TYPE_MAPPING, 993 **STRING_TYPE_MAPPING, 994 exp.DataType.Type.ARRAY: "Array", 995 exp.DataType.Type.BOOLEAN: "Bool", 996 exp.DataType.Type.BIGINT: "Int64", 997 exp.DataType.Type.DATE32: "Date32", 998 exp.DataType.Type.DATETIME: "DateTime", 999 exp.DataType.Type.DATETIME2: "DateTime", 1000 exp.DataType.Type.SMALLDATETIME: "DateTime", 1001 exp.DataType.Type.DATETIME64: "DateTime64", 1002 exp.DataType.Type.DECIMAL: "Decimal", 1003 exp.DataType.Type.DECIMAL32: "Decimal32", 1004 exp.DataType.Type.DECIMAL64: "Decimal64", 1005 exp.DataType.Type.DECIMAL128: "Decimal128", 1006 exp.DataType.Type.DECIMAL256: "Decimal256", 1007 exp.DataType.Type.TIMESTAMP: "DateTime", 1008 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 1009 exp.DataType.Type.DOUBLE: "Float64", 1010 exp.DataType.Type.ENUM: "Enum", 1011 exp.DataType.Type.ENUM8: "Enum8", 1012 exp.DataType.Type.ENUM16: "Enum16", 1013 exp.DataType.Type.FIXEDSTRING: "FixedString", 1014 exp.DataType.Type.FLOAT: "Float32", 1015 exp.DataType.Type.INT: "Int32", 1016 exp.DataType.Type.MEDIUMINT: "Int32", 1017 exp.DataType.Type.INT128: "Int128", 1018 exp.DataType.Type.INT256: "Int256", 1019 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 1020 exp.DataType.Type.MAP: "Map", 1021 exp.DataType.Type.NESTED: "Nested", 1022 exp.DataType.Type.SMALLINT: "Int16", 1023 exp.DataType.Type.STRUCT: "Tuple", 1024 exp.DataType.Type.TINYINT: "Int8", 1025 exp.DataType.Type.UBIGINT: "UInt64", 1026 exp.DataType.Type.UINT: "UInt32", 1027 exp.DataType.Type.UINT128: "UInt128", 1028 exp.DataType.Type.UINT256: "UInt256", 1029 exp.DataType.Type.USMALLINT: "UInt16", 1030 exp.DataType.Type.UTINYINT: "UInt8", 1031 exp.DataType.Type.IPV4: "IPv4", 1032 exp.DataType.Type.IPV6: "IPv6", 1033 exp.DataType.Type.POINT: "Point", 1034 exp.DataType.Type.RING: "Ring", 1035 exp.DataType.Type.LINESTRING: "LineString", 1036 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 1037 exp.DataType.Type.POLYGON: "Polygon", 1038 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 1039 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 1040 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 1041 exp.DataType.Type.DYNAMIC: "Dynamic", 1042 } 1043 1044 TRANSFORMS = { 1045 **generator.Generator.TRANSFORMS, 1046 exp.AnyValue: rename_func("any"), 1047 exp.ApproxDistinct: rename_func("uniq"), 1048 exp.ArrayConcat: rename_func("arrayConcat"), 1049 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 1050 exp.ArraySum: rename_func("arraySum"), 1051 exp.ArgMax: arg_max_or_min_no_count("argMax"), 1052 exp.ArgMin: arg_max_or_min_no_count("argMin"), 1053 exp.Array: inline_array_sql, 1054 exp.CastToStrType: rename_func("CAST"), 1055 exp.CountIf: rename_func("countIf"), 1056 exp.CompressColumnConstraint: lambda self, 1057 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 1058 exp.ComputedColumnConstraint: lambda self, 1059 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 1060 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 1061 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 1062 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 1063 exp.DateStrToDate: rename_func("toDate"), 1064 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 1065 exp.Explode: rename_func("arrayJoin"), 1066 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 1067 exp.IsNan: rename_func("isNaN"), 1068 exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}", 1069 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 1070 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 1071 exp.JSONPathKey: json_path_key_only_name, 1072 exp.JSONPathRoot: lambda *_: "", 1073 exp.Length: length_or_char_length_sql, 1074 exp.Map: _map_sql, 1075 exp.Median: rename_func("median"), 1076 exp.Nullif: rename_func("nullIf"), 1077 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1078 exp.Pivot: no_pivot_sql, 1079 exp.Quantile: _quantile_sql, 1080 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1081 exp.Rand: rename_func("randCanonical"), 1082 exp.StartsWith: rename_func("startsWith"), 1083 exp.StrPosition: lambda self, e: strposition_sql( 1084 self, 1085 e, 1086 func_name="POSITION", 1087 supports_position=True, 1088 use_ansi_position=False, 1089 ), 1090 exp.TimeToStr: lambda self, e: self.func( 1091 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1092 ), 1093 exp.TimeStrToTime: _timestrtotime_sql, 1094 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1095 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1096 exp.VarMap: _map_sql, 1097 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1098 exp.MD5Digest: rename_func("MD5"), 1099 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1100 exp.SHA: rename_func("SHA1"), 1101 exp.SHA2: sha256_sql, 1102 exp.UnixToTime: _unix_to_time_sql, 1103 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1104 exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"), 1105 exp.Variance: rename_func("varSamp"), 1106 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1107 exp.Stddev: rename_func("stddevSamp"), 1108 exp.Chr: rename_func("CHAR"), 1109 exp.Lag: lambda self, e: self.func( 1110 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1111 ), 1112 exp.Lead: lambda self, e: self.func( 1113 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1114 ), 1115 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1116 rename_func("editDistance") 1117 ), 1118 } 1119 1120 PROPERTIES_LOCATION = { 1121 **generator.Generator.PROPERTIES_LOCATION, 1122 exp.OnCluster: exp.Properties.Location.POST_NAME, 1123 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1124 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1125 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1126 } 1127 1128 # There's no list in docs, but it can be found in Clickhouse code 1129 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1130 ON_CLUSTER_TARGETS = { 1131 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1132 "DATABASE", 1133 "TABLE", 1134 "VIEW", 1135 "DICTIONARY", 1136 "INDEX", 1137 "FUNCTION", 1138 "NAMED COLLECTION", 1139 } 1140 1141 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1142 NON_NULLABLE_TYPES = { 1143 exp.DataType.Type.ARRAY, 1144 exp.DataType.Type.MAP, 1145 exp.DataType.Type.STRUCT, 1146 exp.DataType.Type.POINT, 1147 exp.DataType.Type.RING, 1148 exp.DataType.Type.LINESTRING, 1149 exp.DataType.Type.MULTILINESTRING, 1150 exp.DataType.Type.POLYGON, 1151 exp.DataType.Type.MULTIPOLYGON, 1152 } 1153 1154 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1155 strtodate_sql = self.function_fallback_sql(expression) 1156 1157 if not isinstance(expression.parent, exp.Cast): 1158 # StrToDate returns DATEs in other dialects (eg. postgres), so 1159 # this branch aims to improve the transpilation to clickhouse 1160 return self.cast_sql(exp.cast(expression, "DATE")) 1161 1162 return strtodate_sql 1163 1164 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1165 this = expression.this 1166 1167 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1168 return self.sql(this) 1169 1170 return super().cast_sql(expression, safe_prefix=safe_prefix) 1171 1172 def trycast_sql(self, expression: exp.TryCast) -> str: 1173 dtype = expression.to 1174 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1175 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1176 dtype.set("nullable", True) 1177 1178 return super().cast_sql(expression) 1179 1180 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1181 this = self.json_path_part(expression.this) 1182 return str(int(this) + 1) if is_int(this) else this 1183 1184 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1185 return f"AS {self.sql(expression, 'this')}" 1186 1187 def _any_to_has( 1188 self, 1189 expression: exp.EQ | exp.NEQ, 1190 default: t.Callable[[t.Any], str], 1191 prefix: str = "", 1192 ) -> str: 1193 if isinstance(expression.left, exp.Any): 1194 arr = expression.left 1195 this = expression.right 1196 elif isinstance(expression.right, exp.Any): 1197 arr = expression.right 1198 this = expression.left 1199 else: 1200 return default(expression) 1201 1202 return prefix + self.func("has", arr.this.unnest(), this) 1203 1204 def eq_sql(self, expression: exp.EQ) -> str: 1205 return self._any_to_has(expression, super().eq_sql) 1206 1207 def neq_sql(self, expression: exp.NEQ) -> str: 1208 return self._any_to_has(expression, super().neq_sql, "NOT ") 1209 1210 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1211 # Manually add a flag to make the search case-insensitive 1212 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1213 return self.func("match", expression.this, regex) 1214 1215 def datatype_sql(self, expression: exp.DataType) -> str: 1216 # String is the standard ClickHouse type, every other variant is just an alias. 1217 # Additionally, any supplied length parameter will be ignored. 1218 # 1219 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1220 if expression.this in self.STRING_TYPE_MAPPING: 1221 dtype = "String" 1222 else: 1223 dtype = super().datatype_sql(expression) 1224 1225 # This section changes the type to `Nullable(...)` if the following conditions hold: 1226 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1227 # and change their semantics 1228 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1229 # constraint: "Type of Map key must be a type, that can be represented by integer or 1230 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1231 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1232 parent = expression.parent 1233 nullable = expression.args.get("nullable") 1234 if nullable is True or ( 1235 nullable is None 1236 and not ( 1237 isinstance(parent, exp.DataType) 1238 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1239 and expression.index in (None, 0) 1240 ) 1241 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1242 ): 1243 dtype = f"Nullable({dtype})" 1244 1245 return dtype 1246 1247 def cte_sql(self, expression: exp.CTE) -> str: 1248 if expression.args.get("scalar"): 1249 this = self.sql(expression, "this") 1250 alias = self.sql(expression, "alias") 1251 return f"{this} AS {alias}" 1252 1253 return super().cte_sql(expression) 1254 1255 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1256 return super().after_limit_modifiers(expression) + [ 1257 ( 1258 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1259 if expression.args.get("settings") 1260 else "" 1261 ), 1262 ( 1263 self.seg("FORMAT ") + self.sql(expression, "format") 1264 if expression.args.get("format") 1265 else "" 1266 ), 1267 ] 1268 1269 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1270 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1271 1272 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1273 return f"ON CLUSTER {self.sql(expression, 'this')}" 1274 1275 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1276 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1277 exp.Properties.Location.POST_NAME 1278 ): 1279 this_name = self.sql( 1280 expression.this if isinstance(expression.this, exp.Schema) else expression, 1281 "this", 1282 ) 1283 this_properties = " ".join( 1284 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1285 ) 1286 this_schema = self.schema_columns_sql(expression.this) 1287 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1288 1289 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1290 1291 return super().createable_sql(expression, locations) 1292 1293 def create_sql(self, expression: exp.Create) -> str: 1294 # The comment property comes last in CTAS statements, i.e. after the query 1295 query = expression.expression 1296 if isinstance(query, exp.Query): 1297 comment_prop = expression.find(exp.SchemaCommentProperty) 1298 if comment_prop: 1299 comment_prop.pop() 1300 query.replace(exp.paren(query)) 1301 else: 1302 comment_prop = None 1303 1304 create_sql = super().create_sql(expression) 1305 1306 comment_sql = self.sql(comment_prop) 1307 comment_sql = f" {comment_sql}" if comment_sql else "" 1308 1309 return f"{create_sql}{comment_sql}" 1310 1311 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1312 this = self.indent(self.sql(expression, "this")) 1313 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1314 1315 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1316 this = self.sql(expression, "this") 1317 this = f" {this}" if this else "" 1318 expr = self.sql(expression, "expression") 1319 expr = f" {expr}" if expr else "" 1320 index_type = self.sql(expression, "index_type") 1321 index_type = f" TYPE {index_type}" if index_type else "" 1322 granularity = self.sql(expression, "granularity") 1323 granularity = f" GRANULARITY {granularity}" if granularity else "" 1324 1325 return f"INDEX{this}{expr}{index_type}{granularity}" 1326 1327 def partition_sql(self, expression: exp.Partition) -> str: 1328 return f"PARTITION {self.expressions(expression, flat=True)}" 1329 1330 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1331 return f"ID {self.sql(expression.this)}" 1332 1333 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1334 return ( 1335 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1336 ) 1337 1338 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1339 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1340 1341 def is_sql(self, expression: exp.Is) -> str: 1342 is_sql = super().is_sql(expression) 1343 1344 if isinstance(expression.parent, exp.Not): 1345 # value IS NOT NULL -> NOT (value IS NULL) 1346 is_sql = self.wrap(is_sql) 1347 1348 return is_sql 1349 1350 def in_sql(self, expression: exp.In) -> str: 1351 in_sql = super().in_sql(expression) 1352 1353 if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"): 1354 in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1) 1355 1356 return in_sql 1357 1358 def not_sql(self, expression: exp.Not) -> str: 1359 if isinstance(expression.this, exp.In) and expression.this.args.get("is_global"): 1360 # let `GLOBAL IN` child interpose `NOT` 1361 return self.sql(expression, "this") 1362 1363 return super().not_sql(expression) 1364 1365 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1366 # If the VALUES clause contains tuples of expressions, we need to treat it 1367 # as a table since Clickhouse will automatically alias it as such. 1368 alias = expression.args.get("alias") 1369 1370 if alias and alias.args.get("columns") and expression.expressions: 1371 values = expression.expressions[0].expressions 1372 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1373 else: 1374 values_as_table = True 1375 1376 return super().values_sql(expression, values_as_table=values_as_table)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1154 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1155 strtodate_sql = self.function_fallback_sql(expression) 1156 1157 if not isinstance(expression.parent, exp.Cast): 1158 # StrToDate returns DATEs in other dialects (eg. postgres), so 1159 # this branch aims to improve the transpilation to clickhouse 1160 return self.cast_sql(exp.cast(expression, "DATE")) 1161 1162 return strtodate_sql
1164 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1165 this = expression.this 1166 1167 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1168 return self.sql(this) 1169 1170 return super().cast_sql(expression, safe_prefix=safe_prefix)
1172 def trycast_sql(self, expression: exp.TryCast) -> str: 1173 dtype = expression.to 1174 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1175 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1176 dtype.set("nullable", True) 1177 1178 return super().cast_sql(expression)
1215 def datatype_sql(self, expression: exp.DataType) -> str: 1216 # String is the standard ClickHouse type, every other variant is just an alias. 1217 # Additionally, any supplied length parameter will be ignored. 1218 # 1219 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1220 if expression.this in self.STRING_TYPE_MAPPING: 1221 dtype = "String" 1222 else: 1223 dtype = super().datatype_sql(expression) 1224 1225 # This section changes the type to `Nullable(...)` if the following conditions hold: 1226 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1227 # and change their semantics 1228 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1229 # constraint: "Type of Map key must be a type, that can be represented by integer or 1230 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1231 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1232 parent = expression.parent 1233 nullable = expression.args.get("nullable") 1234 if nullable is True or ( 1235 nullable is None 1236 and not ( 1237 isinstance(parent, exp.DataType) 1238 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1239 and expression.index in (None, 0) 1240 ) 1241 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1242 ): 1243 dtype = f"Nullable({dtype})" 1244 1245 return dtype
1255 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1256 return super().after_limit_modifiers(expression) + [ 1257 ( 1258 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1259 if expression.args.get("settings") 1260 else "" 1261 ), 1262 ( 1263 self.seg("FORMAT ") + self.sql(expression, "format") 1264 if expression.args.get("format") 1265 else "" 1266 ), 1267 ]
1275 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1276 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1277 exp.Properties.Location.POST_NAME 1278 ): 1279 this_name = self.sql( 1280 expression.this if isinstance(expression.this, exp.Schema) else expression, 1281 "this", 1282 ) 1283 this_properties = " ".join( 1284 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1285 ) 1286 this_schema = self.schema_columns_sql(expression.this) 1287 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1288 1289 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1290 1291 return super().createable_sql(expression, locations)
1293 def create_sql(self, expression: exp.Create) -> str: 1294 # The comment property comes last in CTAS statements, i.e. after the query 1295 query = expression.expression 1296 if isinstance(query, exp.Query): 1297 comment_prop = expression.find(exp.SchemaCommentProperty) 1298 if comment_prop: 1299 comment_prop.pop() 1300 query.replace(exp.paren(query)) 1301 else: 1302 comment_prop = None 1303 1304 create_sql = super().create_sql(expression) 1305 1306 comment_sql = self.sql(comment_prop) 1307 comment_sql = f" {comment_sql}" if comment_sql else "" 1308 1309 return f"{create_sql}{comment_sql}"
1315 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1316 this = self.sql(expression, "this") 1317 this = f" {this}" if this else "" 1318 expr = self.sql(expression, "expression") 1319 expr = f" {expr}" if expr else "" 1320 index_type = self.sql(expression, "index_type") 1321 index_type = f" TYPE {index_type}" if index_type else "" 1322 granularity = self.sql(expression, "granularity") 1323 granularity = f" GRANULARITY {granularity}" if granularity else "" 1324 1325 return f"INDEX{this}{expr}{index_type}{granularity}"
1365 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1366 # If the VALUES clause contains tuples of expressions, we need to treat it 1367 # as a table since Clickhouse will automatically alias it as such. 1368 alias = expression.args.get("alias") 1369 1370 if alias and alias.args.get("columns") and expression.expressions: 1371 values = expression.expressions[0].expressions 1372 values_as_table = any(isinstance(value, exp.Tuple) for value in values) 1373 else: 1374 values_as_table = True 1375 1376 return super().values_sql(expression, values_as_table=values_as_table)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- PARSE_JSON_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- put_sql