sqlglot.dialects.clickhouse
1from __future__ import annotations 2import typing as t 3import datetime 4from sqlglot import exp, generator, parser, tokens 5from sqlglot.dialects.dialect import ( 6 Dialect, 7 NormalizationStrategy, 8 arg_max_or_min_no_count, 9 build_date_delta, 10 build_formatted_time, 11 inline_array_sql, 12 json_extract_segments, 13 json_path_key_only_name, 14 length_or_char_length_sql, 15 no_pivot_sql, 16 build_json_extract_path, 17 rename_func, 18 sha256_sql, 19 strposition_sql, 20 var_map_sql, 21 timestamptrunc_sql, 22 unit_to_var, 23 trim_sql, 24) 25from sqlglot.generator import Generator 26from sqlglot.helper import is_int, seq_get 27from sqlglot.tokens import Token, TokenType 28from sqlglot.generator import unsupported_args 29 30DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 31 32 33def _build_date_format(args: t.List) -> exp.TimeToStr: 34 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 35 36 timezone = seq_get(args, 2) 37 if timezone: 38 expr.set("zone", timezone) 39 40 return expr 41 42 43def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 44 scale = expression.args.get("scale") 45 timestamp = expression.this 46 47 if scale in (None, exp.UnixToTime.SECONDS): 48 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 49 if scale == exp.UnixToTime.MILLIS: 50 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 51 if scale == exp.UnixToTime.MICROS: 52 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 53 if scale == exp.UnixToTime.NANOS: 54 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 55 56 return self.func( 57 "fromUnixTimestamp", 58 exp.cast( 59 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 60 ), 61 ) 62 63 64def _lower_func(sql: str) -> str: 65 index = sql.index("(") 66 return sql[:index].lower() + sql[index:] 67 68 69def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 70 quantile = expression.args["quantile"] 71 args = f"({self.sql(expression, 'this')})" 72 73 if isinstance(quantile, exp.Array): 74 func = self.func("quantiles", *quantile) 75 else: 76 func = self.func("quantile", quantile) 77 78 return func + args 79 80 81def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 82 if len(args) == 1: 83 return exp.CountIf(this=seq_get(args, 0)) 84 85 return exp.CombinedAggFunc(this="countIf", expressions=args, parts=("count", "If")) 86 87 88def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 89 if len(args) == 3: 90 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 91 92 strtodate = exp.StrToDate.from_arg_list(args) 93 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 94 95 96def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 97 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 98 if not expression.unit: 99 return rename_func(name)(self, expression) 100 101 return self.func( 102 name, 103 unit_to_var(expression), 104 expression.expression, 105 expression.this, 106 ) 107 108 return _delta_sql 109 110 111def _timestrtotime_sql(self: ClickHouse.Generator, expression: exp.TimeStrToTime): 112 ts = expression.this 113 114 tz = expression.args.get("zone") 115 if tz and isinstance(ts, exp.Literal): 116 # Clickhouse will not accept timestamps that include a UTC offset, so we must remove them. 117 # The first step to removing is parsing the string with `datetime.datetime.fromisoformat`. 118 # 119 # In python <3.11, `fromisoformat()` can only parse timestamps of millisecond (3 digit) 120 # or microsecond (6 digit) precision. It will error if passed any other number of fractional 121 # digits, so we extract the fractional seconds and pad to 6 digits before parsing. 122 ts_string = ts.name.strip() 123 124 # separate [date and time] from [fractional seconds and UTC offset] 125 ts_parts = ts_string.split(".") 126 if len(ts_parts) == 2: 127 # separate fractional seconds and UTC offset 128 offset_sep = "+" if "+" in ts_parts[1] else "-" 129 ts_frac_parts = ts_parts[1].split(offset_sep) 130 num_frac_parts = len(ts_frac_parts) 131 132 # pad to 6 digits if fractional seconds present 133 ts_frac_parts[0] = ts_frac_parts[0].ljust(6, "0") 134 ts_string = "".join( 135 [ 136 ts_parts[0], # date and time 137 ".", 138 ts_frac_parts[0], # fractional seconds 139 offset_sep if num_frac_parts > 1 else "", 140 ts_frac_parts[1] if num_frac_parts > 1 else "", # utc offset (if present) 141 ] 142 ) 143 144 # return literal with no timezone, eg turn '2020-01-01 12:13:14-08:00' into '2020-01-01 12:13:14' 145 # this is because Clickhouse encodes the timezone as a data type parameter and throws an error if 146 # it's part of the timestamp string 147 ts_without_tz = ( 148 datetime.datetime.fromisoformat(ts_string).replace(tzinfo=None).isoformat(sep=" ") 149 ) 150 ts = exp.Literal.string(ts_without_tz) 151 152 # Non-nullable DateTime64 with microsecond precision 153 expressions = [exp.DataTypeParam(this=tz)] if tz else [] 154 datatype = exp.DataType.build( 155 exp.DataType.Type.DATETIME64, 156 expressions=[exp.DataTypeParam(this=exp.Literal.number(6)), *expressions], 157 nullable=False, 158 ) 159 160 return self.sql(exp.cast(ts, datatype, dialect=self.dialect)) 161 162 163class ClickHouse(Dialect): 164 NORMALIZE_FUNCTIONS: bool | str = False 165 NULL_ORDERING = "nulls_are_last" 166 SUPPORTS_USER_DEFINED_TYPES = False 167 SAFE_DIVISION = True 168 LOG_BASE_FIRST: t.Optional[bool] = None 169 FORCE_EARLY_ALIAS_REF_EXPANSION = True 170 PRESERVE_ORIGINAL_NAMES = True 171 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 172 IDENTIFIERS_CAN_START_WITH_DIGIT = True 173 174 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 175 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 176 177 UNESCAPED_SEQUENCES = { 178 "\\0": "\0", 179 } 180 181 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 182 183 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 184 exp.Except: False, 185 exp.Intersect: False, 186 exp.Union: None, 187 } 188 189 class Tokenizer(tokens.Tokenizer): 190 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 191 IDENTIFIERS = ['"', "`"] 192 IDENTIFIER_ESCAPES = ["\\"] 193 STRING_ESCAPES = ["'", "\\"] 194 BIT_STRINGS = [("0b", "")] 195 HEX_STRINGS = [("0x", ""), ("0X", "")] 196 HEREDOC_STRINGS = ["$"] 197 198 KEYWORDS = { 199 **tokens.Tokenizer.KEYWORDS, 200 "ATTACH": TokenType.COMMAND, 201 "DATE32": TokenType.DATE32, 202 "DATETIME64": TokenType.DATETIME64, 203 "DICTIONARY": TokenType.DICTIONARY, 204 "DYNAMIC": TokenType.DYNAMIC, 205 "ENUM8": TokenType.ENUM8, 206 "ENUM16": TokenType.ENUM16, 207 "FINAL": TokenType.FINAL, 208 "FIXEDSTRING": TokenType.FIXEDSTRING, 209 "FLOAT32": TokenType.FLOAT, 210 "FLOAT64": TokenType.DOUBLE, 211 "GLOBAL": TokenType.GLOBAL, 212 "INT256": TokenType.INT256, 213 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 214 "MAP": TokenType.MAP, 215 "NESTED": TokenType.NESTED, 216 "SAMPLE": TokenType.TABLE_SAMPLE, 217 "TUPLE": TokenType.STRUCT, 218 "UINT128": TokenType.UINT128, 219 "UINT16": TokenType.USMALLINT, 220 "UINT256": TokenType.UINT256, 221 "UINT32": TokenType.UINT, 222 "UINT64": TokenType.UBIGINT, 223 "UINT8": TokenType.UTINYINT, 224 "IPV4": TokenType.IPV4, 225 "IPV6": TokenType.IPV6, 226 "POINT": TokenType.POINT, 227 "RING": TokenType.RING, 228 "LINESTRING": TokenType.LINESTRING, 229 "MULTILINESTRING": TokenType.MULTILINESTRING, 230 "POLYGON": TokenType.POLYGON, 231 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 232 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 233 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 234 "SYSTEM": TokenType.COMMAND, 235 "PREWHERE": TokenType.PREWHERE, 236 } 237 KEYWORDS.pop("/*+") 238 239 SINGLE_TOKENS = { 240 **tokens.Tokenizer.SINGLE_TOKENS, 241 "$": TokenType.HEREDOC_STRING, 242 } 243 244 class Parser(parser.Parser): 245 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 246 # * select x from t1 union all select x from t2 limit 1; 247 # * select x from t1 union all (select x from t2 limit 1); 248 MODIFIERS_ATTACHED_TO_SET_OP = False 249 INTERVAL_SPANS = False 250 OPTIONAL_ALIAS_TOKEN_CTE = False 251 252 FUNCTIONS = { 253 **parser.Parser.FUNCTIONS, 254 "ANY": exp.AnyValue.from_arg_list, 255 "ARRAYSUM": exp.ArraySum.from_arg_list, 256 "COUNTIF": _build_count_if, 257 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 258 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 259 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 260 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 261 "DATE_FORMAT": _build_date_format, 262 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 263 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 264 "FORMATDATETIME": _build_date_format, 265 "JSONEXTRACTSTRING": build_json_extract_path( 266 exp.JSONExtractScalar, zero_based_indexing=False 267 ), 268 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 269 "MAP": parser.build_var_map, 270 "MATCH": exp.RegexpLike.from_arg_list, 271 "RANDCANONICAL": exp.Rand.from_arg_list, 272 "STR_TO_DATE": _build_str_to_date, 273 "TUPLE": exp.Struct.from_arg_list, 274 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 275 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 276 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 277 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 278 "UNIQ": exp.ApproxDistinct.from_arg_list, 279 "XOR": lambda args: exp.Xor(expressions=args), 280 "MD5": exp.MD5Digest.from_arg_list, 281 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 282 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 283 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 284 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 285 } 286 FUNCTIONS.pop("TRANSFORM") 287 288 AGG_FUNCTIONS = { 289 "count", 290 "min", 291 "max", 292 "sum", 293 "avg", 294 "any", 295 "stddevPop", 296 "stddevSamp", 297 "varPop", 298 "varSamp", 299 "corr", 300 "covarPop", 301 "covarSamp", 302 "entropy", 303 "exponentialMovingAverage", 304 "intervalLengthSum", 305 "kolmogorovSmirnovTest", 306 "mannWhitneyUTest", 307 "median", 308 "rankCorr", 309 "sumKahan", 310 "studentTTest", 311 "welchTTest", 312 "anyHeavy", 313 "anyLast", 314 "boundingRatio", 315 "first_value", 316 "last_value", 317 "argMin", 318 "argMax", 319 "avgWeighted", 320 "topK", 321 "topKWeighted", 322 "deltaSum", 323 "deltaSumTimestamp", 324 "groupArray", 325 "groupArrayLast", 326 "groupUniqArray", 327 "groupArrayInsertAt", 328 "groupArrayMovingAvg", 329 "groupArrayMovingSum", 330 "groupArraySample", 331 "groupBitAnd", 332 "groupBitOr", 333 "groupBitXor", 334 "groupBitmap", 335 "groupBitmapAnd", 336 "groupBitmapOr", 337 "groupBitmapXor", 338 "sumWithOverflow", 339 "sumMap", 340 "minMap", 341 "maxMap", 342 "skewSamp", 343 "skewPop", 344 "kurtSamp", 345 "kurtPop", 346 "uniq", 347 "uniqExact", 348 "uniqCombined", 349 "uniqCombined64", 350 "uniqHLL12", 351 "uniqTheta", 352 "quantile", 353 "quantiles", 354 "quantileExact", 355 "quantilesExact", 356 "quantileExactLow", 357 "quantilesExactLow", 358 "quantileExactHigh", 359 "quantilesExactHigh", 360 "quantileExactWeighted", 361 "quantilesExactWeighted", 362 "quantileTiming", 363 "quantilesTiming", 364 "quantileTimingWeighted", 365 "quantilesTimingWeighted", 366 "quantileDeterministic", 367 "quantilesDeterministic", 368 "quantileTDigest", 369 "quantilesTDigest", 370 "quantileTDigestWeighted", 371 "quantilesTDigestWeighted", 372 "quantileBFloat16", 373 "quantilesBFloat16", 374 "quantileBFloat16Weighted", 375 "quantilesBFloat16Weighted", 376 "simpleLinearRegression", 377 "stochasticLinearRegression", 378 "stochasticLogisticRegression", 379 "categoricalInformationValue", 380 "contingency", 381 "cramersV", 382 "cramersVBiasCorrected", 383 "theilsU", 384 "maxIntersections", 385 "maxIntersectionsPosition", 386 "meanZTest", 387 "quantileInterpolatedWeighted", 388 "quantilesInterpolatedWeighted", 389 "quantileGK", 390 "quantilesGK", 391 "sparkBar", 392 "sumCount", 393 "largestTriangleThreeBuckets", 394 "histogram", 395 "sequenceMatch", 396 "sequenceCount", 397 "windowFunnel", 398 "retention", 399 "uniqUpTo", 400 "sequenceNextNode", 401 "exponentialTimeDecayedAvg", 402 } 403 404 AGG_FUNCTIONS_SUFFIXES = [ 405 "If", 406 "Array", 407 "ArrayIf", 408 "Map", 409 "SimpleState", 410 "State", 411 "Merge", 412 "MergeState", 413 "ForEach", 414 "Distinct", 415 "OrDefault", 416 "OrNull", 417 "Resample", 418 "ArgMin", 419 "ArgMax", 420 ] 421 422 FUNC_TOKENS = { 423 *parser.Parser.FUNC_TOKENS, 424 TokenType.SET, 425 } 426 427 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 428 429 ID_VAR_TOKENS = { 430 *parser.Parser.ID_VAR_TOKENS, 431 TokenType.LIKE, 432 } 433 434 AGG_FUNC_MAPPING = ( 435 lambda functions, suffixes: { 436 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 437 } 438 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 439 440 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 441 442 FUNCTION_PARSERS = { 443 **parser.Parser.FUNCTION_PARSERS, 444 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 445 "QUANTILE": lambda self: self._parse_quantile(), 446 "MEDIAN": lambda self: self._parse_quantile(), 447 "COLUMNS": lambda self: self._parse_columns(), 448 } 449 450 FUNCTION_PARSERS.pop("MATCH") 451 452 PROPERTY_PARSERS = parser.Parser.PROPERTY_PARSERS.copy() 453 PROPERTY_PARSERS.pop("DYNAMIC") 454 455 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 456 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 457 458 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 459 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 460 461 RANGE_PARSERS = { 462 **parser.Parser.RANGE_PARSERS, 463 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 464 and self._parse_in(this, is_global=True), 465 } 466 467 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 468 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 469 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 470 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 471 472 JOIN_KINDS = { 473 *parser.Parser.JOIN_KINDS, 474 TokenType.ANY, 475 TokenType.ASOF, 476 TokenType.ARRAY, 477 } 478 479 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 480 TokenType.ANY, 481 TokenType.ARRAY, 482 TokenType.FINAL, 483 TokenType.FORMAT, 484 TokenType.SETTINGS, 485 } 486 487 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 488 TokenType.FORMAT, 489 } 490 491 LOG_DEFAULTS_TO_LN = True 492 493 QUERY_MODIFIER_PARSERS = { 494 **parser.Parser.QUERY_MODIFIER_PARSERS, 495 TokenType.SETTINGS: lambda self: ( 496 "settings", 497 self._advance() or self._parse_csv(self._parse_assignment), 498 ), 499 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 500 } 501 502 CONSTRAINT_PARSERS = { 503 **parser.Parser.CONSTRAINT_PARSERS, 504 "INDEX": lambda self: self._parse_index_constraint(), 505 "CODEC": lambda self: self._parse_compress(), 506 } 507 508 ALTER_PARSERS = { 509 **parser.Parser.ALTER_PARSERS, 510 "REPLACE": lambda self: self._parse_alter_table_replace(), 511 } 512 513 SCHEMA_UNNAMED_CONSTRAINTS = { 514 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 515 "INDEX", 516 } 517 518 PLACEHOLDER_PARSERS = { 519 **parser.Parser.PLACEHOLDER_PARSERS, 520 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 521 } 522 523 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 524 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 525 return self._parse_lambda() 526 527 def _parse_types( 528 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 529 ) -> t.Optional[exp.Expression]: 530 dtype = super()._parse_types( 531 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 532 ) 533 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 534 # Mark every type as non-nullable which is ClickHouse's default, unless it's 535 # already marked as nullable. This marker helps us transpile types from other 536 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 537 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 538 # fail in ClickHouse without the `Nullable` type constructor. 539 dtype.set("nullable", False) 540 541 return dtype 542 543 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 544 index = self._index 545 this = self._parse_bitwise() 546 if self._match(TokenType.FROM): 547 self._retreat(index) 548 return super()._parse_extract() 549 550 # We return Anonymous here because extract and regexpExtract have different semantics, 551 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 552 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 553 # 554 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 555 self._match(TokenType.COMMA) 556 return self.expression( 557 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 558 ) 559 560 def _parse_assignment(self) -> t.Optional[exp.Expression]: 561 this = super()._parse_assignment() 562 563 if self._match(TokenType.PLACEHOLDER): 564 return self.expression( 565 exp.If, 566 this=this, 567 true=self._parse_assignment(), 568 false=self._match(TokenType.COLON) and self._parse_assignment(), 569 ) 570 571 return this 572 573 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 574 """ 575 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 576 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 577 """ 578 index = self._index 579 580 this = self._parse_id_var() 581 self._match(TokenType.COLON) 582 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 583 self._match_text_seq("IDENTIFIER") and "Identifier" 584 ) 585 586 if not kind: 587 self._retreat(index) 588 return None 589 elif not self._match(TokenType.R_BRACE): 590 self.raise_error("Expecting }") 591 592 return self.expression(exp.Placeholder, this=this, kind=kind) 593 594 def _parse_bracket( 595 self, this: t.Optional[exp.Expression] = None 596 ) -> t.Optional[exp.Expression]: 597 l_brace = self._match(TokenType.L_BRACE, advance=False) 598 bracket = super()._parse_bracket(this) 599 600 if l_brace and isinstance(bracket, exp.Struct): 601 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 602 for expression in bracket.expressions: 603 if not isinstance(expression, exp.PropertyEQ): 604 break 605 606 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 607 varmap.args["values"].append("expressions", expression.expression) 608 609 return varmap 610 611 return bracket 612 613 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 614 this = super()._parse_in(this) 615 this.set("is_global", is_global) 616 return this 617 618 def _parse_table( 619 self, 620 schema: bool = False, 621 joins: bool = False, 622 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 623 parse_bracket: bool = False, 624 is_db_reference: bool = False, 625 parse_partition: bool = False, 626 ) -> t.Optional[exp.Expression]: 627 this = super()._parse_table( 628 schema=schema, 629 joins=joins, 630 alias_tokens=alias_tokens, 631 parse_bracket=parse_bracket, 632 is_db_reference=is_db_reference, 633 ) 634 635 if self._match(TokenType.FINAL): 636 this = self.expression(exp.Final, this=this) 637 638 return this 639 640 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 641 return super()._parse_position(haystack_first=True) 642 643 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 644 def _parse_cte(self) -> t.Optional[exp.CTE]: 645 # WITH <identifier> AS <subquery expression> 646 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 647 648 if not cte: 649 # WITH <expression> AS <identifier> 650 cte = self.expression( 651 exp.CTE, 652 this=self._parse_assignment(), 653 alias=self._parse_table_alias(), 654 scalar=True, 655 ) 656 657 return cte 658 659 def _parse_join_parts( 660 self, 661 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 662 is_global = self._match(TokenType.GLOBAL) and self._prev 663 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 664 665 if kind_pre: 666 kind = self._match_set(self.JOIN_KINDS) and self._prev 667 side = self._match_set(self.JOIN_SIDES) and self._prev 668 return is_global, side, kind 669 670 return ( 671 is_global, 672 self._match_set(self.JOIN_SIDES) and self._prev, 673 self._match_set(self.JOIN_KINDS) and self._prev, 674 ) 675 676 def _parse_join( 677 self, skip_join_token: bool = False, parse_bracket: bool = False 678 ) -> t.Optional[exp.Join]: 679 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 680 if join: 681 join.set("global", join.args.pop("method", None)) 682 683 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 684 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 685 if join.kind == "ARRAY": 686 for table in join.find_all(exp.Table): 687 table.replace(table.to_column()) 688 689 return join 690 691 def _parse_function( 692 self, 693 functions: t.Optional[t.Dict[str, t.Callable]] = None, 694 anonymous: bool = False, 695 optional_parens: bool = True, 696 any_token: bool = False, 697 ) -> t.Optional[exp.Expression]: 698 expr = super()._parse_function( 699 functions=functions, 700 anonymous=anonymous, 701 optional_parens=optional_parens, 702 any_token=any_token, 703 ) 704 705 func = expr.this if isinstance(expr, exp.Window) else expr 706 707 # Aggregate functions can be split in 2 parts: <func_name><suffix> 708 parts = ( 709 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 710 ) 711 712 if parts: 713 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 714 params = self._parse_func_params(anon_func) 715 716 kwargs = { 717 "this": anon_func.this, 718 "expressions": anon_func.expressions, 719 } 720 if parts[1]: 721 kwargs["parts"] = parts 722 exp_class: t.Type[exp.Expression] = ( 723 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 724 ) 725 else: 726 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 727 728 kwargs["exp_class"] = exp_class 729 if params: 730 kwargs["params"] = params 731 732 func = self.expression(**kwargs) 733 734 if isinstance(expr, exp.Window): 735 # The window's func was parsed as Anonymous in base parser, fix its 736 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 737 expr.set("this", func) 738 elif params: 739 # Params have blocked super()._parse_function() from parsing the following window 740 # (if that exists) as they're standing between the function call and the window spec 741 expr = self._parse_window(func) 742 else: 743 expr = func 744 745 return expr 746 747 def _parse_func_params( 748 self, this: t.Optional[exp.Func] = None 749 ) -> t.Optional[t.List[exp.Expression]]: 750 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 751 return self._parse_csv(self._parse_lambda) 752 753 if self._match(TokenType.L_PAREN): 754 params = self._parse_csv(self._parse_lambda) 755 self._match_r_paren(this) 756 return params 757 758 return None 759 760 def _parse_quantile(self) -> exp.Quantile: 761 this = self._parse_lambda() 762 params = self._parse_func_params() 763 if params: 764 return self.expression(exp.Quantile, this=params[0], quantile=this) 765 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 766 767 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 768 return super()._parse_wrapped_id_vars(optional=True) 769 770 def _parse_primary_key( 771 self, wrapped_optional: bool = False, in_props: bool = False 772 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 773 return super()._parse_primary_key( 774 wrapped_optional=wrapped_optional or in_props, in_props=in_props 775 ) 776 777 def _parse_on_property(self) -> t.Optional[exp.Expression]: 778 index = self._index 779 if self._match_text_seq("CLUSTER"): 780 this = self._parse_id_var() 781 if this: 782 return self.expression(exp.OnCluster, this=this) 783 else: 784 self._retreat(index) 785 return None 786 787 def _parse_index_constraint( 788 self, kind: t.Optional[str] = None 789 ) -> exp.IndexColumnConstraint: 790 # INDEX name1 expr TYPE type1(args) GRANULARITY value 791 this = self._parse_id_var() 792 expression = self._parse_assignment() 793 794 index_type = self._match_text_seq("TYPE") and ( 795 self._parse_function() or self._parse_var() 796 ) 797 798 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 799 800 return self.expression( 801 exp.IndexColumnConstraint, 802 this=this, 803 expression=expression, 804 index_type=index_type, 805 granularity=granularity, 806 ) 807 808 def _parse_partition(self) -> t.Optional[exp.Partition]: 809 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 810 if not self._match(TokenType.PARTITION): 811 return None 812 813 if self._match_text_seq("ID"): 814 # Corresponds to the PARTITION ID <string_value> syntax 815 expressions: t.List[exp.Expression] = [ 816 self.expression(exp.PartitionId, this=self._parse_string()) 817 ] 818 else: 819 expressions = self._parse_expressions() 820 821 return self.expression(exp.Partition, expressions=expressions) 822 823 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 824 partition = self._parse_partition() 825 826 if not partition or not self._match(TokenType.FROM): 827 return None 828 829 return self.expression( 830 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 831 ) 832 833 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 834 if not self._match_text_seq("PROJECTION"): 835 return None 836 837 return self.expression( 838 exp.ProjectionDef, 839 this=self._parse_id_var(), 840 expression=self._parse_wrapped(self._parse_statement), 841 ) 842 843 def _parse_constraint(self) -> t.Optional[exp.Expression]: 844 return super()._parse_constraint() or self._parse_projection_def() 845 846 def _parse_alias( 847 self, this: t.Optional[exp.Expression], explicit: bool = False 848 ) -> t.Optional[exp.Expression]: 849 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 850 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 851 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 852 return this 853 854 return super()._parse_alias(this=this, explicit=explicit) 855 856 def _parse_expression(self) -> t.Optional[exp.Expression]: 857 this = super()._parse_expression() 858 859 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 860 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 861 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 862 self._match(TokenType.R_PAREN) 863 864 return this 865 866 def _parse_columns(self) -> exp.Expression: 867 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 868 869 while self._next and self._match_text_seq(")", "APPLY", "("): 870 self._match(TokenType.R_PAREN) 871 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 872 return this 873 874 class Generator(generator.Generator): 875 QUERY_HINTS = False 876 STRUCT_DELIMITER = ("(", ")") 877 NVL2_SUPPORTED = False 878 TABLESAMPLE_REQUIRES_PARENS = False 879 TABLESAMPLE_SIZE_IS_ROWS = False 880 TABLESAMPLE_KEYWORDS = "SAMPLE" 881 LAST_DAY_SUPPORTS_DATE_PART = False 882 CAN_IMPLEMENT_ARRAY_ANY = True 883 SUPPORTS_TO_NUMBER = False 884 JOIN_HINTS = False 885 TABLE_HINTS = False 886 GROUPINGS_SEP = "" 887 SET_OP_MODIFIERS = False 888 SUPPORTS_TABLE_ALIAS_COLUMNS = False 889 VALUES_AS_TABLE = False 890 ARRAY_SIZE_NAME = "LENGTH" 891 892 STRING_TYPE_MAPPING = { 893 exp.DataType.Type.CHAR: "String", 894 exp.DataType.Type.LONGBLOB: "String", 895 exp.DataType.Type.LONGTEXT: "String", 896 exp.DataType.Type.MEDIUMBLOB: "String", 897 exp.DataType.Type.MEDIUMTEXT: "String", 898 exp.DataType.Type.TINYBLOB: "String", 899 exp.DataType.Type.TINYTEXT: "String", 900 exp.DataType.Type.TEXT: "String", 901 exp.DataType.Type.VARBINARY: "String", 902 exp.DataType.Type.VARCHAR: "String", 903 } 904 905 SUPPORTED_JSON_PATH_PARTS = { 906 exp.JSONPathKey, 907 exp.JSONPathRoot, 908 exp.JSONPathSubscript, 909 } 910 911 TYPE_MAPPING = { 912 **generator.Generator.TYPE_MAPPING, 913 **STRING_TYPE_MAPPING, 914 exp.DataType.Type.ARRAY: "Array", 915 exp.DataType.Type.BOOLEAN: "Bool", 916 exp.DataType.Type.BIGINT: "Int64", 917 exp.DataType.Type.DATE32: "Date32", 918 exp.DataType.Type.DATETIME: "DateTime", 919 exp.DataType.Type.DATETIME2: "DateTime", 920 exp.DataType.Type.SMALLDATETIME: "DateTime", 921 exp.DataType.Type.DATETIME64: "DateTime64", 922 exp.DataType.Type.DECIMAL: "Decimal", 923 exp.DataType.Type.DECIMAL32: "Decimal32", 924 exp.DataType.Type.DECIMAL64: "Decimal64", 925 exp.DataType.Type.DECIMAL128: "Decimal128", 926 exp.DataType.Type.DECIMAL256: "Decimal256", 927 exp.DataType.Type.TIMESTAMP: "DateTime", 928 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 929 exp.DataType.Type.DOUBLE: "Float64", 930 exp.DataType.Type.ENUM: "Enum", 931 exp.DataType.Type.ENUM8: "Enum8", 932 exp.DataType.Type.ENUM16: "Enum16", 933 exp.DataType.Type.FIXEDSTRING: "FixedString", 934 exp.DataType.Type.FLOAT: "Float32", 935 exp.DataType.Type.INT: "Int32", 936 exp.DataType.Type.MEDIUMINT: "Int32", 937 exp.DataType.Type.INT128: "Int128", 938 exp.DataType.Type.INT256: "Int256", 939 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 940 exp.DataType.Type.MAP: "Map", 941 exp.DataType.Type.NESTED: "Nested", 942 exp.DataType.Type.SMALLINT: "Int16", 943 exp.DataType.Type.STRUCT: "Tuple", 944 exp.DataType.Type.TINYINT: "Int8", 945 exp.DataType.Type.UBIGINT: "UInt64", 946 exp.DataType.Type.UINT: "UInt32", 947 exp.DataType.Type.UINT128: "UInt128", 948 exp.DataType.Type.UINT256: "UInt256", 949 exp.DataType.Type.USMALLINT: "UInt16", 950 exp.DataType.Type.UTINYINT: "UInt8", 951 exp.DataType.Type.IPV4: "IPv4", 952 exp.DataType.Type.IPV6: "IPv6", 953 exp.DataType.Type.POINT: "Point", 954 exp.DataType.Type.RING: "Ring", 955 exp.DataType.Type.LINESTRING: "LineString", 956 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 957 exp.DataType.Type.POLYGON: "Polygon", 958 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 959 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 960 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 961 exp.DataType.Type.DYNAMIC: "Dynamic", 962 } 963 964 TRANSFORMS = { 965 **generator.Generator.TRANSFORMS, 966 exp.AnyValue: rename_func("any"), 967 exp.ApproxDistinct: rename_func("uniq"), 968 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 969 exp.ArraySum: rename_func("arraySum"), 970 exp.ArgMax: arg_max_or_min_no_count("argMax"), 971 exp.ArgMin: arg_max_or_min_no_count("argMin"), 972 exp.Array: inline_array_sql, 973 exp.CastToStrType: rename_func("CAST"), 974 exp.CountIf: rename_func("countIf"), 975 exp.CompressColumnConstraint: lambda self, 976 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 977 exp.ComputedColumnConstraint: lambda self, 978 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 979 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 980 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 981 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 982 exp.DateStrToDate: rename_func("toDate"), 983 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 984 exp.Explode: rename_func("arrayJoin"), 985 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 986 exp.IsNan: rename_func("isNaN"), 987 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 988 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 989 exp.JSONPathKey: json_path_key_only_name, 990 exp.JSONPathRoot: lambda *_: "", 991 exp.Length: length_or_char_length_sql, 992 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 993 exp.Median: rename_func("median"), 994 exp.Nullif: rename_func("nullIf"), 995 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 996 exp.Pivot: no_pivot_sql, 997 exp.Quantile: _quantile_sql, 998 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 999 exp.Rand: rename_func("randCanonical"), 1000 exp.StartsWith: rename_func("startsWith"), 1001 exp.StrPosition: lambda self, e: strposition_sql( 1002 self, 1003 e, 1004 func_name="POSITION", 1005 supports_position=True, 1006 use_ansi_position=False, 1007 ), 1008 exp.TimeToStr: lambda self, e: self.func( 1009 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1010 ), 1011 exp.TimeStrToTime: _timestrtotime_sql, 1012 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1013 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1014 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 1015 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1016 exp.MD5Digest: rename_func("MD5"), 1017 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1018 exp.SHA: rename_func("SHA1"), 1019 exp.SHA2: sha256_sql, 1020 exp.UnixToTime: _unix_to_time_sql, 1021 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1022 exp.Trim: trim_sql, 1023 exp.Variance: rename_func("varSamp"), 1024 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1025 exp.Stddev: rename_func("stddevSamp"), 1026 exp.Chr: rename_func("CHAR"), 1027 exp.Lag: lambda self, e: self.func( 1028 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1029 ), 1030 exp.Lead: lambda self, e: self.func( 1031 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1032 ), 1033 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1034 rename_func("editDistance") 1035 ), 1036 } 1037 1038 PROPERTIES_LOCATION = { 1039 **generator.Generator.PROPERTIES_LOCATION, 1040 exp.OnCluster: exp.Properties.Location.POST_NAME, 1041 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1042 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1043 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1044 } 1045 1046 # There's no list in docs, but it can be found in Clickhouse code 1047 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1048 ON_CLUSTER_TARGETS = { 1049 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1050 "DATABASE", 1051 "TABLE", 1052 "VIEW", 1053 "DICTIONARY", 1054 "INDEX", 1055 "FUNCTION", 1056 "NAMED COLLECTION", 1057 } 1058 1059 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1060 NON_NULLABLE_TYPES = { 1061 exp.DataType.Type.ARRAY, 1062 exp.DataType.Type.MAP, 1063 exp.DataType.Type.STRUCT, 1064 exp.DataType.Type.POINT, 1065 exp.DataType.Type.RING, 1066 exp.DataType.Type.LINESTRING, 1067 exp.DataType.Type.MULTILINESTRING, 1068 exp.DataType.Type.POLYGON, 1069 exp.DataType.Type.MULTIPOLYGON, 1070 } 1071 1072 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1073 strtodate_sql = self.function_fallback_sql(expression) 1074 1075 if not isinstance(expression.parent, exp.Cast): 1076 # StrToDate returns DATEs in other dialects (eg. postgres), so 1077 # this branch aims to improve the transpilation to clickhouse 1078 return f"CAST({strtodate_sql} AS DATE)" 1079 1080 return strtodate_sql 1081 1082 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1083 this = expression.this 1084 1085 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1086 return self.sql(this) 1087 1088 return super().cast_sql(expression, safe_prefix=safe_prefix) 1089 1090 def trycast_sql(self, expression: exp.TryCast) -> str: 1091 dtype = expression.to 1092 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1093 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1094 dtype.set("nullable", True) 1095 1096 return super().cast_sql(expression) 1097 1098 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1099 this = self.json_path_part(expression.this) 1100 return str(int(this) + 1) if is_int(this) else this 1101 1102 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1103 return f"AS {self.sql(expression, 'this')}" 1104 1105 def _any_to_has( 1106 self, 1107 expression: exp.EQ | exp.NEQ, 1108 default: t.Callable[[t.Any], str], 1109 prefix: str = "", 1110 ) -> str: 1111 if isinstance(expression.left, exp.Any): 1112 arr = expression.left 1113 this = expression.right 1114 elif isinstance(expression.right, exp.Any): 1115 arr = expression.right 1116 this = expression.left 1117 else: 1118 return default(expression) 1119 1120 return prefix + self.func("has", arr.this.unnest(), this) 1121 1122 def eq_sql(self, expression: exp.EQ) -> str: 1123 return self._any_to_has(expression, super().eq_sql) 1124 1125 def neq_sql(self, expression: exp.NEQ) -> str: 1126 return self._any_to_has(expression, super().neq_sql, "NOT ") 1127 1128 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1129 # Manually add a flag to make the search case-insensitive 1130 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1131 return self.func("match", expression.this, regex) 1132 1133 def datatype_sql(self, expression: exp.DataType) -> str: 1134 # String is the standard ClickHouse type, every other variant is just an alias. 1135 # Additionally, any supplied length parameter will be ignored. 1136 # 1137 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1138 if expression.this in self.STRING_TYPE_MAPPING: 1139 dtype = "String" 1140 else: 1141 dtype = super().datatype_sql(expression) 1142 1143 # This section changes the type to `Nullable(...)` if the following conditions hold: 1144 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1145 # and change their semantics 1146 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1147 # constraint: "Type of Map key must be a type, that can be represented by integer or 1148 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1149 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1150 parent = expression.parent 1151 nullable = expression.args.get("nullable") 1152 if nullable is True or ( 1153 nullable is None 1154 and not ( 1155 isinstance(parent, exp.DataType) 1156 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1157 and expression.index in (None, 0) 1158 ) 1159 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1160 ): 1161 dtype = f"Nullable({dtype})" 1162 1163 return dtype 1164 1165 def cte_sql(self, expression: exp.CTE) -> str: 1166 if expression.args.get("scalar"): 1167 this = self.sql(expression, "this") 1168 alias = self.sql(expression, "alias") 1169 return f"{this} AS {alias}" 1170 1171 return super().cte_sql(expression) 1172 1173 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1174 return super().after_limit_modifiers(expression) + [ 1175 ( 1176 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1177 if expression.args.get("settings") 1178 else "" 1179 ), 1180 ( 1181 self.seg("FORMAT ") + self.sql(expression, "format") 1182 if expression.args.get("format") 1183 else "" 1184 ), 1185 ] 1186 1187 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1188 params = self.expressions(expression, key="params", flat=True) 1189 return self.func(expression.name, *expression.expressions) + f"({params})" 1190 1191 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1192 return self.func(expression.name, *expression.expressions) 1193 1194 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1195 return self.anonymousaggfunc_sql(expression) 1196 1197 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1198 return self.parameterizedagg_sql(expression) 1199 1200 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1201 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1202 1203 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1204 return f"ON CLUSTER {self.sql(expression, 'this')}" 1205 1206 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1207 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1208 exp.Properties.Location.POST_NAME 1209 ): 1210 this_name = self.sql( 1211 expression.this if isinstance(expression.this, exp.Schema) else expression, 1212 "this", 1213 ) 1214 this_properties = " ".join( 1215 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1216 ) 1217 this_schema = self.schema_columns_sql(expression.this) 1218 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1219 1220 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1221 1222 return super().createable_sql(expression, locations) 1223 1224 def create_sql(self, expression: exp.Create) -> str: 1225 # The comment property comes last in CTAS statements, i.e. after the query 1226 query = expression.expression 1227 if isinstance(query, exp.Query): 1228 comment_prop = expression.find(exp.SchemaCommentProperty) 1229 if comment_prop: 1230 comment_prop.pop() 1231 query.replace(exp.paren(query)) 1232 else: 1233 comment_prop = None 1234 1235 create_sql = super().create_sql(expression) 1236 1237 comment_sql = self.sql(comment_prop) 1238 comment_sql = f" {comment_sql}" if comment_sql else "" 1239 1240 return f"{create_sql}{comment_sql}" 1241 1242 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1243 this = self.indent(self.sql(expression, "this")) 1244 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1245 1246 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1247 this = self.sql(expression, "this") 1248 this = f" {this}" if this else "" 1249 expr = self.sql(expression, "expression") 1250 expr = f" {expr}" if expr else "" 1251 index_type = self.sql(expression, "index_type") 1252 index_type = f" TYPE {index_type}" if index_type else "" 1253 granularity = self.sql(expression, "granularity") 1254 granularity = f" GRANULARITY {granularity}" if granularity else "" 1255 1256 return f"INDEX{this}{expr}{index_type}{granularity}" 1257 1258 def partition_sql(self, expression: exp.Partition) -> str: 1259 return f"PARTITION {self.expressions(expression, flat=True)}" 1260 1261 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1262 return f"ID {self.sql(expression.this)}" 1263 1264 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1265 return ( 1266 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1267 ) 1268 1269 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1270 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1271 1272 def is_sql(self, expression: exp.Is) -> str: 1273 is_sql = super().is_sql(expression) 1274 1275 if isinstance(expression.parent, exp.Not): 1276 # value IS NOT NULL -> NOT (value IS NULL) 1277 is_sql = self.wrap(is_sql) 1278 1279 return is_sql
164class ClickHouse(Dialect): 165 NORMALIZE_FUNCTIONS: bool | str = False 166 NULL_ORDERING = "nulls_are_last" 167 SUPPORTS_USER_DEFINED_TYPES = False 168 SAFE_DIVISION = True 169 LOG_BASE_FIRST: t.Optional[bool] = None 170 FORCE_EARLY_ALIAS_REF_EXPANSION = True 171 PRESERVE_ORIGINAL_NAMES = True 172 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 173 IDENTIFIERS_CAN_START_WITH_DIGIT = True 174 175 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 176 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 177 178 UNESCAPED_SEQUENCES = { 179 "\\0": "\0", 180 } 181 182 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 183 184 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 185 exp.Except: False, 186 exp.Intersect: False, 187 exp.Union: None, 188 } 189 190 class Tokenizer(tokens.Tokenizer): 191 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 192 IDENTIFIERS = ['"', "`"] 193 IDENTIFIER_ESCAPES = ["\\"] 194 STRING_ESCAPES = ["'", "\\"] 195 BIT_STRINGS = [("0b", "")] 196 HEX_STRINGS = [("0x", ""), ("0X", "")] 197 HEREDOC_STRINGS = ["$"] 198 199 KEYWORDS = { 200 **tokens.Tokenizer.KEYWORDS, 201 "ATTACH": TokenType.COMMAND, 202 "DATE32": TokenType.DATE32, 203 "DATETIME64": TokenType.DATETIME64, 204 "DICTIONARY": TokenType.DICTIONARY, 205 "DYNAMIC": TokenType.DYNAMIC, 206 "ENUM8": TokenType.ENUM8, 207 "ENUM16": TokenType.ENUM16, 208 "FINAL": TokenType.FINAL, 209 "FIXEDSTRING": TokenType.FIXEDSTRING, 210 "FLOAT32": TokenType.FLOAT, 211 "FLOAT64": TokenType.DOUBLE, 212 "GLOBAL": TokenType.GLOBAL, 213 "INT256": TokenType.INT256, 214 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 215 "MAP": TokenType.MAP, 216 "NESTED": TokenType.NESTED, 217 "SAMPLE": TokenType.TABLE_SAMPLE, 218 "TUPLE": TokenType.STRUCT, 219 "UINT128": TokenType.UINT128, 220 "UINT16": TokenType.USMALLINT, 221 "UINT256": TokenType.UINT256, 222 "UINT32": TokenType.UINT, 223 "UINT64": TokenType.UBIGINT, 224 "UINT8": TokenType.UTINYINT, 225 "IPV4": TokenType.IPV4, 226 "IPV6": TokenType.IPV6, 227 "POINT": TokenType.POINT, 228 "RING": TokenType.RING, 229 "LINESTRING": TokenType.LINESTRING, 230 "MULTILINESTRING": TokenType.MULTILINESTRING, 231 "POLYGON": TokenType.POLYGON, 232 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 233 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 234 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 235 "SYSTEM": TokenType.COMMAND, 236 "PREWHERE": TokenType.PREWHERE, 237 } 238 KEYWORDS.pop("/*+") 239 240 SINGLE_TOKENS = { 241 **tokens.Tokenizer.SINGLE_TOKENS, 242 "$": TokenType.HEREDOC_STRING, 243 } 244 245 class Parser(parser.Parser): 246 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 247 # * select x from t1 union all select x from t2 limit 1; 248 # * select x from t1 union all (select x from t2 limit 1); 249 MODIFIERS_ATTACHED_TO_SET_OP = False 250 INTERVAL_SPANS = False 251 OPTIONAL_ALIAS_TOKEN_CTE = False 252 253 FUNCTIONS = { 254 **parser.Parser.FUNCTIONS, 255 "ANY": exp.AnyValue.from_arg_list, 256 "ARRAYSUM": exp.ArraySum.from_arg_list, 257 "COUNTIF": _build_count_if, 258 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 259 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 260 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 261 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 262 "DATE_FORMAT": _build_date_format, 263 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 264 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 265 "FORMATDATETIME": _build_date_format, 266 "JSONEXTRACTSTRING": build_json_extract_path( 267 exp.JSONExtractScalar, zero_based_indexing=False 268 ), 269 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 270 "MAP": parser.build_var_map, 271 "MATCH": exp.RegexpLike.from_arg_list, 272 "RANDCANONICAL": exp.Rand.from_arg_list, 273 "STR_TO_DATE": _build_str_to_date, 274 "TUPLE": exp.Struct.from_arg_list, 275 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 276 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 277 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 278 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 279 "UNIQ": exp.ApproxDistinct.from_arg_list, 280 "XOR": lambda args: exp.Xor(expressions=args), 281 "MD5": exp.MD5Digest.from_arg_list, 282 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 283 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 284 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 285 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 286 } 287 FUNCTIONS.pop("TRANSFORM") 288 289 AGG_FUNCTIONS = { 290 "count", 291 "min", 292 "max", 293 "sum", 294 "avg", 295 "any", 296 "stddevPop", 297 "stddevSamp", 298 "varPop", 299 "varSamp", 300 "corr", 301 "covarPop", 302 "covarSamp", 303 "entropy", 304 "exponentialMovingAverage", 305 "intervalLengthSum", 306 "kolmogorovSmirnovTest", 307 "mannWhitneyUTest", 308 "median", 309 "rankCorr", 310 "sumKahan", 311 "studentTTest", 312 "welchTTest", 313 "anyHeavy", 314 "anyLast", 315 "boundingRatio", 316 "first_value", 317 "last_value", 318 "argMin", 319 "argMax", 320 "avgWeighted", 321 "topK", 322 "topKWeighted", 323 "deltaSum", 324 "deltaSumTimestamp", 325 "groupArray", 326 "groupArrayLast", 327 "groupUniqArray", 328 "groupArrayInsertAt", 329 "groupArrayMovingAvg", 330 "groupArrayMovingSum", 331 "groupArraySample", 332 "groupBitAnd", 333 "groupBitOr", 334 "groupBitXor", 335 "groupBitmap", 336 "groupBitmapAnd", 337 "groupBitmapOr", 338 "groupBitmapXor", 339 "sumWithOverflow", 340 "sumMap", 341 "minMap", 342 "maxMap", 343 "skewSamp", 344 "skewPop", 345 "kurtSamp", 346 "kurtPop", 347 "uniq", 348 "uniqExact", 349 "uniqCombined", 350 "uniqCombined64", 351 "uniqHLL12", 352 "uniqTheta", 353 "quantile", 354 "quantiles", 355 "quantileExact", 356 "quantilesExact", 357 "quantileExactLow", 358 "quantilesExactLow", 359 "quantileExactHigh", 360 "quantilesExactHigh", 361 "quantileExactWeighted", 362 "quantilesExactWeighted", 363 "quantileTiming", 364 "quantilesTiming", 365 "quantileTimingWeighted", 366 "quantilesTimingWeighted", 367 "quantileDeterministic", 368 "quantilesDeterministic", 369 "quantileTDigest", 370 "quantilesTDigest", 371 "quantileTDigestWeighted", 372 "quantilesTDigestWeighted", 373 "quantileBFloat16", 374 "quantilesBFloat16", 375 "quantileBFloat16Weighted", 376 "quantilesBFloat16Weighted", 377 "simpleLinearRegression", 378 "stochasticLinearRegression", 379 "stochasticLogisticRegression", 380 "categoricalInformationValue", 381 "contingency", 382 "cramersV", 383 "cramersVBiasCorrected", 384 "theilsU", 385 "maxIntersections", 386 "maxIntersectionsPosition", 387 "meanZTest", 388 "quantileInterpolatedWeighted", 389 "quantilesInterpolatedWeighted", 390 "quantileGK", 391 "quantilesGK", 392 "sparkBar", 393 "sumCount", 394 "largestTriangleThreeBuckets", 395 "histogram", 396 "sequenceMatch", 397 "sequenceCount", 398 "windowFunnel", 399 "retention", 400 "uniqUpTo", 401 "sequenceNextNode", 402 "exponentialTimeDecayedAvg", 403 } 404 405 AGG_FUNCTIONS_SUFFIXES = [ 406 "If", 407 "Array", 408 "ArrayIf", 409 "Map", 410 "SimpleState", 411 "State", 412 "Merge", 413 "MergeState", 414 "ForEach", 415 "Distinct", 416 "OrDefault", 417 "OrNull", 418 "Resample", 419 "ArgMin", 420 "ArgMax", 421 ] 422 423 FUNC_TOKENS = { 424 *parser.Parser.FUNC_TOKENS, 425 TokenType.SET, 426 } 427 428 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 429 430 ID_VAR_TOKENS = { 431 *parser.Parser.ID_VAR_TOKENS, 432 TokenType.LIKE, 433 } 434 435 AGG_FUNC_MAPPING = ( 436 lambda functions, suffixes: { 437 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 438 } 439 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 440 441 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 442 443 FUNCTION_PARSERS = { 444 **parser.Parser.FUNCTION_PARSERS, 445 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 446 "QUANTILE": lambda self: self._parse_quantile(), 447 "MEDIAN": lambda self: self._parse_quantile(), 448 "COLUMNS": lambda self: self._parse_columns(), 449 } 450 451 FUNCTION_PARSERS.pop("MATCH") 452 453 PROPERTY_PARSERS = parser.Parser.PROPERTY_PARSERS.copy() 454 PROPERTY_PARSERS.pop("DYNAMIC") 455 456 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 457 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 458 459 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 460 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 461 462 RANGE_PARSERS = { 463 **parser.Parser.RANGE_PARSERS, 464 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 465 and self._parse_in(this, is_global=True), 466 } 467 468 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 469 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 470 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 471 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 472 473 JOIN_KINDS = { 474 *parser.Parser.JOIN_KINDS, 475 TokenType.ANY, 476 TokenType.ASOF, 477 TokenType.ARRAY, 478 } 479 480 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 481 TokenType.ANY, 482 TokenType.ARRAY, 483 TokenType.FINAL, 484 TokenType.FORMAT, 485 TokenType.SETTINGS, 486 } 487 488 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 489 TokenType.FORMAT, 490 } 491 492 LOG_DEFAULTS_TO_LN = True 493 494 QUERY_MODIFIER_PARSERS = { 495 **parser.Parser.QUERY_MODIFIER_PARSERS, 496 TokenType.SETTINGS: lambda self: ( 497 "settings", 498 self._advance() or self._parse_csv(self._parse_assignment), 499 ), 500 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 501 } 502 503 CONSTRAINT_PARSERS = { 504 **parser.Parser.CONSTRAINT_PARSERS, 505 "INDEX": lambda self: self._parse_index_constraint(), 506 "CODEC": lambda self: self._parse_compress(), 507 } 508 509 ALTER_PARSERS = { 510 **parser.Parser.ALTER_PARSERS, 511 "REPLACE": lambda self: self._parse_alter_table_replace(), 512 } 513 514 SCHEMA_UNNAMED_CONSTRAINTS = { 515 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 516 "INDEX", 517 } 518 519 PLACEHOLDER_PARSERS = { 520 **parser.Parser.PLACEHOLDER_PARSERS, 521 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 522 } 523 524 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 525 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 526 return self._parse_lambda() 527 528 def _parse_types( 529 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 530 ) -> t.Optional[exp.Expression]: 531 dtype = super()._parse_types( 532 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 533 ) 534 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 535 # Mark every type as non-nullable which is ClickHouse's default, unless it's 536 # already marked as nullable. This marker helps us transpile types from other 537 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 538 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 539 # fail in ClickHouse without the `Nullable` type constructor. 540 dtype.set("nullable", False) 541 542 return dtype 543 544 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 545 index = self._index 546 this = self._parse_bitwise() 547 if self._match(TokenType.FROM): 548 self._retreat(index) 549 return super()._parse_extract() 550 551 # We return Anonymous here because extract and regexpExtract have different semantics, 552 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 553 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 554 # 555 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 556 self._match(TokenType.COMMA) 557 return self.expression( 558 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 559 ) 560 561 def _parse_assignment(self) -> t.Optional[exp.Expression]: 562 this = super()._parse_assignment() 563 564 if self._match(TokenType.PLACEHOLDER): 565 return self.expression( 566 exp.If, 567 this=this, 568 true=self._parse_assignment(), 569 false=self._match(TokenType.COLON) and self._parse_assignment(), 570 ) 571 572 return this 573 574 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 575 """ 576 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 577 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 578 """ 579 index = self._index 580 581 this = self._parse_id_var() 582 self._match(TokenType.COLON) 583 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 584 self._match_text_seq("IDENTIFIER") and "Identifier" 585 ) 586 587 if not kind: 588 self._retreat(index) 589 return None 590 elif not self._match(TokenType.R_BRACE): 591 self.raise_error("Expecting }") 592 593 return self.expression(exp.Placeholder, this=this, kind=kind) 594 595 def _parse_bracket( 596 self, this: t.Optional[exp.Expression] = None 597 ) -> t.Optional[exp.Expression]: 598 l_brace = self._match(TokenType.L_BRACE, advance=False) 599 bracket = super()._parse_bracket(this) 600 601 if l_brace and isinstance(bracket, exp.Struct): 602 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 603 for expression in bracket.expressions: 604 if not isinstance(expression, exp.PropertyEQ): 605 break 606 607 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 608 varmap.args["values"].append("expressions", expression.expression) 609 610 return varmap 611 612 return bracket 613 614 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 615 this = super()._parse_in(this) 616 this.set("is_global", is_global) 617 return this 618 619 def _parse_table( 620 self, 621 schema: bool = False, 622 joins: bool = False, 623 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 624 parse_bracket: bool = False, 625 is_db_reference: bool = False, 626 parse_partition: bool = False, 627 ) -> t.Optional[exp.Expression]: 628 this = super()._parse_table( 629 schema=schema, 630 joins=joins, 631 alias_tokens=alias_tokens, 632 parse_bracket=parse_bracket, 633 is_db_reference=is_db_reference, 634 ) 635 636 if self._match(TokenType.FINAL): 637 this = self.expression(exp.Final, this=this) 638 639 return this 640 641 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 642 return super()._parse_position(haystack_first=True) 643 644 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 645 def _parse_cte(self) -> t.Optional[exp.CTE]: 646 # WITH <identifier> AS <subquery expression> 647 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 648 649 if not cte: 650 # WITH <expression> AS <identifier> 651 cte = self.expression( 652 exp.CTE, 653 this=self._parse_assignment(), 654 alias=self._parse_table_alias(), 655 scalar=True, 656 ) 657 658 return cte 659 660 def _parse_join_parts( 661 self, 662 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 663 is_global = self._match(TokenType.GLOBAL) and self._prev 664 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 665 666 if kind_pre: 667 kind = self._match_set(self.JOIN_KINDS) and self._prev 668 side = self._match_set(self.JOIN_SIDES) and self._prev 669 return is_global, side, kind 670 671 return ( 672 is_global, 673 self._match_set(self.JOIN_SIDES) and self._prev, 674 self._match_set(self.JOIN_KINDS) and self._prev, 675 ) 676 677 def _parse_join( 678 self, skip_join_token: bool = False, parse_bracket: bool = False 679 ) -> t.Optional[exp.Join]: 680 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 681 if join: 682 join.set("global", join.args.pop("method", None)) 683 684 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 685 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 686 if join.kind == "ARRAY": 687 for table in join.find_all(exp.Table): 688 table.replace(table.to_column()) 689 690 return join 691 692 def _parse_function( 693 self, 694 functions: t.Optional[t.Dict[str, t.Callable]] = None, 695 anonymous: bool = False, 696 optional_parens: bool = True, 697 any_token: bool = False, 698 ) -> t.Optional[exp.Expression]: 699 expr = super()._parse_function( 700 functions=functions, 701 anonymous=anonymous, 702 optional_parens=optional_parens, 703 any_token=any_token, 704 ) 705 706 func = expr.this if isinstance(expr, exp.Window) else expr 707 708 # Aggregate functions can be split in 2 parts: <func_name><suffix> 709 parts = ( 710 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 711 ) 712 713 if parts: 714 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 715 params = self._parse_func_params(anon_func) 716 717 kwargs = { 718 "this": anon_func.this, 719 "expressions": anon_func.expressions, 720 } 721 if parts[1]: 722 kwargs["parts"] = parts 723 exp_class: t.Type[exp.Expression] = ( 724 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 725 ) 726 else: 727 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 728 729 kwargs["exp_class"] = exp_class 730 if params: 731 kwargs["params"] = params 732 733 func = self.expression(**kwargs) 734 735 if isinstance(expr, exp.Window): 736 # The window's func was parsed as Anonymous in base parser, fix its 737 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 738 expr.set("this", func) 739 elif params: 740 # Params have blocked super()._parse_function() from parsing the following window 741 # (if that exists) as they're standing between the function call and the window spec 742 expr = self._parse_window(func) 743 else: 744 expr = func 745 746 return expr 747 748 def _parse_func_params( 749 self, this: t.Optional[exp.Func] = None 750 ) -> t.Optional[t.List[exp.Expression]]: 751 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 752 return self._parse_csv(self._parse_lambda) 753 754 if self._match(TokenType.L_PAREN): 755 params = self._parse_csv(self._parse_lambda) 756 self._match_r_paren(this) 757 return params 758 759 return None 760 761 def _parse_quantile(self) -> exp.Quantile: 762 this = self._parse_lambda() 763 params = self._parse_func_params() 764 if params: 765 return self.expression(exp.Quantile, this=params[0], quantile=this) 766 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 767 768 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 769 return super()._parse_wrapped_id_vars(optional=True) 770 771 def _parse_primary_key( 772 self, wrapped_optional: bool = False, in_props: bool = False 773 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 774 return super()._parse_primary_key( 775 wrapped_optional=wrapped_optional or in_props, in_props=in_props 776 ) 777 778 def _parse_on_property(self) -> t.Optional[exp.Expression]: 779 index = self._index 780 if self._match_text_seq("CLUSTER"): 781 this = self._parse_id_var() 782 if this: 783 return self.expression(exp.OnCluster, this=this) 784 else: 785 self._retreat(index) 786 return None 787 788 def _parse_index_constraint( 789 self, kind: t.Optional[str] = None 790 ) -> exp.IndexColumnConstraint: 791 # INDEX name1 expr TYPE type1(args) GRANULARITY value 792 this = self._parse_id_var() 793 expression = self._parse_assignment() 794 795 index_type = self._match_text_seq("TYPE") and ( 796 self._parse_function() or self._parse_var() 797 ) 798 799 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 800 801 return self.expression( 802 exp.IndexColumnConstraint, 803 this=this, 804 expression=expression, 805 index_type=index_type, 806 granularity=granularity, 807 ) 808 809 def _parse_partition(self) -> t.Optional[exp.Partition]: 810 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 811 if not self._match(TokenType.PARTITION): 812 return None 813 814 if self._match_text_seq("ID"): 815 # Corresponds to the PARTITION ID <string_value> syntax 816 expressions: t.List[exp.Expression] = [ 817 self.expression(exp.PartitionId, this=self._parse_string()) 818 ] 819 else: 820 expressions = self._parse_expressions() 821 822 return self.expression(exp.Partition, expressions=expressions) 823 824 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 825 partition = self._parse_partition() 826 827 if not partition or not self._match(TokenType.FROM): 828 return None 829 830 return self.expression( 831 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 832 ) 833 834 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 835 if not self._match_text_seq("PROJECTION"): 836 return None 837 838 return self.expression( 839 exp.ProjectionDef, 840 this=self._parse_id_var(), 841 expression=self._parse_wrapped(self._parse_statement), 842 ) 843 844 def _parse_constraint(self) -> t.Optional[exp.Expression]: 845 return super()._parse_constraint() or self._parse_projection_def() 846 847 def _parse_alias( 848 self, this: t.Optional[exp.Expression], explicit: bool = False 849 ) -> t.Optional[exp.Expression]: 850 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 851 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 852 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 853 return this 854 855 return super()._parse_alias(this=this, explicit=explicit) 856 857 def _parse_expression(self) -> t.Optional[exp.Expression]: 858 this = super()._parse_expression() 859 860 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 861 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 862 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 863 self._match(TokenType.R_PAREN) 864 865 return this 866 867 def _parse_columns(self) -> exp.Expression: 868 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 869 870 while self._next and self._match_text_seq(")", "APPLY", "("): 871 self._match(TokenType.R_PAREN) 872 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 873 return this 874 875 class Generator(generator.Generator): 876 QUERY_HINTS = False 877 STRUCT_DELIMITER = ("(", ")") 878 NVL2_SUPPORTED = False 879 TABLESAMPLE_REQUIRES_PARENS = False 880 TABLESAMPLE_SIZE_IS_ROWS = False 881 TABLESAMPLE_KEYWORDS = "SAMPLE" 882 LAST_DAY_SUPPORTS_DATE_PART = False 883 CAN_IMPLEMENT_ARRAY_ANY = True 884 SUPPORTS_TO_NUMBER = False 885 JOIN_HINTS = False 886 TABLE_HINTS = False 887 GROUPINGS_SEP = "" 888 SET_OP_MODIFIERS = False 889 SUPPORTS_TABLE_ALIAS_COLUMNS = False 890 VALUES_AS_TABLE = False 891 ARRAY_SIZE_NAME = "LENGTH" 892 893 STRING_TYPE_MAPPING = { 894 exp.DataType.Type.CHAR: "String", 895 exp.DataType.Type.LONGBLOB: "String", 896 exp.DataType.Type.LONGTEXT: "String", 897 exp.DataType.Type.MEDIUMBLOB: "String", 898 exp.DataType.Type.MEDIUMTEXT: "String", 899 exp.DataType.Type.TINYBLOB: "String", 900 exp.DataType.Type.TINYTEXT: "String", 901 exp.DataType.Type.TEXT: "String", 902 exp.DataType.Type.VARBINARY: "String", 903 exp.DataType.Type.VARCHAR: "String", 904 } 905 906 SUPPORTED_JSON_PATH_PARTS = { 907 exp.JSONPathKey, 908 exp.JSONPathRoot, 909 exp.JSONPathSubscript, 910 } 911 912 TYPE_MAPPING = { 913 **generator.Generator.TYPE_MAPPING, 914 **STRING_TYPE_MAPPING, 915 exp.DataType.Type.ARRAY: "Array", 916 exp.DataType.Type.BOOLEAN: "Bool", 917 exp.DataType.Type.BIGINT: "Int64", 918 exp.DataType.Type.DATE32: "Date32", 919 exp.DataType.Type.DATETIME: "DateTime", 920 exp.DataType.Type.DATETIME2: "DateTime", 921 exp.DataType.Type.SMALLDATETIME: "DateTime", 922 exp.DataType.Type.DATETIME64: "DateTime64", 923 exp.DataType.Type.DECIMAL: "Decimal", 924 exp.DataType.Type.DECIMAL32: "Decimal32", 925 exp.DataType.Type.DECIMAL64: "Decimal64", 926 exp.DataType.Type.DECIMAL128: "Decimal128", 927 exp.DataType.Type.DECIMAL256: "Decimal256", 928 exp.DataType.Type.TIMESTAMP: "DateTime", 929 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 930 exp.DataType.Type.DOUBLE: "Float64", 931 exp.DataType.Type.ENUM: "Enum", 932 exp.DataType.Type.ENUM8: "Enum8", 933 exp.DataType.Type.ENUM16: "Enum16", 934 exp.DataType.Type.FIXEDSTRING: "FixedString", 935 exp.DataType.Type.FLOAT: "Float32", 936 exp.DataType.Type.INT: "Int32", 937 exp.DataType.Type.MEDIUMINT: "Int32", 938 exp.DataType.Type.INT128: "Int128", 939 exp.DataType.Type.INT256: "Int256", 940 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 941 exp.DataType.Type.MAP: "Map", 942 exp.DataType.Type.NESTED: "Nested", 943 exp.DataType.Type.SMALLINT: "Int16", 944 exp.DataType.Type.STRUCT: "Tuple", 945 exp.DataType.Type.TINYINT: "Int8", 946 exp.DataType.Type.UBIGINT: "UInt64", 947 exp.DataType.Type.UINT: "UInt32", 948 exp.DataType.Type.UINT128: "UInt128", 949 exp.DataType.Type.UINT256: "UInt256", 950 exp.DataType.Type.USMALLINT: "UInt16", 951 exp.DataType.Type.UTINYINT: "UInt8", 952 exp.DataType.Type.IPV4: "IPv4", 953 exp.DataType.Type.IPV6: "IPv6", 954 exp.DataType.Type.POINT: "Point", 955 exp.DataType.Type.RING: "Ring", 956 exp.DataType.Type.LINESTRING: "LineString", 957 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 958 exp.DataType.Type.POLYGON: "Polygon", 959 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 960 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 961 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 962 exp.DataType.Type.DYNAMIC: "Dynamic", 963 } 964 965 TRANSFORMS = { 966 **generator.Generator.TRANSFORMS, 967 exp.AnyValue: rename_func("any"), 968 exp.ApproxDistinct: rename_func("uniq"), 969 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 970 exp.ArraySum: rename_func("arraySum"), 971 exp.ArgMax: arg_max_or_min_no_count("argMax"), 972 exp.ArgMin: arg_max_or_min_no_count("argMin"), 973 exp.Array: inline_array_sql, 974 exp.CastToStrType: rename_func("CAST"), 975 exp.CountIf: rename_func("countIf"), 976 exp.CompressColumnConstraint: lambda self, 977 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 978 exp.ComputedColumnConstraint: lambda self, 979 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 980 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 981 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 982 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 983 exp.DateStrToDate: rename_func("toDate"), 984 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 985 exp.Explode: rename_func("arrayJoin"), 986 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 987 exp.IsNan: rename_func("isNaN"), 988 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 989 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 990 exp.JSONPathKey: json_path_key_only_name, 991 exp.JSONPathRoot: lambda *_: "", 992 exp.Length: length_or_char_length_sql, 993 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 994 exp.Median: rename_func("median"), 995 exp.Nullif: rename_func("nullIf"), 996 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 997 exp.Pivot: no_pivot_sql, 998 exp.Quantile: _quantile_sql, 999 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1000 exp.Rand: rename_func("randCanonical"), 1001 exp.StartsWith: rename_func("startsWith"), 1002 exp.StrPosition: lambda self, e: strposition_sql( 1003 self, 1004 e, 1005 func_name="POSITION", 1006 supports_position=True, 1007 use_ansi_position=False, 1008 ), 1009 exp.TimeToStr: lambda self, e: self.func( 1010 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1011 ), 1012 exp.TimeStrToTime: _timestrtotime_sql, 1013 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1014 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1015 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 1016 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1017 exp.MD5Digest: rename_func("MD5"), 1018 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1019 exp.SHA: rename_func("SHA1"), 1020 exp.SHA2: sha256_sql, 1021 exp.UnixToTime: _unix_to_time_sql, 1022 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1023 exp.Trim: trim_sql, 1024 exp.Variance: rename_func("varSamp"), 1025 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1026 exp.Stddev: rename_func("stddevSamp"), 1027 exp.Chr: rename_func("CHAR"), 1028 exp.Lag: lambda self, e: self.func( 1029 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1030 ), 1031 exp.Lead: lambda self, e: self.func( 1032 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1033 ), 1034 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1035 rename_func("editDistance") 1036 ), 1037 } 1038 1039 PROPERTIES_LOCATION = { 1040 **generator.Generator.PROPERTIES_LOCATION, 1041 exp.OnCluster: exp.Properties.Location.POST_NAME, 1042 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1043 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1044 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1045 } 1046 1047 # There's no list in docs, but it can be found in Clickhouse code 1048 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1049 ON_CLUSTER_TARGETS = { 1050 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1051 "DATABASE", 1052 "TABLE", 1053 "VIEW", 1054 "DICTIONARY", 1055 "INDEX", 1056 "FUNCTION", 1057 "NAMED COLLECTION", 1058 } 1059 1060 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1061 NON_NULLABLE_TYPES = { 1062 exp.DataType.Type.ARRAY, 1063 exp.DataType.Type.MAP, 1064 exp.DataType.Type.STRUCT, 1065 exp.DataType.Type.POINT, 1066 exp.DataType.Type.RING, 1067 exp.DataType.Type.LINESTRING, 1068 exp.DataType.Type.MULTILINESTRING, 1069 exp.DataType.Type.POLYGON, 1070 exp.DataType.Type.MULTIPOLYGON, 1071 } 1072 1073 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1074 strtodate_sql = self.function_fallback_sql(expression) 1075 1076 if not isinstance(expression.parent, exp.Cast): 1077 # StrToDate returns DATEs in other dialects (eg. postgres), so 1078 # this branch aims to improve the transpilation to clickhouse 1079 return f"CAST({strtodate_sql} AS DATE)" 1080 1081 return strtodate_sql 1082 1083 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1084 this = expression.this 1085 1086 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1087 return self.sql(this) 1088 1089 return super().cast_sql(expression, safe_prefix=safe_prefix) 1090 1091 def trycast_sql(self, expression: exp.TryCast) -> str: 1092 dtype = expression.to 1093 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1094 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1095 dtype.set("nullable", True) 1096 1097 return super().cast_sql(expression) 1098 1099 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1100 this = self.json_path_part(expression.this) 1101 return str(int(this) + 1) if is_int(this) else this 1102 1103 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1104 return f"AS {self.sql(expression, 'this')}" 1105 1106 def _any_to_has( 1107 self, 1108 expression: exp.EQ | exp.NEQ, 1109 default: t.Callable[[t.Any], str], 1110 prefix: str = "", 1111 ) -> str: 1112 if isinstance(expression.left, exp.Any): 1113 arr = expression.left 1114 this = expression.right 1115 elif isinstance(expression.right, exp.Any): 1116 arr = expression.right 1117 this = expression.left 1118 else: 1119 return default(expression) 1120 1121 return prefix + self.func("has", arr.this.unnest(), this) 1122 1123 def eq_sql(self, expression: exp.EQ) -> str: 1124 return self._any_to_has(expression, super().eq_sql) 1125 1126 def neq_sql(self, expression: exp.NEQ) -> str: 1127 return self._any_to_has(expression, super().neq_sql, "NOT ") 1128 1129 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1130 # Manually add a flag to make the search case-insensitive 1131 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1132 return self.func("match", expression.this, regex) 1133 1134 def datatype_sql(self, expression: exp.DataType) -> str: 1135 # String is the standard ClickHouse type, every other variant is just an alias. 1136 # Additionally, any supplied length parameter will be ignored. 1137 # 1138 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1139 if expression.this in self.STRING_TYPE_MAPPING: 1140 dtype = "String" 1141 else: 1142 dtype = super().datatype_sql(expression) 1143 1144 # This section changes the type to `Nullable(...)` if the following conditions hold: 1145 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1146 # and change their semantics 1147 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1148 # constraint: "Type of Map key must be a type, that can be represented by integer or 1149 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1150 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1151 parent = expression.parent 1152 nullable = expression.args.get("nullable") 1153 if nullable is True or ( 1154 nullable is None 1155 and not ( 1156 isinstance(parent, exp.DataType) 1157 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1158 and expression.index in (None, 0) 1159 ) 1160 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1161 ): 1162 dtype = f"Nullable({dtype})" 1163 1164 return dtype 1165 1166 def cte_sql(self, expression: exp.CTE) -> str: 1167 if expression.args.get("scalar"): 1168 this = self.sql(expression, "this") 1169 alias = self.sql(expression, "alias") 1170 return f"{this} AS {alias}" 1171 1172 return super().cte_sql(expression) 1173 1174 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1175 return super().after_limit_modifiers(expression) + [ 1176 ( 1177 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1178 if expression.args.get("settings") 1179 else "" 1180 ), 1181 ( 1182 self.seg("FORMAT ") + self.sql(expression, "format") 1183 if expression.args.get("format") 1184 else "" 1185 ), 1186 ] 1187 1188 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1189 params = self.expressions(expression, key="params", flat=True) 1190 return self.func(expression.name, *expression.expressions) + f"({params})" 1191 1192 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1193 return self.func(expression.name, *expression.expressions) 1194 1195 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1196 return self.anonymousaggfunc_sql(expression) 1197 1198 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1199 return self.parameterizedagg_sql(expression) 1200 1201 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1202 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1203 1204 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1205 return f"ON CLUSTER {self.sql(expression, 'this')}" 1206 1207 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1208 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1209 exp.Properties.Location.POST_NAME 1210 ): 1211 this_name = self.sql( 1212 expression.this if isinstance(expression.this, exp.Schema) else expression, 1213 "this", 1214 ) 1215 this_properties = " ".join( 1216 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1217 ) 1218 this_schema = self.schema_columns_sql(expression.this) 1219 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1220 1221 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1222 1223 return super().createable_sql(expression, locations) 1224 1225 def create_sql(self, expression: exp.Create) -> str: 1226 # The comment property comes last in CTAS statements, i.e. after the query 1227 query = expression.expression 1228 if isinstance(query, exp.Query): 1229 comment_prop = expression.find(exp.SchemaCommentProperty) 1230 if comment_prop: 1231 comment_prop.pop() 1232 query.replace(exp.paren(query)) 1233 else: 1234 comment_prop = None 1235 1236 create_sql = super().create_sql(expression) 1237 1238 comment_sql = self.sql(comment_prop) 1239 comment_sql = f" {comment_sql}" if comment_sql else "" 1240 1241 return f"{create_sql}{comment_sql}" 1242 1243 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1244 this = self.indent(self.sql(expression, "this")) 1245 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1246 1247 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1248 this = self.sql(expression, "this") 1249 this = f" {this}" if this else "" 1250 expr = self.sql(expression, "expression") 1251 expr = f" {expr}" if expr else "" 1252 index_type = self.sql(expression, "index_type") 1253 index_type = f" TYPE {index_type}" if index_type else "" 1254 granularity = self.sql(expression, "granularity") 1255 granularity = f" GRANULARITY {granularity}" if granularity else "" 1256 1257 return f"INDEX{this}{expr}{index_type}{granularity}" 1258 1259 def partition_sql(self, expression: exp.Partition) -> str: 1260 return f"PARTITION {self.expressions(expression, flat=True)}" 1261 1262 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1263 return f"ID {self.sql(expression.this)}" 1264 1265 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1266 return ( 1267 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1268 ) 1269 1270 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1271 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1272 1273 def is_sql(self, expression: exp.Is) -> str: 1274 is_sql = super().is_sql(expression) 1275 1276 if isinstance(expression.parent, exp.Not): 1277 # value IS NOT NULL -> NOT (value IS NULL) 1278 is_sql = self.wrap(is_sql) 1279 1280 return is_sql
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Whether number literals can include underscores for better readability
Specifies the strategy according to which identifiers should be normalized.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- TYPED_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- PROMOTE_TO_INFERRED_DATETIME_TYPE
- SUPPORTS_VALUES_DEFAULT
- REGEXP_EXTRACT_DEFAULT_GROUP
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
190 class Tokenizer(tokens.Tokenizer): 191 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 192 IDENTIFIERS = ['"', "`"] 193 IDENTIFIER_ESCAPES = ["\\"] 194 STRING_ESCAPES = ["'", "\\"] 195 BIT_STRINGS = [("0b", "")] 196 HEX_STRINGS = [("0x", ""), ("0X", "")] 197 HEREDOC_STRINGS = ["$"] 198 199 KEYWORDS = { 200 **tokens.Tokenizer.KEYWORDS, 201 "ATTACH": TokenType.COMMAND, 202 "DATE32": TokenType.DATE32, 203 "DATETIME64": TokenType.DATETIME64, 204 "DICTIONARY": TokenType.DICTIONARY, 205 "DYNAMIC": TokenType.DYNAMIC, 206 "ENUM8": TokenType.ENUM8, 207 "ENUM16": TokenType.ENUM16, 208 "FINAL": TokenType.FINAL, 209 "FIXEDSTRING": TokenType.FIXEDSTRING, 210 "FLOAT32": TokenType.FLOAT, 211 "FLOAT64": TokenType.DOUBLE, 212 "GLOBAL": TokenType.GLOBAL, 213 "INT256": TokenType.INT256, 214 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 215 "MAP": TokenType.MAP, 216 "NESTED": TokenType.NESTED, 217 "SAMPLE": TokenType.TABLE_SAMPLE, 218 "TUPLE": TokenType.STRUCT, 219 "UINT128": TokenType.UINT128, 220 "UINT16": TokenType.USMALLINT, 221 "UINT256": TokenType.UINT256, 222 "UINT32": TokenType.UINT, 223 "UINT64": TokenType.UBIGINT, 224 "UINT8": TokenType.UTINYINT, 225 "IPV4": TokenType.IPV4, 226 "IPV6": TokenType.IPV6, 227 "POINT": TokenType.POINT, 228 "RING": TokenType.RING, 229 "LINESTRING": TokenType.LINESTRING, 230 "MULTILINESTRING": TokenType.MULTILINESTRING, 231 "POLYGON": TokenType.POLYGON, 232 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 233 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 234 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 235 "SYSTEM": TokenType.COMMAND, 236 "PREWHERE": TokenType.PREWHERE, 237 } 238 KEYWORDS.pop("/*+") 239 240 SINGLE_TOKENS = { 241 **tokens.Tokenizer.SINGLE_TOKENS, 242 "$": TokenType.HEREDOC_STRING, 243 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
245 class Parser(parser.Parser): 246 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 247 # * select x from t1 union all select x from t2 limit 1; 248 # * select x from t1 union all (select x from t2 limit 1); 249 MODIFIERS_ATTACHED_TO_SET_OP = False 250 INTERVAL_SPANS = False 251 OPTIONAL_ALIAS_TOKEN_CTE = False 252 253 FUNCTIONS = { 254 **parser.Parser.FUNCTIONS, 255 "ANY": exp.AnyValue.from_arg_list, 256 "ARRAYSUM": exp.ArraySum.from_arg_list, 257 "COUNTIF": _build_count_if, 258 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 259 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 260 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 261 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 262 "DATE_FORMAT": _build_date_format, 263 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 264 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 265 "FORMATDATETIME": _build_date_format, 266 "JSONEXTRACTSTRING": build_json_extract_path( 267 exp.JSONExtractScalar, zero_based_indexing=False 268 ), 269 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 270 "MAP": parser.build_var_map, 271 "MATCH": exp.RegexpLike.from_arg_list, 272 "RANDCANONICAL": exp.Rand.from_arg_list, 273 "STR_TO_DATE": _build_str_to_date, 274 "TUPLE": exp.Struct.from_arg_list, 275 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 276 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 277 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 278 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 279 "UNIQ": exp.ApproxDistinct.from_arg_list, 280 "XOR": lambda args: exp.Xor(expressions=args), 281 "MD5": exp.MD5Digest.from_arg_list, 282 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 283 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 284 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 285 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 286 } 287 FUNCTIONS.pop("TRANSFORM") 288 289 AGG_FUNCTIONS = { 290 "count", 291 "min", 292 "max", 293 "sum", 294 "avg", 295 "any", 296 "stddevPop", 297 "stddevSamp", 298 "varPop", 299 "varSamp", 300 "corr", 301 "covarPop", 302 "covarSamp", 303 "entropy", 304 "exponentialMovingAverage", 305 "intervalLengthSum", 306 "kolmogorovSmirnovTest", 307 "mannWhitneyUTest", 308 "median", 309 "rankCorr", 310 "sumKahan", 311 "studentTTest", 312 "welchTTest", 313 "anyHeavy", 314 "anyLast", 315 "boundingRatio", 316 "first_value", 317 "last_value", 318 "argMin", 319 "argMax", 320 "avgWeighted", 321 "topK", 322 "topKWeighted", 323 "deltaSum", 324 "deltaSumTimestamp", 325 "groupArray", 326 "groupArrayLast", 327 "groupUniqArray", 328 "groupArrayInsertAt", 329 "groupArrayMovingAvg", 330 "groupArrayMovingSum", 331 "groupArraySample", 332 "groupBitAnd", 333 "groupBitOr", 334 "groupBitXor", 335 "groupBitmap", 336 "groupBitmapAnd", 337 "groupBitmapOr", 338 "groupBitmapXor", 339 "sumWithOverflow", 340 "sumMap", 341 "minMap", 342 "maxMap", 343 "skewSamp", 344 "skewPop", 345 "kurtSamp", 346 "kurtPop", 347 "uniq", 348 "uniqExact", 349 "uniqCombined", 350 "uniqCombined64", 351 "uniqHLL12", 352 "uniqTheta", 353 "quantile", 354 "quantiles", 355 "quantileExact", 356 "quantilesExact", 357 "quantileExactLow", 358 "quantilesExactLow", 359 "quantileExactHigh", 360 "quantilesExactHigh", 361 "quantileExactWeighted", 362 "quantilesExactWeighted", 363 "quantileTiming", 364 "quantilesTiming", 365 "quantileTimingWeighted", 366 "quantilesTimingWeighted", 367 "quantileDeterministic", 368 "quantilesDeterministic", 369 "quantileTDigest", 370 "quantilesTDigest", 371 "quantileTDigestWeighted", 372 "quantilesTDigestWeighted", 373 "quantileBFloat16", 374 "quantilesBFloat16", 375 "quantileBFloat16Weighted", 376 "quantilesBFloat16Weighted", 377 "simpleLinearRegression", 378 "stochasticLinearRegression", 379 "stochasticLogisticRegression", 380 "categoricalInformationValue", 381 "contingency", 382 "cramersV", 383 "cramersVBiasCorrected", 384 "theilsU", 385 "maxIntersections", 386 "maxIntersectionsPosition", 387 "meanZTest", 388 "quantileInterpolatedWeighted", 389 "quantilesInterpolatedWeighted", 390 "quantileGK", 391 "quantilesGK", 392 "sparkBar", 393 "sumCount", 394 "largestTriangleThreeBuckets", 395 "histogram", 396 "sequenceMatch", 397 "sequenceCount", 398 "windowFunnel", 399 "retention", 400 "uniqUpTo", 401 "sequenceNextNode", 402 "exponentialTimeDecayedAvg", 403 } 404 405 AGG_FUNCTIONS_SUFFIXES = [ 406 "If", 407 "Array", 408 "ArrayIf", 409 "Map", 410 "SimpleState", 411 "State", 412 "Merge", 413 "MergeState", 414 "ForEach", 415 "Distinct", 416 "OrDefault", 417 "OrNull", 418 "Resample", 419 "ArgMin", 420 "ArgMax", 421 ] 422 423 FUNC_TOKENS = { 424 *parser.Parser.FUNC_TOKENS, 425 TokenType.SET, 426 } 427 428 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 429 430 ID_VAR_TOKENS = { 431 *parser.Parser.ID_VAR_TOKENS, 432 TokenType.LIKE, 433 } 434 435 AGG_FUNC_MAPPING = ( 436 lambda functions, suffixes: { 437 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 438 } 439 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 440 441 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 442 443 FUNCTION_PARSERS = { 444 **parser.Parser.FUNCTION_PARSERS, 445 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 446 "QUANTILE": lambda self: self._parse_quantile(), 447 "MEDIAN": lambda self: self._parse_quantile(), 448 "COLUMNS": lambda self: self._parse_columns(), 449 } 450 451 FUNCTION_PARSERS.pop("MATCH") 452 453 PROPERTY_PARSERS = parser.Parser.PROPERTY_PARSERS.copy() 454 PROPERTY_PARSERS.pop("DYNAMIC") 455 456 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 457 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 458 459 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 460 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 461 462 RANGE_PARSERS = { 463 **parser.Parser.RANGE_PARSERS, 464 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 465 and self._parse_in(this, is_global=True), 466 } 467 468 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 469 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 470 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 471 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 472 473 JOIN_KINDS = { 474 *parser.Parser.JOIN_KINDS, 475 TokenType.ANY, 476 TokenType.ASOF, 477 TokenType.ARRAY, 478 } 479 480 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 481 TokenType.ANY, 482 TokenType.ARRAY, 483 TokenType.FINAL, 484 TokenType.FORMAT, 485 TokenType.SETTINGS, 486 } 487 488 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 489 TokenType.FORMAT, 490 } 491 492 LOG_DEFAULTS_TO_LN = True 493 494 QUERY_MODIFIER_PARSERS = { 495 **parser.Parser.QUERY_MODIFIER_PARSERS, 496 TokenType.SETTINGS: lambda self: ( 497 "settings", 498 self._advance() or self._parse_csv(self._parse_assignment), 499 ), 500 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 501 } 502 503 CONSTRAINT_PARSERS = { 504 **parser.Parser.CONSTRAINT_PARSERS, 505 "INDEX": lambda self: self._parse_index_constraint(), 506 "CODEC": lambda self: self._parse_compress(), 507 } 508 509 ALTER_PARSERS = { 510 **parser.Parser.ALTER_PARSERS, 511 "REPLACE": lambda self: self._parse_alter_table_replace(), 512 } 513 514 SCHEMA_UNNAMED_CONSTRAINTS = { 515 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 516 "INDEX", 517 } 518 519 PLACEHOLDER_PARSERS = { 520 **parser.Parser.PLACEHOLDER_PARSERS, 521 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 522 } 523 524 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 525 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 526 return self._parse_lambda() 527 528 def _parse_types( 529 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 530 ) -> t.Optional[exp.Expression]: 531 dtype = super()._parse_types( 532 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 533 ) 534 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 535 # Mark every type as non-nullable which is ClickHouse's default, unless it's 536 # already marked as nullable. This marker helps us transpile types from other 537 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 538 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 539 # fail in ClickHouse without the `Nullable` type constructor. 540 dtype.set("nullable", False) 541 542 return dtype 543 544 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 545 index = self._index 546 this = self._parse_bitwise() 547 if self._match(TokenType.FROM): 548 self._retreat(index) 549 return super()._parse_extract() 550 551 # We return Anonymous here because extract and regexpExtract have different semantics, 552 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 553 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 554 # 555 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 556 self._match(TokenType.COMMA) 557 return self.expression( 558 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 559 ) 560 561 def _parse_assignment(self) -> t.Optional[exp.Expression]: 562 this = super()._parse_assignment() 563 564 if self._match(TokenType.PLACEHOLDER): 565 return self.expression( 566 exp.If, 567 this=this, 568 true=self._parse_assignment(), 569 false=self._match(TokenType.COLON) and self._parse_assignment(), 570 ) 571 572 return this 573 574 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 575 """ 576 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 577 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 578 """ 579 index = self._index 580 581 this = self._parse_id_var() 582 self._match(TokenType.COLON) 583 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 584 self._match_text_seq("IDENTIFIER") and "Identifier" 585 ) 586 587 if not kind: 588 self._retreat(index) 589 return None 590 elif not self._match(TokenType.R_BRACE): 591 self.raise_error("Expecting }") 592 593 return self.expression(exp.Placeholder, this=this, kind=kind) 594 595 def _parse_bracket( 596 self, this: t.Optional[exp.Expression] = None 597 ) -> t.Optional[exp.Expression]: 598 l_brace = self._match(TokenType.L_BRACE, advance=False) 599 bracket = super()._parse_bracket(this) 600 601 if l_brace and isinstance(bracket, exp.Struct): 602 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 603 for expression in bracket.expressions: 604 if not isinstance(expression, exp.PropertyEQ): 605 break 606 607 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 608 varmap.args["values"].append("expressions", expression.expression) 609 610 return varmap 611 612 return bracket 613 614 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 615 this = super()._parse_in(this) 616 this.set("is_global", is_global) 617 return this 618 619 def _parse_table( 620 self, 621 schema: bool = False, 622 joins: bool = False, 623 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 624 parse_bracket: bool = False, 625 is_db_reference: bool = False, 626 parse_partition: bool = False, 627 ) -> t.Optional[exp.Expression]: 628 this = super()._parse_table( 629 schema=schema, 630 joins=joins, 631 alias_tokens=alias_tokens, 632 parse_bracket=parse_bracket, 633 is_db_reference=is_db_reference, 634 ) 635 636 if self._match(TokenType.FINAL): 637 this = self.expression(exp.Final, this=this) 638 639 return this 640 641 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 642 return super()._parse_position(haystack_first=True) 643 644 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 645 def _parse_cte(self) -> t.Optional[exp.CTE]: 646 # WITH <identifier> AS <subquery expression> 647 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 648 649 if not cte: 650 # WITH <expression> AS <identifier> 651 cte = self.expression( 652 exp.CTE, 653 this=self._parse_assignment(), 654 alias=self._parse_table_alias(), 655 scalar=True, 656 ) 657 658 return cte 659 660 def _parse_join_parts( 661 self, 662 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 663 is_global = self._match(TokenType.GLOBAL) and self._prev 664 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 665 666 if kind_pre: 667 kind = self._match_set(self.JOIN_KINDS) and self._prev 668 side = self._match_set(self.JOIN_SIDES) and self._prev 669 return is_global, side, kind 670 671 return ( 672 is_global, 673 self._match_set(self.JOIN_SIDES) and self._prev, 674 self._match_set(self.JOIN_KINDS) and self._prev, 675 ) 676 677 def _parse_join( 678 self, skip_join_token: bool = False, parse_bracket: bool = False 679 ) -> t.Optional[exp.Join]: 680 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 681 if join: 682 join.set("global", join.args.pop("method", None)) 683 684 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 685 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 686 if join.kind == "ARRAY": 687 for table in join.find_all(exp.Table): 688 table.replace(table.to_column()) 689 690 return join 691 692 def _parse_function( 693 self, 694 functions: t.Optional[t.Dict[str, t.Callable]] = None, 695 anonymous: bool = False, 696 optional_parens: bool = True, 697 any_token: bool = False, 698 ) -> t.Optional[exp.Expression]: 699 expr = super()._parse_function( 700 functions=functions, 701 anonymous=anonymous, 702 optional_parens=optional_parens, 703 any_token=any_token, 704 ) 705 706 func = expr.this if isinstance(expr, exp.Window) else expr 707 708 # Aggregate functions can be split in 2 parts: <func_name><suffix> 709 parts = ( 710 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 711 ) 712 713 if parts: 714 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 715 params = self._parse_func_params(anon_func) 716 717 kwargs = { 718 "this": anon_func.this, 719 "expressions": anon_func.expressions, 720 } 721 if parts[1]: 722 kwargs["parts"] = parts 723 exp_class: t.Type[exp.Expression] = ( 724 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 725 ) 726 else: 727 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 728 729 kwargs["exp_class"] = exp_class 730 if params: 731 kwargs["params"] = params 732 733 func = self.expression(**kwargs) 734 735 if isinstance(expr, exp.Window): 736 # The window's func was parsed as Anonymous in base parser, fix its 737 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 738 expr.set("this", func) 739 elif params: 740 # Params have blocked super()._parse_function() from parsing the following window 741 # (if that exists) as they're standing between the function call and the window spec 742 expr = self._parse_window(func) 743 else: 744 expr = func 745 746 return expr 747 748 def _parse_func_params( 749 self, this: t.Optional[exp.Func] = None 750 ) -> t.Optional[t.List[exp.Expression]]: 751 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 752 return self._parse_csv(self._parse_lambda) 753 754 if self._match(TokenType.L_PAREN): 755 params = self._parse_csv(self._parse_lambda) 756 self._match_r_paren(this) 757 return params 758 759 return None 760 761 def _parse_quantile(self) -> exp.Quantile: 762 this = self._parse_lambda() 763 params = self._parse_func_params() 764 if params: 765 return self.expression(exp.Quantile, this=params[0], quantile=this) 766 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 767 768 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 769 return super()._parse_wrapped_id_vars(optional=True) 770 771 def _parse_primary_key( 772 self, wrapped_optional: bool = False, in_props: bool = False 773 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 774 return super()._parse_primary_key( 775 wrapped_optional=wrapped_optional or in_props, in_props=in_props 776 ) 777 778 def _parse_on_property(self) -> t.Optional[exp.Expression]: 779 index = self._index 780 if self._match_text_seq("CLUSTER"): 781 this = self._parse_id_var() 782 if this: 783 return self.expression(exp.OnCluster, this=this) 784 else: 785 self._retreat(index) 786 return None 787 788 def _parse_index_constraint( 789 self, kind: t.Optional[str] = None 790 ) -> exp.IndexColumnConstraint: 791 # INDEX name1 expr TYPE type1(args) GRANULARITY value 792 this = self._parse_id_var() 793 expression = self._parse_assignment() 794 795 index_type = self._match_text_seq("TYPE") and ( 796 self._parse_function() or self._parse_var() 797 ) 798 799 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 800 801 return self.expression( 802 exp.IndexColumnConstraint, 803 this=this, 804 expression=expression, 805 index_type=index_type, 806 granularity=granularity, 807 ) 808 809 def _parse_partition(self) -> t.Optional[exp.Partition]: 810 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 811 if not self._match(TokenType.PARTITION): 812 return None 813 814 if self._match_text_seq("ID"): 815 # Corresponds to the PARTITION ID <string_value> syntax 816 expressions: t.List[exp.Expression] = [ 817 self.expression(exp.PartitionId, this=self._parse_string()) 818 ] 819 else: 820 expressions = self._parse_expressions() 821 822 return self.expression(exp.Partition, expressions=expressions) 823 824 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 825 partition = self._parse_partition() 826 827 if not partition or not self._match(TokenType.FROM): 828 return None 829 830 return self.expression( 831 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 832 ) 833 834 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 835 if not self._match_text_seq("PROJECTION"): 836 return None 837 838 return self.expression( 839 exp.ProjectionDef, 840 this=self._parse_id_var(), 841 expression=self._parse_wrapped(self._parse_statement), 842 ) 843 844 def _parse_constraint(self) -> t.Optional[exp.Expression]: 845 return super()._parse_constraint() or self._parse_projection_def() 846 847 def _parse_alias( 848 self, this: t.Optional[exp.Expression], explicit: bool = False 849 ) -> t.Optional[exp.Expression]: 850 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 851 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 852 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 853 return this 854 855 return super()._parse_alias(this=this, explicit=explicit) 856 857 def _parse_expression(self) -> t.Optional[exp.Expression]: 858 this = super()._parse_expression() 859 860 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 861 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 862 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 863 self._match(TokenType.R_PAREN) 864 865 return this 866 867 def _parse_columns(self) -> exp.Expression: 868 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 869 870 while self._next and self._match_text_seq(")", "APPLY", "("): 871 self._match(TokenType.R_PAREN) 872 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 873 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
875 class Generator(generator.Generator): 876 QUERY_HINTS = False 877 STRUCT_DELIMITER = ("(", ")") 878 NVL2_SUPPORTED = False 879 TABLESAMPLE_REQUIRES_PARENS = False 880 TABLESAMPLE_SIZE_IS_ROWS = False 881 TABLESAMPLE_KEYWORDS = "SAMPLE" 882 LAST_DAY_SUPPORTS_DATE_PART = False 883 CAN_IMPLEMENT_ARRAY_ANY = True 884 SUPPORTS_TO_NUMBER = False 885 JOIN_HINTS = False 886 TABLE_HINTS = False 887 GROUPINGS_SEP = "" 888 SET_OP_MODIFIERS = False 889 SUPPORTS_TABLE_ALIAS_COLUMNS = False 890 VALUES_AS_TABLE = False 891 ARRAY_SIZE_NAME = "LENGTH" 892 893 STRING_TYPE_MAPPING = { 894 exp.DataType.Type.CHAR: "String", 895 exp.DataType.Type.LONGBLOB: "String", 896 exp.DataType.Type.LONGTEXT: "String", 897 exp.DataType.Type.MEDIUMBLOB: "String", 898 exp.DataType.Type.MEDIUMTEXT: "String", 899 exp.DataType.Type.TINYBLOB: "String", 900 exp.DataType.Type.TINYTEXT: "String", 901 exp.DataType.Type.TEXT: "String", 902 exp.DataType.Type.VARBINARY: "String", 903 exp.DataType.Type.VARCHAR: "String", 904 } 905 906 SUPPORTED_JSON_PATH_PARTS = { 907 exp.JSONPathKey, 908 exp.JSONPathRoot, 909 exp.JSONPathSubscript, 910 } 911 912 TYPE_MAPPING = { 913 **generator.Generator.TYPE_MAPPING, 914 **STRING_TYPE_MAPPING, 915 exp.DataType.Type.ARRAY: "Array", 916 exp.DataType.Type.BOOLEAN: "Bool", 917 exp.DataType.Type.BIGINT: "Int64", 918 exp.DataType.Type.DATE32: "Date32", 919 exp.DataType.Type.DATETIME: "DateTime", 920 exp.DataType.Type.DATETIME2: "DateTime", 921 exp.DataType.Type.SMALLDATETIME: "DateTime", 922 exp.DataType.Type.DATETIME64: "DateTime64", 923 exp.DataType.Type.DECIMAL: "Decimal", 924 exp.DataType.Type.DECIMAL32: "Decimal32", 925 exp.DataType.Type.DECIMAL64: "Decimal64", 926 exp.DataType.Type.DECIMAL128: "Decimal128", 927 exp.DataType.Type.DECIMAL256: "Decimal256", 928 exp.DataType.Type.TIMESTAMP: "DateTime", 929 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 930 exp.DataType.Type.DOUBLE: "Float64", 931 exp.DataType.Type.ENUM: "Enum", 932 exp.DataType.Type.ENUM8: "Enum8", 933 exp.DataType.Type.ENUM16: "Enum16", 934 exp.DataType.Type.FIXEDSTRING: "FixedString", 935 exp.DataType.Type.FLOAT: "Float32", 936 exp.DataType.Type.INT: "Int32", 937 exp.DataType.Type.MEDIUMINT: "Int32", 938 exp.DataType.Type.INT128: "Int128", 939 exp.DataType.Type.INT256: "Int256", 940 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 941 exp.DataType.Type.MAP: "Map", 942 exp.DataType.Type.NESTED: "Nested", 943 exp.DataType.Type.SMALLINT: "Int16", 944 exp.DataType.Type.STRUCT: "Tuple", 945 exp.DataType.Type.TINYINT: "Int8", 946 exp.DataType.Type.UBIGINT: "UInt64", 947 exp.DataType.Type.UINT: "UInt32", 948 exp.DataType.Type.UINT128: "UInt128", 949 exp.DataType.Type.UINT256: "UInt256", 950 exp.DataType.Type.USMALLINT: "UInt16", 951 exp.DataType.Type.UTINYINT: "UInt8", 952 exp.DataType.Type.IPV4: "IPv4", 953 exp.DataType.Type.IPV6: "IPv6", 954 exp.DataType.Type.POINT: "Point", 955 exp.DataType.Type.RING: "Ring", 956 exp.DataType.Type.LINESTRING: "LineString", 957 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 958 exp.DataType.Type.POLYGON: "Polygon", 959 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 960 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 961 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 962 exp.DataType.Type.DYNAMIC: "Dynamic", 963 } 964 965 TRANSFORMS = { 966 **generator.Generator.TRANSFORMS, 967 exp.AnyValue: rename_func("any"), 968 exp.ApproxDistinct: rename_func("uniq"), 969 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 970 exp.ArraySum: rename_func("arraySum"), 971 exp.ArgMax: arg_max_or_min_no_count("argMax"), 972 exp.ArgMin: arg_max_or_min_no_count("argMin"), 973 exp.Array: inline_array_sql, 974 exp.CastToStrType: rename_func("CAST"), 975 exp.CountIf: rename_func("countIf"), 976 exp.CompressColumnConstraint: lambda self, 977 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 978 exp.ComputedColumnConstraint: lambda self, 979 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 980 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 981 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 982 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 983 exp.DateStrToDate: rename_func("toDate"), 984 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 985 exp.Explode: rename_func("arrayJoin"), 986 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 987 exp.IsNan: rename_func("isNaN"), 988 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 989 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 990 exp.JSONPathKey: json_path_key_only_name, 991 exp.JSONPathRoot: lambda *_: "", 992 exp.Length: length_or_char_length_sql, 993 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 994 exp.Median: rename_func("median"), 995 exp.Nullif: rename_func("nullIf"), 996 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 997 exp.Pivot: no_pivot_sql, 998 exp.Quantile: _quantile_sql, 999 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1000 exp.Rand: rename_func("randCanonical"), 1001 exp.StartsWith: rename_func("startsWith"), 1002 exp.StrPosition: lambda self, e: strposition_sql( 1003 self, 1004 e, 1005 func_name="POSITION", 1006 supports_position=True, 1007 use_ansi_position=False, 1008 ), 1009 exp.TimeToStr: lambda self, e: self.func( 1010 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1011 ), 1012 exp.TimeStrToTime: _timestrtotime_sql, 1013 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1014 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1015 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 1016 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1017 exp.MD5Digest: rename_func("MD5"), 1018 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1019 exp.SHA: rename_func("SHA1"), 1020 exp.SHA2: sha256_sql, 1021 exp.UnixToTime: _unix_to_time_sql, 1022 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1023 exp.Trim: trim_sql, 1024 exp.Variance: rename_func("varSamp"), 1025 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1026 exp.Stddev: rename_func("stddevSamp"), 1027 exp.Chr: rename_func("CHAR"), 1028 exp.Lag: lambda self, e: self.func( 1029 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1030 ), 1031 exp.Lead: lambda self, e: self.func( 1032 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1033 ), 1034 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1035 rename_func("editDistance") 1036 ), 1037 } 1038 1039 PROPERTIES_LOCATION = { 1040 **generator.Generator.PROPERTIES_LOCATION, 1041 exp.OnCluster: exp.Properties.Location.POST_NAME, 1042 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1043 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1044 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1045 } 1046 1047 # There's no list in docs, but it can be found in Clickhouse code 1048 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1049 ON_CLUSTER_TARGETS = { 1050 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1051 "DATABASE", 1052 "TABLE", 1053 "VIEW", 1054 "DICTIONARY", 1055 "INDEX", 1056 "FUNCTION", 1057 "NAMED COLLECTION", 1058 } 1059 1060 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1061 NON_NULLABLE_TYPES = { 1062 exp.DataType.Type.ARRAY, 1063 exp.DataType.Type.MAP, 1064 exp.DataType.Type.STRUCT, 1065 exp.DataType.Type.POINT, 1066 exp.DataType.Type.RING, 1067 exp.DataType.Type.LINESTRING, 1068 exp.DataType.Type.MULTILINESTRING, 1069 exp.DataType.Type.POLYGON, 1070 exp.DataType.Type.MULTIPOLYGON, 1071 } 1072 1073 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1074 strtodate_sql = self.function_fallback_sql(expression) 1075 1076 if not isinstance(expression.parent, exp.Cast): 1077 # StrToDate returns DATEs in other dialects (eg. postgres), so 1078 # this branch aims to improve the transpilation to clickhouse 1079 return f"CAST({strtodate_sql} AS DATE)" 1080 1081 return strtodate_sql 1082 1083 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1084 this = expression.this 1085 1086 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1087 return self.sql(this) 1088 1089 return super().cast_sql(expression, safe_prefix=safe_prefix) 1090 1091 def trycast_sql(self, expression: exp.TryCast) -> str: 1092 dtype = expression.to 1093 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1094 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1095 dtype.set("nullable", True) 1096 1097 return super().cast_sql(expression) 1098 1099 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1100 this = self.json_path_part(expression.this) 1101 return str(int(this) + 1) if is_int(this) else this 1102 1103 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1104 return f"AS {self.sql(expression, 'this')}" 1105 1106 def _any_to_has( 1107 self, 1108 expression: exp.EQ | exp.NEQ, 1109 default: t.Callable[[t.Any], str], 1110 prefix: str = "", 1111 ) -> str: 1112 if isinstance(expression.left, exp.Any): 1113 arr = expression.left 1114 this = expression.right 1115 elif isinstance(expression.right, exp.Any): 1116 arr = expression.right 1117 this = expression.left 1118 else: 1119 return default(expression) 1120 1121 return prefix + self.func("has", arr.this.unnest(), this) 1122 1123 def eq_sql(self, expression: exp.EQ) -> str: 1124 return self._any_to_has(expression, super().eq_sql) 1125 1126 def neq_sql(self, expression: exp.NEQ) -> str: 1127 return self._any_to_has(expression, super().neq_sql, "NOT ") 1128 1129 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1130 # Manually add a flag to make the search case-insensitive 1131 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1132 return self.func("match", expression.this, regex) 1133 1134 def datatype_sql(self, expression: exp.DataType) -> str: 1135 # String is the standard ClickHouse type, every other variant is just an alias. 1136 # Additionally, any supplied length parameter will be ignored. 1137 # 1138 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1139 if expression.this in self.STRING_TYPE_MAPPING: 1140 dtype = "String" 1141 else: 1142 dtype = super().datatype_sql(expression) 1143 1144 # This section changes the type to `Nullable(...)` if the following conditions hold: 1145 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1146 # and change their semantics 1147 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1148 # constraint: "Type of Map key must be a type, that can be represented by integer or 1149 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1150 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1151 parent = expression.parent 1152 nullable = expression.args.get("nullable") 1153 if nullable is True or ( 1154 nullable is None 1155 and not ( 1156 isinstance(parent, exp.DataType) 1157 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1158 and expression.index in (None, 0) 1159 ) 1160 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1161 ): 1162 dtype = f"Nullable({dtype})" 1163 1164 return dtype 1165 1166 def cte_sql(self, expression: exp.CTE) -> str: 1167 if expression.args.get("scalar"): 1168 this = self.sql(expression, "this") 1169 alias = self.sql(expression, "alias") 1170 return f"{this} AS {alias}" 1171 1172 return super().cte_sql(expression) 1173 1174 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1175 return super().after_limit_modifiers(expression) + [ 1176 ( 1177 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1178 if expression.args.get("settings") 1179 else "" 1180 ), 1181 ( 1182 self.seg("FORMAT ") + self.sql(expression, "format") 1183 if expression.args.get("format") 1184 else "" 1185 ), 1186 ] 1187 1188 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1189 params = self.expressions(expression, key="params", flat=True) 1190 return self.func(expression.name, *expression.expressions) + f"({params})" 1191 1192 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1193 return self.func(expression.name, *expression.expressions) 1194 1195 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1196 return self.anonymousaggfunc_sql(expression) 1197 1198 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1199 return self.parameterizedagg_sql(expression) 1200 1201 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1202 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1203 1204 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1205 return f"ON CLUSTER {self.sql(expression, 'this')}" 1206 1207 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1208 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1209 exp.Properties.Location.POST_NAME 1210 ): 1211 this_name = self.sql( 1212 expression.this if isinstance(expression.this, exp.Schema) else expression, 1213 "this", 1214 ) 1215 this_properties = " ".join( 1216 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1217 ) 1218 this_schema = self.schema_columns_sql(expression.this) 1219 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1220 1221 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1222 1223 return super().createable_sql(expression, locations) 1224 1225 def create_sql(self, expression: exp.Create) -> str: 1226 # The comment property comes last in CTAS statements, i.e. after the query 1227 query = expression.expression 1228 if isinstance(query, exp.Query): 1229 comment_prop = expression.find(exp.SchemaCommentProperty) 1230 if comment_prop: 1231 comment_prop.pop() 1232 query.replace(exp.paren(query)) 1233 else: 1234 comment_prop = None 1235 1236 create_sql = super().create_sql(expression) 1237 1238 comment_sql = self.sql(comment_prop) 1239 comment_sql = f" {comment_sql}" if comment_sql else "" 1240 1241 return f"{create_sql}{comment_sql}" 1242 1243 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1244 this = self.indent(self.sql(expression, "this")) 1245 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1246 1247 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1248 this = self.sql(expression, "this") 1249 this = f" {this}" if this else "" 1250 expr = self.sql(expression, "expression") 1251 expr = f" {expr}" if expr else "" 1252 index_type = self.sql(expression, "index_type") 1253 index_type = f" TYPE {index_type}" if index_type else "" 1254 granularity = self.sql(expression, "granularity") 1255 granularity = f" GRANULARITY {granularity}" if granularity else "" 1256 1257 return f"INDEX{this}{expr}{index_type}{granularity}" 1258 1259 def partition_sql(self, expression: exp.Partition) -> str: 1260 return f"PARTITION {self.expressions(expression, flat=True)}" 1261 1262 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1263 return f"ID {self.sql(expression.this)}" 1264 1265 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1266 return ( 1267 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1268 ) 1269 1270 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1271 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1272 1273 def is_sql(self, expression: exp.Is) -> str: 1274 is_sql = super().is_sql(expression) 1275 1276 if isinstance(expression.parent, exp.Not): 1277 # value IS NOT NULL -> NOT (value IS NULL) 1278 is_sql = self.wrap(is_sql) 1279 1280 return is_sql
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1073 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1074 strtodate_sql = self.function_fallback_sql(expression) 1075 1076 if not isinstance(expression.parent, exp.Cast): 1077 # StrToDate returns DATEs in other dialects (eg. postgres), so 1078 # this branch aims to improve the transpilation to clickhouse 1079 return f"CAST({strtodate_sql} AS DATE)" 1080 1081 return strtodate_sql
1083 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1084 this = expression.this 1085 1086 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1087 return self.sql(this) 1088 1089 return super().cast_sql(expression, safe_prefix=safe_prefix)
1091 def trycast_sql(self, expression: exp.TryCast) -> str: 1092 dtype = expression.to 1093 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1094 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1095 dtype.set("nullable", True) 1096 1097 return super().cast_sql(expression)
1134 def datatype_sql(self, expression: exp.DataType) -> str: 1135 # String is the standard ClickHouse type, every other variant is just an alias. 1136 # Additionally, any supplied length parameter will be ignored. 1137 # 1138 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1139 if expression.this in self.STRING_TYPE_MAPPING: 1140 dtype = "String" 1141 else: 1142 dtype = super().datatype_sql(expression) 1143 1144 # This section changes the type to `Nullable(...)` if the following conditions hold: 1145 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1146 # and change their semantics 1147 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1148 # constraint: "Type of Map key must be a type, that can be represented by integer or 1149 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1150 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1151 parent = expression.parent 1152 nullable = expression.args.get("nullable") 1153 if nullable is True or ( 1154 nullable is None 1155 and not ( 1156 isinstance(parent, exp.DataType) 1157 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1158 and expression.index in (None, 0) 1159 ) 1160 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1161 ): 1162 dtype = f"Nullable({dtype})" 1163 1164 return dtype
1174 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1175 return super().after_limit_modifiers(expression) + [ 1176 ( 1177 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1178 if expression.args.get("settings") 1179 else "" 1180 ), 1181 ( 1182 self.seg("FORMAT ") + self.sql(expression, "format") 1183 if expression.args.get("format") 1184 else "" 1185 ), 1186 ]
1207 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1208 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1209 exp.Properties.Location.POST_NAME 1210 ): 1211 this_name = self.sql( 1212 expression.this if isinstance(expression.this, exp.Schema) else expression, 1213 "this", 1214 ) 1215 this_properties = " ".join( 1216 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1217 ) 1218 this_schema = self.schema_columns_sql(expression.this) 1219 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1220 1221 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1222 1223 return super().createable_sql(expression, locations)
1225 def create_sql(self, expression: exp.Create) -> str: 1226 # The comment property comes last in CTAS statements, i.e. after the query 1227 query = expression.expression 1228 if isinstance(query, exp.Query): 1229 comment_prop = expression.find(exp.SchemaCommentProperty) 1230 if comment_prop: 1231 comment_prop.pop() 1232 query.replace(exp.paren(query)) 1233 else: 1234 comment_prop = None 1235 1236 create_sql = super().create_sql(expression) 1237 1238 comment_sql = self.sql(comment_prop) 1239 comment_sql = f" {comment_sql}" if comment_sql else "" 1240 1241 return f"{create_sql}{comment_sql}"
1247 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1248 this = self.sql(expression, "this") 1249 this = f" {this}" if this else "" 1250 expr = self.sql(expression, "expression") 1251 expr = f" {expr}" if expr else "" 1252 index_type = self.sql(expression, "index_type") 1253 index_type = f" TYPE {index_type}" if index_type else "" 1254 granularity = self.sql(expression, "granularity") 1255 granularity = f" GRANULARITY {granularity}" if granularity else "" 1256 1257 return f"INDEX{this}{expr}{index_type}{granularity}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- PARSE_JSON_NAME
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql