sqlglot.dialects.clickhouse
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 arg_max_or_min_no_count, 9 build_date_delta, 10 build_formatted_time, 11 inline_array_sql, 12 json_extract_segments, 13 json_path_key_only_name, 14 no_pivot_sql, 15 build_json_extract_path, 16 rename_func, 17 sha256_sql, 18 var_map_sql, 19 timestamptrunc_sql, 20 unit_to_var, 21) 22from sqlglot.generator import Generator 23from sqlglot.helper import is_int, seq_get 24from sqlglot.tokens import Token, TokenType 25 26DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 27 28 29def _build_date_format(args: t.List) -> exp.TimeToStr: 30 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 31 32 timezone = seq_get(args, 2) 33 if timezone: 34 expr.set("timezone", timezone) 35 36 return expr 37 38 39def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 40 scale = expression.args.get("scale") 41 timestamp = expression.this 42 43 if scale in (None, exp.UnixToTime.SECONDS): 44 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 45 if scale == exp.UnixToTime.MILLIS: 46 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 47 if scale == exp.UnixToTime.MICROS: 48 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 49 if scale == exp.UnixToTime.NANOS: 50 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 51 52 return self.func( 53 "fromUnixTimestamp", 54 exp.cast( 55 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 56 ), 57 ) 58 59 60def _lower_func(sql: str) -> str: 61 index = sql.index("(") 62 return sql[:index].lower() + sql[index:] 63 64 65def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 66 quantile = expression.args["quantile"] 67 args = f"({self.sql(expression, 'this')})" 68 69 if isinstance(quantile, exp.Array): 70 func = self.func("quantiles", *quantile) 71 else: 72 func = self.func("quantile", quantile) 73 74 return func + args 75 76 77def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 78 if len(args) == 1: 79 return exp.CountIf(this=seq_get(args, 0)) 80 81 return exp.CombinedAggFunc(this="countIf", expressions=args, parts=("count", "If")) 82 83 84def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 85 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 86 if not expression.unit: 87 return rename_func(name)(self, expression) 88 89 return self.func( 90 name, 91 unit_to_var(expression), 92 expression.expression, 93 expression.this, 94 ) 95 96 return _delta_sql 97 98 99class ClickHouse(Dialect): 100 NORMALIZE_FUNCTIONS: bool | str = False 101 NULL_ORDERING = "nulls_are_last" 102 SUPPORTS_USER_DEFINED_TYPES = False 103 SAFE_DIVISION = True 104 LOG_BASE_FIRST: t.Optional[bool] = None 105 FORCE_EARLY_ALIAS_REF_EXPANSION = True 106 107 UNESCAPED_SEQUENCES = { 108 "\\0": "\0", 109 } 110 111 class Tokenizer(tokens.Tokenizer): 112 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 113 IDENTIFIERS = ['"', "`"] 114 STRING_ESCAPES = ["'", "\\"] 115 BIT_STRINGS = [("0b", "")] 116 HEX_STRINGS = [("0x", ""), ("0X", "")] 117 HEREDOC_STRINGS = ["$"] 118 119 KEYWORDS = { 120 **tokens.Tokenizer.KEYWORDS, 121 "ATTACH": TokenType.COMMAND, 122 "DATE32": TokenType.DATE32, 123 "DATETIME64": TokenType.DATETIME64, 124 "DICTIONARY": TokenType.DICTIONARY, 125 "ENUM8": TokenType.ENUM8, 126 "ENUM16": TokenType.ENUM16, 127 "FINAL": TokenType.FINAL, 128 "FIXEDSTRING": TokenType.FIXEDSTRING, 129 "FLOAT32": TokenType.FLOAT, 130 "FLOAT64": TokenType.DOUBLE, 131 "GLOBAL": TokenType.GLOBAL, 132 "INT256": TokenType.INT256, 133 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 134 "MAP": TokenType.MAP, 135 "NESTED": TokenType.NESTED, 136 "SAMPLE": TokenType.TABLE_SAMPLE, 137 "TUPLE": TokenType.STRUCT, 138 "UINT128": TokenType.UINT128, 139 "UINT16": TokenType.USMALLINT, 140 "UINT256": TokenType.UINT256, 141 "UINT32": TokenType.UINT, 142 "UINT64": TokenType.UBIGINT, 143 "UINT8": TokenType.UTINYINT, 144 "IPV4": TokenType.IPV4, 145 "IPV6": TokenType.IPV6, 146 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 147 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 148 "SYSTEM": TokenType.COMMAND, 149 "PREWHERE": TokenType.PREWHERE, 150 } 151 KEYWORDS.pop("/*+") 152 153 SINGLE_TOKENS = { 154 **tokens.Tokenizer.SINGLE_TOKENS, 155 "$": TokenType.HEREDOC_STRING, 156 } 157 158 class Parser(parser.Parser): 159 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 160 # * select x from t1 union all select x from t2 limit 1; 161 # * select x from t1 union all (select x from t2 limit 1); 162 MODIFIERS_ATTACHED_TO_SET_OP = False 163 INTERVAL_SPANS = False 164 165 FUNCTIONS = { 166 **parser.Parser.FUNCTIONS, 167 "ANY": exp.AnyValue.from_arg_list, 168 "ARRAYSUM": exp.ArraySum.from_arg_list, 169 "COUNTIF": _build_count_if, 170 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 171 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 172 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 173 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 174 "DATE_FORMAT": _build_date_format, 175 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 176 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 177 "FORMATDATETIME": _build_date_format, 178 "JSONEXTRACTSTRING": build_json_extract_path( 179 exp.JSONExtractScalar, zero_based_indexing=False 180 ), 181 "MAP": parser.build_var_map, 182 "MATCH": exp.RegexpLike.from_arg_list, 183 "RANDCANONICAL": exp.Rand.from_arg_list, 184 "TUPLE": exp.Struct.from_arg_list, 185 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 186 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 187 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 188 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 189 "UNIQ": exp.ApproxDistinct.from_arg_list, 190 "XOR": lambda args: exp.Xor(expressions=args), 191 "MD5": exp.MD5Digest.from_arg_list, 192 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 193 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 194 } 195 196 AGG_FUNCTIONS = { 197 "count", 198 "min", 199 "max", 200 "sum", 201 "avg", 202 "any", 203 "stddevPop", 204 "stddevSamp", 205 "varPop", 206 "varSamp", 207 "corr", 208 "covarPop", 209 "covarSamp", 210 "entropy", 211 "exponentialMovingAverage", 212 "intervalLengthSum", 213 "kolmogorovSmirnovTest", 214 "mannWhitneyUTest", 215 "median", 216 "rankCorr", 217 "sumKahan", 218 "studentTTest", 219 "welchTTest", 220 "anyHeavy", 221 "anyLast", 222 "boundingRatio", 223 "first_value", 224 "last_value", 225 "argMin", 226 "argMax", 227 "avgWeighted", 228 "topK", 229 "topKWeighted", 230 "deltaSum", 231 "deltaSumTimestamp", 232 "groupArray", 233 "groupArrayLast", 234 "groupUniqArray", 235 "groupArrayInsertAt", 236 "groupArrayMovingAvg", 237 "groupArrayMovingSum", 238 "groupArraySample", 239 "groupBitAnd", 240 "groupBitOr", 241 "groupBitXor", 242 "groupBitmap", 243 "groupBitmapAnd", 244 "groupBitmapOr", 245 "groupBitmapXor", 246 "sumWithOverflow", 247 "sumMap", 248 "minMap", 249 "maxMap", 250 "skewSamp", 251 "skewPop", 252 "kurtSamp", 253 "kurtPop", 254 "uniq", 255 "uniqExact", 256 "uniqCombined", 257 "uniqCombined64", 258 "uniqHLL12", 259 "uniqTheta", 260 "quantile", 261 "quantiles", 262 "quantileExact", 263 "quantilesExact", 264 "quantileExactLow", 265 "quantilesExactLow", 266 "quantileExactHigh", 267 "quantilesExactHigh", 268 "quantileExactWeighted", 269 "quantilesExactWeighted", 270 "quantileTiming", 271 "quantilesTiming", 272 "quantileTimingWeighted", 273 "quantilesTimingWeighted", 274 "quantileDeterministic", 275 "quantilesDeterministic", 276 "quantileTDigest", 277 "quantilesTDigest", 278 "quantileTDigestWeighted", 279 "quantilesTDigestWeighted", 280 "quantileBFloat16", 281 "quantilesBFloat16", 282 "quantileBFloat16Weighted", 283 "quantilesBFloat16Weighted", 284 "simpleLinearRegression", 285 "stochasticLinearRegression", 286 "stochasticLogisticRegression", 287 "categoricalInformationValue", 288 "contingency", 289 "cramersV", 290 "cramersVBiasCorrected", 291 "theilsU", 292 "maxIntersections", 293 "maxIntersectionsPosition", 294 "meanZTest", 295 "quantileInterpolatedWeighted", 296 "quantilesInterpolatedWeighted", 297 "quantileGK", 298 "quantilesGK", 299 "sparkBar", 300 "sumCount", 301 "largestTriangleThreeBuckets", 302 "histogram", 303 "sequenceMatch", 304 "sequenceCount", 305 "windowFunnel", 306 "retention", 307 "uniqUpTo", 308 "sequenceNextNode", 309 "exponentialTimeDecayedAvg", 310 } 311 312 AGG_FUNCTIONS_SUFFIXES = [ 313 "If", 314 "Array", 315 "ArrayIf", 316 "Map", 317 "SimpleState", 318 "State", 319 "Merge", 320 "MergeState", 321 "ForEach", 322 "Distinct", 323 "OrDefault", 324 "OrNull", 325 "Resample", 326 "ArgMin", 327 "ArgMax", 328 ] 329 330 FUNC_TOKENS = { 331 *parser.Parser.FUNC_TOKENS, 332 TokenType.SET, 333 } 334 335 AGG_FUNC_MAPPING = ( 336 lambda functions, suffixes: { 337 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 338 } 339 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 340 341 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 342 343 FUNCTION_PARSERS = { 344 **parser.Parser.FUNCTION_PARSERS, 345 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 346 "QUANTILE": lambda self: self._parse_quantile(), 347 } 348 349 FUNCTION_PARSERS.pop("MATCH") 350 351 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 352 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 353 354 RANGE_PARSERS = { 355 **parser.Parser.RANGE_PARSERS, 356 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 357 and self._parse_in(this, is_global=True), 358 } 359 360 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 361 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 362 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 363 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 364 365 JOIN_KINDS = { 366 *parser.Parser.JOIN_KINDS, 367 TokenType.ANY, 368 TokenType.ASOF, 369 TokenType.ARRAY, 370 } 371 372 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 373 TokenType.ANY, 374 TokenType.ARRAY, 375 TokenType.FINAL, 376 TokenType.FORMAT, 377 TokenType.SETTINGS, 378 } 379 380 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 381 TokenType.FORMAT, 382 } 383 384 LOG_DEFAULTS_TO_LN = True 385 386 QUERY_MODIFIER_PARSERS = { 387 **parser.Parser.QUERY_MODIFIER_PARSERS, 388 TokenType.SETTINGS: lambda self: ( 389 "settings", 390 self._advance() or self._parse_csv(self._parse_assignment), 391 ), 392 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 393 } 394 395 CONSTRAINT_PARSERS = { 396 **parser.Parser.CONSTRAINT_PARSERS, 397 "INDEX": lambda self: self._parse_index_constraint(), 398 "CODEC": lambda self: self._parse_compress(), 399 } 400 401 ALTER_PARSERS = { 402 **parser.Parser.ALTER_PARSERS, 403 "REPLACE": lambda self: self._parse_alter_table_replace(), 404 } 405 406 SCHEMA_UNNAMED_CONSTRAINTS = { 407 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 408 "INDEX", 409 } 410 411 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 412 index = self._index 413 this = self._parse_bitwise() 414 if self._match(TokenType.FROM): 415 self._retreat(index) 416 return super()._parse_extract() 417 418 # We return Anonymous here because extract and regexpExtract have different semantics, 419 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 420 # `extract('foobar', 'b')` works, but CH crashes for `regexpExtract('foobar', 'b')`. 421 # 422 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 423 self._match(TokenType.COMMA) 424 return self.expression( 425 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 426 ) 427 428 def _parse_assignment(self) -> t.Optional[exp.Expression]: 429 this = super()._parse_assignment() 430 431 if self._match(TokenType.PLACEHOLDER): 432 return self.expression( 433 exp.If, 434 this=this, 435 true=self._parse_assignment(), 436 false=self._match(TokenType.COLON) and self._parse_assignment(), 437 ) 438 439 return this 440 441 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 442 """ 443 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 444 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 445 """ 446 if not self._match(TokenType.L_BRACE): 447 return None 448 449 this = self._parse_id_var() 450 self._match(TokenType.COLON) 451 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 452 self._match_text_seq("IDENTIFIER") and "Identifier" 453 ) 454 455 if not kind: 456 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 457 elif not self._match(TokenType.R_BRACE): 458 self.raise_error("Expecting }") 459 460 return self.expression(exp.Placeholder, this=this, kind=kind) 461 462 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 463 this = super()._parse_in(this) 464 this.set("is_global", is_global) 465 return this 466 467 def _parse_table( 468 self, 469 schema: bool = False, 470 joins: bool = False, 471 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 472 parse_bracket: bool = False, 473 is_db_reference: bool = False, 474 parse_partition: bool = False, 475 ) -> t.Optional[exp.Expression]: 476 this = super()._parse_table( 477 schema=schema, 478 joins=joins, 479 alias_tokens=alias_tokens, 480 parse_bracket=parse_bracket, 481 is_db_reference=is_db_reference, 482 ) 483 484 if self._match(TokenType.FINAL): 485 this = self.expression(exp.Final, this=this) 486 487 return this 488 489 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 490 return super()._parse_position(haystack_first=True) 491 492 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 493 def _parse_cte(self) -> exp.CTE: 494 # WITH <identifier> AS <subquery expression> 495 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 496 497 if not cte: 498 # WITH <expression> AS <identifier> 499 cte = self.expression( 500 exp.CTE, 501 this=self._parse_assignment(), 502 alias=self._parse_table_alias(), 503 scalar=True, 504 ) 505 506 return cte 507 508 def _parse_join_parts( 509 self, 510 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 511 is_global = self._match(TokenType.GLOBAL) and self._prev 512 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 513 514 if kind_pre: 515 kind = self._match_set(self.JOIN_KINDS) and self._prev 516 side = self._match_set(self.JOIN_SIDES) and self._prev 517 return is_global, side, kind 518 519 return ( 520 is_global, 521 self._match_set(self.JOIN_SIDES) and self._prev, 522 self._match_set(self.JOIN_KINDS) and self._prev, 523 ) 524 525 def _parse_join( 526 self, skip_join_token: bool = False, parse_bracket: bool = False 527 ) -> t.Optional[exp.Join]: 528 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 529 if join: 530 join.set("global", join.args.pop("method", None)) 531 532 return join 533 534 def _parse_function( 535 self, 536 functions: t.Optional[t.Dict[str, t.Callable]] = None, 537 anonymous: bool = False, 538 optional_parens: bool = True, 539 any_token: bool = False, 540 ) -> t.Optional[exp.Expression]: 541 expr = super()._parse_function( 542 functions=functions, 543 anonymous=anonymous, 544 optional_parens=optional_parens, 545 any_token=any_token, 546 ) 547 548 func = expr.this if isinstance(expr, exp.Window) else expr 549 550 # Aggregate functions can be split in 2 parts: <func_name><suffix> 551 parts = ( 552 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 553 ) 554 555 if parts: 556 params = self._parse_func_params(func) 557 558 kwargs = { 559 "this": func.this, 560 "expressions": func.expressions, 561 } 562 if parts[1]: 563 kwargs["parts"] = parts 564 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 565 else: 566 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 567 568 kwargs["exp_class"] = exp_class 569 if params: 570 kwargs["params"] = params 571 572 func = self.expression(**kwargs) 573 574 if isinstance(expr, exp.Window): 575 # The window's func was parsed as Anonymous in base parser, fix its 576 # type to be CH style CombinedAnonymousAggFunc / AnonymousAggFunc 577 expr.set("this", func) 578 elif params: 579 # Params have blocked super()._parse_function() from parsing the following window 580 # (if that exists) as they're standing between the function call and the window spec 581 expr = self._parse_window(func) 582 else: 583 expr = func 584 585 return expr 586 587 def _parse_func_params( 588 self, this: t.Optional[exp.Func] = None 589 ) -> t.Optional[t.List[exp.Expression]]: 590 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 591 return self._parse_csv(self._parse_lambda) 592 593 if self._match(TokenType.L_PAREN): 594 params = self._parse_csv(self._parse_lambda) 595 self._match_r_paren(this) 596 return params 597 598 return None 599 600 def _parse_quantile(self) -> exp.Quantile: 601 this = self._parse_lambda() 602 params = self._parse_func_params() 603 if params: 604 return self.expression(exp.Quantile, this=params[0], quantile=this) 605 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 606 607 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 608 return super()._parse_wrapped_id_vars(optional=True) 609 610 def _parse_primary_key( 611 self, wrapped_optional: bool = False, in_props: bool = False 612 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 613 return super()._parse_primary_key( 614 wrapped_optional=wrapped_optional or in_props, in_props=in_props 615 ) 616 617 def _parse_on_property(self) -> t.Optional[exp.Expression]: 618 index = self._index 619 if self._match_text_seq("CLUSTER"): 620 this = self._parse_id_var() 621 if this: 622 return self.expression(exp.OnCluster, this=this) 623 else: 624 self._retreat(index) 625 return None 626 627 def _parse_index_constraint( 628 self, kind: t.Optional[str] = None 629 ) -> exp.IndexColumnConstraint: 630 # INDEX name1 expr TYPE type1(args) GRANULARITY value 631 this = self._parse_id_var() 632 expression = self._parse_assignment() 633 634 index_type = self._match_text_seq("TYPE") and ( 635 self._parse_function() or self._parse_var() 636 ) 637 638 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 639 640 return self.expression( 641 exp.IndexColumnConstraint, 642 this=this, 643 expression=expression, 644 index_type=index_type, 645 granularity=granularity, 646 ) 647 648 def _parse_partition(self) -> t.Optional[exp.Partition]: 649 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 650 if not self._match(TokenType.PARTITION): 651 return None 652 653 if self._match_text_seq("ID"): 654 # Corresponds to the PARTITION ID <string_value> syntax 655 expressions: t.List[exp.Expression] = [ 656 self.expression(exp.PartitionId, this=self._parse_string()) 657 ] 658 else: 659 expressions = self._parse_expressions() 660 661 return self.expression(exp.Partition, expressions=expressions) 662 663 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 664 partition = self._parse_partition() 665 666 if not partition or not self._match(TokenType.FROM): 667 return None 668 669 return self.expression( 670 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 671 ) 672 673 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 674 if not self._match_text_seq("PROJECTION"): 675 return None 676 677 return self.expression( 678 exp.ProjectionDef, 679 this=self._parse_id_var(), 680 expression=self._parse_wrapped(self._parse_statement), 681 ) 682 683 def _parse_constraint(self) -> t.Optional[exp.Expression]: 684 return super()._parse_constraint() or self._parse_projection_def() 685 686 class Generator(generator.Generator): 687 QUERY_HINTS = False 688 STRUCT_DELIMITER = ("(", ")") 689 NVL2_SUPPORTED = False 690 TABLESAMPLE_REQUIRES_PARENS = False 691 TABLESAMPLE_SIZE_IS_ROWS = False 692 TABLESAMPLE_KEYWORDS = "SAMPLE" 693 LAST_DAY_SUPPORTS_DATE_PART = False 694 CAN_IMPLEMENT_ARRAY_ANY = True 695 SUPPORTS_TO_NUMBER = False 696 JOIN_HINTS = False 697 TABLE_HINTS = False 698 EXPLICIT_SET_OP = True 699 GROUPINGS_SEP = "" 700 SET_OP_MODIFIERS = False 701 SUPPORTS_TABLE_ALIAS_COLUMNS = False 702 703 STRING_TYPE_MAPPING = { 704 exp.DataType.Type.CHAR: "String", 705 exp.DataType.Type.LONGBLOB: "String", 706 exp.DataType.Type.LONGTEXT: "String", 707 exp.DataType.Type.MEDIUMBLOB: "String", 708 exp.DataType.Type.MEDIUMTEXT: "String", 709 exp.DataType.Type.TINYBLOB: "String", 710 exp.DataType.Type.TINYTEXT: "String", 711 exp.DataType.Type.TEXT: "String", 712 exp.DataType.Type.VARBINARY: "String", 713 exp.DataType.Type.VARCHAR: "String", 714 } 715 716 SUPPORTED_JSON_PATH_PARTS = { 717 exp.JSONPathKey, 718 exp.JSONPathRoot, 719 exp.JSONPathSubscript, 720 } 721 722 TYPE_MAPPING = { 723 **generator.Generator.TYPE_MAPPING, 724 **STRING_TYPE_MAPPING, 725 exp.DataType.Type.ARRAY: "Array", 726 exp.DataType.Type.BIGINT: "Int64", 727 exp.DataType.Type.DATE32: "Date32", 728 exp.DataType.Type.DATETIME64: "DateTime64", 729 exp.DataType.Type.DOUBLE: "Float64", 730 exp.DataType.Type.ENUM: "Enum", 731 exp.DataType.Type.ENUM8: "Enum8", 732 exp.DataType.Type.ENUM16: "Enum16", 733 exp.DataType.Type.FIXEDSTRING: "FixedString", 734 exp.DataType.Type.FLOAT: "Float32", 735 exp.DataType.Type.INT: "Int32", 736 exp.DataType.Type.MEDIUMINT: "Int32", 737 exp.DataType.Type.INT128: "Int128", 738 exp.DataType.Type.INT256: "Int256", 739 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 740 exp.DataType.Type.MAP: "Map", 741 exp.DataType.Type.NESTED: "Nested", 742 exp.DataType.Type.NULLABLE: "Nullable", 743 exp.DataType.Type.SMALLINT: "Int16", 744 exp.DataType.Type.STRUCT: "Tuple", 745 exp.DataType.Type.TINYINT: "Int8", 746 exp.DataType.Type.UBIGINT: "UInt64", 747 exp.DataType.Type.UINT: "UInt32", 748 exp.DataType.Type.UINT128: "UInt128", 749 exp.DataType.Type.UINT256: "UInt256", 750 exp.DataType.Type.USMALLINT: "UInt16", 751 exp.DataType.Type.UTINYINT: "UInt8", 752 exp.DataType.Type.IPV4: "IPv4", 753 exp.DataType.Type.IPV6: "IPv6", 754 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 755 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 756 } 757 758 TRANSFORMS = { 759 **generator.Generator.TRANSFORMS, 760 exp.AnyValue: rename_func("any"), 761 exp.ApproxDistinct: rename_func("uniq"), 762 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 763 exp.ArraySize: rename_func("LENGTH"), 764 exp.ArraySum: rename_func("arraySum"), 765 exp.ArgMax: arg_max_or_min_no_count("argMax"), 766 exp.ArgMin: arg_max_or_min_no_count("argMin"), 767 exp.Array: inline_array_sql, 768 exp.CastToStrType: rename_func("CAST"), 769 exp.CountIf: rename_func("countIf"), 770 exp.CompressColumnConstraint: lambda self, 771 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 772 exp.ComputedColumnConstraint: lambda self, 773 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 774 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 775 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 776 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 777 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 778 exp.Explode: rename_func("arrayJoin"), 779 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 780 exp.IsNan: rename_func("isNaN"), 781 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 782 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 783 exp.JSONPathKey: json_path_key_only_name, 784 exp.JSONPathRoot: lambda *_: "", 785 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 786 exp.Nullif: rename_func("nullIf"), 787 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 788 exp.Pivot: no_pivot_sql, 789 exp.Quantile: _quantile_sql, 790 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 791 exp.Rand: rename_func("randCanonical"), 792 exp.Select: transforms.preprocess([transforms.eliminate_qualify]), 793 exp.StartsWith: rename_func("startsWith"), 794 exp.StrPosition: lambda self, e: self.func( 795 "position", e.this, e.args.get("substr"), e.args.get("position") 796 ), 797 exp.TimeToStr: lambda self, e: self.func( 798 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("timezone") 799 ), 800 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 801 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 802 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 803 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 804 exp.MD5Digest: rename_func("MD5"), 805 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 806 exp.SHA: rename_func("SHA1"), 807 exp.SHA2: sha256_sql, 808 exp.UnixToTime: _unix_to_time_sql, 809 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 810 exp.Variance: rename_func("varSamp"), 811 exp.Stddev: rename_func("stddevSamp"), 812 } 813 814 PROPERTIES_LOCATION = { 815 **generator.Generator.PROPERTIES_LOCATION, 816 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 817 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 818 exp.OnCluster: exp.Properties.Location.POST_NAME, 819 } 820 821 # there's no list in docs, but it can be found in Clickhouse code 822 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 823 ON_CLUSTER_TARGETS = { 824 "DATABASE", 825 "TABLE", 826 "VIEW", 827 "DICTIONARY", 828 "INDEX", 829 "FUNCTION", 830 "NAMED COLLECTION", 831 } 832 833 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 834 this = self.json_path_part(expression.this) 835 return str(int(this) + 1) if is_int(this) else this 836 837 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 838 return f"AS {self.sql(expression, 'this')}" 839 840 def _any_to_has( 841 self, 842 expression: exp.EQ | exp.NEQ, 843 default: t.Callable[[t.Any], str], 844 prefix: str = "", 845 ) -> str: 846 if isinstance(expression.left, exp.Any): 847 arr = expression.left 848 this = expression.right 849 elif isinstance(expression.right, exp.Any): 850 arr = expression.right 851 this = expression.left 852 else: 853 return default(expression) 854 855 return prefix + self.func("has", arr.this.unnest(), this) 856 857 def eq_sql(self, expression: exp.EQ) -> str: 858 return self._any_to_has(expression, super().eq_sql) 859 860 def neq_sql(self, expression: exp.NEQ) -> str: 861 return self._any_to_has(expression, super().neq_sql, "NOT ") 862 863 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 864 # Manually add a flag to make the search case-insensitive 865 regex = self.func("CONCAT", "'(?i)'", expression.expression) 866 return self.func("match", expression.this, regex) 867 868 def datatype_sql(self, expression: exp.DataType) -> str: 869 # String is the standard ClickHouse type, every other variant is just an alias. 870 # Additionally, any supplied length parameter will be ignored. 871 # 872 # https://clickhouse.com/docs/en/sql-reference/data-types/string 873 if expression.this in self.STRING_TYPE_MAPPING: 874 return "String" 875 876 return super().datatype_sql(expression) 877 878 def cte_sql(self, expression: exp.CTE) -> str: 879 if expression.args.get("scalar"): 880 this = self.sql(expression, "this") 881 alias = self.sql(expression, "alias") 882 return f"{this} AS {alias}" 883 884 return super().cte_sql(expression) 885 886 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 887 return super().after_limit_modifiers(expression) + [ 888 ( 889 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 890 if expression.args.get("settings") 891 else "" 892 ), 893 ( 894 self.seg("FORMAT ") + self.sql(expression, "format") 895 if expression.args.get("format") 896 else "" 897 ), 898 ] 899 900 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 901 params = self.expressions(expression, key="params", flat=True) 902 return self.func(expression.name, *expression.expressions) + f"({params})" 903 904 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 905 return self.func(expression.name, *expression.expressions) 906 907 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 908 return self.anonymousaggfunc_sql(expression) 909 910 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 911 return self.parameterizedagg_sql(expression) 912 913 def placeholder_sql(self, expression: exp.Placeholder) -> str: 914 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 915 916 def oncluster_sql(self, expression: exp.OnCluster) -> str: 917 return f"ON CLUSTER {self.sql(expression, 'this')}" 918 919 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 920 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 921 exp.Properties.Location.POST_NAME 922 ): 923 this_name = self.sql(expression.this, "this") 924 this_properties = " ".join( 925 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 926 ) 927 this_schema = self.schema_columns_sql(expression.this) 928 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 929 930 return super().createable_sql(expression, locations) 931 932 def prewhere_sql(self, expression: exp.PreWhere) -> str: 933 this = self.indent(self.sql(expression, "this")) 934 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 935 936 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 937 this = self.sql(expression, "this") 938 this = f" {this}" if this else "" 939 expr = self.sql(expression, "expression") 940 expr = f" {expr}" if expr else "" 941 index_type = self.sql(expression, "index_type") 942 index_type = f" TYPE {index_type}" if index_type else "" 943 granularity = self.sql(expression, "granularity") 944 granularity = f" GRANULARITY {granularity}" if granularity else "" 945 946 return f"INDEX{this}{expr}{index_type}{granularity}" 947 948 def partition_sql(self, expression: exp.Partition) -> str: 949 return f"PARTITION {self.expressions(expression, flat=True)}" 950 951 def partitionid_sql(self, expression: exp.PartitionId) -> str: 952 return f"ID {self.sql(expression.this)}" 953 954 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 955 return ( 956 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 957 ) 958 959 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 960 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
100class ClickHouse(Dialect): 101 NORMALIZE_FUNCTIONS: bool | str = False 102 NULL_ORDERING = "nulls_are_last" 103 SUPPORTS_USER_DEFINED_TYPES = False 104 SAFE_DIVISION = True 105 LOG_BASE_FIRST: t.Optional[bool] = None 106 FORCE_EARLY_ALIAS_REF_EXPANSION = True 107 108 UNESCAPED_SEQUENCES = { 109 "\\0": "\0", 110 } 111 112 class Tokenizer(tokens.Tokenizer): 113 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 114 IDENTIFIERS = ['"', "`"] 115 STRING_ESCAPES = ["'", "\\"] 116 BIT_STRINGS = [("0b", "")] 117 HEX_STRINGS = [("0x", ""), ("0X", "")] 118 HEREDOC_STRINGS = ["$"] 119 120 KEYWORDS = { 121 **tokens.Tokenizer.KEYWORDS, 122 "ATTACH": TokenType.COMMAND, 123 "DATE32": TokenType.DATE32, 124 "DATETIME64": TokenType.DATETIME64, 125 "DICTIONARY": TokenType.DICTIONARY, 126 "ENUM8": TokenType.ENUM8, 127 "ENUM16": TokenType.ENUM16, 128 "FINAL": TokenType.FINAL, 129 "FIXEDSTRING": TokenType.FIXEDSTRING, 130 "FLOAT32": TokenType.FLOAT, 131 "FLOAT64": TokenType.DOUBLE, 132 "GLOBAL": TokenType.GLOBAL, 133 "INT256": TokenType.INT256, 134 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 135 "MAP": TokenType.MAP, 136 "NESTED": TokenType.NESTED, 137 "SAMPLE": TokenType.TABLE_SAMPLE, 138 "TUPLE": TokenType.STRUCT, 139 "UINT128": TokenType.UINT128, 140 "UINT16": TokenType.USMALLINT, 141 "UINT256": TokenType.UINT256, 142 "UINT32": TokenType.UINT, 143 "UINT64": TokenType.UBIGINT, 144 "UINT8": TokenType.UTINYINT, 145 "IPV4": TokenType.IPV4, 146 "IPV6": TokenType.IPV6, 147 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 148 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 149 "SYSTEM": TokenType.COMMAND, 150 "PREWHERE": TokenType.PREWHERE, 151 } 152 KEYWORDS.pop("/*+") 153 154 SINGLE_TOKENS = { 155 **tokens.Tokenizer.SINGLE_TOKENS, 156 "$": TokenType.HEREDOC_STRING, 157 } 158 159 class Parser(parser.Parser): 160 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 161 # * select x from t1 union all select x from t2 limit 1; 162 # * select x from t1 union all (select x from t2 limit 1); 163 MODIFIERS_ATTACHED_TO_SET_OP = False 164 INTERVAL_SPANS = False 165 166 FUNCTIONS = { 167 **parser.Parser.FUNCTIONS, 168 "ANY": exp.AnyValue.from_arg_list, 169 "ARRAYSUM": exp.ArraySum.from_arg_list, 170 "COUNTIF": _build_count_if, 171 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 172 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 173 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 174 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 175 "DATE_FORMAT": _build_date_format, 176 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 177 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 178 "FORMATDATETIME": _build_date_format, 179 "JSONEXTRACTSTRING": build_json_extract_path( 180 exp.JSONExtractScalar, zero_based_indexing=False 181 ), 182 "MAP": parser.build_var_map, 183 "MATCH": exp.RegexpLike.from_arg_list, 184 "RANDCANONICAL": exp.Rand.from_arg_list, 185 "TUPLE": exp.Struct.from_arg_list, 186 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 187 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 188 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 189 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 190 "UNIQ": exp.ApproxDistinct.from_arg_list, 191 "XOR": lambda args: exp.Xor(expressions=args), 192 "MD5": exp.MD5Digest.from_arg_list, 193 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 194 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 195 } 196 197 AGG_FUNCTIONS = { 198 "count", 199 "min", 200 "max", 201 "sum", 202 "avg", 203 "any", 204 "stddevPop", 205 "stddevSamp", 206 "varPop", 207 "varSamp", 208 "corr", 209 "covarPop", 210 "covarSamp", 211 "entropy", 212 "exponentialMovingAverage", 213 "intervalLengthSum", 214 "kolmogorovSmirnovTest", 215 "mannWhitneyUTest", 216 "median", 217 "rankCorr", 218 "sumKahan", 219 "studentTTest", 220 "welchTTest", 221 "anyHeavy", 222 "anyLast", 223 "boundingRatio", 224 "first_value", 225 "last_value", 226 "argMin", 227 "argMax", 228 "avgWeighted", 229 "topK", 230 "topKWeighted", 231 "deltaSum", 232 "deltaSumTimestamp", 233 "groupArray", 234 "groupArrayLast", 235 "groupUniqArray", 236 "groupArrayInsertAt", 237 "groupArrayMovingAvg", 238 "groupArrayMovingSum", 239 "groupArraySample", 240 "groupBitAnd", 241 "groupBitOr", 242 "groupBitXor", 243 "groupBitmap", 244 "groupBitmapAnd", 245 "groupBitmapOr", 246 "groupBitmapXor", 247 "sumWithOverflow", 248 "sumMap", 249 "minMap", 250 "maxMap", 251 "skewSamp", 252 "skewPop", 253 "kurtSamp", 254 "kurtPop", 255 "uniq", 256 "uniqExact", 257 "uniqCombined", 258 "uniqCombined64", 259 "uniqHLL12", 260 "uniqTheta", 261 "quantile", 262 "quantiles", 263 "quantileExact", 264 "quantilesExact", 265 "quantileExactLow", 266 "quantilesExactLow", 267 "quantileExactHigh", 268 "quantilesExactHigh", 269 "quantileExactWeighted", 270 "quantilesExactWeighted", 271 "quantileTiming", 272 "quantilesTiming", 273 "quantileTimingWeighted", 274 "quantilesTimingWeighted", 275 "quantileDeterministic", 276 "quantilesDeterministic", 277 "quantileTDigest", 278 "quantilesTDigest", 279 "quantileTDigestWeighted", 280 "quantilesTDigestWeighted", 281 "quantileBFloat16", 282 "quantilesBFloat16", 283 "quantileBFloat16Weighted", 284 "quantilesBFloat16Weighted", 285 "simpleLinearRegression", 286 "stochasticLinearRegression", 287 "stochasticLogisticRegression", 288 "categoricalInformationValue", 289 "contingency", 290 "cramersV", 291 "cramersVBiasCorrected", 292 "theilsU", 293 "maxIntersections", 294 "maxIntersectionsPosition", 295 "meanZTest", 296 "quantileInterpolatedWeighted", 297 "quantilesInterpolatedWeighted", 298 "quantileGK", 299 "quantilesGK", 300 "sparkBar", 301 "sumCount", 302 "largestTriangleThreeBuckets", 303 "histogram", 304 "sequenceMatch", 305 "sequenceCount", 306 "windowFunnel", 307 "retention", 308 "uniqUpTo", 309 "sequenceNextNode", 310 "exponentialTimeDecayedAvg", 311 } 312 313 AGG_FUNCTIONS_SUFFIXES = [ 314 "If", 315 "Array", 316 "ArrayIf", 317 "Map", 318 "SimpleState", 319 "State", 320 "Merge", 321 "MergeState", 322 "ForEach", 323 "Distinct", 324 "OrDefault", 325 "OrNull", 326 "Resample", 327 "ArgMin", 328 "ArgMax", 329 ] 330 331 FUNC_TOKENS = { 332 *parser.Parser.FUNC_TOKENS, 333 TokenType.SET, 334 } 335 336 AGG_FUNC_MAPPING = ( 337 lambda functions, suffixes: { 338 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 339 } 340 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 341 342 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 343 344 FUNCTION_PARSERS = { 345 **parser.Parser.FUNCTION_PARSERS, 346 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 347 "QUANTILE": lambda self: self._parse_quantile(), 348 } 349 350 FUNCTION_PARSERS.pop("MATCH") 351 352 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 353 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 354 355 RANGE_PARSERS = { 356 **parser.Parser.RANGE_PARSERS, 357 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 358 and self._parse_in(this, is_global=True), 359 } 360 361 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 362 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 363 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 364 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 365 366 JOIN_KINDS = { 367 *parser.Parser.JOIN_KINDS, 368 TokenType.ANY, 369 TokenType.ASOF, 370 TokenType.ARRAY, 371 } 372 373 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 374 TokenType.ANY, 375 TokenType.ARRAY, 376 TokenType.FINAL, 377 TokenType.FORMAT, 378 TokenType.SETTINGS, 379 } 380 381 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 382 TokenType.FORMAT, 383 } 384 385 LOG_DEFAULTS_TO_LN = True 386 387 QUERY_MODIFIER_PARSERS = { 388 **parser.Parser.QUERY_MODIFIER_PARSERS, 389 TokenType.SETTINGS: lambda self: ( 390 "settings", 391 self._advance() or self._parse_csv(self._parse_assignment), 392 ), 393 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 394 } 395 396 CONSTRAINT_PARSERS = { 397 **parser.Parser.CONSTRAINT_PARSERS, 398 "INDEX": lambda self: self._parse_index_constraint(), 399 "CODEC": lambda self: self._parse_compress(), 400 } 401 402 ALTER_PARSERS = { 403 **parser.Parser.ALTER_PARSERS, 404 "REPLACE": lambda self: self._parse_alter_table_replace(), 405 } 406 407 SCHEMA_UNNAMED_CONSTRAINTS = { 408 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 409 "INDEX", 410 } 411 412 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 413 index = self._index 414 this = self._parse_bitwise() 415 if self._match(TokenType.FROM): 416 self._retreat(index) 417 return super()._parse_extract() 418 419 # We return Anonymous here because extract and regexpExtract have different semantics, 420 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 421 # `extract('foobar', 'b')` works, but CH crashes for `regexpExtract('foobar', 'b')`. 422 # 423 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 424 self._match(TokenType.COMMA) 425 return self.expression( 426 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 427 ) 428 429 def _parse_assignment(self) -> t.Optional[exp.Expression]: 430 this = super()._parse_assignment() 431 432 if self._match(TokenType.PLACEHOLDER): 433 return self.expression( 434 exp.If, 435 this=this, 436 true=self._parse_assignment(), 437 false=self._match(TokenType.COLON) and self._parse_assignment(), 438 ) 439 440 return this 441 442 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 443 """ 444 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 445 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 446 """ 447 if not self._match(TokenType.L_BRACE): 448 return None 449 450 this = self._parse_id_var() 451 self._match(TokenType.COLON) 452 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 453 self._match_text_seq("IDENTIFIER") and "Identifier" 454 ) 455 456 if not kind: 457 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 458 elif not self._match(TokenType.R_BRACE): 459 self.raise_error("Expecting }") 460 461 return self.expression(exp.Placeholder, this=this, kind=kind) 462 463 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 464 this = super()._parse_in(this) 465 this.set("is_global", is_global) 466 return this 467 468 def _parse_table( 469 self, 470 schema: bool = False, 471 joins: bool = False, 472 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 473 parse_bracket: bool = False, 474 is_db_reference: bool = False, 475 parse_partition: bool = False, 476 ) -> t.Optional[exp.Expression]: 477 this = super()._parse_table( 478 schema=schema, 479 joins=joins, 480 alias_tokens=alias_tokens, 481 parse_bracket=parse_bracket, 482 is_db_reference=is_db_reference, 483 ) 484 485 if self._match(TokenType.FINAL): 486 this = self.expression(exp.Final, this=this) 487 488 return this 489 490 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 491 return super()._parse_position(haystack_first=True) 492 493 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 494 def _parse_cte(self) -> exp.CTE: 495 # WITH <identifier> AS <subquery expression> 496 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 497 498 if not cte: 499 # WITH <expression> AS <identifier> 500 cte = self.expression( 501 exp.CTE, 502 this=self._parse_assignment(), 503 alias=self._parse_table_alias(), 504 scalar=True, 505 ) 506 507 return cte 508 509 def _parse_join_parts( 510 self, 511 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 512 is_global = self._match(TokenType.GLOBAL) and self._prev 513 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 514 515 if kind_pre: 516 kind = self._match_set(self.JOIN_KINDS) and self._prev 517 side = self._match_set(self.JOIN_SIDES) and self._prev 518 return is_global, side, kind 519 520 return ( 521 is_global, 522 self._match_set(self.JOIN_SIDES) and self._prev, 523 self._match_set(self.JOIN_KINDS) and self._prev, 524 ) 525 526 def _parse_join( 527 self, skip_join_token: bool = False, parse_bracket: bool = False 528 ) -> t.Optional[exp.Join]: 529 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 530 if join: 531 join.set("global", join.args.pop("method", None)) 532 533 return join 534 535 def _parse_function( 536 self, 537 functions: t.Optional[t.Dict[str, t.Callable]] = None, 538 anonymous: bool = False, 539 optional_parens: bool = True, 540 any_token: bool = False, 541 ) -> t.Optional[exp.Expression]: 542 expr = super()._parse_function( 543 functions=functions, 544 anonymous=anonymous, 545 optional_parens=optional_parens, 546 any_token=any_token, 547 ) 548 549 func = expr.this if isinstance(expr, exp.Window) else expr 550 551 # Aggregate functions can be split in 2 parts: <func_name><suffix> 552 parts = ( 553 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 554 ) 555 556 if parts: 557 params = self._parse_func_params(func) 558 559 kwargs = { 560 "this": func.this, 561 "expressions": func.expressions, 562 } 563 if parts[1]: 564 kwargs["parts"] = parts 565 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 566 else: 567 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 568 569 kwargs["exp_class"] = exp_class 570 if params: 571 kwargs["params"] = params 572 573 func = self.expression(**kwargs) 574 575 if isinstance(expr, exp.Window): 576 # The window's func was parsed as Anonymous in base parser, fix its 577 # type to be CH style CombinedAnonymousAggFunc / AnonymousAggFunc 578 expr.set("this", func) 579 elif params: 580 # Params have blocked super()._parse_function() from parsing the following window 581 # (if that exists) as they're standing between the function call and the window spec 582 expr = self._parse_window(func) 583 else: 584 expr = func 585 586 return expr 587 588 def _parse_func_params( 589 self, this: t.Optional[exp.Func] = None 590 ) -> t.Optional[t.List[exp.Expression]]: 591 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 592 return self._parse_csv(self._parse_lambda) 593 594 if self._match(TokenType.L_PAREN): 595 params = self._parse_csv(self._parse_lambda) 596 self._match_r_paren(this) 597 return params 598 599 return None 600 601 def _parse_quantile(self) -> exp.Quantile: 602 this = self._parse_lambda() 603 params = self._parse_func_params() 604 if params: 605 return self.expression(exp.Quantile, this=params[0], quantile=this) 606 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 607 608 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 609 return super()._parse_wrapped_id_vars(optional=True) 610 611 def _parse_primary_key( 612 self, wrapped_optional: bool = False, in_props: bool = False 613 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 614 return super()._parse_primary_key( 615 wrapped_optional=wrapped_optional or in_props, in_props=in_props 616 ) 617 618 def _parse_on_property(self) -> t.Optional[exp.Expression]: 619 index = self._index 620 if self._match_text_seq("CLUSTER"): 621 this = self._parse_id_var() 622 if this: 623 return self.expression(exp.OnCluster, this=this) 624 else: 625 self._retreat(index) 626 return None 627 628 def _parse_index_constraint( 629 self, kind: t.Optional[str] = None 630 ) -> exp.IndexColumnConstraint: 631 # INDEX name1 expr TYPE type1(args) GRANULARITY value 632 this = self._parse_id_var() 633 expression = self._parse_assignment() 634 635 index_type = self._match_text_seq("TYPE") and ( 636 self._parse_function() or self._parse_var() 637 ) 638 639 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 640 641 return self.expression( 642 exp.IndexColumnConstraint, 643 this=this, 644 expression=expression, 645 index_type=index_type, 646 granularity=granularity, 647 ) 648 649 def _parse_partition(self) -> t.Optional[exp.Partition]: 650 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 651 if not self._match(TokenType.PARTITION): 652 return None 653 654 if self._match_text_seq("ID"): 655 # Corresponds to the PARTITION ID <string_value> syntax 656 expressions: t.List[exp.Expression] = [ 657 self.expression(exp.PartitionId, this=self._parse_string()) 658 ] 659 else: 660 expressions = self._parse_expressions() 661 662 return self.expression(exp.Partition, expressions=expressions) 663 664 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 665 partition = self._parse_partition() 666 667 if not partition or not self._match(TokenType.FROM): 668 return None 669 670 return self.expression( 671 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 672 ) 673 674 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 675 if not self._match_text_seq("PROJECTION"): 676 return None 677 678 return self.expression( 679 exp.ProjectionDef, 680 this=self._parse_id_var(), 681 expression=self._parse_wrapped(self._parse_statement), 682 ) 683 684 def _parse_constraint(self) -> t.Optional[exp.Expression]: 685 return super()._parse_constraint() or self._parse_projection_def() 686 687 class Generator(generator.Generator): 688 QUERY_HINTS = False 689 STRUCT_DELIMITER = ("(", ")") 690 NVL2_SUPPORTED = False 691 TABLESAMPLE_REQUIRES_PARENS = False 692 TABLESAMPLE_SIZE_IS_ROWS = False 693 TABLESAMPLE_KEYWORDS = "SAMPLE" 694 LAST_DAY_SUPPORTS_DATE_PART = False 695 CAN_IMPLEMENT_ARRAY_ANY = True 696 SUPPORTS_TO_NUMBER = False 697 JOIN_HINTS = False 698 TABLE_HINTS = False 699 EXPLICIT_SET_OP = True 700 GROUPINGS_SEP = "" 701 SET_OP_MODIFIERS = False 702 SUPPORTS_TABLE_ALIAS_COLUMNS = False 703 704 STRING_TYPE_MAPPING = { 705 exp.DataType.Type.CHAR: "String", 706 exp.DataType.Type.LONGBLOB: "String", 707 exp.DataType.Type.LONGTEXT: "String", 708 exp.DataType.Type.MEDIUMBLOB: "String", 709 exp.DataType.Type.MEDIUMTEXT: "String", 710 exp.DataType.Type.TINYBLOB: "String", 711 exp.DataType.Type.TINYTEXT: "String", 712 exp.DataType.Type.TEXT: "String", 713 exp.DataType.Type.VARBINARY: "String", 714 exp.DataType.Type.VARCHAR: "String", 715 } 716 717 SUPPORTED_JSON_PATH_PARTS = { 718 exp.JSONPathKey, 719 exp.JSONPathRoot, 720 exp.JSONPathSubscript, 721 } 722 723 TYPE_MAPPING = { 724 **generator.Generator.TYPE_MAPPING, 725 **STRING_TYPE_MAPPING, 726 exp.DataType.Type.ARRAY: "Array", 727 exp.DataType.Type.BIGINT: "Int64", 728 exp.DataType.Type.DATE32: "Date32", 729 exp.DataType.Type.DATETIME64: "DateTime64", 730 exp.DataType.Type.DOUBLE: "Float64", 731 exp.DataType.Type.ENUM: "Enum", 732 exp.DataType.Type.ENUM8: "Enum8", 733 exp.DataType.Type.ENUM16: "Enum16", 734 exp.DataType.Type.FIXEDSTRING: "FixedString", 735 exp.DataType.Type.FLOAT: "Float32", 736 exp.DataType.Type.INT: "Int32", 737 exp.DataType.Type.MEDIUMINT: "Int32", 738 exp.DataType.Type.INT128: "Int128", 739 exp.DataType.Type.INT256: "Int256", 740 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 741 exp.DataType.Type.MAP: "Map", 742 exp.DataType.Type.NESTED: "Nested", 743 exp.DataType.Type.NULLABLE: "Nullable", 744 exp.DataType.Type.SMALLINT: "Int16", 745 exp.DataType.Type.STRUCT: "Tuple", 746 exp.DataType.Type.TINYINT: "Int8", 747 exp.DataType.Type.UBIGINT: "UInt64", 748 exp.DataType.Type.UINT: "UInt32", 749 exp.DataType.Type.UINT128: "UInt128", 750 exp.DataType.Type.UINT256: "UInt256", 751 exp.DataType.Type.USMALLINT: "UInt16", 752 exp.DataType.Type.UTINYINT: "UInt8", 753 exp.DataType.Type.IPV4: "IPv4", 754 exp.DataType.Type.IPV6: "IPv6", 755 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 756 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 757 } 758 759 TRANSFORMS = { 760 **generator.Generator.TRANSFORMS, 761 exp.AnyValue: rename_func("any"), 762 exp.ApproxDistinct: rename_func("uniq"), 763 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 764 exp.ArraySize: rename_func("LENGTH"), 765 exp.ArraySum: rename_func("arraySum"), 766 exp.ArgMax: arg_max_or_min_no_count("argMax"), 767 exp.ArgMin: arg_max_or_min_no_count("argMin"), 768 exp.Array: inline_array_sql, 769 exp.CastToStrType: rename_func("CAST"), 770 exp.CountIf: rename_func("countIf"), 771 exp.CompressColumnConstraint: lambda self, 772 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 773 exp.ComputedColumnConstraint: lambda self, 774 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 775 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 776 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 777 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 778 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 779 exp.Explode: rename_func("arrayJoin"), 780 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 781 exp.IsNan: rename_func("isNaN"), 782 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 783 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 784 exp.JSONPathKey: json_path_key_only_name, 785 exp.JSONPathRoot: lambda *_: "", 786 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 787 exp.Nullif: rename_func("nullIf"), 788 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 789 exp.Pivot: no_pivot_sql, 790 exp.Quantile: _quantile_sql, 791 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 792 exp.Rand: rename_func("randCanonical"), 793 exp.Select: transforms.preprocess([transforms.eliminate_qualify]), 794 exp.StartsWith: rename_func("startsWith"), 795 exp.StrPosition: lambda self, e: self.func( 796 "position", e.this, e.args.get("substr"), e.args.get("position") 797 ), 798 exp.TimeToStr: lambda self, e: self.func( 799 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("timezone") 800 ), 801 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 802 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 803 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 804 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 805 exp.MD5Digest: rename_func("MD5"), 806 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 807 exp.SHA: rename_func("SHA1"), 808 exp.SHA2: sha256_sql, 809 exp.UnixToTime: _unix_to_time_sql, 810 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 811 exp.Variance: rename_func("varSamp"), 812 exp.Stddev: rename_func("stddevSamp"), 813 } 814 815 PROPERTIES_LOCATION = { 816 **generator.Generator.PROPERTIES_LOCATION, 817 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 818 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 819 exp.OnCluster: exp.Properties.Location.POST_NAME, 820 } 821 822 # there's no list in docs, but it can be found in Clickhouse code 823 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 824 ON_CLUSTER_TARGETS = { 825 "DATABASE", 826 "TABLE", 827 "VIEW", 828 "DICTIONARY", 829 "INDEX", 830 "FUNCTION", 831 "NAMED COLLECTION", 832 } 833 834 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 835 this = self.json_path_part(expression.this) 836 return str(int(this) + 1) if is_int(this) else this 837 838 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 839 return f"AS {self.sql(expression, 'this')}" 840 841 def _any_to_has( 842 self, 843 expression: exp.EQ | exp.NEQ, 844 default: t.Callable[[t.Any], str], 845 prefix: str = "", 846 ) -> str: 847 if isinstance(expression.left, exp.Any): 848 arr = expression.left 849 this = expression.right 850 elif isinstance(expression.right, exp.Any): 851 arr = expression.right 852 this = expression.left 853 else: 854 return default(expression) 855 856 return prefix + self.func("has", arr.this.unnest(), this) 857 858 def eq_sql(self, expression: exp.EQ) -> str: 859 return self._any_to_has(expression, super().eq_sql) 860 861 def neq_sql(self, expression: exp.NEQ) -> str: 862 return self._any_to_has(expression, super().neq_sql, "NOT ") 863 864 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 865 # Manually add a flag to make the search case-insensitive 866 regex = self.func("CONCAT", "'(?i)'", expression.expression) 867 return self.func("match", expression.this, regex) 868 869 def datatype_sql(self, expression: exp.DataType) -> str: 870 # String is the standard ClickHouse type, every other variant is just an alias. 871 # Additionally, any supplied length parameter will be ignored. 872 # 873 # https://clickhouse.com/docs/en/sql-reference/data-types/string 874 if expression.this in self.STRING_TYPE_MAPPING: 875 return "String" 876 877 return super().datatype_sql(expression) 878 879 def cte_sql(self, expression: exp.CTE) -> str: 880 if expression.args.get("scalar"): 881 this = self.sql(expression, "this") 882 alias = self.sql(expression, "alias") 883 return f"{this} AS {alias}" 884 885 return super().cte_sql(expression) 886 887 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 888 return super().after_limit_modifiers(expression) + [ 889 ( 890 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 891 if expression.args.get("settings") 892 else "" 893 ), 894 ( 895 self.seg("FORMAT ") + self.sql(expression, "format") 896 if expression.args.get("format") 897 else "" 898 ), 899 ] 900 901 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 902 params = self.expressions(expression, key="params", flat=True) 903 return self.func(expression.name, *expression.expressions) + f"({params})" 904 905 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 906 return self.func(expression.name, *expression.expressions) 907 908 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 909 return self.anonymousaggfunc_sql(expression) 910 911 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 912 return self.parameterizedagg_sql(expression) 913 914 def placeholder_sql(self, expression: exp.Placeholder) -> str: 915 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 916 917 def oncluster_sql(self, expression: exp.OnCluster) -> str: 918 return f"ON CLUSTER {self.sql(expression, 'this')}" 919 920 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 921 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 922 exp.Properties.Location.POST_NAME 923 ): 924 this_name = self.sql(expression.this, "this") 925 this_properties = " ".join( 926 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 927 ) 928 this_schema = self.schema_columns_sql(expression.this) 929 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 930 931 return super().createable_sql(expression, locations) 932 933 def prewhere_sql(self, expression: exp.PreWhere) -> str: 934 this = self.indent(self.sql(expression, "this")) 935 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 936 937 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 938 this = self.sql(expression, "this") 939 this = f" {this}" if this else "" 940 expr = self.sql(expression, "expression") 941 expr = f" {expr}" if expr else "" 942 index_type = self.sql(expression, "index_type") 943 index_type = f" TYPE {index_type}" if index_type else "" 944 granularity = self.sql(expression, "granularity") 945 granularity = f" GRANULARITY {granularity}" if granularity else "" 946 947 return f"INDEX{this}{expr}{index_type}{granularity}" 948 949 def partition_sql(self, expression: exp.Partition) -> str: 950 return f"PARTITION {self.expressions(expression, flat=True)}" 951 952 def partitionid_sql(self, expression: exp.PartitionId) -> str: 953 return f"ID {self.sql(expression.this)}" 954 955 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 956 return ( 957 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 958 ) 959 960 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 961 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects "my_id" would refer to "data.my_id" (which is done in _qualify_columns()) across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- NORMALIZATION_STRATEGY
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- TYPED_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- DATE_PART_MAPPING
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
112 class Tokenizer(tokens.Tokenizer): 113 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 114 IDENTIFIERS = ['"', "`"] 115 STRING_ESCAPES = ["'", "\\"] 116 BIT_STRINGS = [("0b", "")] 117 HEX_STRINGS = [("0x", ""), ("0X", "")] 118 HEREDOC_STRINGS = ["$"] 119 120 KEYWORDS = { 121 **tokens.Tokenizer.KEYWORDS, 122 "ATTACH": TokenType.COMMAND, 123 "DATE32": TokenType.DATE32, 124 "DATETIME64": TokenType.DATETIME64, 125 "DICTIONARY": TokenType.DICTIONARY, 126 "ENUM8": TokenType.ENUM8, 127 "ENUM16": TokenType.ENUM16, 128 "FINAL": TokenType.FINAL, 129 "FIXEDSTRING": TokenType.FIXEDSTRING, 130 "FLOAT32": TokenType.FLOAT, 131 "FLOAT64": TokenType.DOUBLE, 132 "GLOBAL": TokenType.GLOBAL, 133 "INT256": TokenType.INT256, 134 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 135 "MAP": TokenType.MAP, 136 "NESTED": TokenType.NESTED, 137 "SAMPLE": TokenType.TABLE_SAMPLE, 138 "TUPLE": TokenType.STRUCT, 139 "UINT128": TokenType.UINT128, 140 "UINT16": TokenType.USMALLINT, 141 "UINT256": TokenType.UINT256, 142 "UINT32": TokenType.UINT, 143 "UINT64": TokenType.UBIGINT, 144 "UINT8": TokenType.UTINYINT, 145 "IPV4": TokenType.IPV4, 146 "IPV6": TokenType.IPV6, 147 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 148 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 149 "SYSTEM": TokenType.COMMAND, 150 "PREWHERE": TokenType.PREWHERE, 151 } 152 KEYWORDS.pop("/*+") 153 154 SINGLE_TOKENS = { 155 **tokens.Tokenizer.SINGLE_TOKENS, 156 "$": TokenType.HEREDOC_STRING, 157 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIER_ESCAPES
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
159 class Parser(parser.Parser): 160 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 161 # * select x from t1 union all select x from t2 limit 1; 162 # * select x from t1 union all (select x from t2 limit 1); 163 MODIFIERS_ATTACHED_TO_SET_OP = False 164 INTERVAL_SPANS = False 165 166 FUNCTIONS = { 167 **parser.Parser.FUNCTIONS, 168 "ANY": exp.AnyValue.from_arg_list, 169 "ARRAYSUM": exp.ArraySum.from_arg_list, 170 "COUNTIF": _build_count_if, 171 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 172 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 173 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 174 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 175 "DATE_FORMAT": _build_date_format, 176 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 177 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 178 "FORMATDATETIME": _build_date_format, 179 "JSONEXTRACTSTRING": build_json_extract_path( 180 exp.JSONExtractScalar, zero_based_indexing=False 181 ), 182 "MAP": parser.build_var_map, 183 "MATCH": exp.RegexpLike.from_arg_list, 184 "RANDCANONICAL": exp.Rand.from_arg_list, 185 "TUPLE": exp.Struct.from_arg_list, 186 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 187 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 188 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 189 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 190 "UNIQ": exp.ApproxDistinct.from_arg_list, 191 "XOR": lambda args: exp.Xor(expressions=args), 192 "MD5": exp.MD5Digest.from_arg_list, 193 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 194 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 195 } 196 197 AGG_FUNCTIONS = { 198 "count", 199 "min", 200 "max", 201 "sum", 202 "avg", 203 "any", 204 "stddevPop", 205 "stddevSamp", 206 "varPop", 207 "varSamp", 208 "corr", 209 "covarPop", 210 "covarSamp", 211 "entropy", 212 "exponentialMovingAverage", 213 "intervalLengthSum", 214 "kolmogorovSmirnovTest", 215 "mannWhitneyUTest", 216 "median", 217 "rankCorr", 218 "sumKahan", 219 "studentTTest", 220 "welchTTest", 221 "anyHeavy", 222 "anyLast", 223 "boundingRatio", 224 "first_value", 225 "last_value", 226 "argMin", 227 "argMax", 228 "avgWeighted", 229 "topK", 230 "topKWeighted", 231 "deltaSum", 232 "deltaSumTimestamp", 233 "groupArray", 234 "groupArrayLast", 235 "groupUniqArray", 236 "groupArrayInsertAt", 237 "groupArrayMovingAvg", 238 "groupArrayMovingSum", 239 "groupArraySample", 240 "groupBitAnd", 241 "groupBitOr", 242 "groupBitXor", 243 "groupBitmap", 244 "groupBitmapAnd", 245 "groupBitmapOr", 246 "groupBitmapXor", 247 "sumWithOverflow", 248 "sumMap", 249 "minMap", 250 "maxMap", 251 "skewSamp", 252 "skewPop", 253 "kurtSamp", 254 "kurtPop", 255 "uniq", 256 "uniqExact", 257 "uniqCombined", 258 "uniqCombined64", 259 "uniqHLL12", 260 "uniqTheta", 261 "quantile", 262 "quantiles", 263 "quantileExact", 264 "quantilesExact", 265 "quantileExactLow", 266 "quantilesExactLow", 267 "quantileExactHigh", 268 "quantilesExactHigh", 269 "quantileExactWeighted", 270 "quantilesExactWeighted", 271 "quantileTiming", 272 "quantilesTiming", 273 "quantileTimingWeighted", 274 "quantilesTimingWeighted", 275 "quantileDeterministic", 276 "quantilesDeterministic", 277 "quantileTDigest", 278 "quantilesTDigest", 279 "quantileTDigestWeighted", 280 "quantilesTDigestWeighted", 281 "quantileBFloat16", 282 "quantilesBFloat16", 283 "quantileBFloat16Weighted", 284 "quantilesBFloat16Weighted", 285 "simpleLinearRegression", 286 "stochasticLinearRegression", 287 "stochasticLogisticRegression", 288 "categoricalInformationValue", 289 "contingency", 290 "cramersV", 291 "cramersVBiasCorrected", 292 "theilsU", 293 "maxIntersections", 294 "maxIntersectionsPosition", 295 "meanZTest", 296 "quantileInterpolatedWeighted", 297 "quantilesInterpolatedWeighted", 298 "quantileGK", 299 "quantilesGK", 300 "sparkBar", 301 "sumCount", 302 "largestTriangleThreeBuckets", 303 "histogram", 304 "sequenceMatch", 305 "sequenceCount", 306 "windowFunnel", 307 "retention", 308 "uniqUpTo", 309 "sequenceNextNode", 310 "exponentialTimeDecayedAvg", 311 } 312 313 AGG_FUNCTIONS_SUFFIXES = [ 314 "If", 315 "Array", 316 "ArrayIf", 317 "Map", 318 "SimpleState", 319 "State", 320 "Merge", 321 "MergeState", 322 "ForEach", 323 "Distinct", 324 "OrDefault", 325 "OrNull", 326 "Resample", 327 "ArgMin", 328 "ArgMax", 329 ] 330 331 FUNC_TOKENS = { 332 *parser.Parser.FUNC_TOKENS, 333 TokenType.SET, 334 } 335 336 AGG_FUNC_MAPPING = ( 337 lambda functions, suffixes: { 338 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 339 } 340 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 341 342 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 343 344 FUNCTION_PARSERS = { 345 **parser.Parser.FUNCTION_PARSERS, 346 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 347 "QUANTILE": lambda self: self._parse_quantile(), 348 } 349 350 FUNCTION_PARSERS.pop("MATCH") 351 352 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 353 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 354 355 RANGE_PARSERS = { 356 **parser.Parser.RANGE_PARSERS, 357 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 358 and self._parse_in(this, is_global=True), 359 } 360 361 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 362 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 363 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 364 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 365 366 JOIN_KINDS = { 367 *parser.Parser.JOIN_KINDS, 368 TokenType.ANY, 369 TokenType.ASOF, 370 TokenType.ARRAY, 371 } 372 373 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 374 TokenType.ANY, 375 TokenType.ARRAY, 376 TokenType.FINAL, 377 TokenType.FORMAT, 378 TokenType.SETTINGS, 379 } 380 381 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 382 TokenType.FORMAT, 383 } 384 385 LOG_DEFAULTS_TO_LN = True 386 387 QUERY_MODIFIER_PARSERS = { 388 **parser.Parser.QUERY_MODIFIER_PARSERS, 389 TokenType.SETTINGS: lambda self: ( 390 "settings", 391 self._advance() or self._parse_csv(self._parse_assignment), 392 ), 393 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 394 } 395 396 CONSTRAINT_PARSERS = { 397 **parser.Parser.CONSTRAINT_PARSERS, 398 "INDEX": lambda self: self._parse_index_constraint(), 399 "CODEC": lambda self: self._parse_compress(), 400 } 401 402 ALTER_PARSERS = { 403 **parser.Parser.ALTER_PARSERS, 404 "REPLACE": lambda self: self._parse_alter_table_replace(), 405 } 406 407 SCHEMA_UNNAMED_CONSTRAINTS = { 408 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 409 "INDEX", 410 } 411 412 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 413 index = self._index 414 this = self._parse_bitwise() 415 if self._match(TokenType.FROM): 416 self._retreat(index) 417 return super()._parse_extract() 418 419 # We return Anonymous here because extract and regexpExtract have different semantics, 420 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 421 # `extract('foobar', 'b')` works, but CH crashes for `regexpExtract('foobar', 'b')`. 422 # 423 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 424 self._match(TokenType.COMMA) 425 return self.expression( 426 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 427 ) 428 429 def _parse_assignment(self) -> t.Optional[exp.Expression]: 430 this = super()._parse_assignment() 431 432 if self._match(TokenType.PLACEHOLDER): 433 return self.expression( 434 exp.If, 435 this=this, 436 true=self._parse_assignment(), 437 false=self._match(TokenType.COLON) and self._parse_assignment(), 438 ) 439 440 return this 441 442 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 443 """ 444 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 445 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 446 """ 447 if not self._match(TokenType.L_BRACE): 448 return None 449 450 this = self._parse_id_var() 451 self._match(TokenType.COLON) 452 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 453 self._match_text_seq("IDENTIFIER") and "Identifier" 454 ) 455 456 if not kind: 457 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 458 elif not self._match(TokenType.R_BRACE): 459 self.raise_error("Expecting }") 460 461 return self.expression(exp.Placeholder, this=this, kind=kind) 462 463 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 464 this = super()._parse_in(this) 465 this.set("is_global", is_global) 466 return this 467 468 def _parse_table( 469 self, 470 schema: bool = False, 471 joins: bool = False, 472 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 473 parse_bracket: bool = False, 474 is_db_reference: bool = False, 475 parse_partition: bool = False, 476 ) -> t.Optional[exp.Expression]: 477 this = super()._parse_table( 478 schema=schema, 479 joins=joins, 480 alias_tokens=alias_tokens, 481 parse_bracket=parse_bracket, 482 is_db_reference=is_db_reference, 483 ) 484 485 if self._match(TokenType.FINAL): 486 this = self.expression(exp.Final, this=this) 487 488 return this 489 490 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 491 return super()._parse_position(haystack_first=True) 492 493 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 494 def _parse_cte(self) -> exp.CTE: 495 # WITH <identifier> AS <subquery expression> 496 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 497 498 if not cte: 499 # WITH <expression> AS <identifier> 500 cte = self.expression( 501 exp.CTE, 502 this=self._parse_assignment(), 503 alias=self._parse_table_alias(), 504 scalar=True, 505 ) 506 507 return cte 508 509 def _parse_join_parts( 510 self, 511 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 512 is_global = self._match(TokenType.GLOBAL) and self._prev 513 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 514 515 if kind_pre: 516 kind = self._match_set(self.JOIN_KINDS) and self._prev 517 side = self._match_set(self.JOIN_SIDES) and self._prev 518 return is_global, side, kind 519 520 return ( 521 is_global, 522 self._match_set(self.JOIN_SIDES) and self._prev, 523 self._match_set(self.JOIN_KINDS) and self._prev, 524 ) 525 526 def _parse_join( 527 self, skip_join_token: bool = False, parse_bracket: bool = False 528 ) -> t.Optional[exp.Join]: 529 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 530 if join: 531 join.set("global", join.args.pop("method", None)) 532 533 return join 534 535 def _parse_function( 536 self, 537 functions: t.Optional[t.Dict[str, t.Callable]] = None, 538 anonymous: bool = False, 539 optional_parens: bool = True, 540 any_token: bool = False, 541 ) -> t.Optional[exp.Expression]: 542 expr = super()._parse_function( 543 functions=functions, 544 anonymous=anonymous, 545 optional_parens=optional_parens, 546 any_token=any_token, 547 ) 548 549 func = expr.this if isinstance(expr, exp.Window) else expr 550 551 # Aggregate functions can be split in 2 parts: <func_name><suffix> 552 parts = ( 553 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 554 ) 555 556 if parts: 557 params = self._parse_func_params(func) 558 559 kwargs = { 560 "this": func.this, 561 "expressions": func.expressions, 562 } 563 if parts[1]: 564 kwargs["parts"] = parts 565 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 566 else: 567 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 568 569 kwargs["exp_class"] = exp_class 570 if params: 571 kwargs["params"] = params 572 573 func = self.expression(**kwargs) 574 575 if isinstance(expr, exp.Window): 576 # The window's func was parsed as Anonymous in base parser, fix its 577 # type to be CH style CombinedAnonymousAggFunc / AnonymousAggFunc 578 expr.set("this", func) 579 elif params: 580 # Params have blocked super()._parse_function() from parsing the following window 581 # (if that exists) as they're standing between the function call and the window spec 582 expr = self._parse_window(func) 583 else: 584 expr = func 585 586 return expr 587 588 def _parse_func_params( 589 self, this: t.Optional[exp.Func] = None 590 ) -> t.Optional[t.List[exp.Expression]]: 591 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 592 return self._parse_csv(self._parse_lambda) 593 594 if self._match(TokenType.L_PAREN): 595 params = self._parse_csv(self._parse_lambda) 596 self._match_r_paren(this) 597 return params 598 599 return None 600 601 def _parse_quantile(self) -> exp.Quantile: 602 this = self._parse_lambda() 603 params = self._parse_func_params() 604 if params: 605 return self.expression(exp.Quantile, this=params[0], quantile=this) 606 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 607 608 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 609 return super()._parse_wrapped_id_vars(optional=True) 610 611 def _parse_primary_key( 612 self, wrapped_optional: bool = False, in_props: bool = False 613 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 614 return super()._parse_primary_key( 615 wrapped_optional=wrapped_optional or in_props, in_props=in_props 616 ) 617 618 def _parse_on_property(self) -> t.Optional[exp.Expression]: 619 index = self._index 620 if self._match_text_seq("CLUSTER"): 621 this = self._parse_id_var() 622 if this: 623 return self.expression(exp.OnCluster, this=this) 624 else: 625 self._retreat(index) 626 return None 627 628 def _parse_index_constraint( 629 self, kind: t.Optional[str] = None 630 ) -> exp.IndexColumnConstraint: 631 # INDEX name1 expr TYPE type1(args) GRANULARITY value 632 this = self._parse_id_var() 633 expression = self._parse_assignment() 634 635 index_type = self._match_text_seq("TYPE") and ( 636 self._parse_function() or self._parse_var() 637 ) 638 639 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 640 641 return self.expression( 642 exp.IndexColumnConstraint, 643 this=this, 644 expression=expression, 645 index_type=index_type, 646 granularity=granularity, 647 ) 648 649 def _parse_partition(self) -> t.Optional[exp.Partition]: 650 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 651 if not self._match(TokenType.PARTITION): 652 return None 653 654 if self._match_text_seq("ID"): 655 # Corresponds to the PARTITION ID <string_value> syntax 656 expressions: t.List[exp.Expression] = [ 657 self.expression(exp.PartitionId, this=self._parse_string()) 658 ] 659 else: 660 expressions = self._parse_expressions() 661 662 return self.expression(exp.Partition, expressions=expressions) 663 664 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 665 partition = self._parse_partition() 666 667 if not partition or not self._match(TokenType.FROM): 668 return None 669 670 return self.expression( 671 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 672 ) 673 674 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 675 if not self._match_text_seq("PROJECTION"): 676 return None 677 678 return self.expression( 679 exp.ProjectionDef, 680 this=self._parse_id_var(), 681 expression=self._parse_wrapped(self._parse_statement), 682 ) 683 684 def _parse_constraint(self) -> t.Optional[exp.Expression]: 685 return super()._parse_constraint() or self._parse_projection_def()
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- PROPERTY_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
687 class Generator(generator.Generator): 688 QUERY_HINTS = False 689 STRUCT_DELIMITER = ("(", ")") 690 NVL2_SUPPORTED = False 691 TABLESAMPLE_REQUIRES_PARENS = False 692 TABLESAMPLE_SIZE_IS_ROWS = False 693 TABLESAMPLE_KEYWORDS = "SAMPLE" 694 LAST_DAY_SUPPORTS_DATE_PART = False 695 CAN_IMPLEMENT_ARRAY_ANY = True 696 SUPPORTS_TO_NUMBER = False 697 JOIN_HINTS = False 698 TABLE_HINTS = False 699 EXPLICIT_SET_OP = True 700 GROUPINGS_SEP = "" 701 SET_OP_MODIFIERS = False 702 SUPPORTS_TABLE_ALIAS_COLUMNS = False 703 704 STRING_TYPE_MAPPING = { 705 exp.DataType.Type.CHAR: "String", 706 exp.DataType.Type.LONGBLOB: "String", 707 exp.DataType.Type.LONGTEXT: "String", 708 exp.DataType.Type.MEDIUMBLOB: "String", 709 exp.DataType.Type.MEDIUMTEXT: "String", 710 exp.DataType.Type.TINYBLOB: "String", 711 exp.DataType.Type.TINYTEXT: "String", 712 exp.DataType.Type.TEXT: "String", 713 exp.DataType.Type.VARBINARY: "String", 714 exp.DataType.Type.VARCHAR: "String", 715 } 716 717 SUPPORTED_JSON_PATH_PARTS = { 718 exp.JSONPathKey, 719 exp.JSONPathRoot, 720 exp.JSONPathSubscript, 721 } 722 723 TYPE_MAPPING = { 724 **generator.Generator.TYPE_MAPPING, 725 **STRING_TYPE_MAPPING, 726 exp.DataType.Type.ARRAY: "Array", 727 exp.DataType.Type.BIGINT: "Int64", 728 exp.DataType.Type.DATE32: "Date32", 729 exp.DataType.Type.DATETIME64: "DateTime64", 730 exp.DataType.Type.DOUBLE: "Float64", 731 exp.DataType.Type.ENUM: "Enum", 732 exp.DataType.Type.ENUM8: "Enum8", 733 exp.DataType.Type.ENUM16: "Enum16", 734 exp.DataType.Type.FIXEDSTRING: "FixedString", 735 exp.DataType.Type.FLOAT: "Float32", 736 exp.DataType.Type.INT: "Int32", 737 exp.DataType.Type.MEDIUMINT: "Int32", 738 exp.DataType.Type.INT128: "Int128", 739 exp.DataType.Type.INT256: "Int256", 740 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 741 exp.DataType.Type.MAP: "Map", 742 exp.DataType.Type.NESTED: "Nested", 743 exp.DataType.Type.NULLABLE: "Nullable", 744 exp.DataType.Type.SMALLINT: "Int16", 745 exp.DataType.Type.STRUCT: "Tuple", 746 exp.DataType.Type.TINYINT: "Int8", 747 exp.DataType.Type.UBIGINT: "UInt64", 748 exp.DataType.Type.UINT: "UInt32", 749 exp.DataType.Type.UINT128: "UInt128", 750 exp.DataType.Type.UINT256: "UInt256", 751 exp.DataType.Type.USMALLINT: "UInt16", 752 exp.DataType.Type.UTINYINT: "UInt8", 753 exp.DataType.Type.IPV4: "IPv4", 754 exp.DataType.Type.IPV6: "IPv6", 755 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 756 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 757 } 758 759 TRANSFORMS = { 760 **generator.Generator.TRANSFORMS, 761 exp.AnyValue: rename_func("any"), 762 exp.ApproxDistinct: rename_func("uniq"), 763 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 764 exp.ArraySize: rename_func("LENGTH"), 765 exp.ArraySum: rename_func("arraySum"), 766 exp.ArgMax: arg_max_or_min_no_count("argMax"), 767 exp.ArgMin: arg_max_or_min_no_count("argMin"), 768 exp.Array: inline_array_sql, 769 exp.CastToStrType: rename_func("CAST"), 770 exp.CountIf: rename_func("countIf"), 771 exp.CompressColumnConstraint: lambda self, 772 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 773 exp.ComputedColumnConstraint: lambda self, 774 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 775 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 776 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 777 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 778 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 779 exp.Explode: rename_func("arrayJoin"), 780 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 781 exp.IsNan: rename_func("isNaN"), 782 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 783 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 784 exp.JSONPathKey: json_path_key_only_name, 785 exp.JSONPathRoot: lambda *_: "", 786 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 787 exp.Nullif: rename_func("nullIf"), 788 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 789 exp.Pivot: no_pivot_sql, 790 exp.Quantile: _quantile_sql, 791 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 792 exp.Rand: rename_func("randCanonical"), 793 exp.Select: transforms.preprocess([transforms.eliminate_qualify]), 794 exp.StartsWith: rename_func("startsWith"), 795 exp.StrPosition: lambda self, e: self.func( 796 "position", e.this, e.args.get("substr"), e.args.get("position") 797 ), 798 exp.TimeToStr: lambda self, e: self.func( 799 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("timezone") 800 ), 801 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 802 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 803 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 804 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 805 exp.MD5Digest: rename_func("MD5"), 806 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 807 exp.SHA: rename_func("SHA1"), 808 exp.SHA2: sha256_sql, 809 exp.UnixToTime: _unix_to_time_sql, 810 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 811 exp.Variance: rename_func("varSamp"), 812 exp.Stddev: rename_func("stddevSamp"), 813 } 814 815 PROPERTIES_LOCATION = { 816 **generator.Generator.PROPERTIES_LOCATION, 817 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 818 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 819 exp.OnCluster: exp.Properties.Location.POST_NAME, 820 } 821 822 # there's no list in docs, but it can be found in Clickhouse code 823 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 824 ON_CLUSTER_TARGETS = { 825 "DATABASE", 826 "TABLE", 827 "VIEW", 828 "DICTIONARY", 829 "INDEX", 830 "FUNCTION", 831 "NAMED COLLECTION", 832 } 833 834 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 835 this = self.json_path_part(expression.this) 836 return str(int(this) + 1) if is_int(this) else this 837 838 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 839 return f"AS {self.sql(expression, 'this')}" 840 841 def _any_to_has( 842 self, 843 expression: exp.EQ | exp.NEQ, 844 default: t.Callable[[t.Any], str], 845 prefix: str = "", 846 ) -> str: 847 if isinstance(expression.left, exp.Any): 848 arr = expression.left 849 this = expression.right 850 elif isinstance(expression.right, exp.Any): 851 arr = expression.right 852 this = expression.left 853 else: 854 return default(expression) 855 856 return prefix + self.func("has", arr.this.unnest(), this) 857 858 def eq_sql(self, expression: exp.EQ) -> str: 859 return self._any_to_has(expression, super().eq_sql) 860 861 def neq_sql(self, expression: exp.NEQ) -> str: 862 return self._any_to_has(expression, super().neq_sql, "NOT ") 863 864 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 865 # Manually add a flag to make the search case-insensitive 866 regex = self.func("CONCAT", "'(?i)'", expression.expression) 867 return self.func("match", expression.this, regex) 868 869 def datatype_sql(self, expression: exp.DataType) -> str: 870 # String is the standard ClickHouse type, every other variant is just an alias. 871 # Additionally, any supplied length parameter will be ignored. 872 # 873 # https://clickhouse.com/docs/en/sql-reference/data-types/string 874 if expression.this in self.STRING_TYPE_MAPPING: 875 return "String" 876 877 return super().datatype_sql(expression) 878 879 def cte_sql(self, expression: exp.CTE) -> str: 880 if expression.args.get("scalar"): 881 this = self.sql(expression, "this") 882 alias = self.sql(expression, "alias") 883 return f"{this} AS {alias}" 884 885 return super().cte_sql(expression) 886 887 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 888 return super().after_limit_modifiers(expression) + [ 889 ( 890 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 891 if expression.args.get("settings") 892 else "" 893 ), 894 ( 895 self.seg("FORMAT ") + self.sql(expression, "format") 896 if expression.args.get("format") 897 else "" 898 ), 899 ] 900 901 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 902 params = self.expressions(expression, key="params", flat=True) 903 return self.func(expression.name, *expression.expressions) + f"({params})" 904 905 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 906 return self.func(expression.name, *expression.expressions) 907 908 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 909 return self.anonymousaggfunc_sql(expression) 910 911 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 912 return self.parameterizedagg_sql(expression) 913 914 def placeholder_sql(self, expression: exp.Placeholder) -> str: 915 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 916 917 def oncluster_sql(self, expression: exp.OnCluster) -> str: 918 return f"ON CLUSTER {self.sql(expression, 'this')}" 919 920 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 921 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 922 exp.Properties.Location.POST_NAME 923 ): 924 this_name = self.sql(expression.this, "this") 925 this_properties = " ".join( 926 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 927 ) 928 this_schema = self.schema_columns_sql(expression.this) 929 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 930 931 return super().createable_sql(expression, locations) 932 933 def prewhere_sql(self, expression: exp.PreWhere) -> str: 934 this = self.indent(self.sql(expression, "this")) 935 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 936 937 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 938 this = self.sql(expression, "this") 939 this = f" {this}" if this else "" 940 expr = self.sql(expression, "expression") 941 expr = f" {expr}" if expr else "" 942 index_type = self.sql(expression, "index_type") 943 index_type = f" TYPE {index_type}" if index_type else "" 944 granularity = self.sql(expression, "granularity") 945 granularity = f" GRANULARITY {granularity}" if granularity else "" 946 947 return f"INDEX{this}{expr}{index_type}{granularity}" 948 949 def partition_sql(self, expression: exp.Partition) -> str: 950 return f"PARTITION {self.expressions(expression, flat=True)}" 951 952 def partitionid_sql(self, expression: exp.PartitionId) -> str: 953 return f"ID {self.sql(expression.this)}" 954 955 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 956 return ( 957 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 958 ) 959 960 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 961 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
869 def datatype_sql(self, expression: exp.DataType) -> str: 870 # String is the standard ClickHouse type, every other variant is just an alias. 871 # Additionally, any supplied length parameter will be ignored. 872 # 873 # https://clickhouse.com/docs/en/sql-reference/data-types/string 874 if expression.this in self.STRING_TYPE_MAPPING: 875 return "String" 876 877 return super().datatype_sql(expression)
887 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 888 return super().after_limit_modifiers(expression) + [ 889 ( 890 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 891 if expression.args.get("settings") 892 else "" 893 ), 894 ( 895 self.seg("FORMAT ") + self.sql(expression, "format") 896 if expression.args.get("format") 897 else "" 898 ), 899 ]
920 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 921 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 922 exp.Properties.Location.POST_NAME 923 ): 924 this_name = self.sql(expression.this, "this") 925 this_properties = " ".join( 926 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 927 ) 928 this_schema = self.schema_columns_sql(expression.this) 929 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 930 931 return super().createable_sql(expression, locations)
937 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 938 this = self.sql(expression, "this") 939 this = f" {this}" if this else "" 940 expr = self.sql(expression, "expression") 941 expr = f" {expr}" if expr else "" 942 index_type = self.sql(expression, "index_type") 943 index_type = f" TYPE {index_type}" if index_type else "" 944 granularity = self.sql(expression, "granularity") 945 granularity = f" GRANULARITY {granularity}" if granularity else "" 946 947 return f"INDEX{this}{expr}{index_type}{granularity}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- generateseries_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- length_sql
- rand_sql
- strtodate_sql
- strtotime_sql
- changes_sql