sqlglot.dialects.clickhouse
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 arg_max_or_min_no_count, 9 build_date_delta, 10 build_formatted_time, 11 inline_array_sql, 12 json_extract_segments, 13 json_path_key_only_name, 14 no_pivot_sql, 15 build_json_extract_path, 16 rename_func, 17 sha256_sql, 18 var_map_sql, 19 timestamptrunc_sql, 20 unit_to_var, 21) 22from sqlglot.generator import Generator 23from sqlglot.helper import is_int, seq_get 24from sqlglot.tokens import Token, TokenType 25 26DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 27 28 29def _build_date_format(args: t.List) -> exp.TimeToStr: 30 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 31 32 timezone = seq_get(args, 2) 33 if timezone: 34 expr.set("timezone", timezone) 35 36 return expr 37 38 39def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 40 scale = expression.args.get("scale") 41 timestamp = expression.this 42 43 if scale in (None, exp.UnixToTime.SECONDS): 44 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 45 if scale == exp.UnixToTime.MILLIS: 46 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 47 if scale == exp.UnixToTime.MICROS: 48 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 49 if scale == exp.UnixToTime.NANOS: 50 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 51 52 return self.func( 53 "fromUnixTimestamp", 54 exp.cast( 55 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 56 ), 57 ) 58 59 60def _lower_func(sql: str) -> str: 61 index = sql.index("(") 62 return sql[:index].lower() + sql[index:] 63 64 65def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 66 quantile = expression.args["quantile"] 67 args = f"({self.sql(expression, 'this')})" 68 69 if isinstance(quantile, exp.Array): 70 func = self.func("quantiles", *quantile) 71 else: 72 func = self.func("quantile", quantile) 73 74 return func + args 75 76 77def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 78 if len(args) == 1: 79 return exp.CountIf(this=seq_get(args, 0)) 80 81 return exp.CombinedAggFunc(this="countIf", expressions=args, parts=("count", "If")) 82 83 84def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 85 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 86 if not expression.unit: 87 return rename_func(name)(self, expression) 88 89 return self.func( 90 name, 91 unit_to_var(expression), 92 expression.expression, 93 expression.this, 94 ) 95 96 return _delta_sql 97 98 99class ClickHouse(Dialect): 100 NORMALIZE_FUNCTIONS: bool | str = False 101 NULL_ORDERING = "nulls_are_last" 102 SUPPORTS_USER_DEFINED_TYPES = False 103 SAFE_DIVISION = True 104 LOG_BASE_FIRST: t.Optional[bool] = None 105 FORCE_EARLY_ALIAS_REF_EXPANSION = True 106 107 UNESCAPED_SEQUENCES = { 108 "\\0": "\0", 109 } 110 111 class Tokenizer(tokens.Tokenizer): 112 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 113 IDENTIFIERS = ['"', "`"] 114 STRING_ESCAPES = ["'", "\\"] 115 BIT_STRINGS = [("0b", "")] 116 HEX_STRINGS = [("0x", ""), ("0X", "")] 117 HEREDOC_STRINGS = ["$"] 118 119 KEYWORDS = { 120 **tokens.Tokenizer.KEYWORDS, 121 "ATTACH": TokenType.COMMAND, 122 "DATE32": TokenType.DATE32, 123 "DATETIME64": TokenType.DATETIME64, 124 "DICTIONARY": TokenType.DICTIONARY, 125 "ENUM8": TokenType.ENUM8, 126 "ENUM16": TokenType.ENUM16, 127 "FINAL": TokenType.FINAL, 128 "FIXEDSTRING": TokenType.FIXEDSTRING, 129 "FLOAT32": TokenType.FLOAT, 130 "FLOAT64": TokenType.DOUBLE, 131 "GLOBAL": TokenType.GLOBAL, 132 "INT256": TokenType.INT256, 133 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 134 "MAP": TokenType.MAP, 135 "NESTED": TokenType.NESTED, 136 "SAMPLE": TokenType.TABLE_SAMPLE, 137 "TUPLE": TokenType.STRUCT, 138 "UINT128": TokenType.UINT128, 139 "UINT16": TokenType.USMALLINT, 140 "UINT256": TokenType.UINT256, 141 "UINT32": TokenType.UINT, 142 "UINT64": TokenType.UBIGINT, 143 "UINT8": TokenType.UTINYINT, 144 "IPV4": TokenType.IPV4, 145 "IPV6": TokenType.IPV6, 146 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 147 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 148 "SYSTEM": TokenType.COMMAND, 149 "PREWHERE": TokenType.PREWHERE, 150 } 151 KEYWORDS.pop("/*+") 152 153 SINGLE_TOKENS = { 154 **tokens.Tokenizer.SINGLE_TOKENS, 155 "$": TokenType.HEREDOC_STRING, 156 } 157 158 class Parser(parser.Parser): 159 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 160 # * select x from t1 union all select x from t2 limit 1; 161 # * select x from t1 union all (select x from t2 limit 1); 162 MODIFIERS_ATTACHED_TO_SET_OP = False 163 INTERVAL_SPANS = False 164 165 FUNCTIONS = { 166 **parser.Parser.FUNCTIONS, 167 "ANY": exp.AnyValue.from_arg_list, 168 "ARRAYSUM": exp.ArraySum.from_arg_list, 169 "COUNTIF": _build_count_if, 170 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 171 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 172 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 173 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 174 "DATE_FORMAT": _build_date_format, 175 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 176 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 177 "FORMATDATETIME": _build_date_format, 178 "JSONEXTRACTSTRING": build_json_extract_path( 179 exp.JSONExtractScalar, zero_based_indexing=False 180 ), 181 "MAP": parser.build_var_map, 182 "MATCH": exp.RegexpLike.from_arg_list, 183 "RANDCANONICAL": exp.Rand.from_arg_list, 184 "TUPLE": exp.Struct.from_arg_list, 185 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 186 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 187 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 188 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 189 "UNIQ": exp.ApproxDistinct.from_arg_list, 190 "XOR": lambda args: exp.Xor(expressions=args), 191 "MD5": exp.MD5Digest.from_arg_list, 192 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 193 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 194 } 195 196 AGG_FUNCTIONS = { 197 "count", 198 "min", 199 "max", 200 "sum", 201 "avg", 202 "any", 203 "stddevPop", 204 "stddevSamp", 205 "varPop", 206 "varSamp", 207 "corr", 208 "covarPop", 209 "covarSamp", 210 "entropy", 211 "exponentialMovingAverage", 212 "intervalLengthSum", 213 "kolmogorovSmirnovTest", 214 "mannWhitneyUTest", 215 "median", 216 "rankCorr", 217 "sumKahan", 218 "studentTTest", 219 "welchTTest", 220 "anyHeavy", 221 "anyLast", 222 "boundingRatio", 223 "first_value", 224 "last_value", 225 "argMin", 226 "argMax", 227 "avgWeighted", 228 "topK", 229 "topKWeighted", 230 "deltaSum", 231 "deltaSumTimestamp", 232 "groupArray", 233 "groupArrayLast", 234 "groupUniqArray", 235 "groupArrayInsertAt", 236 "groupArrayMovingAvg", 237 "groupArrayMovingSum", 238 "groupArraySample", 239 "groupBitAnd", 240 "groupBitOr", 241 "groupBitXor", 242 "groupBitmap", 243 "groupBitmapAnd", 244 "groupBitmapOr", 245 "groupBitmapXor", 246 "sumWithOverflow", 247 "sumMap", 248 "minMap", 249 "maxMap", 250 "skewSamp", 251 "skewPop", 252 "kurtSamp", 253 "kurtPop", 254 "uniq", 255 "uniqExact", 256 "uniqCombined", 257 "uniqCombined64", 258 "uniqHLL12", 259 "uniqTheta", 260 "quantile", 261 "quantiles", 262 "quantileExact", 263 "quantilesExact", 264 "quantileExactLow", 265 "quantilesExactLow", 266 "quantileExactHigh", 267 "quantilesExactHigh", 268 "quantileExactWeighted", 269 "quantilesExactWeighted", 270 "quantileTiming", 271 "quantilesTiming", 272 "quantileTimingWeighted", 273 "quantilesTimingWeighted", 274 "quantileDeterministic", 275 "quantilesDeterministic", 276 "quantileTDigest", 277 "quantilesTDigest", 278 "quantileTDigestWeighted", 279 "quantilesTDigestWeighted", 280 "quantileBFloat16", 281 "quantilesBFloat16", 282 "quantileBFloat16Weighted", 283 "quantilesBFloat16Weighted", 284 "simpleLinearRegression", 285 "stochasticLinearRegression", 286 "stochasticLogisticRegression", 287 "categoricalInformationValue", 288 "contingency", 289 "cramersV", 290 "cramersVBiasCorrected", 291 "theilsU", 292 "maxIntersections", 293 "maxIntersectionsPosition", 294 "meanZTest", 295 "quantileInterpolatedWeighted", 296 "quantilesInterpolatedWeighted", 297 "quantileGK", 298 "quantilesGK", 299 "sparkBar", 300 "sumCount", 301 "largestTriangleThreeBuckets", 302 "histogram", 303 "sequenceMatch", 304 "sequenceCount", 305 "windowFunnel", 306 "retention", 307 "uniqUpTo", 308 "sequenceNextNode", 309 "exponentialTimeDecayedAvg", 310 } 311 312 AGG_FUNCTIONS_SUFFIXES = [ 313 "If", 314 "Array", 315 "ArrayIf", 316 "Map", 317 "SimpleState", 318 "State", 319 "Merge", 320 "MergeState", 321 "ForEach", 322 "Distinct", 323 "OrDefault", 324 "OrNull", 325 "Resample", 326 "ArgMin", 327 "ArgMax", 328 ] 329 330 FUNC_TOKENS = { 331 *parser.Parser.FUNC_TOKENS, 332 TokenType.SET, 333 } 334 335 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 336 337 AGG_FUNC_MAPPING = ( 338 lambda functions, suffixes: { 339 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 340 } 341 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 342 343 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 344 345 FUNCTION_PARSERS = { 346 **parser.Parser.FUNCTION_PARSERS, 347 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 348 "QUANTILE": lambda self: self._parse_quantile(), 349 } 350 351 FUNCTION_PARSERS.pop("MATCH") 352 353 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 354 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 355 356 RANGE_PARSERS = { 357 **parser.Parser.RANGE_PARSERS, 358 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 359 and self._parse_in(this, is_global=True), 360 } 361 362 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 363 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 364 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 365 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 366 367 JOIN_KINDS = { 368 *parser.Parser.JOIN_KINDS, 369 TokenType.ANY, 370 TokenType.ASOF, 371 TokenType.ARRAY, 372 } 373 374 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 375 TokenType.ANY, 376 TokenType.ARRAY, 377 TokenType.FINAL, 378 TokenType.FORMAT, 379 TokenType.SETTINGS, 380 } 381 382 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 383 TokenType.FORMAT, 384 } 385 386 LOG_DEFAULTS_TO_LN = True 387 388 QUERY_MODIFIER_PARSERS = { 389 **parser.Parser.QUERY_MODIFIER_PARSERS, 390 TokenType.SETTINGS: lambda self: ( 391 "settings", 392 self._advance() or self._parse_csv(self._parse_assignment), 393 ), 394 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 395 } 396 397 CONSTRAINT_PARSERS = { 398 **parser.Parser.CONSTRAINT_PARSERS, 399 "INDEX": lambda self: self._parse_index_constraint(), 400 "CODEC": lambda self: self._parse_compress(), 401 } 402 403 ALTER_PARSERS = { 404 **parser.Parser.ALTER_PARSERS, 405 "REPLACE": lambda self: self._parse_alter_table_replace(), 406 } 407 408 SCHEMA_UNNAMED_CONSTRAINTS = { 409 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 410 "INDEX", 411 } 412 413 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 414 index = self._index 415 this = self._parse_bitwise() 416 if self._match(TokenType.FROM): 417 self._retreat(index) 418 return super()._parse_extract() 419 420 # We return Anonymous here because extract and regexpExtract have different semantics, 421 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 422 # `extract('foobar', 'b')` works, but CH crashes for `regexpExtract('foobar', 'b')`. 423 # 424 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 425 self._match(TokenType.COMMA) 426 return self.expression( 427 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 428 ) 429 430 def _parse_assignment(self) -> t.Optional[exp.Expression]: 431 this = super()._parse_assignment() 432 433 if self._match(TokenType.PLACEHOLDER): 434 return self.expression( 435 exp.If, 436 this=this, 437 true=self._parse_assignment(), 438 false=self._match(TokenType.COLON) and self._parse_assignment(), 439 ) 440 441 return this 442 443 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 444 """ 445 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 446 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 447 """ 448 if not self._match(TokenType.L_BRACE): 449 return None 450 451 this = self._parse_id_var() 452 self._match(TokenType.COLON) 453 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 454 self._match_text_seq("IDENTIFIER") and "Identifier" 455 ) 456 457 if not kind: 458 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 459 elif not self._match(TokenType.R_BRACE): 460 self.raise_error("Expecting }") 461 462 return self.expression(exp.Placeholder, this=this, kind=kind) 463 464 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 465 this = super()._parse_in(this) 466 this.set("is_global", is_global) 467 return this 468 469 def _parse_table( 470 self, 471 schema: bool = False, 472 joins: bool = False, 473 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 474 parse_bracket: bool = False, 475 is_db_reference: bool = False, 476 parse_partition: bool = False, 477 ) -> t.Optional[exp.Expression]: 478 this = super()._parse_table( 479 schema=schema, 480 joins=joins, 481 alias_tokens=alias_tokens, 482 parse_bracket=parse_bracket, 483 is_db_reference=is_db_reference, 484 ) 485 486 if self._match(TokenType.FINAL): 487 this = self.expression(exp.Final, this=this) 488 489 return this 490 491 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 492 return super()._parse_position(haystack_first=True) 493 494 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 495 def _parse_cte(self) -> exp.CTE: 496 # WITH <identifier> AS <subquery expression> 497 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 498 499 if not cte: 500 # WITH <expression> AS <identifier> 501 cte = self.expression( 502 exp.CTE, 503 this=self._parse_assignment(), 504 alias=self._parse_table_alias(), 505 scalar=True, 506 ) 507 508 return cte 509 510 def _parse_join_parts( 511 self, 512 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 513 is_global = self._match(TokenType.GLOBAL) and self._prev 514 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 515 516 if kind_pre: 517 kind = self._match_set(self.JOIN_KINDS) and self._prev 518 side = self._match_set(self.JOIN_SIDES) and self._prev 519 return is_global, side, kind 520 521 return ( 522 is_global, 523 self._match_set(self.JOIN_SIDES) and self._prev, 524 self._match_set(self.JOIN_KINDS) and self._prev, 525 ) 526 527 def _parse_join( 528 self, skip_join_token: bool = False, parse_bracket: bool = False 529 ) -> t.Optional[exp.Join]: 530 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 531 if join: 532 join.set("global", join.args.pop("method", None)) 533 534 return join 535 536 def _parse_function( 537 self, 538 functions: t.Optional[t.Dict[str, t.Callable]] = None, 539 anonymous: bool = False, 540 optional_parens: bool = True, 541 any_token: bool = False, 542 ) -> t.Optional[exp.Expression]: 543 expr = super()._parse_function( 544 functions=functions, 545 anonymous=anonymous, 546 optional_parens=optional_parens, 547 any_token=any_token, 548 ) 549 550 func = expr.this if isinstance(expr, exp.Window) else expr 551 552 # Aggregate functions can be split in 2 parts: <func_name><suffix> 553 parts = ( 554 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 555 ) 556 557 if parts: 558 params = self._parse_func_params(func) 559 560 kwargs = { 561 "this": func.this, 562 "expressions": func.expressions, 563 } 564 if parts[1]: 565 kwargs["parts"] = parts 566 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 567 else: 568 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 569 570 kwargs["exp_class"] = exp_class 571 if params: 572 kwargs["params"] = params 573 574 func = self.expression(**kwargs) 575 576 if isinstance(expr, exp.Window): 577 # The window's func was parsed as Anonymous in base parser, fix its 578 # type to be CH style CombinedAnonymousAggFunc / AnonymousAggFunc 579 expr.set("this", func) 580 elif params: 581 # Params have blocked super()._parse_function() from parsing the following window 582 # (if that exists) as they're standing between the function call and the window spec 583 expr = self._parse_window(func) 584 else: 585 expr = func 586 587 return expr 588 589 def _parse_func_params( 590 self, this: t.Optional[exp.Func] = None 591 ) -> t.Optional[t.List[exp.Expression]]: 592 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 593 return self._parse_csv(self._parse_lambda) 594 595 if self._match(TokenType.L_PAREN): 596 params = self._parse_csv(self._parse_lambda) 597 self._match_r_paren(this) 598 return params 599 600 return None 601 602 def _parse_quantile(self) -> exp.Quantile: 603 this = self._parse_lambda() 604 params = self._parse_func_params() 605 if params: 606 return self.expression(exp.Quantile, this=params[0], quantile=this) 607 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 608 609 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 610 return super()._parse_wrapped_id_vars(optional=True) 611 612 def _parse_primary_key( 613 self, wrapped_optional: bool = False, in_props: bool = False 614 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 615 return super()._parse_primary_key( 616 wrapped_optional=wrapped_optional or in_props, in_props=in_props 617 ) 618 619 def _parse_on_property(self) -> t.Optional[exp.Expression]: 620 index = self._index 621 if self._match_text_seq("CLUSTER"): 622 this = self._parse_id_var() 623 if this: 624 return self.expression(exp.OnCluster, this=this) 625 else: 626 self._retreat(index) 627 return None 628 629 def _parse_index_constraint( 630 self, kind: t.Optional[str] = None 631 ) -> exp.IndexColumnConstraint: 632 # INDEX name1 expr TYPE type1(args) GRANULARITY value 633 this = self._parse_id_var() 634 expression = self._parse_assignment() 635 636 index_type = self._match_text_seq("TYPE") and ( 637 self._parse_function() or self._parse_var() 638 ) 639 640 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 641 642 return self.expression( 643 exp.IndexColumnConstraint, 644 this=this, 645 expression=expression, 646 index_type=index_type, 647 granularity=granularity, 648 ) 649 650 def _parse_partition(self) -> t.Optional[exp.Partition]: 651 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 652 if not self._match(TokenType.PARTITION): 653 return None 654 655 if self._match_text_seq("ID"): 656 # Corresponds to the PARTITION ID <string_value> syntax 657 expressions: t.List[exp.Expression] = [ 658 self.expression(exp.PartitionId, this=self._parse_string()) 659 ] 660 else: 661 expressions = self._parse_expressions() 662 663 return self.expression(exp.Partition, expressions=expressions) 664 665 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 666 partition = self._parse_partition() 667 668 if not partition or not self._match(TokenType.FROM): 669 return None 670 671 return self.expression( 672 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 673 ) 674 675 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 676 if not self._match_text_seq("PROJECTION"): 677 return None 678 679 return self.expression( 680 exp.ProjectionDef, 681 this=self._parse_id_var(), 682 expression=self._parse_wrapped(self._parse_statement), 683 ) 684 685 def _parse_constraint(self) -> t.Optional[exp.Expression]: 686 return super()._parse_constraint() or self._parse_projection_def() 687 688 class Generator(generator.Generator): 689 QUERY_HINTS = False 690 STRUCT_DELIMITER = ("(", ")") 691 NVL2_SUPPORTED = False 692 TABLESAMPLE_REQUIRES_PARENS = False 693 TABLESAMPLE_SIZE_IS_ROWS = False 694 TABLESAMPLE_KEYWORDS = "SAMPLE" 695 LAST_DAY_SUPPORTS_DATE_PART = False 696 CAN_IMPLEMENT_ARRAY_ANY = True 697 SUPPORTS_TO_NUMBER = False 698 JOIN_HINTS = False 699 TABLE_HINTS = False 700 EXPLICIT_SET_OP = True 701 GROUPINGS_SEP = "" 702 SET_OP_MODIFIERS = False 703 SUPPORTS_TABLE_ALIAS_COLUMNS = False 704 705 STRING_TYPE_MAPPING = { 706 exp.DataType.Type.CHAR: "String", 707 exp.DataType.Type.LONGBLOB: "String", 708 exp.DataType.Type.LONGTEXT: "String", 709 exp.DataType.Type.MEDIUMBLOB: "String", 710 exp.DataType.Type.MEDIUMTEXT: "String", 711 exp.DataType.Type.TINYBLOB: "String", 712 exp.DataType.Type.TINYTEXT: "String", 713 exp.DataType.Type.TEXT: "String", 714 exp.DataType.Type.VARBINARY: "String", 715 exp.DataType.Type.VARCHAR: "String", 716 } 717 718 SUPPORTED_JSON_PATH_PARTS = { 719 exp.JSONPathKey, 720 exp.JSONPathRoot, 721 exp.JSONPathSubscript, 722 } 723 724 TYPE_MAPPING = { 725 **generator.Generator.TYPE_MAPPING, 726 **STRING_TYPE_MAPPING, 727 exp.DataType.Type.ARRAY: "Array", 728 exp.DataType.Type.BIGINT: "Int64", 729 exp.DataType.Type.DATE32: "Date32", 730 exp.DataType.Type.DATETIME64: "DateTime64", 731 exp.DataType.Type.DOUBLE: "Float64", 732 exp.DataType.Type.ENUM: "Enum", 733 exp.DataType.Type.ENUM8: "Enum8", 734 exp.DataType.Type.ENUM16: "Enum16", 735 exp.DataType.Type.FIXEDSTRING: "FixedString", 736 exp.DataType.Type.FLOAT: "Float32", 737 exp.DataType.Type.INT: "Int32", 738 exp.DataType.Type.MEDIUMINT: "Int32", 739 exp.DataType.Type.INT128: "Int128", 740 exp.DataType.Type.INT256: "Int256", 741 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 742 exp.DataType.Type.MAP: "Map", 743 exp.DataType.Type.NESTED: "Nested", 744 exp.DataType.Type.NULLABLE: "Nullable", 745 exp.DataType.Type.SMALLINT: "Int16", 746 exp.DataType.Type.STRUCT: "Tuple", 747 exp.DataType.Type.TINYINT: "Int8", 748 exp.DataType.Type.UBIGINT: "UInt64", 749 exp.DataType.Type.UINT: "UInt32", 750 exp.DataType.Type.UINT128: "UInt128", 751 exp.DataType.Type.UINT256: "UInt256", 752 exp.DataType.Type.USMALLINT: "UInt16", 753 exp.DataType.Type.UTINYINT: "UInt8", 754 exp.DataType.Type.IPV4: "IPv4", 755 exp.DataType.Type.IPV6: "IPv6", 756 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 757 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 758 } 759 760 TRANSFORMS = { 761 **generator.Generator.TRANSFORMS, 762 exp.AnyValue: rename_func("any"), 763 exp.ApproxDistinct: rename_func("uniq"), 764 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 765 exp.ArraySize: rename_func("LENGTH"), 766 exp.ArraySum: rename_func("arraySum"), 767 exp.ArgMax: arg_max_or_min_no_count("argMax"), 768 exp.ArgMin: arg_max_or_min_no_count("argMin"), 769 exp.Array: inline_array_sql, 770 exp.CastToStrType: rename_func("CAST"), 771 exp.CountIf: rename_func("countIf"), 772 exp.CompressColumnConstraint: lambda self, 773 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 774 exp.ComputedColumnConstraint: lambda self, 775 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 776 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 777 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 778 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 779 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 780 exp.Explode: rename_func("arrayJoin"), 781 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 782 exp.IsNan: rename_func("isNaN"), 783 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 784 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 785 exp.JSONPathKey: json_path_key_only_name, 786 exp.JSONPathRoot: lambda *_: "", 787 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 788 exp.Nullif: rename_func("nullIf"), 789 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 790 exp.Pivot: no_pivot_sql, 791 exp.Quantile: _quantile_sql, 792 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 793 exp.Rand: rename_func("randCanonical"), 794 exp.StartsWith: rename_func("startsWith"), 795 exp.StrPosition: lambda self, e: self.func( 796 "position", e.this, e.args.get("substr"), e.args.get("position") 797 ), 798 exp.TimeToStr: lambda self, e: self.func( 799 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("timezone") 800 ), 801 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 802 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 803 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 804 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 805 exp.MD5Digest: rename_func("MD5"), 806 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 807 exp.SHA: rename_func("SHA1"), 808 exp.SHA2: sha256_sql, 809 exp.UnixToTime: _unix_to_time_sql, 810 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 811 exp.Variance: rename_func("varSamp"), 812 exp.Stddev: rename_func("stddevSamp"), 813 } 814 815 PROPERTIES_LOCATION = { 816 **generator.Generator.PROPERTIES_LOCATION, 817 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 818 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 819 exp.OnCluster: exp.Properties.Location.POST_NAME, 820 } 821 822 # there's no list in docs, but it can be found in Clickhouse code 823 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 824 ON_CLUSTER_TARGETS = { 825 "DATABASE", 826 "TABLE", 827 "VIEW", 828 "DICTIONARY", 829 "INDEX", 830 "FUNCTION", 831 "NAMED COLLECTION", 832 } 833 834 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 835 this = self.json_path_part(expression.this) 836 return str(int(this) + 1) if is_int(this) else this 837 838 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 839 return f"AS {self.sql(expression, 'this')}" 840 841 def _any_to_has( 842 self, 843 expression: exp.EQ | exp.NEQ, 844 default: t.Callable[[t.Any], str], 845 prefix: str = "", 846 ) -> str: 847 if isinstance(expression.left, exp.Any): 848 arr = expression.left 849 this = expression.right 850 elif isinstance(expression.right, exp.Any): 851 arr = expression.right 852 this = expression.left 853 else: 854 return default(expression) 855 856 return prefix + self.func("has", arr.this.unnest(), this) 857 858 def eq_sql(self, expression: exp.EQ) -> str: 859 return self._any_to_has(expression, super().eq_sql) 860 861 def neq_sql(self, expression: exp.NEQ) -> str: 862 return self._any_to_has(expression, super().neq_sql, "NOT ") 863 864 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 865 # Manually add a flag to make the search case-insensitive 866 regex = self.func("CONCAT", "'(?i)'", expression.expression) 867 return self.func("match", expression.this, regex) 868 869 def datatype_sql(self, expression: exp.DataType) -> str: 870 # String is the standard ClickHouse type, every other variant is just an alias. 871 # Additionally, any supplied length parameter will be ignored. 872 # 873 # https://clickhouse.com/docs/en/sql-reference/data-types/string 874 if expression.this in self.STRING_TYPE_MAPPING: 875 return "String" 876 877 return super().datatype_sql(expression) 878 879 def cte_sql(self, expression: exp.CTE) -> str: 880 if expression.args.get("scalar"): 881 this = self.sql(expression, "this") 882 alias = self.sql(expression, "alias") 883 return f"{this} AS {alias}" 884 885 return super().cte_sql(expression) 886 887 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 888 return super().after_limit_modifiers(expression) + [ 889 ( 890 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 891 if expression.args.get("settings") 892 else "" 893 ), 894 ( 895 self.seg("FORMAT ") + self.sql(expression, "format") 896 if expression.args.get("format") 897 else "" 898 ), 899 ] 900 901 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 902 params = self.expressions(expression, key="params", flat=True) 903 return self.func(expression.name, *expression.expressions) + f"({params})" 904 905 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 906 return self.func(expression.name, *expression.expressions) 907 908 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 909 return self.anonymousaggfunc_sql(expression) 910 911 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 912 return self.parameterizedagg_sql(expression) 913 914 def placeholder_sql(self, expression: exp.Placeholder) -> str: 915 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 916 917 def oncluster_sql(self, expression: exp.OnCluster) -> str: 918 return f"ON CLUSTER {self.sql(expression, 'this')}" 919 920 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 921 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 922 exp.Properties.Location.POST_NAME 923 ): 924 this_name = self.sql(expression.this, "this") 925 this_properties = " ".join( 926 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 927 ) 928 this_schema = self.schema_columns_sql(expression.this) 929 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 930 931 return super().createable_sql(expression, locations) 932 933 def prewhere_sql(self, expression: exp.PreWhere) -> str: 934 this = self.indent(self.sql(expression, "this")) 935 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 936 937 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 938 this = self.sql(expression, "this") 939 this = f" {this}" if this else "" 940 expr = self.sql(expression, "expression") 941 expr = f" {expr}" if expr else "" 942 index_type = self.sql(expression, "index_type") 943 index_type = f" TYPE {index_type}" if index_type else "" 944 granularity = self.sql(expression, "granularity") 945 granularity = f" GRANULARITY {granularity}" if granularity else "" 946 947 return f"INDEX{this}{expr}{index_type}{granularity}" 948 949 def partition_sql(self, expression: exp.Partition) -> str: 950 return f"PARTITION {self.expressions(expression, flat=True)}" 951 952 def partitionid_sql(self, expression: exp.PartitionId) -> str: 953 return f"ID {self.sql(expression.this)}" 954 955 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 956 return ( 957 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 958 ) 959 960 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 961 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
100class ClickHouse(Dialect): 101 NORMALIZE_FUNCTIONS: bool | str = False 102 NULL_ORDERING = "nulls_are_last" 103 SUPPORTS_USER_DEFINED_TYPES = False 104 SAFE_DIVISION = True 105 LOG_BASE_FIRST: t.Optional[bool] = None 106 FORCE_EARLY_ALIAS_REF_EXPANSION = True 107 108 UNESCAPED_SEQUENCES = { 109 "\\0": "\0", 110 } 111 112 class Tokenizer(tokens.Tokenizer): 113 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 114 IDENTIFIERS = ['"', "`"] 115 STRING_ESCAPES = ["'", "\\"] 116 BIT_STRINGS = [("0b", "")] 117 HEX_STRINGS = [("0x", ""), ("0X", "")] 118 HEREDOC_STRINGS = ["$"] 119 120 KEYWORDS = { 121 **tokens.Tokenizer.KEYWORDS, 122 "ATTACH": TokenType.COMMAND, 123 "DATE32": TokenType.DATE32, 124 "DATETIME64": TokenType.DATETIME64, 125 "DICTIONARY": TokenType.DICTIONARY, 126 "ENUM8": TokenType.ENUM8, 127 "ENUM16": TokenType.ENUM16, 128 "FINAL": TokenType.FINAL, 129 "FIXEDSTRING": TokenType.FIXEDSTRING, 130 "FLOAT32": TokenType.FLOAT, 131 "FLOAT64": TokenType.DOUBLE, 132 "GLOBAL": TokenType.GLOBAL, 133 "INT256": TokenType.INT256, 134 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 135 "MAP": TokenType.MAP, 136 "NESTED": TokenType.NESTED, 137 "SAMPLE": TokenType.TABLE_SAMPLE, 138 "TUPLE": TokenType.STRUCT, 139 "UINT128": TokenType.UINT128, 140 "UINT16": TokenType.USMALLINT, 141 "UINT256": TokenType.UINT256, 142 "UINT32": TokenType.UINT, 143 "UINT64": TokenType.UBIGINT, 144 "UINT8": TokenType.UTINYINT, 145 "IPV4": TokenType.IPV4, 146 "IPV6": TokenType.IPV6, 147 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 148 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 149 "SYSTEM": TokenType.COMMAND, 150 "PREWHERE": TokenType.PREWHERE, 151 } 152 KEYWORDS.pop("/*+") 153 154 SINGLE_TOKENS = { 155 **tokens.Tokenizer.SINGLE_TOKENS, 156 "$": TokenType.HEREDOC_STRING, 157 } 158 159 class Parser(parser.Parser): 160 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 161 # * select x from t1 union all select x from t2 limit 1; 162 # * select x from t1 union all (select x from t2 limit 1); 163 MODIFIERS_ATTACHED_TO_SET_OP = False 164 INTERVAL_SPANS = False 165 166 FUNCTIONS = { 167 **parser.Parser.FUNCTIONS, 168 "ANY": exp.AnyValue.from_arg_list, 169 "ARRAYSUM": exp.ArraySum.from_arg_list, 170 "COUNTIF": _build_count_if, 171 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 172 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 173 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 174 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 175 "DATE_FORMAT": _build_date_format, 176 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 177 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 178 "FORMATDATETIME": _build_date_format, 179 "JSONEXTRACTSTRING": build_json_extract_path( 180 exp.JSONExtractScalar, zero_based_indexing=False 181 ), 182 "MAP": parser.build_var_map, 183 "MATCH": exp.RegexpLike.from_arg_list, 184 "RANDCANONICAL": exp.Rand.from_arg_list, 185 "TUPLE": exp.Struct.from_arg_list, 186 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 187 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 188 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 189 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 190 "UNIQ": exp.ApproxDistinct.from_arg_list, 191 "XOR": lambda args: exp.Xor(expressions=args), 192 "MD5": exp.MD5Digest.from_arg_list, 193 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 194 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 195 } 196 197 AGG_FUNCTIONS = { 198 "count", 199 "min", 200 "max", 201 "sum", 202 "avg", 203 "any", 204 "stddevPop", 205 "stddevSamp", 206 "varPop", 207 "varSamp", 208 "corr", 209 "covarPop", 210 "covarSamp", 211 "entropy", 212 "exponentialMovingAverage", 213 "intervalLengthSum", 214 "kolmogorovSmirnovTest", 215 "mannWhitneyUTest", 216 "median", 217 "rankCorr", 218 "sumKahan", 219 "studentTTest", 220 "welchTTest", 221 "anyHeavy", 222 "anyLast", 223 "boundingRatio", 224 "first_value", 225 "last_value", 226 "argMin", 227 "argMax", 228 "avgWeighted", 229 "topK", 230 "topKWeighted", 231 "deltaSum", 232 "deltaSumTimestamp", 233 "groupArray", 234 "groupArrayLast", 235 "groupUniqArray", 236 "groupArrayInsertAt", 237 "groupArrayMovingAvg", 238 "groupArrayMovingSum", 239 "groupArraySample", 240 "groupBitAnd", 241 "groupBitOr", 242 "groupBitXor", 243 "groupBitmap", 244 "groupBitmapAnd", 245 "groupBitmapOr", 246 "groupBitmapXor", 247 "sumWithOverflow", 248 "sumMap", 249 "minMap", 250 "maxMap", 251 "skewSamp", 252 "skewPop", 253 "kurtSamp", 254 "kurtPop", 255 "uniq", 256 "uniqExact", 257 "uniqCombined", 258 "uniqCombined64", 259 "uniqHLL12", 260 "uniqTheta", 261 "quantile", 262 "quantiles", 263 "quantileExact", 264 "quantilesExact", 265 "quantileExactLow", 266 "quantilesExactLow", 267 "quantileExactHigh", 268 "quantilesExactHigh", 269 "quantileExactWeighted", 270 "quantilesExactWeighted", 271 "quantileTiming", 272 "quantilesTiming", 273 "quantileTimingWeighted", 274 "quantilesTimingWeighted", 275 "quantileDeterministic", 276 "quantilesDeterministic", 277 "quantileTDigest", 278 "quantilesTDigest", 279 "quantileTDigestWeighted", 280 "quantilesTDigestWeighted", 281 "quantileBFloat16", 282 "quantilesBFloat16", 283 "quantileBFloat16Weighted", 284 "quantilesBFloat16Weighted", 285 "simpleLinearRegression", 286 "stochasticLinearRegression", 287 "stochasticLogisticRegression", 288 "categoricalInformationValue", 289 "contingency", 290 "cramersV", 291 "cramersVBiasCorrected", 292 "theilsU", 293 "maxIntersections", 294 "maxIntersectionsPosition", 295 "meanZTest", 296 "quantileInterpolatedWeighted", 297 "quantilesInterpolatedWeighted", 298 "quantileGK", 299 "quantilesGK", 300 "sparkBar", 301 "sumCount", 302 "largestTriangleThreeBuckets", 303 "histogram", 304 "sequenceMatch", 305 "sequenceCount", 306 "windowFunnel", 307 "retention", 308 "uniqUpTo", 309 "sequenceNextNode", 310 "exponentialTimeDecayedAvg", 311 } 312 313 AGG_FUNCTIONS_SUFFIXES = [ 314 "If", 315 "Array", 316 "ArrayIf", 317 "Map", 318 "SimpleState", 319 "State", 320 "Merge", 321 "MergeState", 322 "ForEach", 323 "Distinct", 324 "OrDefault", 325 "OrNull", 326 "Resample", 327 "ArgMin", 328 "ArgMax", 329 ] 330 331 FUNC_TOKENS = { 332 *parser.Parser.FUNC_TOKENS, 333 TokenType.SET, 334 } 335 336 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 337 338 AGG_FUNC_MAPPING = ( 339 lambda functions, suffixes: { 340 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 341 } 342 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 343 344 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 345 346 FUNCTION_PARSERS = { 347 **parser.Parser.FUNCTION_PARSERS, 348 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 349 "QUANTILE": lambda self: self._parse_quantile(), 350 } 351 352 FUNCTION_PARSERS.pop("MATCH") 353 354 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 355 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 356 357 RANGE_PARSERS = { 358 **parser.Parser.RANGE_PARSERS, 359 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 360 and self._parse_in(this, is_global=True), 361 } 362 363 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 364 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 365 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 366 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 367 368 JOIN_KINDS = { 369 *parser.Parser.JOIN_KINDS, 370 TokenType.ANY, 371 TokenType.ASOF, 372 TokenType.ARRAY, 373 } 374 375 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 376 TokenType.ANY, 377 TokenType.ARRAY, 378 TokenType.FINAL, 379 TokenType.FORMAT, 380 TokenType.SETTINGS, 381 } 382 383 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 384 TokenType.FORMAT, 385 } 386 387 LOG_DEFAULTS_TO_LN = True 388 389 QUERY_MODIFIER_PARSERS = { 390 **parser.Parser.QUERY_MODIFIER_PARSERS, 391 TokenType.SETTINGS: lambda self: ( 392 "settings", 393 self._advance() or self._parse_csv(self._parse_assignment), 394 ), 395 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 396 } 397 398 CONSTRAINT_PARSERS = { 399 **parser.Parser.CONSTRAINT_PARSERS, 400 "INDEX": lambda self: self._parse_index_constraint(), 401 "CODEC": lambda self: self._parse_compress(), 402 } 403 404 ALTER_PARSERS = { 405 **parser.Parser.ALTER_PARSERS, 406 "REPLACE": lambda self: self._parse_alter_table_replace(), 407 } 408 409 SCHEMA_UNNAMED_CONSTRAINTS = { 410 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 411 "INDEX", 412 } 413 414 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 415 index = self._index 416 this = self._parse_bitwise() 417 if self._match(TokenType.FROM): 418 self._retreat(index) 419 return super()._parse_extract() 420 421 # We return Anonymous here because extract and regexpExtract have different semantics, 422 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 423 # `extract('foobar', 'b')` works, but CH crashes for `regexpExtract('foobar', 'b')`. 424 # 425 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 426 self._match(TokenType.COMMA) 427 return self.expression( 428 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 429 ) 430 431 def _parse_assignment(self) -> t.Optional[exp.Expression]: 432 this = super()._parse_assignment() 433 434 if self._match(TokenType.PLACEHOLDER): 435 return self.expression( 436 exp.If, 437 this=this, 438 true=self._parse_assignment(), 439 false=self._match(TokenType.COLON) and self._parse_assignment(), 440 ) 441 442 return this 443 444 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 445 """ 446 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 447 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 448 """ 449 if not self._match(TokenType.L_BRACE): 450 return None 451 452 this = self._parse_id_var() 453 self._match(TokenType.COLON) 454 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 455 self._match_text_seq("IDENTIFIER") and "Identifier" 456 ) 457 458 if not kind: 459 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 460 elif not self._match(TokenType.R_BRACE): 461 self.raise_error("Expecting }") 462 463 return self.expression(exp.Placeholder, this=this, kind=kind) 464 465 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 466 this = super()._parse_in(this) 467 this.set("is_global", is_global) 468 return this 469 470 def _parse_table( 471 self, 472 schema: bool = False, 473 joins: bool = False, 474 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 475 parse_bracket: bool = False, 476 is_db_reference: bool = False, 477 parse_partition: bool = False, 478 ) -> t.Optional[exp.Expression]: 479 this = super()._parse_table( 480 schema=schema, 481 joins=joins, 482 alias_tokens=alias_tokens, 483 parse_bracket=parse_bracket, 484 is_db_reference=is_db_reference, 485 ) 486 487 if self._match(TokenType.FINAL): 488 this = self.expression(exp.Final, this=this) 489 490 return this 491 492 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 493 return super()._parse_position(haystack_first=True) 494 495 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 496 def _parse_cte(self) -> exp.CTE: 497 # WITH <identifier> AS <subquery expression> 498 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 499 500 if not cte: 501 # WITH <expression> AS <identifier> 502 cte = self.expression( 503 exp.CTE, 504 this=self._parse_assignment(), 505 alias=self._parse_table_alias(), 506 scalar=True, 507 ) 508 509 return cte 510 511 def _parse_join_parts( 512 self, 513 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 514 is_global = self._match(TokenType.GLOBAL) and self._prev 515 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 516 517 if kind_pre: 518 kind = self._match_set(self.JOIN_KINDS) and self._prev 519 side = self._match_set(self.JOIN_SIDES) and self._prev 520 return is_global, side, kind 521 522 return ( 523 is_global, 524 self._match_set(self.JOIN_SIDES) and self._prev, 525 self._match_set(self.JOIN_KINDS) and self._prev, 526 ) 527 528 def _parse_join( 529 self, skip_join_token: bool = False, parse_bracket: bool = False 530 ) -> t.Optional[exp.Join]: 531 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 532 if join: 533 join.set("global", join.args.pop("method", None)) 534 535 return join 536 537 def _parse_function( 538 self, 539 functions: t.Optional[t.Dict[str, t.Callable]] = None, 540 anonymous: bool = False, 541 optional_parens: bool = True, 542 any_token: bool = False, 543 ) -> t.Optional[exp.Expression]: 544 expr = super()._parse_function( 545 functions=functions, 546 anonymous=anonymous, 547 optional_parens=optional_parens, 548 any_token=any_token, 549 ) 550 551 func = expr.this if isinstance(expr, exp.Window) else expr 552 553 # Aggregate functions can be split in 2 parts: <func_name><suffix> 554 parts = ( 555 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 556 ) 557 558 if parts: 559 params = self._parse_func_params(func) 560 561 kwargs = { 562 "this": func.this, 563 "expressions": func.expressions, 564 } 565 if parts[1]: 566 kwargs["parts"] = parts 567 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 568 else: 569 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 570 571 kwargs["exp_class"] = exp_class 572 if params: 573 kwargs["params"] = params 574 575 func = self.expression(**kwargs) 576 577 if isinstance(expr, exp.Window): 578 # The window's func was parsed as Anonymous in base parser, fix its 579 # type to be CH style CombinedAnonymousAggFunc / AnonymousAggFunc 580 expr.set("this", func) 581 elif params: 582 # Params have blocked super()._parse_function() from parsing the following window 583 # (if that exists) as they're standing between the function call and the window spec 584 expr = self._parse_window(func) 585 else: 586 expr = func 587 588 return expr 589 590 def _parse_func_params( 591 self, this: t.Optional[exp.Func] = None 592 ) -> t.Optional[t.List[exp.Expression]]: 593 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 594 return self._parse_csv(self._parse_lambda) 595 596 if self._match(TokenType.L_PAREN): 597 params = self._parse_csv(self._parse_lambda) 598 self._match_r_paren(this) 599 return params 600 601 return None 602 603 def _parse_quantile(self) -> exp.Quantile: 604 this = self._parse_lambda() 605 params = self._parse_func_params() 606 if params: 607 return self.expression(exp.Quantile, this=params[0], quantile=this) 608 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 609 610 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 611 return super()._parse_wrapped_id_vars(optional=True) 612 613 def _parse_primary_key( 614 self, wrapped_optional: bool = False, in_props: bool = False 615 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 616 return super()._parse_primary_key( 617 wrapped_optional=wrapped_optional or in_props, in_props=in_props 618 ) 619 620 def _parse_on_property(self) -> t.Optional[exp.Expression]: 621 index = self._index 622 if self._match_text_seq("CLUSTER"): 623 this = self._parse_id_var() 624 if this: 625 return self.expression(exp.OnCluster, this=this) 626 else: 627 self._retreat(index) 628 return None 629 630 def _parse_index_constraint( 631 self, kind: t.Optional[str] = None 632 ) -> exp.IndexColumnConstraint: 633 # INDEX name1 expr TYPE type1(args) GRANULARITY value 634 this = self._parse_id_var() 635 expression = self._parse_assignment() 636 637 index_type = self._match_text_seq("TYPE") and ( 638 self._parse_function() or self._parse_var() 639 ) 640 641 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 642 643 return self.expression( 644 exp.IndexColumnConstraint, 645 this=this, 646 expression=expression, 647 index_type=index_type, 648 granularity=granularity, 649 ) 650 651 def _parse_partition(self) -> t.Optional[exp.Partition]: 652 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 653 if not self._match(TokenType.PARTITION): 654 return None 655 656 if self._match_text_seq("ID"): 657 # Corresponds to the PARTITION ID <string_value> syntax 658 expressions: t.List[exp.Expression] = [ 659 self.expression(exp.PartitionId, this=self._parse_string()) 660 ] 661 else: 662 expressions = self._parse_expressions() 663 664 return self.expression(exp.Partition, expressions=expressions) 665 666 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 667 partition = self._parse_partition() 668 669 if not partition or not self._match(TokenType.FROM): 670 return None 671 672 return self.expression( 673 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 674 ) 675 676 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 677 if not self._match_text_seq("PROJECTION"): 678 return None 679 680 return self.expression( 681 exp.ProjectionDef, 682 this=self._parse_id_var(), 683 expression=self._parse_wrapped(self._parse_statement), 684 ) 685 686 def _parse_constraint(self) -> t.Optional[exp.Expression]: 687 return super()._parse_constraint() or self._parse_projection_def() 688 689 class Generator(generator.Generator): 690 QUERY_HINTS = False 691 STRUCT_DELIMITER = ("(", ")") 692 NVL2_SUPPORTED = False 693 TABLESAMPLE_REQUIRES_PARENS = False 694 TABLESAMPLE_SIZE_IS_ROWS = False 695 TABLESAMPLE_KEYWORDS = "SAMPLE" 696 LAST_DAY_SUPPORTS_DATE_PART = False 697 CAN_IMPLEMENT_ARRAY_ANY = True 698 SUPPORTS_TO_NUMBER = False 699 JOIN_HINTS = False 700 TABLE_HINTS = False 701 EXPLICIT_SET_OP = True 702 GROUPINGS_SEP = "" 703 SET_OP_MODIFIERS = False 704 SUPPORTS_TABLE_ALIAS_COLUMNS = False 705 706 STRING_TYPE_MAPPING = { 707 exp.DataType.Type.CHAR: "String", 708 exp.DataType.Type.LONGBLOB: "String", 709 exp.DataType.Type.LONGTEXT: "String", 710 exp.DataType.Type.MEDIUMBLOB: "String", 711 exp.DataType.Type.MEDIUMTEXT: "String", 712 exp.DataType.Type.TINYBLOB: "String", 713 exp.DataType.Type.TINYTEXT: "String", 714 exp.DataType.Type.TEXT: "String", 715 exp.DataType.Type.VARBINARY: "String", 716 exp.DataType.Type.VARCHAR: "String", 717 } 718 719 SUPPORTED_JSON_PATH_PARTS = { 720 exp.JSONPathKey, 721 exp.JSONPathRoot, 722 exp.JSONPathSubscript, 723 } 724 725 TYPE_MAPPING = { 726 **generator.Generator.TYPE_MAPPING, 727 **STRING_TYPE_MAPPING, 728 exp.DataType.Type.ARRAY: "Array", 729 exp.DataType.Type.BIGINT: "Int64", 730 exp.DataType.Type.DATE32: "Date32", 731 exp.DataType.Type.DATETIME64: "DateTime64", 732 exp.DataType.Type.DOUBLE: "Float64", 733 exp.DataType.Type.ENUM: "Enum", 734 exp.DataType.Type.ENUM8: "Enum8", 735 exp.DataType.Type.ENUM16: "Enum16", 736 exp.DataType.Type.FIXEDSTRING: "FixedString", 737 exp.DataType.Type.FLOAT: "Float32", 738 exp.DataType.Type.INT: "Int32", 739 exp.DataType.Type.MEDIUMINT: "Int32", 740 exp.DataType.Type.INT128: "Int128", 741 exp.DataType.Type.INT256: "Int256", 742 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 743 exp.DataType.Type.MAP: "Map", 744 exp.DataType.Type.NESTED: "Nested", 745 exp.DataType.Type.NULLABLE: "Nullable", 746 exp.DataType.Type.SMALLINT: "Int16", 747 exp.DataType.Type.STRUCT: "Tuple", 748 exp.DataType.Type.TINYINT: "Int8", 749 exp.DataType.Type.UBIGINT: "UInt64", 750 exp.DataType.Type.UINT: "UInt32", 751 exp.DataType.Type.UINT128: "UInt128", 752 exp.DataType.Type.UINT256: "UInt256", 753 exp.DataType.Type.USMALLINT: "UInt16", 754 exp.DataType.Type.UTINYINT: "UInt8", 755 exp.DataType.Type.IPV4: "IPv4", 756 exp.DataType.Type.IPV6: "IPv6", 757 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 758 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 759 } 760 761 TRANSFORMS = { 762 **generator.Generator.TRANSFORMS, 763 exp.AnyValue: rename_func("any"), 764 exp.ApproxDistinct: rename_func("uniq"), 765 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 766 exp.ArraySize: rename_func("LENGTH"), 767 exp.ArraySum: rename_func("arraySum"), 768 exp.ArgMax: arg_max_or_min_no_count("argMax"), 769 exp.ArgMin: arg_max_or_min_no_count("argMin"), 770 exp.Array: inline_array_sql, 771 exp.CastToStrType: rename_func("CAST"), 772 exp.CountIf: rename_func("countIf"), 773 exp.CompressColumnConstraint: lambda self, 774 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 775 exp.ComputedColumnConstraint: lambda self, 776 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 777 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 778 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 779 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 780 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 781 exp.Explode: rename_func("arrayJoin"), 782 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 783 exp.IsNan: rename_func("isNaN"), 784 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 785 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 786 exp.JSONPathKey: json_path_key_only_name, 787 exp.JSONPathRoot: lambda *_: "", 788 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 789 exp.Nullif: rename_func("nullIf"), 790 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 791 exp.Pivot: no_pivot_sql, 792 exp.Quantile: _quantile_sql, 793 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 794 exp.Rand: rename_func("randCanonical"), 795 exp.StartsWith: rename_func("startsWith"), 796 exp.StrPosition: lambda self, e: self.func( 797 "position", e.this, e.args.get("substr"), e.args.get("position") 798 ), 799 exp.TimeToStr: lambda self, e: self.func( 800 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("timezone") 801 ), 802 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 803 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 804 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 805 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 806 exp.MD5Digest: rename_func("MD5"), 807 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 808 exp.SHA: rename_func("SHA1"), 809 exp.SHA2: sha256_sql, 810 exp.UnixToTime: _unix_to_time_sql, 811 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 812 exp.Variance: rename_func("varSamp"), 813 exp.Stddev: rename_func("stddevSamp"), 814 } 815 816 PROPERTIES_LOCATION = { 817 **generator.Generator.PROPERTIES_LOCATION, 818 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 819 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 820 exp.OnCluster: exp.Properties.Location.POST_NAME, 821 } 822 823 # there's no list in docs, but it can be found in Clickhouse code 824 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 825 ON_CLUSTER_TARGETS = { 826 "DATABASE", 827 "TABLE", 828 "VIEW", 829 "DICTIONARY", 830 "INDEX", 831 "FUNCTION", 832 "NAMED COLLECTION", 833 } 834 835 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 836 this = self.json_path_part(expression.this) 837 return str(int(this) + 1) if is_int(this) else this 838 839 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 840 return f"AS {self.sql(expression, 'this')}" 841 842 def _any_to_has( 843 self, 844 expression: exp.EQ | exp.NEQ, 845 default: t.Callable[[t.Any], str], 846 prefix: str = "", 847 ) -> str: 848 if isinstance(expression.left, exp.Any): 849 arr = expression.left 850 this = expression.right 851 elif isinstance(expression.right, exp.Any): 852 arr = expression.right 853 this = expression.left 854 else: 855 return default(expression) 856 857 return prefix + self.func("has", arr.this.unnest(), this) 858 859 def eq_sql(self, expression: exp.EQ) -> str: 860 return self._any_to_has(expression, super().eq_sql) 861 862 def neq_sql(self, expression: exp.NEQ) -> str: 863 return self._any_to_has(expression, super().neq_sql, "NOT ") 864 865 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 866 # Manually add a flag to make the search case-insensitive 867 regex = self.func("CONCAT", "'(?i)'", expression.expression) 868 return self.func("match", expression.this, regex) 869 870 def datatype_sql(self, expression: exp.DataType) -> str: 871 # String is the standard ClickHouse type, every other variant is just an alias. 872 # Additionally, any supplied length parameter will be ignored. 873 # 874 # https://clickhouse.com/docs/en/sql-reference/data-types/string 875 if expression.this in self.STRING_TYPE_MAPPING: 876 return "String" 877 878 return super().datatype_sql(expression) 879 880 def cte_sql(self, expression: exp.CTE) -> str: 881 if expression.args.get("scalar"): 882 this = self.sql(expression, "this") 883 alias = self.sql(expression, "alias") 884 return f"{this} AS {alias}" 885 886 return super().cte_sql(expression) 887 888 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 889 return super().after_limit_modifiers(expression) + [ 890 ( 891 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 892 if expression.args.get("settings") 893 else "" 894 ), 895 ( 896 self.seg("FORMAT ") + self.sql(expression, "format") 897 if expression.args.get("format") 898 else "" 899 ), 900 ] 901 902 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 903 params = self.expressions(expression, key="params", flat=True) 904 return self.func(expression.name, *expression.expressions) + f"({params})" 905 906 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 907 return self.func(expression.name, *expression.expressions) 908 909 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 910 return self.anonymousaggfunc_sql(expression) 911 912 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 913 return self.parameterizedagg_sql(expression) 914 915 def placeholder_sql(self, expression: exp.Placeholder) -> str: 916 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 917 918 def oncluster_sql(self, expression: exp.OnCluster) -> str: 919 return f"ON CLUSTER {self.sql(expression, 'this')}" 920 921 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 922 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 923 exp.Properties.Location.POST_NAME 924 ): 925 this_name = self.sql(expression.this, "this") 926 this_properties = " ".join( 927 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 928 ) 929 this_schema = self.schema_columns_sql(expression.this) 930 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 931 932 return super().createable_sql(expression, locations) 933 934 def prewhere_sql(self, expression: exp.PreWhere) -> str: 935 this = self.indent(self.sql(expression, "this")) 936 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 937 938 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 939 this = self.sql(expression, "this") 940 this = f" {this}" if this else "" 941 expr = self.sql(expression, "expression") 942 expr = f" {expr}" if expr else "" 943 index_type = self.sql(expression, "index_type") 944 index_type = f" TYPE {index_type}" if index_type else "" 945 granularity = self.sql(expression, "granularity") 946 granularity = f" GRANULARITY {granularity}" if granularity else "" 947 948 return f"INDEX{this}{expr}{index_type}{granularity}" 949 950 def partition_sql(self, expression: exp.Partition) -> str: 951 return f"PARTITION {self.expressions(expression, flat=True)}" 952 953 def partitionid_sql(self, expression: exp.PartitionId) -> str: 954 return f"ID {self.sql(expression.this)}" 955 956 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 957 return ( 958 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 959 ) 960 961 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 962 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects "my_id" would refer to "data.my_id" (which is done in _qualify_columns()) across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- NORMALIZATION_STRATEGY
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- TYPED_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- DATE_PART_MAPPING
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
112 class Tokenizer(tokens.Tokenizer): 113 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 114 IDENTIFIERS = ['"', "`"] 115 STRING_ESCAPES = ["'", "\\"] 116 BIT_STRINGS = [("0b", "")] 117 HEX_STRINGS = [("0x", ""), ("0X", "")] 118 HEREDOC_STRINGS = ["$"] 119 120 KEYWORDS = { 121 **tokens.Tokenizer.KEYWORDS, 122 "ATTACH": TokenType.COMMAND, 123 "DATE32": TokenType.DATE32, 124 "DATETIME64": TokenType.DATETIME64, 125 "DICTIONARY": TokenType.DICTIONARY, 126 "ENUM8": TokenType.ENUM8, 127 "ENUM16": TokenType.ENUM16, 128 "FINAL": TokenType.FINAL, 129 "FIXEDSTRING": TokenType.FIXEDSTRING, 130 "FLOAT32": TokenType.FLOAT, 131 "FLOAT64": TokenType.DOUBLE, 132 "GLOBAL": TokenType.GLOBAL, 133 "INT256": TokenType.INT256, 134 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 135 "MAP": TokenType.MAP, 136 "NESTED": TokenType.NESTED, 137 "SAMPLE": TokenType.TABLE_SAMPLE, 138 "TUPLE": TokenType.STRUCT, 139 "UINT128": TokenType.UINT128, 140 "UINT16": TokenType.USMALLINT, 141 "UINT256": TokenType.UINT256, 142 "UINT32": TokenType.UINT, 143 "UINT64": TokenType.UBIGINT, 144 "UINT8": TokenType.UTINYINT, 145 "IPV4": TokenType.IPV4, 146 "IPV6": TokenType.IPV6, 147 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 148 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 149 "SYSTEM": TokenType.COMMAND, 150 "PREWHERE": TokenType.PREWHERE, 151 } 152 KEYWORDS.pop("/*+") 153 154 SINGLE_TOKENS = { 155 **tokens.Tokenizer.SINGLE_TOKENS, 156 "$": TokenType.HEREDOC_STRING, 157 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIER_ESCAPES
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
159 class Parser(parser.Parser): 160 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 161 # * select x from t1 union all select x from t2 limit 1; 162 # * select x from t1 union all (select x from t2 limit 1); 163 MODIFIERS_ATTACHED_TO_SET_OP = False 164 INTERVAL_SPANS = False 165 166 FUNCTIONS = { 167 **parser.Parser.FUNCTIONS, 168 "ANY": exp.AnyValue.from_arg_list, 169 "ARRAYSUM": exp.ArraySum.from_arg_list, 170 "COUNTIF": _build_count_if, 171 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 172 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 173 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 174 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 175 "DATE_FORMAT": _build_date_format, 176 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 177 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 178 "FORMATDATETIME": _build_date_format, 179 "JSONEXTRACTSTRING": build_json_extract_path( 180 exp.JSONExtractScalar, zero_based_indexing=False 181 ), 182 "MAP": parser.build_var_map, 183 "MATCH": exp.RegexpLike.from_arg_list, 184 "RANDCANONICAL": exp.Rand.from_arg_list, 185 "TUPLE": exp.Struct.from_arg_list, 186 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 187 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 188 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 189 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 190 "UNIQ": exp.ApproxDistinct.from_arg_list, 191 "XOR": lambda args: exp.Xor(expressions=args), 192 "MD5": exp.MD5Digest.from_arg_list, 193 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 194 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 195 } 196 197 AGG_FUNCTIONS = { 198 "count", 199 "min", 200 "max", 201 "sum", 202 "avg", 203 "any", 204 "stddevPop", 205 "stddevSamp", 206 "varPop", 207 "varSamp", 208 "corr", 209 "covarPop", 210 "covarSamp", 211 "entropy", 212 "exponentialMovingAverage", 213 "intervalLengthSum", 214 "kolmogorovSmirnovTest", 215 "mannWhitneyUTest", 216 "median", 217 "rankCorr", 218 "sumKahan", 219 "studentTTest", 220 "welchTTest", 221 "anyHeavy", 222 "anyLast", 223 "boundingRatio", 224 "first_value", 225 "last_value", 226 "argMin", 227 "argMax", 228 "avgWeighted", 229 "topK", 230 "topKWeighted", 231 "deltaSum", 232 "deltaSumTimestamp", 233 "groupArray", 234 "groupArrayLast", 235 "groupUniqArray", 236 "groupArrayInsertAt", 237 "groupArrayMovingAvg", 238 "groupArrayMovingSum", 239 "groupArraySample", 240 "groupBitAnd", 241 "groupBitOr", 242 "groupBitXor", 243 "groupBitmap", 244 "groupBitmapAnd", 245 "groupBitmapOr", 246 "groupBitmapXor", 247 "sumWithOverflow", 248 "sumMap", 249 "minMap", 250 "maxMap", 251 "skewSamp", 252 "skewPop", 253 "kurtSamp", 254 "kurtPop", 255 "uniq", 256 "uniqExact", 257 "uniqCombined", 258 "uniqCombined64", 259 "uniqHLL12", 260 "uniqTheta", 261 "quantile", 262 "quantiles", 263 "quantileExact", 264 "quantilesExact", 265 "quantileExactLow", 266 "quantilesExactLow", 267 "quantileExactHigh", 268 "quantilesExactHigh", 269 "quantileExactWeighted", 270 "quantilesExactWeighted", 271 "quantileTiming", 272 "quantilesTiming", 273 "quantileTimingWeighted", 274 "quantilesTimingWeighted", 275 "quantileDeterministic", 276 "quantilesDeterministic", 277 "quantileTDigest", 278 "quantilesTDigest", 279 "quantileTDigestWeighted", 280 "quantilesTDigestWeighted", 281 "quantileBFloat16", 282 "quantilesBFloat16", 283 "quantileBFloat16Weighted", 284 "quantilesBFloat16Weighted", 285 "simpleLinearRegression", 286 "stochasticLinearRegression", 287 "stochasticLogisticRegression", 288 "categoricalInformationValue", 289 "contingency", 290 "cramersV", 291 "cramersVBiasCorrected", 292 "theilsU", 293 "maxIntersections", 294 "maxIntersectionsPosition", 295 "meanZTest", 296 "quantileInterpolatedWeighted", 297 "quantilesInterpolatedWeighted", 298 "quantileGK", 299 "quantilesGK", 300 "sparkBar", 301 "sumCount", 302 "largestTriangleThreeBuckets", 303 "histogram", 304 "sequenceMatch", 305 "sequenceCount", 306 "windowFunnel", 307 "retention", 308 "uniqUpTo", 309 "sequenceNextNode", 310 "exponentialTimeDecayedAvg", 311 } 312 313 AGG_FUNCTIONS_SUFFIXES = [ 314 "If", 315 "Array", 316 "ArrayIf", 317 "Map", 318 "SimpleState", 319 "State", 320 "Merge", 321 "MergeState", 322 "ForEach", 323 "Distinct", 324 "OrDefault", 325 "OrNull", 326 "Resample", 327 "ArgMin", 328 "ArgMax", 329 ] 330 331 FUNC_TOKENS = { 332 *parser.Parser.FUNC_TOKENS, 333 TokenType.SET, 334 } 335 336 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 337 338 AGG_FUNC_MAPPING = ( 339 lambda functions, suffixes: { 340 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 341 } 342 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 343 344 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 345 346 FUNCTION_PARSERS = { 347 **parser.Parser.FUNCTION_PARSERS, 348 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 349 "QUANTILE": lambda self: self._parse_quantile(), 350 } 351 352 FUNCTION_PARSERS.pop("MATCH") 353 354 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 355 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 356 357 RANGE_PARSERS = { 358 **parser.Parser.RANGE_PARSERS, 359 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 360 and self._parse_in(this, is_global=True), 361 } 362 363 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 364 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 365 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 366 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 367 368 JOIN_KINDS = { 369 *parser.Parser.JOIN_KINDS, 370 TokenType.ANY, 371 TokenType.ASOF, 372 TokenType.ARRAY, 373 } 374 375 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 376 TokenType.ANY, 377 TokenType.ARRAY, 378 TokenType.FINAL, 379 TokenType.FORMAT, 380 TokenType.SETTINGS, 381 } 382 383 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 384 TokenType.FORMAT, 385 } 386 387 LOG_DEFAULTS_TO_LN = True 388 389 QUERY_MODIFIER_PARSERS = { 390 **parser.Parser.QUERY_MODIFIER_PARSERS, 391 TokenType.SETTINGS: lambda self: ( 392 "settings", 393 self._advance() or self._parse_csv(self._parse_assignment), 394 ), 395 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 396 } 397 398 CONSTRAINT_PARSERS = { 399 **parser.Parser.CONSTRAINT_PARSERS, 400 "INDEX": lambda self: self._parse_index_constraint(), 401 "CODEC": lambda self: self._parse_compress(), 402 } 403 404 ALTER_PARSERS = { 405 **parser.Parser.ALTER_PARSERS, 406 "REPLACE": lambda self: self._parse_alter_table_replace(), 407 } 408 409 SCHEMA_UNNAMED_CONSTRAINTS = { 410 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 411 "INDEX", 412 } 413 414 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 415 index = self._index 416 this = self._parse_bitwise() 417 if self._match(TokenType.FROM): 418 self._retreat(index) 419 return super()._parse_extract() 420 421 # We return Anonymous here because extract and regexpExtract have different semantics, 422 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 423 # `extract('foobar', 'b')` works, but CH crashes for `regexpExtract('foobar', 'b')`. 424 # 425 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 426 self._match(TokenType.COMMA) 427 return self.expression( 428 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 429 ) 430 431 def _parse_assignment(self) -> t.Optional[exp.Expression]: 432 this = super()._parse_assignment() 433 434 if self._match(TokenType.PLACEHOLDER): 435 return self.expression( 436 exp.If, 437 this=this, 438 true=self._parse_assignment(), 439 false=self._match(TokenType.COLON) and self._parse_assignment(), 440 ) 441 442 return this 443 444 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 445 """ 446 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 447 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 448 """ 449 if not self._match(TokenType.L_BRACE): 450 return None 451 452 this = self._parse_id_var() 453 self._match(TokenType.COLON) 454 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 455 self._match_text_seq("IDENTIFIER") and "Identifier" 456 ) 457 458 if not kind: 459 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 460 elif not self._match(TokenType.R_BRACE): 461 self.raise_error("Expecting }") 462 463 return self.expression(exp.Placeholder, this=this, kind=kind) 464 465 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 466 this = super()._parse_in(this) 467 this.set("is_global", is_global) 468 return this 469 470 def _parse_table( 471 self, 472 schema: bool = False, 473 joins: bool = False, 474 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 475 parse_bracket: bool = False, 476 is_db_reference: bool = False, 477 parse_partition: bool = False, 478 ) -> t.Optional[exp.Expression]: 479 this = super()._parse_table( 480 schema=schema, 481 joins=joins, 482 alias_tokens=alias_tokens, 483 parse_bracket=parse_bracket, 484 is_db_reference=is_db_reference, 485 ) 486 487 if self._match(TokenType.FINAL): 488 this = self.expression(exp.Final, this=this) 489 490 return this 491 492 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 493 return super()._parse_position(haystack_first=True) 494 495 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 496 def _parse_cte(self) -> exp.CTE: 497 # WITH <identifier> AS <subquery expression> 498 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 499 500 if not cte: 501 # WITH <expression> AS <identifier> 502 cte = self.expression( 503 exp.CTE, 504 this=self._parse_assignment(), 505 alias=self._parse_table_alias(), 506 scalar=True, 507 ) 508 509 return cte 510 511 def _parse_join_parts( 512 self, 513 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 514 is_global = self._match(TokenType.GLOBAL) and self._prev 515 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 516 517 if kind_pre: 518 kind = self._match_set(self.JOIN_KINDS) and self._prev 519 side = self._match_set(self.JOIN_SIDES) and self._prev 520 return is_global, side, kind 521 522 return ( 523 is_global, 524 self._match_set(self.JOIN_SIDES) and self._prev, 525 self._match_set(self.JOIN_KINDS) and self._prev, 526 ) 527 528 def _parse_join( 529 self, skip_join_token: bool = False, parse_bracket: bool = False 530 ) -> t.Optional[exp.Join]: 531 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 532 if join: 533 join.set("global", join.args.pop("method", None)) 534 535 return join 536 537 def _parse_function( 538 self, 539 functions: t.Optional[t.Dict[str, t.Callable]] = None, 540 anonymous: bool = False, 541 optional_parens: bool = True, 542 any_token: bool = False, 543 ) -> t.Optional[exp.Expression]: 544 expr = super()._parse_function( 545 functions=functions, 546 anonymous=anonymous, 547 optional_parens=optional_parens, 548 any_token=any_token, 549 ) 550 551 func = expr.this if isinstance(expr, exp.Window) else expr 552 553 # Aggregate functions can be split in 2 parts: <func_name><suffix> 554 parts = ( 555 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 556 ) 557 558 if parts: 559 params = self._parse_func_params(func) 560 561 kwargs = { 562 "this": func.this, 563 "expressions": func.expressions, 564 } 565 if parts[1]: 566 kwargs["parts"] = parts 567 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 568 else: 569 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 570 571 kwargs["exp_class"] = exp_class 572 if params: 573 kwargs["params"] = params 574 575 func = self.expression(**kwargs) 576 577 if isinstance(expr, exp.Window): 578 # The window's func was parsed as Anonymous in base parser, fix its 579 # type to be CH style CombinedAnonymousAggFunc / AnonymousAggFunc 580 expr.set("this", func) 581 elif params: 582 # Params have blocked super()._parse_function() from parsing the following window 583 # (if that exists) as they're standing between the function call and the window spec 584 expr = self._parse_window(func) 585 else: 586 expr = func 587 588 return expr 589 590 def _parse_func_params( 591 self, this: t.Optional[exp.Func] = None 592 ) -> t.Optional[t.List[exp.Expression]]: 593 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 594 return self._parse_csv(self._parse_lambda) 595 596 if self._match(TokenType.L_PAREN): 597 params = self._parse_csv(self._parse_lambda) 598 self._match_r_paren(this) 599 return params 600 601 return None 602 603 def _parse_quantile(self) -> exp.Quantile: 604 this = self._parse_lambda() 605 params = self._parse_func_params() 606 if params: 607 return self.expression(exp.Quantile, this=params[0], quantile=this) 608 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 609 610 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 611 return super()._parse_wrapped_id_vars(optional=True) 612 613 def _parse_primary_key( 614 self, wrapped_optional: bool = False, in_props: bool = False 615 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 616 return super()._parse_primary_key( 617 wrapped_optional=wrapped_optional or in_props, in_props=in_props 618 ) 619 620 def _parse_on_property(self) -> t.Optional[exp.Expression]: 621 index = self._index 622 if self._match_text_seq("CLUSTER"): 623 this = self._parse_id_var() 624 if this: 625 return self.expression(exp.OnCluster, this=this) 626 else: 627 self._retreat(index) 628 return None 629 630 def _parse_index_constraint( 631 self, kind: t.Optional[str] = None 632 ) -> exp.IndexColumnConstraint: 633 # INDEX name1 expr TYPE type1(args) GRANULARITY value 634 this = self._parse_id_var() 635 expression = self._parse_assignment() 636 637 index_type = self._match_text_seq("TYPE") and ( 638 self._parse_function() or self._parse_var() 639 ) 640 641 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 642 643 return self.expression( 644 exp.IndexColumnConstraint, 645 this=this, 646 expression=expression, 647 index_type=index_type, 648 granularity=granularity, 649 ) 650 651 def _parse_partition(self) -> t.Optional[exp.Partition]: 652 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 653 if not self._match(TokenType.PARTITION): 654 return None 655 656 if self._match_text_seq("ID"): 657 # Corresponds to the PARTITION ID <string_value> syntax 658 expressions: t.List[exp.Expression] = [ 659 self.expression(exp.PartitionId, this=self._parse_string()) 660 ] 661 else: 662 expressions = self._parse_expressions() 663 664 return self.expression(exp.Partition, expressions=expressions) 665 666 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 667 partition = self._parse_partition() 668 669 if not partition or not self._match(TokenType.FROM): 670 return None 671 672 return self.expression( 673 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 674 ) 675 676 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 677 if not self._match_text_seq("PROJECTION"): 678 return None 679 680 return self.expression( 681 exp.ProjectionDef, 682 this=self._parse_id_var(), 683 expression=self._parse_wrapped(self._parse_statement), 684 ) 685 686 def _parse_constraint(self) -> t.Optional[exp.Expression]: 687 return super()._parse_constraint() or self._parse_projection_def()
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- PROPERTY_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
689 class Generator(generator.Generator): 690 QUERY_HINTS = False 691 STRUCT_DELIMITER = ("(", ")") 692 NVL2_SUPPORTED = False 693 TABLESAMPLE_REQUIRES_PARENS = False 694 TABLESAMPLE_SIZE_IS_ROWS = False 695 TABLESAMPLE_KEYWORDS = "SAMPLE" 696 LAST_DAY_SUPPORTS_DATE_PART = False 697 CAN_IMPLEMENT_ARRAY_ANY = True 698 SUPPORTS_TO_NUMBER = False 699 JOIN_HINTS = False 700 TABLE_HINTS = False 701 EXPLICIT_SET_OP = True 702 GROUPINGS_SEP = "" 703 SET_OP_MODIFIERS = False 704 SUPPORTS_TABLE_ALIAS_COLUMNS = False 705 706 STRING_TYPE_MAPPING = { 707 exp.DataType.Type.CHAR: "String", 708 exp.DataType.Type.LONGBLOB: "String", 709 exp.DataType.Type.LONGTEXT: "String", 710 exp.DataType.Type.MEDIUMBLOB: "String", 711 exp.DataType.Type.MEDIUMTEXT: "String", 712 exp.DataType.Type.TINYBLOB: "String", 713 exp.DataType.Type.TINYTEXT: "String", 714 exp.DataType.Type.TEXT: "String", 715 exp.DataType.Type.VARBINARY: "String", 716 exp.DataType.Type.VARCHAR: "String", 717 } 718 719 SUPPORTED_JSON_PATH_PARTS = { 720 exp.JSONPathKey, 721 exp.JSONPathRoot, 722 exp.JSONPathSubscript, 723 } 724 725 TYPE_MAPPING = { 726 **generator.Generator.TYPE_MAPPING, 727 **STRING_TYPE_MAPPING, 728 exp.DataType.Type.ARRAY: "Array", 729 exp.DataType.Type.BIGINT: "Int64", 730 exp.DataType.Type.DATE32: "Date32", 731 exp.DataType.Type.DATETIME64: "DateTime64", 732 exp.DataType.Type.DOUBLE: "Float64", 733 exp.DataType.Type.ENUM: "Enum", 734 exp.DataType.Type.ENUM8: "Enum8", 735 exp.DataType.Type.ENUM16: "Enum16", 736 exp.DataType.Type.FIXEDSTRING: "FixedString", 737 exp.DataType.Type.FLOAT: "Float32", 738 exp.DataType.Type.INT: "Int32", 739 exp.DataType.Type.MEDIUMINT: "Int32", 740 exp.DataType.Type.INT128: "Int128", 741 exp.DataType.Type.INT256: "Int256", 742 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 743 exp.DataType.Type.MAP: "Map", 744 exp.DataType.Type.NESTED: "Nested", 745 exp.DataType.Type.NULLABLE: "Nullable", 746 exp.DataType.Type.SMALLINT: "Int16", 747 exp.DataType.Type.STRUCT: "Tuple", 748 exp.DataType.Type.TINYINT: "Int8", 749 exp.DataType.Type.UBIGINT: "UInt64", 750 exp.DataType.Type.UINT: "UInt32", 751 exp.DataType.Type.UINT128: "UInt128", 752 exp.DataType.Type.UINT256: "UInt256", 753 exp.DataType.Type.USMALLINT: "UInt16", 754 exp.DataType.Type.UTINYINT: "UInt8", 755 exp.DataType.Type.IPV4: "IPv4", 756 exp.DataType.Type.IPV6: "IPv6", 757 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 758 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 759 } 760 761 TRANSFORMS = { 762 **generator.Generator.TRANSFORMS, 763 exp.AnyValue: rename_func("any"), 764 exp.ApproxDistinct: rename_func("uniq"), 765 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 766 exp.ArraySize: rename_func("LENGTH"), 767 exp.ArraySum: rename_func("arraySum"), 768 exp.ArgMax: arg_max_or_min_no_count("argMax"), 769 exp.ArgMin: arg_max_or_min_no_count("argMin"), 770 exp.Array: inline_array_sql, 771 exp.CastToStrType: rename_func("CAST"), 772 exp.CountIf: rename_func("countIf"), 773 exp.CompressColumnConstraint: lambda self, 774 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 775 exp.ComputedColumnConstraint: lambda self, 776 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 777 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 778 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 779 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 780 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 781 exp.Explode: rename_func("arrayJoin"), 782 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 783 exp.IsNan: rename_func("isNaN"), 784 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 785 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 786 exp.JSONPathKey: json_path_key_only_name, 787 exp.JSONPathRoot: lambda *_: "", 788 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 789 exp.Nullif: rename_func("nullIf"), 790 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 791 exp.Pivot: no_pivot_sql, 792 exp.Quantile: _quantile_sql, 793 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 794 exp.Rand: rename_func("randCanonical"), 795 exp.StartsWith: rename_func("startsWith"), 796 exp.StrPosition: lambda self, e: self.func( 797 "position", e.this, e.args.get("substr"), e.args.get("position") 798 ), 799 exp.TimeToStr: lambda self, e: self.func( 800 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("timezone") 801 ), 802 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 803 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 804 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 805 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 806 exp.MD5Digest: rename_func("MD5"), 807 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 808 exp.SHA: rename_func("SHA1"), 809 exp.SHA2: sha256_sql, 810 exp.UnixToTime: _unix_to_time_sql, 811 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 812 exp.Variance: rename_func("varSamp"), 813 exp.Stddev: rename_func("stddevSamp"), 814 } 815 816 PROPERTIES_LOCATION = { 817 **generator.Generator.PROPERTIES_LOCATION, 818 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 819 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 820 exp.OnCluster: exp.Properties.Location.POST_NAME, 821 } 822 823 # there's no list in docs, but it can be found in Clickhouse code 824 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 825 ON_CLUSTER_TARGETS = { 826 "DATABASE", 827 "TABLE", 828 "VIEW", 829 "DICTIONARY", 830 "INDEX", 831 "FUNCTION", 832 "NAMED COLLECTION", 833 } 834 835 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 836 this = self.json_path_part(expression.this) 837 return str(int(this) + 1) if is_int(this) else this 838 839 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 840 return f"AS {self.sql(expression, 'this')}" 841 842 def _any_to_has( 843 self, 844 expression: exp.EQ | exp.NEQ, 845 default: t.Callable[[t.Any], str], 846 prefix: str = "", 847 ) -> str: 848 if isinstance(expression.left, exp.Any): 849 arr = expression.left 850 this = expression.right 851 elif isinstance(expression.right, exp.Any): 852 arr = expression.right 853 this = expression.left 854 else: 855 return default(expression) 856 857 return prefix + self.func("has", arr.this.unnest(), this) 858 859 def eq_sql(self, expression: exp.EQ) -> str: 860 return self._any_to_has(expression, super().eq_sql) 861 862 def neq_sql(self, expression: exp.NEQ) -> str: 863 return self._any_to_has(expression, super().neq_sql, "NOT ") 864 865 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 866 # Manually add a flag to make the search case-insensitive 867 regex = self.func("CONCAT", "'(?i)'", expression.expression) 868 return self.func("match", expression.this, regex) 869 870 def datatype_sql(self, expression: exp.DataType) -> str: 871 # String is the standard ClickHouse type, every other variant is just an alias. 872 # Additionally, any supplied length parameter will be ignored. 873 # 874 # https://clickhouse.com/docs/en/sql-reference/data-types/string 875 if expression.this in self.STRING_TYPE_MAPPING: 876 return "String" 877 878 return super().datatype_sql(expression) 879 880 def cte_sql(self, expression: exp.CTE) -> str: 881 if expression.args.get("scalar"): 882 this = self.sql(expression, "this") 883 alias = self.sql(expression, "alias") 884 return f"{this} AS {alias}" 885 886 return super().cte_sql(expression) 887 888 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 889 return super().after_limit_modifiers(expression) + [ 890 ( 891 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 892 if expression.args.get("settings") 893 else "" 894 ), 895 ( 896 self.seg("FORMAT ") + self.sql(expression, "format") 897 if expression.args.get("format") 898 else "" 899 ), 900 ] 901 902 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 903 params = self.expressions(expression, key="params", flat=True) 904 return self.func(expression.name, *expression.expressions) + f"({params})" 905 906 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 907 return self.func(expression.name, *expression.expressions) 908 909 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 910 return self.anonymousaggfunc_sql(expression) 911 912 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 913 return self.parameterizedagg_sql(expression) 914 915 def placeholder_sql(self, expression: exp.Placeholder) -> str: 916 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 917 918 def oncluster_sql(self, expression: exp.OnCluster) -> str: 919 return f"ON CLUSTER {self.sql(expression, 'this')}" 920 921 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 922 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 923 exp.Properties.Location.POST_NAME 924 ): 925 this_name = self.sql(expression.this, "this") 926 this_properties = " ".join( 927 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 928 ) 929 this_schema = self.schema_columns_sql(expression.this) 930 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 931 932 return super().createable_sql(expression, locations) 933 934 def prewhere_sql(self, expression: exp.PreWhere) -> str: 935 this = self.indent(self.sql(expression, "this")) 936 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 937 938 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 939 this = self.sql(expression, "this") 940 this = f" {this}" if this else "" 941 expr = self.sql(expression, "expression") 942 expr = f" {expr}" if expr else "" 943 index_type = self.sql(expression, "index_type") 944 index_type = f" TYPE {index_type}" if index_type else "" 945 granularity = self.sql(expression, "granularity") 946 granularity = f" GRANULARITY {granularity}" if granularity else "" 947 948 return f"INDEX{this}{expr}{index_type}{granularity}" 949 950 def partition_sql(self, expression: exp.Partition) -> str: 951 return f"PARTITION {self.expressions(expression, flat=True)}" 952 953 def partitionid_sql(self, expression: exp.PartitionId) -> str: 954 return f"ID {self.sql(expression.this)}" 955 956 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 957 return ( 958 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 959 ) 960 961 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 962 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
870 def datatype_sql(self, expression: exp.DataType) -> str: 871 # String is the standard ClickHouse type, every other variant is just an alias. 872 # Additionally, any supplied length parameter will be ignored. 873 # 874 # https://clickhouse.com/docs/en/sql-reference/data-types/string 875 if expression.this in self.STRING_TYPE_MAPPING: 876 return "String" 877 878 return super().datatype_sql(expression)
888 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 889 return super().after_limit_modifiers(expression) + [ 890 ( 891 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 892 if expression.args.get("settings") 893 else "" 894 ), 895 ( 896 self.seg("FORMAT ") + self.sql(expression, "format") 897 if expression.args.get("format") 898 else "" 899 ), 900 ]
921 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 922 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 923 exp.Properties.Location.POST_NAME 924 ): 925 this_name = self.sql(expression.this, "this") 926 this_properties = " ".join( 927 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 928 ) 929 this_schema = self.schema_columns_sql(expression.this) 930 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 931 932 return super().createable_sql(expression, locations)
938 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 939 this = self.sql(expression, "this") 940 this = f" {this}" if this else "" 941 expr = self.sql(expression, "expression") 942 expr = f" {expr}" if expr else "" 943 index_type = self.sql(expression, "index_type") 944 index_type = f" TYPE {index_type}" if index_type else "" 945 granularity = self.sql(expression, "granularity") 946 granularity = f" GRANULARITY {granularity}" if granularity else "" 947 948 return f"INDEX{this}{expr}{index_type}{granularity}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- generateseries_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- length_sql
- rand_sql
- strtodate_sql
- strtotime_sql
- changes_sql
- pad_sql