sqlglot.dialects.clickhouse
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 arg_max_or_min_no_count, 9 build_date_delta, 10 build_formatted_time, 11 inline_array_sql, 12 json_extract_segments, 13 json_path_key_only_name, 14 no_pivot_sql, 15 build_json_extract_path, 16 rename_func, 17 sha256_sql, 18 var_map_sql, 19 timestamptrunc_sql, 20 unit_to_var, 21) 22from sqlglot.generator import Generator 23from sqlglot.helper import is_int, seq_get 24from sqlglot.tokens import Token, TokenType 25 26DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 27 28 29def _build_date_format(args: t.List) -> exp.TimeToStr: 30 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 31 32 timezone = seq_get(args, 2) 33 if timezone: 34 expr.set("timezone", timezone) 35 36 return expr 37 38 39def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 40 scale = expression.args.get("scale") 41 timestamp = expression.this 42 43 if scale in (None, exp.UnixToTime.SECONDS): 44 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 45 if scale == exp.UnixToTime.MILLIS: 46 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 47 if scale == exp.UnixToTime.MICROS: 48 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 49 if scale == exp.UnixToTime.NANOS: 50 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 51 52 return self.func( 53 "fromUnixTimestamp", 54 exp.cast( 55 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 56 ), 57 ) 58 59 60def _lower_func(sql: str) -> str: 61 index = sql.index("(") 62 return sql[:index].lower() + sql[index:] 63 64 65def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 66 quantile = expression.args["quantile"] 67 args = f"({self.sql(expression, 'this')})" 68 69 if isinstance(quantile, exp.Array): 70 func = self.func("quantiles", *quantile) 71 else: 72 func = self.func("quantile", quantile) 73 74 return func + args 75 76 77def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 78 if len(args) == 1: 79 return exp.CountIf(this=seq_get(args, 0)) 80 81 return exp.CombinedAggFunc(this="countIf", expressions=args, parts=("count", "If")) 82 83 84def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 85 if len(args) == 3: 86 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 87 88 strtodate = exp.StrToDate.from_arg_list(args) 89 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 90 91 92def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 93 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 94 if not expression.unit: 95 return rename_func(name)(self, expression) 96 97 return self.func( 98 name, 99 unit_to_var(expression), 100 expression.expression, 101 expression.this, 102 ) 103 104 return _delta_sql 105 106 107class ClickHouse(Dialect): 108 NORMALIZE_FUNCTIONS: bool | str = False 109 NULL_ORDERING = "nulls_are_last" 110 SUPPORTS_USER_DEFINED_TYPES = False 111 SAFE_DIVISION = True 112 LOG_BASE_FIRST: t.Optional[bool] = None 113 FORCE_EARLY_ALIAS_REF_EXPANSION = True 114 115 UNESCAPED_SEQUENCES = { 116 "\\0": "\0", 117 } 118 119 class Tokenizer(tokens.Tokenizer): 120 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 121 IDENTIFIERS = ['"', "`"] 122 STRING_ESCAPES = ["'", "\\"] 123 BIT_STRINGS = [("0b", "")] 124 HEX_STRINGS = [("0x", ""), ("0X", "")] 125 HEREDOC_STRINGS = ["$"] 126 127 KEYWORDS = { 128 **tokens.Tokenizer.KEYWORDS, 129 "ATTACH": TokenType.COMMAND, 130 "DATE32": TokenType.DATE32, 131 "DATETIME64": TokenType.DATETIME64, 132 "DICTIONARY": TokenType.DICTIONARY, 133 "ENUM8": TokenType.ENUM8, 134 "ENUM16": TokenType.ENUM16, 135 "FINAL": TokenType.FINAL, 136 "FIXEDSTRING": TokenType.FIXEDSTRING, 137 "FLOAT32": TokenType.FLOAT, 138 "FLOAT64": TokenType.DOUBLE, 139 "GLOBAL": TokenType.GLOBAL, 140 "INT256": TokenType.INT256, 141 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 142 "MAP": TokenType.MAP, 143 "NESTED": TokenType.NESTED, 144 "SAMPLE": TokenType.TABLE_SAMPLE, 145 "TUPLE": TokenType.STRUCT, 146 "UINT128": TokenType.UINT128, 147 "UINT16": TokenType.USMALLINT, 148 "UINT256": TokenType.UINT256, 149 "UINT32": TokenType.UINT, 150 "UINT64": TokenType.UBIGINT, 151 "UINT8": TokenType.UTINYINT, 152 "IPV4": TokenType.IPV4, 153 "IPV6": TokenType.IPV6, 154 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 155 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 156 "SYSTEM": TokenType.COMMAND, 157 "PREWHERE": TokenType.PREWHERE, 158 } 159 KEYWORDS.pop("/*+") 160 161 SINGLE_TOKENS = { 162 **tokens.Tokenizer.SINGLE_TOKENS, 163 "$": TokenType.HEREDOC_STRING, 164 } 165 166 class Parser(parser.Parser): 167 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 168 # * select x from t1 union all select x from t2 limit 1; 169 # * select x from t1 union all (select x from t2 limit 1); 170 MODIFIERS_ATTACHED_TO_SET_OP = False 171 INTERVAL_SPANS = False 172 173 FUNCTIONS = { 174 **parser.Parser.FUNCTIONS, 175 "ANY": exp.AnyValue.from_arg_list, 176 "ARRAYSUM": exp.ArraySum.from_arg_list, 177 "COUNTIF": _build_count_if, 178 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 179 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 180 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 181 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 182 "DATE_FORMAT": _build_date_format, 183 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 184 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 185 "FORMATDATETIME": _build_date_format, 186 "JSONEXTRACTSTRING": build_json_extract_path( 187 exp.JSONExtractScalar, zero_based_indexing=False 188 ), 189 "MAP": parser.build_var_map, 190 "MATCH": exp.RegexpLike.from_arg_list, 191 "RANDCANONICAL": exp.Rand.from_arg_list, 192 "STR_TO_DATE": _build_str_to_date, 193 "TUPLE": exp.Struct.from_arg_list, 194 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 195 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 196 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 197 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 198 "UNIQ": exp.ApproxDistinct.from_arg_list, 199 "XOR": lambda args: exp.Xor(expressions=args), 200 "MD5": exp.MD5Digest.from_arg_list, 201 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 202 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 203 } 204 205 AGG_FUNCTIONS = { 206 "count", 207 "min", 208 "max", 209 "sum", 210 "avg", 211 "any", 212 "stddevPop", 213 "stddevSamp", 214 "varPop", 215 "varSamp", 216 "corr", 217 "covarPop", 218 "covarSamp", 219 "entropy", 220 "exponentialMovingAverage", 221 "intervalLengthSum", 222 "kolmogorovSmirnovTest", 223 "mannWhitneyUTest", 224 "median", 225 "rankCorr", 226 "sumKahan", 227 "studentTTest", 228 "welchTTest", 229 "anyHeavy", 230 "anyLast", 231 "boundingRatio", 232 "first_value", 233 "last_value", 234 "argMin", 235 "argMax", 236 "avgWeighted", 237 "topK", 238 "topKWeighted", 239 "deltaSum", 240 "deltaSumTimestamp", 241 "groupArray", 242 "groupArrayLast", 243 "groupUniqArray", 244 "groupArrayInsertAt", 245 "groupArrayMovingAvg", 246 "groupArrayMovingSum", 247 "groupArraySample", 248 "groupBitAnd", 249 "groupBitOr", 250 "groupBitXor", 251 "groupBitmap", 252 "groupBitmapAnd", 253 "groupBitmapOr", 254 "groupBitmapXor", 255 "sumWithOverflow", 256 "sumMap", 257 "minMap", 258 "maxMap", 259 "skewSamp", 260 "skewPop", 261 "kurtSamp", 262 "kurtPop", 263 "uniq", 264 "uniqExact", 265 "uniqCombined", 266 "uniqCombined64", 267 "uniqHLL12", 268 "uniqTheta", 269 "quantile", 270 "quantiles", 271 "quantileExact", 272 "quantilesExact", 273 "quantileExactLow", 274 "quantilesExactLow", 275 "quantileExactHigh", 276 "quantilesExactHigh", 277 "quantileExactWeighted", 278 "quantilesExactWeighted", 279 "quantileTiming", 280 "quantilesTiming", 281 "quantileTimingWeighted", 282 "quantilesTimingWeighted", 283 "quantileDeterministic", 284 "quantilesDeterministic", 285 "quantileTDigest", 286 "quantilesTDigest", 287 "quantileTDigestWeighted", 288 "quantilesTDigestWeighted", 289 "quantileBFloat16", 290 "quantilesBFloat16", 291 "quantileBFloat16Weighted", 292 "quantilesBFloat16Weighted", 293 "simpleLinearRegression", 294 "stochasticLinearRegression", 295 "stochasticLogisticRegression", 296 "categoricalInformationValue", 297 "contingency", 298 "cramersV", 299 "cramersVBiasCorrected", 300 "theilsU", 301 "maxIntersections", 302 "maxIntersectionsPosition", 303 "meanZTest", 304 "quantileInterpolatedWeighted", 305 "quantilesInterpolatedWeighted", 306 "quantileGK", 307 "quantilesGK", 308 "sparkBar", 309 "sumCount", 310 "largestTriangleThreeBuckets", 311 "histogram", 312 "sequenceMatch", 313 "sequenceCount", 314 "windowFunnel", 315 "retention", 316 "uniqUpTo", 317 "sequenceNextNode", 318 "exponentialTimeDecayedAvg", 319 } 320 321 AGG_FUNCTIONS_SUFFIXES = [ 322 "If", 323 "Array", 324 "ArrayIf", 325 "Map", 326 "SimpleState", 327 "State", 328 "Merge", 329 "MergeState", 330 "ForEach", 331 "Distinct", 332 "OrDefault", 333 "OrNull", 334 "Resample", 335 "ArgMin", 336 "ArgMax", 337 ] 338 339 FUNC_TOKENS = { 340 *parser.Parser.FUNC_TOKENS, 341 TokenType.SET, 342 } 343 344 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 345 346 ID_VAR_TOKENS = { 347 *parser.Parser.ID_VAR_TOKENS, 348 TokenType.LIKE, 349 } 350 351 AGG_FUNC_MAPPING = ( 352 lambda functions, suffixes: { 353 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 354 } 355 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 356 357 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 358 359 FUNCTION_PARSERS = { 360 **parser.Parser.FUNCTION_PARSERS, 361 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 362 "QUANTILE": lambda self: self._parse_quantile(), 363 } 364 365 FUNCTION_PARSERS.pop("MATCH") 366 367 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 368 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 369 370 RANGE_PARSERS = { 371 **parser.Parser.RANGE_PARSERS, 372 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 373 and self._parse_in(this, is_global=True), 374 } 375 376 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 377 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 378 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 379 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 380 381 JOIN_KINDS = { 382 *parser.Parser.JOIN_KINDS, 383 TokenType.ANY, 384 TokenType.ASOF, 385 TokenType.ARRAY, 386 } 387 388 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 389 TokenType.ANY, 390 TokenType.ARRAY, 391 TokenType.FINAL, 392 TokenType.FORMAT, 393 TokenType.SETTINGS, 394 } 395 396 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 397 TokenType.FORMAT, 398 } 399 400 LOG_DEFAULTS_TO_LN = True 401 402 QUERY_MODIFIER_PARSERS = { 403 **parser.Parser.QUERY_MODIFIER_PARSERS, 404 TokenType.SETTINGS: lambda self: ( 405 "settings", 406 self._advance() or self._parse_csv(self._parse_assignment), 407 ), 408 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 409 } 410 411 CONSTRAINT_PARSERS = { 412 **parser.Parser.CONSTRAINT_PARSERS, 413 "INDEX": lambda self: self._parse_index_constraint(), 414 "CODEC": lambda self: self._parse_compress(), 415 } 416 417 ALTER_PARSERS = { 418 **parser.Parser.ALTER_PARSERS, 419 "REPLACE": lambda self: self._parse_alter_table_replace(), 420 } 421 422 SCHEMA_UNNAMED_CONSTRAINTS = { 423 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 424 "INDEX", 425 } 426 427 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 428 index = self._index 429 this = self._parse_bitwise() 430 if self._match(TokenType.FROM): 431 self._retreat(index) 432 return super()._parse_extract() 433 434 # We return Anonymous here because extract and regexpExtract have different semantics, 435 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 436 # `extract('foobar', 'b')` works, but CH crashes for `regexpExtract('foobar', 'b')`. 437 # 438 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 439 self._match(TokenType.COMMA) 440 return self.expression( 441 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 442 ) 443 444 def _parse_assignment(self) -> t.Optional[exp.Expression]: 445 this = super()._parse_assignment() 446 447 if self._match(TokenType.PLACEHOLDER): 448 return self.expression( 449 exp.If, 450 this=this, 451 true=self._parse_assignment(), 452 false=self._match(TokenType.COLON) and self._parse_assignment(), 453 ) 454 455 return this 456 457 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 458 """ 459 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 460 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 461 """ 462 if not self._match(TokenType.L_BRACE): 463 return None 464 465 this = self._parse_id_var() 466 self._match(TokenType.COLON) 467 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 468 self._match_text_seq("IDENTIFIER") and "Identifier" 469 ) 470 471 if not kind: 472 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 473 elif not self._match(TokenType.R_BRACE): 474 self.raise_error("Expecting }") 475 476 return self.expression(exp.Placeholder, this=this, kind=kind) 477 478 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 479 this = super()._parse_in(this) 480 this.set("is_global", is_global) 481 return this 482 483 def _parse_table( 484 self, 485 schema: bool = False, 486 joins: bool = False, 487 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 488 parse_bracket: bool = False, 489 is_db_reference: bool = False, 490 parse_partition: bool = False, 491 ) -> t.Optional[exp.Expression]: 492 this = super()._parse_table( 493 schema=schema, 494 joins=joins, 495 alias_tokens=alias_tokens, 496 parse_bracket=parse_bracket, 497 is_db_reference=is_db_reference, 498 ) 499 500 if self._match(TokenType.FINAL): 501 this = self.expression(exp.Final, this=this) 502 503 return this 504 505 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 506 return super()._parse_position(haystack_first=True) 507 508 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 509 def _parse_cte(self) -> exp.CTE: 510 # WITH <identifier> AS <subquery expression> 511 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 512 513 if not cte: 514 # WITH <expression> AS <identifier> 515 cte = self.expression( 516 exp.CTE, 517 this=self._parse_assignment(), 518 alias=self._parse_table_alias(), 519 scalar=True, 520 ) 521 522 return cte 523 524 def _parse_join_parts( 525 self, 526 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 527 is_global = self._match(TokenType.GLOBAL) and self._prev 528 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 529 530 if kind_pre: 531 kind = self._match_set(self.JOIN_KINDS) and self._prev 532 side = self._match_set(self.JOIN_SIDES) and self._prev 533 return is_global, side, kind 534 535 return ( 536 is_global, 537 self._match_set(self.JOIN_SIDES) and self._prev, 538 self._match_set(self.JOIN_KINDS) and self._prev, 539 ) 540 541 def _parse_join( 542 self, skip_join_token: bool = False, parse_bracket: bool = False 543 ) -> t.Optional[exp.Join]: 544 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 545 if join: 546 join.set("global", join.args.pop("method", None)) 547 548 return join 549 550 def _parse_function( 551 self, 552 functions: t.Optional[t.Dict[str, t.Callable]] = None, 553 anonymous: bool = False, 554 optional_parens: bool = True, 555 any_token: bool = False, 556 ) -> t.Optional[exp.Expression]: 557 expr = super()._parse_function( 558 functions=functions, 559 anonymous=anonymous, 560 optional_parens=optional_parens, 561 any_token=any_token, 562 ) 563 564 func = expr.this if isinstance(expr, exp.Window) else expr 565 566 # Aggregate functions can be split in 2 parts: <func_name><suffix> 567 parts = ( 568 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 569 ) 570 571 if parts: 572 params = self._parse_func_params(func) 573 574 kwargs = { 575 "this": func.this, 576 "expressions": func.expressions, 577 } 578 if parts[1]: 579 kwargs["parts"] = parts 580 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 581 else: 582 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 583 584 kwargs["exp_class"] = exp_class 585 if params: 586 kwargs["params"] = params 587 588 func = self.expression(**kwargs) 589 590 if isinstance(expr, exp.Window): 591 # The window's func was parsed as Anonymous in base parser, fix its 592 # type to be CH style CombinedAnonymousAggFunc / AnonymousAggFunc 593 expr.set("this", func) 594 elif params: 595 # Params have blocked super()._parse_function() from parsing the following window 596 # (if that exists) as they're standing between the function call and the window spec 597 expr = self._parse_window(func) 598 else: 599 expr = func 600 601 return expr 602 603 def _parse_func_params( 604 self, this: t.Optional[exp.Func] = None 605 ) -> t.Optional[t.List[exp.Expression]]: 606 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 607 return self._parse_csv(self._parse_lambda) 608 609 if self._match(TokenType.L_PAREN): 610 params = self._parse_csv(self._parse_lambda) 611 self._match_r_paren(this) 612 return params 613 614 return None 615 616 def _parse_quantile(self) -> exp.Quantile: 617 this = self._parse_lambda() 618 params = self._parse_func_params() 619 if params: 620 return self.expression(exp.Quantile, this=params[0], quantile=this) 621 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 622 623 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 624 return super()._parse_wrapped_id_vars(optional=True) 625 626 def _parse_primary_key( 627 self, wrapped_optional: bool = False, in_props: bool = False 628 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 629 return super()._parse_primary_key( 630 wrapped_optional=wrapped_optional or in_props, in_props=in_props 631 ) 632 633 def _parse_on_property(self) -> t.Optional[exp.Expression]: 634 index = self._index 635 if self._match_text_seq("CLUSTER"): 636 this = self._parse_id_var() 637 if this: 638 return self.expression(exp.OnCluster, this=this) 639 else: 640 self._retreat(index) 641 return None 642 643 def _parse_index_constraint( 644 self, kind: t.Optional[str] = None 645 ) -> exp.IndexColumnConstraint: 646 # INDEX name1 expr TYPE type1(args) GRANULARITY value 647 this = self._parse_id_var() 648 expression = self._parse_assignment() 649 650 index_type = self._match_text_seq("TYPE") and ( 651 self._parse_function() or self._parse_var() 652 ) 653 654 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 655 656 return self.expression( 657 exp.IndexColumnConstraint, 658 this=this, 659 expression=expression, 660 index_type=index_type, 661 granularity=granularity, 662 ) 663 664 def _parse_partition(self) -> t.Optional[exp.Partition]: 665 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 666 if not self._match(TokenType.PARTITION): 667 return None 668 669 if self._match_text_seq("ID"): 670 # Corresponds to the PARTITION ID <string_value> syntax 671 expressions: t.List[exp.Expression] = [ 672 self.expression(exp.PartitionId, this=self._parse_string()) 673 ] 674 else: 675 expressions = self._parse_expressions() 676 677 return self.expression(exp.Partition, expressions=expressions) 678 679 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 680 partition = self._parse_partition() 681 682 if not partition or not self._match(TokenType.FROM): 683 return None 684 685 return self.expression( 686 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 687 ) 688 689 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 690 if not self._match_text_seq("PROJECTION"): 691 return None 692 693 return self.expression( 694 exp.ProjectionDef, 695 this=self._parse_id_var(), 696 expression=self._parse_wrapped(self._parse_statement), 697 ) 698 699 def _parse_constraint(self) -> t.Optional[exp.Expression]: 700 return super()._parse_constraint() or self._parse_projection_def() 701 702 class Generator(generator.Generator): 703 QUERY_HINTS = False 704 STRUCT_DELIMITER = ("(", ")") 705 NVL2_SUPPORTED = False 706 TABLESAMPLE_REQUIRES_PARENS = False 707 TABLESAMPLE_SIZE_IS_ROWS = False 708 TABLESAMPLE_KEYWORDS = "SAMPLE" 709 LAST_DAY_SUPPORTS_DATE_PART = False 710 CAN_IMPLEMENT_ARRAY_ANY = True 711 SUPPORTS_TO_NUMBER = False 712 JOIN_HINTS = False 713 TABLE_HINTS = False 714 EXPLICIT_SET_OP = True 715 GROUPINGS_SEP = "" 716 SET_OP_MODIFIERS = False 717 SUPPORTS_TABLE_ALIAS_COLUMNS = False 718 719 STRING_TYPE_MAPPING = { 720 exp.DataType.Type.CHAR: "String", 721 exp.DataType.Type.LONGBLOB: "String", 722 exp.DataType.Type.LONGTEXT: "String", 723 exp.DataType.Type.MEDIUMBLOB: "String", 724 exp.DataType.Type.MEDIUMTEXT: "String", 725 exp.DataType.Type.TINYBLOB: "String", 726 exp.DataType.Type.TINYTEXT: "String", 727 exp.DataType.Type.TEXT: "String", 728 exp.DataType.Type.VARBINARY: "String", 729 exp.DataType.Type.VARCHAR: "String", 730 } 731 732 SUPPORTED_JSON_PATH_PARTS = { 733 exp.JSONPathKey, 734 exp.JSONPathRoot, 735 exp.JSONPathSubscript, 736 } 737 738 TYPE_MAPPING = { 739 **generator.Generator.TYPE_MAPPING, 740 **STRING_TYPE_MAPPING, 741 exp.DataType.Type.ARRAY: "Array", 742 exp.DataType.Type.BIGINT: "Int64", 743 exp.DataType.Type.DATE32: "Date32", 744 exp.DataType.Type.DATETIME64: "DateTime64", 745 exp.DataType.Type.DOUBLE: "Float64", 746 exp.DataType.Type.ENUM: "Enum", 747 exp.DataType.Type.ENUM8: "Enum8", 748 exp.DataType.Type.ENUM16: "Enum16", 749 exp.DataType.Type.FIXEDSTRING: "FixedString", 750 exp.DataType.Type.FLOAT: "Float32", 751 exp.DataType.Type.INT: "Int32", 752 exp.DataType.Type.MEDIUMINT: "Int32", 753 exp.DataType.Type.INT128: "Int128", 754 exp.DataType.Type.INT256: "Int256", 755 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 756 exp.DataType.Type.MAP: "Map", 757 exp.DataType.Type.NESTED: "Nested", 758 exp.DataType.Type.NULLABLE: "Nullable", 759 exp.DataType.Type.SMALLINT: "Int16", 760 exp.DataType.Type.STRUCT: "Tuple", 761 exp.DataType.Type.TINYINT: "Int8", 762 exp.DataType.Type.UBIGINT: "UInt64", 763 exp.DataType.Type.UINT: "UInt32", 764 exp.DataType.Type.UINT128: "UInt128", 765 exp.DataType.Type.UINT256: "UInt256", 766 exp.DataType.Type.USMALLINT: "UInt16", 767 exp.DataType.Type.UTINYINT: "UInt8", 768 exp.DataType.Type.IPV4: "IPv4", 769 exp.DataType.Type.IPV6: "IPv6", 770 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 771 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 772 } 773 774 TRANSFORMS = { 775 **generator.Generator.TRANSFORMS, 776 exp.AnyValue: rename_func("any"), 777 exp.ApproxDistinct: rename_func("uniq"), 778 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 779 exp.ArraySize: rename_func("LENGTH"), 780 exp.ArraySum: rename_func("arraySum"), 781 exp.ArgMax: arg_max_or_min_no_count("argMax"), 782 exp.ArgMin: arg_max_or_min_no_count("argMin"), 783 exp.Array: inline_array_sql, 784 exp.CastToStrType: rename_func("CAST"), 785 exp.CountIf: rename_func("countIf"), 786 exp.CompressColumnConstraint: lambda self, 787 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 788 exp.ComputedColumnConstraint: lambda self, 789 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 790 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 791 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 792 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 793 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 794 exp.Explode: rename_func("arrayJoin"), 795 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 796 exp.IsNan: rename_func("isNaN"), 797 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 798 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 799 exp.JSONPathKey: json_path_key_only_name, 800 exp.JSONPathRoot: lambda *_: "", 801 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 802 exp.Nullif: rename_func("nullIf"), 803 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 804 exp.Pivot: no_pivot_sql, 805 exp.Quantile: _quantile_sql, 806 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 807 exp.Rand: rename_func("randCanonical"), 808 exp.StartsWith: rename_func("startsWith"), 809 exp.StrPosition: lambda self, e: self.func( 810 "position", e.this, e.args.get("substr"), e.args.get("position") 811 ), 812 exp.TimeToStr: lambda self, e: self.func( 813 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("timezone") 814 ), 815 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 816 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 817 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 818 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 819 exp.MD5Digest: rename_func("MD5"), 820 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 821 exp.SHA: rename_func("SHA1"), 822 exp.SHA2: sha256_sql, 823 exp.UnixToTime: _unix_to_time_sql, 824 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 825 exp.Variance: rename_func("varSamp"), 826 exp.Stddev: rename_func("stddevSamp"), 827 } 828 829 PROPERTIES_LOCATION = { 830 **generator.Generator.PROPERTIES_LOCATION, 831 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 832 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 833 exp.OnCluster: exp.Properties.Location.POST_NAME, 834 } 835 836 # there's no list in docs, but it can be found in Clickhouse code 837 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 838 ON_CLUSTER_TARGETS = { 839 "DATABASE", 840 "TABLE", 841 "VIEW", 842 "DICTIONARY", 843 "INDEX", 844 "FUNCTION", 845 "NAMED COLLECTION", 846 } 847 848 def strtodate_sql(self, expression: exp.StrToDate) -> str: 849 strtodate_sql = self.function_fallback_sql(expression) 850 851 if not isinstance(expression.parent, exp.Cast): 852 # StrToDate returns DATEs in other dialects (eg. postgres), so 853 # this branch aims to improve the transpilation to clickhouse 854 return f"CAST({strtodate_sql} AS DATE)" 855 856 return strtodate_sql 857 858 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 859 this = expression.this 860 861 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 862 return self.sql(this) 863 864 return super().cast_sql(expression, safe_prefix=safe_prefix) 865 866 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 867 this = self.json_path_part(expression.this) 868 return str(int(this) + 1) if is_int(this) else this 869 870 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 871 return f"AS {self.sql(expression, 'this')}" 872 873 def _any_to_has( 874 self, 875 expression: exp.EQ | exp.NEQ, 876 default: t.Callable[[t.Any], str], 877 prefix: str = "", 878 ) -> str: 879 if isinstance(expression.left, exp.Any): 880 arr = expression.left 881 this = expression.right 882 elif isinstance(expression.right, exp.Any): 883 arr = expression.right 884 this = expression.left 885 else: 886 return default(expression) 887 888 return prefix + self.func("has", arr.this.unnest(), this) 889 890 def eq_sql(self, expression: exp.EQ) -> str: 891 return self._any_to_has(expression, super().eq_sql) 892 893 def neq_sql(self, expression: exp.NEQ) -> str: 894 return self._any_to_has(expression, super().neq_sql, "NOT ") 895 896 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 897 # Manually add a flag to make the search case-insensitive 898 regex = self.func("CONCAT", "'(?i)'", expression.expression) 899 return self.func("match", expression.this, regex) 900 901 def datatype_sql(self, expression: exp.DataType) -> str: 902 # String is the standard ClickHouse type, every other variant is just an alias. 903 # Additionally, any supplied length parameter will be ignored. 904 # 905 # https://clickhouse.com/docs/en/sql-reference/data-types/string 906 if expression.this in self.STRING_TYPE_MAPPING: 907 return "String" 908 909 return super().datatype_sql(expression) 910 911 def cte_sql(self, expression: exp.CTE) -> str: 912 if expression.args.get("scalar"): 913 this = self.sql(expression, "this") 914 alias = self.sql(expression, "alias") 915 return f"{this} AS {alias}" 916 917 return super().cte_sql(expression) 918 919 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 920 return super().after_limit_modifiers(expression) + [ 921 ( 922 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 923 if expression.args.get("settings") 924 else "" 925 ), 926 ( 927 self.seg("FORMAT ") + self.sql(expression, "format") 928 if expression.args.get("format") 929 else "" 930 ), 931 ] 932 933 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 934 params = self.expressions(expression, key="params", flat=True) 935 return self.func(expression.name, *expression.expressions) + f"({params})" 936 937 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 938 return self.func(expression.name, *expression.expressions) 939 940 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 941 return self.anonymousaggfunc_sql(expression) 942 943 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 944 return self.parameterizedagg_sql(expression) 945 946 def placeholder_sql(self, expression: exp.Placeholder) -> str: 947 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 948 949 def oncluster_sql(self, expression: exp.OnCluster) -> str: 950 return f"ON CLUSTER {self.sql(expression, 'this')}" 951 952 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 953 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 954 exp.Properties.Location.POST_NAME 955 ): 956 this_name = self.sql(expression.this, "this") 957 this_properties = " ".join( 958 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 959 ) 960 this_schema = self.schema_columns_sql(expression.this) 961 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 962 963 return super().createable_sql(expression, locations) 964 965 def prewhere_sql(self, expression: exp.PreWhere) -> str: 966 this = self.indent(self.sql(expression, "this")) 967 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 968 969 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 970 this = self.sql(expression, "this") 971 this = f" {this}" if this else "" 972 expr = self.sql(expression, "expression") 973 expr = f" {expr}" if expr else "" 974 index_type = self.sql(expression, "index_type") 975 index_type = f" TYPE {index_type}" if index_type else "" 976 granularity = self.sql(expression, "granularity") 977 granularity = f" GRANULARITY {granularity}" if granularity else "" 978 979 return f"INDEX{this}{expr}{index_type}{granularity}" 980 981 def partition_sql(self, expression: exp.Partition) -> str: 982 return f"PARTITION {self.expressions(expression, flat=True)}" 983 984 def partitionid_sql(self, expression: exp.PartitionId) -> str: 985 return f"ID {self.sql(expression.this)}" 986 987 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 988 return ( 989 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 990 ) 991 992 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 993 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
108class ClickHouse(Dialect): 109 NORMALIZE_FUNCTIONS: bool | str = False 110 NULL_ORDERING = "nulls_are_last" 111 SUPPORTS_USER_DEFINED_TYPES = False 112 SAFE_DIVISION = True 113 LOG_BASE_FIRST: t.Optional[bool] = None 114 FORCE_EARLY_ALIAS_REF_EXPANSION = True 115 116 UNESCAPED_SEQUENCES = { 117 "\\0": "\0", 118 } 119 120 class Tokenizer(tokens.Tokenizer): 121 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 122 IDENTIFIERS = ['"', "`"] 123 STRING_ESCAPES = ["'", "\\"] 124 BIT_STRINGS = [("0b", "")] 125 HEX_STRINGS = [("0x", ""), ("0X", "")] 126 HEREDOC_STRINGS = ["$"] 127 128 KEYWORDS = { 129 **tokens.Tokenizer.KEYWORDS, 130 "ATTACH": TokenType.COMMAND, 131 "DATE32": TokenType.DATE32, 132 "DATETIME64": TokenType.DATETIME64, 133 "DICTIONARY": TokenType.DICTIONARY, 134 "ENUM8": TokenType.ENUM8, 135 "ENUM16": TokenType.ENUM16, 136 "FINAL": TokenType.FINAL, 137 "FIXEDSTRING": TokenType.FIXEDSTRING, 138 "FLOAT32": TokenType.FLOAT, 139 "FLOAT64": TokenType.DOUBLE, 140 "GLOBAL": TokenType.GLOBAL, 141 "INT256": TokenType.INT256, 142 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 143 "MAP": TokenType.MAP, 144 "NESTED": TokenType.NESTED, 145 "SAMPLE": TokenType.TABLE_SAMPLE, 146 "TUPLE": TokenType.STRUCT, 147 "UINT128": TokenType.UINT128, 148 "UINT16": TokenType.USMALLINT, 149 "UINT256": TokenType.UINT256, 150 "UINT32": TokenType.UINT, 151 "UINT64": TokenType.UBIGINT, 152 "UINT8": TokenType.UTINYINT, 153 "IPV4": TokenType.IPV4, 154 "IPV6": TokenType.IPV6, 155 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 156 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 157 "SYSTEM": TokenType.COMMAND, 158 "PREWHERE": TokenType.PREWHERE, 159 } 160 KEYWORDS.pop("/*+") 161 162 SINGLE_TOKENS = { 163 **tokens.Tokenizer.SINGLE_TOKENS, 164 "$": TokenType.HEREDOC_STRING, 165 } 166 167 class Parser(parser.Parser): 168 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 169 # * select x from t1 union all select x from t2 limit 1; 170 # * select x from t1 union all (select x from t2 limit 1); 171 MODIFIERS_ATTACHED_TO_SET_OP = False 172 INTERVAL_SPANS = False 173 174 FUNCTIONS = { 175 **parser.Parser.FUNCTIONS, 176 "ANY": exp.AnyValue.from_arg_list, 177 "ARRAYSUM": exp.ArraySum.from_arg_list, 178 "COUNTIF": _build_count_if, 179 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 180 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 181 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 182 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 183 "DATE_FORMAT": _build_date_format, 184 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 185 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 186 "FORMATDATETIME": _build_date_format, 187 "JSONEXTRACTSTRING": build_json_extract_path( 188 exp.JSONExtractScalar, zero_based_indexing=False 189 ), 190 "MAP": parser.build_var_map, 191 "MATCH": exp.RegexpLike.from_arg_list, 192 "RANDCANONICAL": exp.Rand.from_arg_list, 193 "STR_TO_DATE": _build_str_to_date, 194 "TUPLE": exp.Struct.from_arg_list, 195 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 196 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 197 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 198 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 199 "UNIQ": exp.ApproxDistinct.from_arg_list, 200 "XOR": lambda args: exp.Xor(expressions=args), 201 "MD5": exp.MD5Digest.from_arg_list, 202 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 203 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 204 } 205 206 AGG_FUNCTIONS = { 207 "count", 208 "min", 209 "max", 210 "sum", 211 "avg", 212 "any", 213 "stddevPop", 214 "stddevSamp", 215 "varPop", 216 "varSamp", 217 "corr", 218 "covarPop", 219 "covarSamp", 220 "entropy", 221 "exponentialMovingAverage", 222 "intervalLengthSum", 223 "kolmogorovSmirnovTest", 224 "mannWhitneyUTest", 225 "median", 226 "rankCorr", 227 "sumKahan", 228 "studentTTest", 229 "welchTTest", 230 "anyHeavy", 231 "anyLast", 232 "boundingRatio", 233 "first_value", 234 "last_value", 235 "argMin", 236 "argMax", 237 "avgWeighted", 238 "topK", 239 "topKWeighted", 240 "deltaSum", 241 "deltaSumTimestamp", 242 "groupArray", 243 "groupArrayLast", 244 "groupUniqArray", 245 "groupArrayInsertAt", 246 "groupArrayMovingAvg", 247 "groupArrayMovingSum", 248 "groupArraySample", 249 "groupBitAnd", 250 "groupBitOr", 251 "groupBitXor", 252 "groupBitmap", 253 "groupBitmapAnd", 254 "groupBitmapOr", 255 "groupBitmapXor", 256 "sumWithOverflow", 257 "sumMap", 258 "minMap", 259 "maxMap", 260 "skewSamp", 261 "skewPop", 262 "kurtSamp", 263 "kurtPop", 264 "uniq", 265 "uniqExact", 266 "uniqCombined", 267 "uniqCombined64", 268 "uniqHLL12", 269 "uniqTheta", 270 "quantile", 271 "quantiles", 272 "quantileExact", 273 "quantilesExact", 274 "quantileExactLow", 275 "quantilesExactLow", 276 "quantileExactHigh", 277 "quantilesExactHigh", 278 "quantileExactWeighted", 279 "quantilesExactWeighted", 280 "quantileTiming", 281 "quantilesTiming", 282 "quantileTimingWeighted", 283 "quantilesTimingWeighted", 284 "quantileDeterministic", 285 "quantilesDeterministic", 286 "quantileTDigest", 287 "quantilesTDigest", 288 "quantileTDigestWeighted", 289 "quantilesTDigestWeighted", 290 "quantileBFloat16", 291 "quantilesBFloat16", 292 "quantileBFloat16Weighted", 293 "quantilesBFloat16Weighted", 294 "simpleLinearRegression", 295 "stochasticLinearRegression", 296 "stochasticLogisticRegression", 297 "categoricalInformationValue", 298 "contingency", 299 "cramersV", 300 "cramersVBiasCorrected", 301 "theilsU", 302 "maxIntersections", 303 "maxIntersectionsPosition", 304 "meanZTest", 305 "quantileInterpolatedWeighted", 306 "quantilesInterpolatedWeighted", 307 "quantileGK", 308 "quantilesGK", 309 "sparkBar", 310 "sumCount", 311 "largestTriangleThreeBuckets", 312 "histogram", 313 "sequenceMatch", 314 "sequenceCount", 315 "windowFunnel", 316 "retention", 317 "uniqUpTo", 318 "sequenceNextNode", 319 "exponentialTimeDecayedAvg", 320 } 321 322 AGG_FUNCTIONS_SUFFIXES = [ 323 "If", 324 "Array", 325 "ArrayIf", 326 "Map", 327 "SimpleState", 328 "State", 329 "Merge", 330 "MergeState", 331 "ForEach", 332 "Distinct", 333 "OrDefault", 334 "OrNull", 335 "Resample", 336 "ArgMin", 337 "ArgMax", 338 ] 339 340 FUNC_TOKENS = { 341 *parser.Parser.FUNC_TOKENS, 342 TokenType.SET, 343 } 344 345 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 346 347 ID_VAR_TOKENS = { 348 *parser.Parser.ID_VAR_TOKENS, 349 TokenType.LIKE, 350 } 351 352 AGG_FUNC_MAPPING = ( 353 lambda functions, suffixes: { 354 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 355 } 356 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 357 358 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 359 360 FUNCTION_PARSERS = { 361 **parser.Parser.FUNCTION_PARSERS, 362 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 363 "QUANTILE": lambda self: self._parse_quantile(), 364 } 365 366 FUNCTION_PARSERS.pop("MATCH") 367 368 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 369 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 370 371 RANGE_PARSERS = { 372 **parser.Parser.RANGE_PARSERS, 373 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 374 and self._parse_in(this, is_global=True), 375 } 376 377 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 378 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 379 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 380 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 381 382 JOIN_KINDS = { 383 *parser.Parser.JOIN_KINDS, 384 TokenType.ANY, 385 TokenType.ASOF, 386 TokenType.ARRAY, 387 } 388 389 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 390 TokenType.ANY, 391 TokenType.ARRAY, 392 TokenType.FINAL, 393 TokenType.FORMAT, 394 TokenType.SETTINGS, 395 } 396 397 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 398 TokenType.FORMAT, 399 } 400 401 LOG_DEFAULTS_TO_LN = True 402 403 QUERY_MODIFIER_PARSERS = { 404 **parser.Parser.QUERY_MODIFIER_PARSERS, 405 TokenType.SETTINGS: lambda self: ( 406 "settings", 407 self._advance() or self._parse_csv(self._parse_assignment), 408 ), 409 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 410 } 411 412 CONSTRAINT_PARSERS = { 413 **parser.Parser.CONSTRAINT_PARSERS, 414 "INDEX": lambda self: self._parse_index_constraint(), 415 "CODEC": lambda self: self._parse_compress(), 416 } 417 418 ALTER_PARSERS = { 419 **parser.Parser.ALTER_PARSERS, 420 "REPLACE": lambda self: self._parse_alter_table_replace(), 421 } 422 423 SCHEMA_UNNAMED_CONSTRAINTS = { 424 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 425 "INDEX", 426 } 427 428 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 429 index = self._index 430 this = self._parse_bitwise() 431 if self._match(TokenType.FROM): 432 self._retreat(index) 433 return super()._parse_extract() 434 435 # We return Anonymous here because extract and regexpExtract have different semantics, 436 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 437 # `extract('foobar', 'b')` works, but CH crashes for `regexpExtract('foobar', 'b')`. 438 # 439 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 440 self._match(TokenType.COMMA) 441 return self.expression( 442 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 443 ) 444 445 def _parse_assignment(self) -> t.Optional[exp.Expression]: 446 this = super()._parse_assignment() 447 448 if self._match(TokenType.PLACEHOLDER): 449 return self.expression( 450 exp.If, 451 this=this, 452 true=self._parse_assignment(), 453 false=self._match(TokenType.COLON) and self._parse_assignment(), 454 ) 455 456 return this 457 458 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 459 """ 460 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 461 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 462 """ 463 if not self._match(TokenType.L_BRACE): 464 return None 465 466 this = self._parse_id_var() 467 self._match(TokenType.COLON) 468 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 469 self._match_text_seq("IDENTIFIER") and "Identifier" 470 ) 471 472 if not kind: 473 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 474 elif not self._match(TokenType.R_BRACE): 475 self.raise_error("Expecting }") 476 477 return self.expression(exp.Placeholder, this=this, kind=kind) 478 479 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 480 this = super()._parse_in(this) 481 this.set("is_global", is_global) 482 return this 483 484 def _parse_table( 485 self, 486 schema: bool = False, 487 joins: bool = False, 488 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 489 parse_bracket: bool = False, 490 is_db_reference: bool = False, 491 parse_partition: bool = False, 492 ) -> t.Optional[exp.Expression]: 493 this = super()._parse_table( 494 schema=schema, 495 joins=joins, 496 alias_tokens=alias_tokens, 497 parse_bracket=parse_bracket, 498 is_db_reference=is_db_reference, 499 ) 500 501 if self._match(TokenType.FINAL): 502 this = self.expression(exp.Final, this=this) 503 504 return this 505 506 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 507 return super()._parse_position(haystack_first=True) 508 509 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 510 def _parse_cte(self) -> exp.CTE: 511 # WITH <identifier> AS <subquery expression> 512 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 513 514 if not cte: 515 # WITH <expression> AS <identifier> 516 cte = self.expression( 517 exp.CTE, 518 this=self._parse_assignment(), 519 alias=self._parse_table_alias(), 520 scalar=True, 521 ) 522 523 return cte 524 525 def _parse_join_parts( 526 self, 527 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 528 is_global = self._match(TokenType.GLOBAL) and self._prev 529 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 530 531 if kind_pre: 532 kind = self._match_set(self.JOIN_KINDS) and self._prev 533 side = self._match_set(self.JOIN_SIDES) and self._prev 534 return is_global, side, kind 535 536 return ( 537 is_global, 538 self._match_set(self.JOIN_SIDES) and self._prev, 539 self._match_set(self.JOIN_KINDS) and self._prev, 540 ) 541 542 def _parse_join( 543 self, skip_join_token: bool = False, parse_bracket: bool = False 544 ) -> t.Optional[exp.Join]: 545 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 546 if join: 547 join.set("global", join.args.pop("method", None)) 548 549 return join 550 551 def _parse_function( 552 self, 553 functions: t.Optional[t.Dict[str, t.Callable]] = None, 554 anonymous: bool = False, 555 optional_parens: bool = True, 556 any_token: bool = False, 557 ) -> t.Optional[exp.Expression]: 558 expr = super()._parse_function( 559 functions=functions, 560 anonymous=anonymous, 561 optional_parens=optional_parens, 562 any_token=any_token, 563 ) 564 565 func = expr.this if isinstance(expr, exp.Window) else expr 566 567 # Aggregate functions can be split in 2 parts: <func_name><suffix> 568 parts = ( 569 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 570 ) 571 572 if parts: 573 params = self._parse_func_params(func) 574 575 kwargs = { 576 "this": func.this, 577 "expressions": func.expressions, 578 } 579 if parts[1]: 580 kwargs["parts"] = parts 581 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 582 else: 583 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 584 585 kwargs["exp_class"] = exp_class 586 if params: 587 kwargs["params"] = params 588 589 func = self.expression(**kwargs) 590 591 if isinstance(expr, exp.Window): 592 # The window's func was parsed as Anonymous in base parser, fix its 593 # type to be CH style CombinedAnonymousAggFunc / AnonymousAggFunc 594 expr.set("this", func) 595 elif params: 596 # Params have blocked super()._parse_function() from parsing the following window 597 # (if that exists) as they're standing between the function call and the window spec 598 expr = self._parse_window(func) 599 else: 600 expr = func 601 602 return expr 603 604 def _parse_func_params( 605 self, this: t.Optional[exp.Func] = None 606 ) -> t.Optional[t.List[exp.Expression]]: 607 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 608 return self._parse_csv(self._parse_lambda) 609 610 if self._match(TokenType.L_PAREN): 611 params = self._parse_csv(self._parse_lambda) 612 self._match_r_paren(this) 613 return params 614 615 return None 616 617 def _parse_quantile(self) -> exp.Quantile: 618 this = self._parse_lambda() 619 params = self._parse_func_params() 620 if params: 621 return self.expression(exp.Quantile, this=params[0], quantile=this) 622 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 623 624 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 625 return super()._parse_wrapped_id_vars(optional=True) 626 627 def _parse_primary_key( 628 self, wrapped_optional: bool = False, in_props: bool = False 629 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 630 return super()._parse_primary_key( 631 wrapped_optional=wrapped_optional or in_props, in_props=in_props 632 ) 633 634 def _parse_on_property(self) -> t.Optional[exp.Expression]: 635 index = self._index 636 if self._match_text_seq("CLUSTER"): 637 this = self._parse_id_var() 638 if this: 639 return self.expression(exp.OnCluster, this=this) 640 else: 641 self._retreat(index) 642 return None 643 644 def _parse_index_constraint( 645 self, kind: t.Optional[str] = None 646 ) -> exp.IndexColumnConstraint: 647 # INDEX name1 expr TYPE type1(args) GRANULARITY value 648 this = self._parse_id_var() 649 expression = self._parse_assignment() 650 651 index_type = self._match_text_seq("TYPE") and ( 652 self._parse_function() or self._parse_var() 653 ) 654 655 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 656 657 return self.expression( 658 exp.IndexColumnConstraint, 659 this=this, 660 expression=expression, 661 index_type=index_type, 662 granularity=granularity, 663 ) 664 665 def _parse_partition(self) -> t.Optional[exp.Partition]: 666 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 667 if not self._match(TokenType.PARTITION): 668 return None 669 670 if self._match_text_seq("ID"): 671 # Corresponds to the PARTITION ID <string_value> syntax 672 expressions: t.List[exp.Expression] = [ 673 self.expression(exp.PartitionId, this=self._parse_string()) 674 ] 675 else: 676 expressions = self._parse_expressions() 677 678 return self.expression(exp.Partition, expressions=expressions) 679 680 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 681 partition = self._parse_partition() 682 683 if not partition or not self._match(TokenType.FROM): 684 return None 685 686 return self.expression( 687 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 688 ) 689 690 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 691 if not self._match_text_seq("PROJECTION"): 692 return None 693 694 return self.expression( 695 exp.ProjectionDef, 696 this=self._parse_id_var(), 697 expression=self._parse_wrapped(self._parse_statement), 698 ) 699 700 def _parse_constraint(self) -> t.Optional[exp.Expression]: 701 return super()._parse_constraint() or self._parse_projection_def() 702 703 class Generator(generator.Generator): 704 QUERY_HINTS = False 705 STRUCT_DELIMITER = ("(", ")") 706 NVL2_SUPPORTED = False 707 TABLESAMPLE_REQUIRES_PARENS = False 708 TABLESAMPLE_SIZE_IS_ROWS = False 709 TABLESAMPLE_KEYWORDS = "SAMPLE" 710 LAST_DAY_SUPPORTS_DATE_PART = False 711 CAN_IMPLEMENT_ARRAY_ANY = True 712 SUPPORTS_TO_NUMBER = False 713 JOIN_HINTS = False 714 TABLE_HINTS = False 715 EXPLICIT_SET_OP = True 716 GROUPINGS_SEP = "" 717 SET_OP_MODIFIERS = False 718 SUPPORTS_TABLE_ALIAS_COLUMNS = False 719 720 STRING_TYPE_MAPPING = { 721 exp.DataType.Type.CHAR: "String", 722 exp.DataType.Type.LONGBLOB: "String", 723 exp.DataType.Type.LONGTEXT: "String", 724 exp.DataType.Type.MEDIUMBLOB: "String", 725 exp.DataType.Type.MEDIUMTEXT: "String", 726 exp.DataType.Type.TINYBLOB: "String", 727 exp.DataType.Type.TINYTEXT: "String", 728 exp.DataType.Type.TEXT: "String", 729 exp.DataType.Type.VARBINARY: "String", 730 exp.DataType.Type.VARCHAR: "String", 731 } 732 733 SUPPORTED_JSON_PATH_PARTS = { 734 exp.JSONPathKey, 735 exp.JSONPathRoot, 736 exp.JSONPathSubscript, 737 } 738 739 TYPE_MAPPING = { 740 **generator.Generator.TYPE_MAPPING, 741 **STRING_TYPE_MAPPING, 742 exp.DataType.Type.ARRAY: "Array", 743 exp.DataType.Type.BIGINT: "Int64", 744 exp.DataType.Type.DATE32: "Date32", 745 exp.DataType.Type.DATETIME64: "DateTime64", 746 exp.DataType.Type.DOUBLE: "Float64", 747 exp.DataType.Type.ENUM: "Enum", 748 exp.DataType.Type.ENUM8: "Enum8", 749 exp.DataType.Type.ENUM16: "Enum16", 750 exp.DataType.Type.FIXEDSTRING: "FixedString", 751 exp.DataType.Type.FLOAT: "Float32", 752 exp.DataType.Type.INT: "Int32", 753 exp.DataType.Type.MEDIUMINT: "Int32", 754 exp.DataType.Type.INT128: "Int128", 755 exp.DataType.Type.INT256: "Int256", 756 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 757 exp.DataType.Type.MAP: "Map", 758 exp.DataType.Type.NESTED: "Nested", 759 exp.DataType.Type.NULLABLE: "Nullable", 760 exp.DataType.Type.SMALLINT: "Int16", 761 exp.DataType.Type.STRUCT: "Tuple", 762 exp.DataType.Type.TINYINT: "Int8", 763 exp.DataType.Type.UBIGINT: "UInt64", 764 exp.DataType.Type.UINT: "UInt32", 765 exp.DataType.Type.UINT128: "UInt128", 766 exp.DataType.Type.UINT256: "UInt256", 767 exp.DataType.Type.USMALLINT: "UInt16", 768 exp.DataType.Type.UTINYINT: "UInt8", 769 exp.DataType.Type.IPV4: "IPv4", 770 exp.DataType.Type.IPV6: "IPv6", 771 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 772 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 773 } 774 775 TRANSFORMS = { 776 **generator.Generator.TRANSFORMS, 777 exp.AnyValue: rename_func("any"), 778 exp.ApproxDistinct: rename_func("uniq"), 779 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 780 exp.ArraySize: rename_func("LENGTH"), 781 exp.ArraySum: rename_func("arraySum"), 782 exp.ArgMax: arg_max_or_min_no_count("argMax"), 783 exp.ArgMin: arg_max_or_min_no_count("argMin"), 784 exp.Array: inline_array_sql, 785 exp.CastToStrType: rename_func("CAST"), 786 exp.CountIf: rename_func("countIf"), 787 exp.CompressColumnConstraint: lambda self, 788 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 789 exp.ComputedColumnConstraint: lambda self, 790 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 791 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 792 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 793 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 794 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 795 exp.Explode: rename_func("arrayJoin"), 796 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 797 exp.IsNan: rename_func("isNaN"), 798 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 799 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 800 exp.JSONPathKey: json_path_key_only_name, 801 exp.JSONPathRoot: lambda *_: "", 802 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 803 exp.Nullif: rename_func("nullIf"), 804 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 805 exp.Pivot: no_pivot_sql, 806 exp.Quantile: _quantile_sql, 807 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 808 exp.Rand: rename_func("randCanonical"), 809 exp.StartsWith: rename_func("startsWith"), 810 exp.StrPosition: lambda self, e: self.func( 811 "position", e.this, e.args.get("substr"), e.args.get("position") 812 ), 813 exp.TimeToStr: lambda self, e: self.func( 814 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("timezone") 815 ), 816 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 817 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 818 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 819 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 820 exp.MD5Digest: rename_func("MD5"), 821 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 822 exp.SHA: rename_func("SHA1"), 823 exp.SHA2: sha256_sql, 824 exp.UnixToTime: _unix_to_time_sql, 825 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 826 exp.Variance: rename_func("varSamp"), 827 exp.Stddev: rename_func("stddevSamp"), 828 } 829 830 PROPERTIES_LOCATION = { 831 **generator.Generator.PROPERTIES_LOCATION, 832 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 833 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 834 exp.OnCluster: exp.Properties.Location.POST_NAME, 835 } 836 837 # there's no list in docs, but it can be found in Clickhouse code 838 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 839 ON_CLUSTER_TARGETS = { 840 "DATABASE", 841 "TABLE", 842 "VIEW", 843 "DICTIONARY", 844 "INDEX", 845 "FUNCTION", 846 "NAMED COLLECTION", 847 } 848 849 def strtodate_sql(self, expression: exp.StrToDate) -> str: 850 strtodate_sql = self.function_fallback_sql(expression) 851 852 if not isinstance(expression.parent, exp.Cast): 853 # StrToDate returns DATEs in other dialects (eg. postgres), so 854 # this branch aims to improve the transpilation to clickhouse 855 return f"CAST({strtodate_sql} AS DATE)" 856 857 return strtodate_sql 858 859 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 860 this = expression.this 861 862 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 863 return self.sql(this) 864 865 return super().cast_sql(expression, safe_prefix=safe_prefix) 866 867 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 868 this = self.json_path_part(expression.this) 869 return str(int(this) + 1) if is_int(this) else this 870 871 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 872 return f"AS {self.sql(expression, 'this')}" 873 874 def _any_to_has( 875 self, 876 expression: exp.EQ | exp.NEQ, 877 default: t.Callable[[t.Any], str], 878 prefix: str = "", 879 ) -> str: 880 if isinstance(expression.left, exp.Any): 881 arr = expression.left 882 this = expression.right 883 elif isinstance(expression.right, exp.Any): 884 arr = expression.right 885 this = expression.left 886 else: 887 return default(expression) 888 889 return prefix + self.func("has", arr.this.unnest(), this) 890 891 def eq_sql(self, expression: exp.EQ) -> str: 892 return self._any_to_has(expression, super().eq_sql) 893 894 def neq_sql(self, expression: exp.NEQ) -> str: 895 return self._any_to_has(expression, super().neq_sql, "NOT ") 896 897 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 898 # Manually add a flag to make the search case-insensitive 899 regex = self.func("CONCAT", "'(?i)'", expression.expression) 900 return self.func("match", expression.this, regex) 901 902 def datatype_sql(self, expression: exp.DataType) -> str: 903 # String is the standard ClickHouse type, every other variant is just an alias. 904 # Additionally, any supplied length parameter will be ignored. 905 # 906 # https://clickhouse.com/docs/en/sql-reference/data-types/string 907 if expression.this in self.STRING_TYPE_MAPPING: 908 return "String" 909 910 return super().datatype_sql(expression) 911 912 def cte_sql(self, expression: exp.CTE) -> str: 913 if expression.args.get("scalar"): 914 this = self.sql(expression, "this") 915 alias = self.sql(expression, "alias") 916 return f"{this} AS {alias}" 917 918 return super().cte_sql(expression) 919 920 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 921 return super().after_limit_modifiers(expression) + [ 922 ( 923 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 924 if expression.args.get("settings") 925 else "" 926 ), 927 ( 928 self.seg("FORMAT ") + self.sql(expression, "format") 929 if expression.args.get("format") 930 else "" 931 ), 932 ] 933 934 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 935 params = self.expressions(expression, key="params", flat=True) 936 return self.func(expression.name, *expression.expressions) + f"({params})" 937 938 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 939 return self.func(expression.name, *expression.expressions) 940 941 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 942 return self.anonymousaggfunc_sql(expression) 943 944 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 945 return self.parameterizedagg_sql(expression) 946 947 def placeholder_sql(self, expression: exp.Placeholder) -> str: 948 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 949 950 def oncluster_sql(self, expression: exp.OnCluster) -> str: 951 return f"ON CLUSTER {self.sql(expression, 'this')}" 952 953 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 954 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 955 exp.Properties.Location.POST_NAME 956 ): 957 this_name = self.sql(expression.this, "this") 958 this_properties = " ".join( 959 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 960 ) 961 this_schema = self.schema_columns_sql(expression.this) 962 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 963 964 return super().createable_sql(expression, locations) 965 966 def prewhere_sql(self, expression: exp.PreWhere) -> str: 967 this = self.indent(self.sql(expression, "this")) 968 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 969 970 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 971 this = self.sql(expression, "this") 972 this = f" {this}" if this else "" 973 expr = self.sql(expression, "expression") 974 expr = f" {expr}" if expr else "" 975 index_type = self.sql(expression, "index_type") 976 index_type = f" TYPE {index_type}" if index_type else "" 977 granularity = self.sql(expression, "granularity") 978 granularity = f" GRANULARITY {granularity}" if granularity else "" 979 980 return f"INDEX{this}{expr}{index_type}{granularity}" 981 982 def partition_sql(self, expression: exp.Partition) -> str: 983 return f"PARTITION {self.expressions(expression, flat=True)}" 984 985 def partitionid_sql(self, expression: exp.PartitionId) -> str: 986 return f"ID {self.sql(expression.this)}" 987 988 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 989 return ( 990 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 991 ) 992 993 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 994 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects "my_id" would refer to "data.my_id" (which is done in _qualify_columns()) across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- NORMALIZATION_STRATEGY
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- TYPED_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
120 class Tokenizer(tokens.Tokenizer): 121 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 122 IDENTIFIERS = ['"', "`"] 123 STRING_ESCAPES = ["'", "\\"] 124 BIT_STRINGS = [("0b", "")] 125 HEX_STRINGS = [("0x", ""), ("0X", "")] 126 HEREDOC_STRINGS = ["$"] 127 128 KEYWORDS = { 129 **tokens.Tokenizer.KEYWORDS, 130 "ATTACH": TokenType.COMMAND, 131 "DATE32": TokenType.DATE32, 132 "DATETIME64": TokenType.DATETIME64, 133 "DICTIONARY": TokenType.DICTIONARY, 134 "ENUM8": TokenType.ENUM8, 135 "ENUM16": TokenType.ENUM16, 136 "FINAL": TokenType.FINAL, 137 "FIXEDSTRING": TokenType.FIXEDSTRING, 138 "FLOAT32": TokenType.FLOAT, 139 "FLOAT64": TokenType.DOUBLE, 140 "GLOBAL": TokenType.GLOBAL, 141 "INT256": TokenType.INT256, 142 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 143 "MAP": TokenType.MAP, 144 "NESTED": TokenType.NESTED, 145 "SAMPLE": TokenType.TABLE_SAMPLE, 146 "TUPLE": TokenType.STRUCT, 147 "UINT128": TokenType.UINT128, 148 "UINT16": TokenType.USMALLINT, 149 "UINT256": TokenType.UINT256, 150 "UINT32": TokenType.UINT, 151 "UINT64": TokenType.UBIGINT, 152 "UINT8": TokenType.UTINYINT, 153 "IPV4": TokenType.IPV4, 154 "IPV6": TokenType.IPV6, 155 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 156 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 157 "SYSTEM": TokenType.COMMAND, 158 "PREWHERE": TokenType.PREWHERE, 159 } 160 KEYWORDS.pop("/*+") 161 162 SINGLE_TOKENS = { 163 **tokens.Tokenizer.SINGLE_TOKENS, 164 "$": TokenType.HEREDOC_STRING, 165 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIER_ESCAPES
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
167 class Parser(parser.Parser): 168 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 169 # * select x from t1 union all select x from t2 limit 1; 170 # * select x from t1 union all (select x from t2 limit 1); 171 MODIFIERS_ATTACHED_TO_SET_OP = False 172 INTERVAL_SPANS = False 173 174 FUNCTIONS = { 175 **parser.Parser.FUNCTIONS, 176 "ANY": exp.AnyValue.from_arg_list, 177 "ARRAYSUM": exp.ArraySum.from_arg_list, 178 "COUNTIF": _build_count_if, 179 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 180 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 181 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 182 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 183 "DATE_FORMAT": _build_date_format, 184 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 185 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 186 "FORMATDATETIME": _build_date_format, 187 "JSONEXTRACTSTRING": build_json_extract_path( 188 exp.JSONExtractScalar, zero_based_indexing=False 189 ), 190 "MAP": parser.build_var_map, 191 "MATCH": exp.RegexpLike.from_arg_list, 192 "RANDCANONICAL": exp.Rand.from_arg_list, 193 "STR_TO_DATE": _build_str_to_date, 194 "TUPLE": exp.Struct.from_arg_list, 195 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 196 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 197 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 198 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 199 "UNIQ": exp.ApproxDistinct.from_arg_list, 200 "XOR": lambda args: exp.Xor(expressions=args), 201 "MD5": exp.MD5Digest.from_arg_list, 202 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 203 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 204 } 205 206 AGG_FUNCTIONS = { 207 "count", 208 "min", 209 "max", 210 "sum", 211 "avg", 212 "any", 213 "stddevPop", 214 "stddevSamp", 215 "varPop", 216 "varSamp", 217 "corr", 218 "covarPop", 219 "covarSamp", 220 "entropy", 221 "exponentialMovingAverage", 222 "intervalLengthSum", 223 "kolmogorovSmirnovTest", 224 "mannWhitneyUTest", 225 "median", 226 "rankCorr", 227 "sumKahan", 228 "studentTTest", 229 "welchTTest", 230 "anyHeavy", 231 "anyLast", 232 "boundingRatio", 233 "first_value", 234 "last_value", 235 "argMin", 236 "argMax", 237 "avgWeighted", 238 "topK", 239 "topKWeighted", 240 "deltaSum", 241 "deltaSumTimestamp", 242 "groupArray", 243 "groupArrayLast", 244 "groupUniqArray", 245 "groupArrayInsertAt", 246 "groupArrayMovingAvg", 247 "groupArrayMovingSum", 248 "groupArraySample", 249 "groupBitAnd", 250 "groupBitOr", 251 "groupBitXor", 252 "groupBitmap", 253 "groupBitmapAnd", 254 "groupBitmapOr", 255 "groupBitmapXor", 256 "sumWithOverflow", 257 "sumMap", 258 "minMap", 259 "maxMap", 260 "skewSamp", 261 "skewPop", 262 "kurtSamp", 263 "kurtPop", 264 "uniq", 265 "uniqExact", 266 "uniqCombined", 267 "uniqCombined64", 268 "uniqHLL12", 269 "uniqTheta", 270 "quantile", 271 "quantiles", 272 "quantileExact", 273 "quantilesExact", 274 "quantileExactLow", 275 "quantilesExactLow", 276 "quantileExactHigh", 277 "quantilesExactHigh", 278 "quantileExactWeighted", 279 "quantilesExactWeighted", 280 "quantileTiming", 281 "quantilesTiming", 282 "quantileTimingWeighted", 283 "quantilesTimingWeighted", 284 "quantileDeterministic", 285 "quantilesDeterministic", 286 "quantileTDigest", 287 "quantilesTDigest", 288 "quantileTDigestWeighted", 289 "quantilesTDigestWeighted", 290 "quantileBFloat16", 291 "quantilesBFloat16", 292 "quantileBFloat16Weighted", 293 "quantilesBFloat16Weighted", 294 "simpleLinearRegression", 295 "stochasticLinearRegression", 296 "stochasticLogisticRegression", 297 "categoricalInformationValue", 298 "contingency", 299 "cramersV", 300 "cramersVBiasCorrected", 301 "theilsU", 302 "maxIntersections", 303 "maxIntersectionsPosition", 304 "meanZTest", 305 "quantileInterpolatedWeighted", 306 "quantilesInterpolatedWeighted", 307 "quantileGK", 308 "quantilesGK", 309 "sparkBar", 310 "sumCount", 311 "largestTriangleThreeBuckets", 312 "histogram", 313 "sequenceMatch", 314 "sequenceCount", 315 "windowFunnel", 316 "retention", 317 "uniqUpTo", 318 "sequenceNextNode", 319 "exponentialTimeDecayedAvg", 320 } 321 322 AGG_FUNCTIONS_SUFFIXES = [ 323 "If", 324 "Array", 325 "ArrayIf", 326 "Map", 327 "SimpleState", 328 "State", 329 "Merge", 330 "MergeState", 331 "ForEach", 332 "Distinct", 333 "OrDefault", 334 "OrNull", 335 "Resample", 336 "ArgMin", 337 "ArgMax", 338 ] 339 340 FUNC_TOKENS = { 341 *parser.Parser.FUNC_TOKENS, 342 TokenType.SET, 343 } 344 345 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 346 347 ID_VAR_TOKENS = { 348 *parser.Parser.ID_VAR_TOKENS, 349 TokenType.LIKE, 350 } 351 352 AGG_FUNC_MAPPING = ( 353 lambda functions, suffixes: { 354 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 355 } 356 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 357 358 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 359 360 FUNCTION_PARSERS = { 361 **parser.Parser.FUNCTION_PARSERS, 362 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 363 "QUANTILE": lambda self: self._parse_quantile(), 364 } 365 366 FUNCTION_PARSERS.pop("MATCH") 367 368 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 369 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 370 371 RANGE_PARSERS = { 372 **parser.Parser.RANGE_PARSERS, 373 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 374 and self._parse_in(this, is_global=True), 375 } 376 377 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 378 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 379 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 380 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 381 382 JOIN_KINDS = { 383 *parser.Parser.JOIN_KINDS, 384 TokenType.ANY, 385 TokenType.ASOF, 386 TokenType.ARRAY, 387 } 388 389 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 390 TokenType.ANY, 391 TokenType.ARRAY, 392 TokenType.FINAL, 393 TokenType.FORMAT, 394 TokenType.SETTINGS, 395 } 396 397 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 398 TokenType.FORMAT, 399 } 400 401 LOG_DEFAULTS_TO_LN = True 402 403 QUERY_MODIFIER_PARSERS = { 404 **parser.Parser.QUERY_MODIFIER_PARSERS, 405 TokenType.SETTINGS: lambda self: ( 406 "settings", 407 self._advance() or self._parse_csv(self._parse_assignment), 408 ), 409 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 410 } 411 412 CONSTRAINT_PARSERS = { 413 **parser.Parser.CONSTRAINT_PARSERS, 414 "INDEX": lambda self: self._parse_index_constraint(), 415 "CODEC": lambda self: self._parse_compress(), 416 } 417 418 ALTER_PARSERS = { 419 **parser.Parser.ALTER_PARSERS, 420 "REPLACE": lambda self: self._parse_alter_table_replace(), 421 } 422 423 SCHEMA_UNNAMED_CONSTRAINTS = { 424 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 425 "INDEX", 426 } 427 428 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 429 index = self._index 430 this = self._parse_bitwise() 431 if self._match(TokenType.FROM): 432 self._retreat(index) 433 return super()._parse_extract() 434 435 # We return Anonymous here because extract and regexpExtract have different semantics, 436 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 437 # `extract('foobar', 'b')` works, but CH crashes for `regexpExtract('foobar', 'b')`. 438 # 439 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 440 self._match(TokenType.COMMA) 441 return self.expression( 442 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 443 ) 444 445 def _parse_assignment(self) -> t.Optional[exp.Expression]: 446 this = super()._parse_assignment() 447 448 if self._match(TokenType.PLACEHOLDER): 449 return self.expression( 450 exp.If, 451 this=this, 452 true=self._parse_assignment(), 453 false=self._match(TokenType.COLON) and self._parse_assignment(), 454 ) 455 456 return this 457 458 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 459 """ 460 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 461 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 462 """ 463 if not self._match(TokenType.L_BRACE): 464 return None 465 466 this = self._parse_id_var() 467 self._match(TokenType.COLON) 468 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 469 self._match_text_seq("IDENTIFIER") and "Identifier" 470 ) 471 472 if not kind: 473 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 474 elif not self._match(TokenType.R_BRACE): 475 self.raise_error("Expecting }") 476 477 return self.expression(exp.Placeholder, this=this, kind=kind) 478 479 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 480 this = super()._parse_in(this) 481 this.set("is_global", is_global) 482 return this 483 484 def _parse_table( 485 self, 486 schema: bool = False, 487 joins: bool = False, 488 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 489 parse_bracket: bool = False, 490 is_db_reference: bool = False, 491 parse_partition: bool = False, 492 ) -> t.Optional[exp.Expression]: 493 this = super()._parse_table( 494 schema=schema, 495 joins=joins, 496 alias_tokens=alias_tokens, 497 parse_bracket=parse_bracket, 498 is_db_reference=is_db_reference, 499 ) 500 501 if self._match(TokenType.FINAL): 502 this = self.expression(exp.Final, this=this) 503 504 return this 505 506 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 507 return super()._parse_position(haystack_first=True) 508 509 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 510 def _parse_cte(self) -> exp.CTE: 511 # WITH <identifier> AS <subquery expression> 512 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 513 514 if not cte: 515 # WITH <expression> AS <identifier> 516 cte = self.expression( 517 exp.CTE, 518 this=self._parse_assignment(), 519 alias=self._parse_table_alias(), 520 scalar=True, 521 ) 522 523 return cte 524 525 def _parse_join_parts( 526 self, 527 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 528 is_global = self._match(TokenType.GLOBAL) and self._prev 529 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 530 531 if kind_pre: 532 kind = self._match_set(self.JOIN_KINDS) and self._prev 533 side = self._match_set(self.JOIN_SIDES) and self._prev 534 return is_global, side, kind 535 536 return ( 537 is_global, 538 self._match_set(self.JOIN_SIDES) and self._prev, 539 self._match_set(self.JOIN_KINDS) and self._prev, 540 ) 541 542 def _parse_join( 543 self, skip_join_token: bool = False, parse_bracket: bool = False 544 ) -> t.Optional[exp.Join]: 545 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 546 if join: 547 join.set("global", join.args.pop("method", None)) 548 549 return join 550 551 def _parse_function( 552 self, 553 functions: t.Optional[t.Dict[str, t.Callable]] = None, 554 anonymous: bool = False, 555 optional_parens: bool = True, 556 any_token: bool = False, 557 ) -> t.Optional[exp.Expression]: 558 expr = super()._parse_function( 559 functions=functions, 560 anonymous=anonymous, 561 optional_parens=optional_parens, 562 any_token=any_token, 563 ) 564 565 func = expr.this if isinstance(expr, exp.Window) else expr 566 567 # Aggregate functions can be split in 2 parts: <func_name><suffix> 568 parts = ( 569 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 570 ) 571 572 if parts: 573 params = self._parse_func_params(func) 574 575 kwargs = { 576 "this": func.this, 577 "expressions": func.expressions, 578 } 579 if parts[1]: 580 kwargs["parts"] = parts 581 exp_class = exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 582 else: 583 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 584 585 kwargs["exp_class"] = exp_class 586 if params: 587 kwargs["params"] = params 588 589 func = self.expression(**kwargs) 590 591 if isinstance(expr, exp.Window): 592 # The window's func was parsed as Anonymous in base parser, fix its 593 # type to be CH style CombinedAnonymousAggFunc / AnonymousAggFunc 594 expr.set("this", func) 595 elif params: 596 # Params have blocked super()._parse_function() from parsing the following window 597 # (if that exists) as they're standing between the function call and the window spec 598 expr = self._parse_window(func) 599 else: 600 expr = func 601 602 return expr 603 604 def _parse_func_params( 605 self, this: t.Optional[exp.Func] = None 606 ) -> t.Optional[t.List[exp.Expression]]: 607 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 608 return self._parse_csv(self._parse_lambda) 609 610 if self._match(TokenType.L_PAREN): 611 params = self._parse_csv(self._parse_lambda) 612 self._match_r_paren(this) 613 return params 614 615 return None 616 617 def _parse_quantile(self) -> exp.Quantile: 618 this = self._parse_lambda() 619 params = self._parse_func_params() 620 if params: 621 return self.expression(exp.Quantile, this=params[0], quantile=this) 622 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 623 624 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 625 return super()._parse_wrapped_id_vars(optional=True) 626 627 def _parse_primary_key( 628 self, wrapped_optional: bool = False, in_props: bool = False 629 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 630 return super()._parse_primary_key( 631 wrapped_optional=wrapped_optional or in_props, in_props=in_props 632 ) 633 634 def _parse_on_property(self) -> t.Optional[exp.Expression]: 635 index = self._index 636 if self._match_text_seq("CLUSTER"): 637 this = self._parse_id_var() 638 if this: 639 return self.expression(exp.OnCluster, this=this) 640 else: 641 self._retreat(index) 642 return None 643 644 def _parse_index_constraint( 645 self, kind: t.Optional[str] = None 646 ) -> exp.IndexColumnConstraint: 647 # INDEX name1 expr TYPE type1(args) GRANULARITY value 648 this = self._parse_id_var() 649 expression = self._parse_assignment() 650 651 index_type = self._match_text_seq("TYPE") and ( 652 self._parse_function() or self._parse_var() 653 ) 654 655 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 656 657 return self.expression( 658 exp.IndexColumnConstraint, 659 this=this, 660 expression=expression, 661 index_type=index_type, 662 granularity=granularity, 663 ) 664 665 def _parse_partition(self) -> t.Optional[exp.Partition]: 666 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 667 if not self._match(TokenType.PARTITION): 668 return None 669 670 if self._match_text_seq("ID"): 671 # Corresponds to the PARTITION ID <string_value> syntax 672 expressions: t.List[exp.Expression] = [ 673 self.expression(exp.PartitionId, this=self._parse_string()) 674 ] 675 else: 676 expressions = self._parse_expressions() 677 678 return self.expression(exp.Partition, expressions=expressions) 679 680 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 681 partition = self._parse_partition() 682 683 if not partition or not self._match(TokenType.FROM): 684 return None 685 686 return self.expression( 687 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 688 ) 689 690 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 691 if not self._match_text_seq("PROJECTION"): 692 return None 693 694 return self.expression( 695 exp.ProjectionDef, 696 this=self._parse_id_var(), 697 expression=self._parse_wrapped(self._parse_statement), 698 ) 699 700 def _parse_constraint(self) -> t.Optional[exp.Expression]: 701 return super()._parse_constraint() or self._parse_projection_def()
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- PROPERTY_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
703 class Generator(generator.Generator): 704 QUERY_HINTS = False 705 STRUCT_DELIMITER = ("(", ")") 706 NVL2_SUPPORTED = False 707 TABLESAMPLE_REQUIRES_PARENS = False 708 TABLESAMPLE_SIZE_IS_ROWS = False 709 TABLESAMPLE_KEYWORDS = "SAMPLE" 710 LAST_DAY_SUPPORTS_DATE_PART = False 711 CAN_IMPLEMENT_ARRAY_ANY = True 712 SUPPORTS_TO_NUMBER = False 713 JOIN_HINTS = False 714 TABLE_HINTS = False 715 EXPLICIT_SET_OP = True 716 GROUPINGS_SEP = "" 717 SET_OP_MODIFIERS = False 718 SUPPORTS_TABLE_ALIAS_COLUMNS = False 719 720 STRING_TYPE_MAPPING = { 721 exp.DataType.Type.CHAR: "String", 722 exp.DataType.Type.LONGBLOB: "String", 723 exp.DataType.Type.LONGTEXT: "String", 724 exp.DataType.Type.MEDIUMBLOB: "String", 725 exp.DataType.Type.MEDIUMTEXT: "String", 726 exp.DataType.Type.TINYBLOB: "String", 727 exp.DataType.Type.TINYTEXT: "String", 728 exp.DataType.Type.TEXT: "String", 729 exp.DataType.Type.VARBINARY: "String", 730 exp.DataType.Type.VARCHAR: "String", 731 } 732 733 SUPPORTED_JSON_PATH_PARTS = { 734 exp.JSONPathKey, 735 exp.JSONPathRoot, 736 exp.JSONPathSubscript, 737 } 738 739 TYPE_MAPPING = { 740 **generator.Generator.TYPE_MAPPING, 741 **STRING_TYPE_MAPPING, 742 exp.DataType.Type.ARRAY: "Array", 743 exp.DataType.Type.BIGINT: "Int64", 744 exp.DataType.Type.DATE32: "Date32", 745 exp.DataType.Type.DATETIME64: "DateTime64", 746 exp.DataType.Type.DOUBLE: "Float64", 747 exp.DataType.Type.ENUM: "Enum", 748 exp.DataType.Type.ENUM8: "Enum8", 749 exp.DataType.Type.ENUM16: "Enum16", 750 exp.DataType.Type.FIXEDSTRING: "FixedString", 751 exp.DataType.Type.FLOAT: "Float32", 752 exp.DataType.Type.INT: "Int32", 753 exp.DataType.Type.MEDIUMINT: "Int32", 754 exp.DataType.Type.INT128: "Int128", 755 exp.DataType.Type.INT256: "Int256", 756 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 757 exp.DataType.Type.MAP: "Map", 758 exp.DataType.Type.NESTED: "Nested", 759 exp.DataType.Type.NULLABLE: "Nullable", 760 exp.DataType.Type.SMALLINT: "Int16", 761 exp.DataType.Type.STRUCT: "Tuple", 762 exp.DataType.Type.TINYINT: "Int8", 763 exp.DataType.Type.UBIGINT: "UInt64", 764 exp.DataType.Type.UINT: "UInt32", 765 exp.DataType.Type.UINT128: "UInt128", 766 exp.DataType.Type.UINT256: "UInt256", 767 exp.DataType.Type.USMALLINT: "UInt16", 768 exp.DataType.Type.UTINYINT: "UInt8", 769 exp.DataType.Type.IPV4: "IPv4", 770 exp.DataType.Type.IPV6: "IPv6", 771 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 772 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 773 } 774 775 TRANSFORMS = { 776 **generator.Generator.TRANSFORMS, 777 exp.AnyValue: rename_func("any"), 778 exp.ApproxDistinct: rename_func("uniq"), 779 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 780 exp.ArraySize: rename_func("LENGTH"), 781 exp.ArraySum: rename_func("arraySum"), 782 exp.ArgMax: arg_max_or_min_no_count("argMax"), 783 exp.ArgMin: arg_max_or_min_no_count("argMin"), 784 exp.Array: inline_array_sql, 785 exp.CastToStrType: rename_func("CAST"), 786 exp.CountIf: rename_func("countIf"), 787 exp.CompressColumnConstraint: lambda self, 788 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 789 exp.ComputedColumnConstraint: lambda self, 790 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 791 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 792 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 793 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 794 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 795 exp.Explode: rename_func("arrayJoin"), 796 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 797 exp.IsNan: rename_func("isNaN"), 798 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 799 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 800 exp.JSONPathKey: json_path_key_only_name, 801 exp.JSONPathRoot: lambda *_: "", 802 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 803 exp.Nullif: rename_func("nullIf"), 804 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 805 exp.Pivot: no_pivot_sql, 806 exp.Quantile: _quantile_sql, 807 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 808 exp.Rand: rename_func("randCanonical"), 809 exp.StartsWith: rename_func("startsWith"), 810 exp.StrPosition: lambda self, e: self.func( 811 "position", e.this, e.args.get("substr"), e.args.get("position") 812 ), 813 exp.TimeToStr: lambda self, e: self.func( 814 "DATE_FORMAT", e.this, self.format_time(e), e.args.get("timezone") 815 ), 816 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 817 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 818 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 819 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 820 exp.MD5Digest: rename_func("MD5"), 821 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 822 exp.SHA: rename_func("SHA1"), 823 exp.SHA2: sha256_sql, 824 exp.UnixToTime: _unix_to_time_sql, 825 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 826 exp.Variance: rename_func("varSamp"), 827 exp.Stddev: rename_func("stddevSamp"), 828 } 829 830 PROPERTIES_LOCATION = { 831 **generator.Generator.PROPERTIES_LOCATION, 832 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 833 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 834 exp.OnCluster: exp.Properties.Location.POST_NAME, 835 } 836 837 # there's no list in docs, but it can be found in Clickhouse code 838 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 839 ON_CLUSTER_TARGETS = { 840 "DATABASE", 841 "TABLE", 842 "VIEW", 843 "DICTIONARY", 844 "INDEX", 845 "FUNCTION", 846 "NAMED COLLECTION", 847 } 848 849 def strtodate_sql(self, expression: exp.StrToDate) -> str: 850 strtodate_sql = self.function_fallback_sql(expression) 851 852 if not isinstance(expression.parent, exp.Cast): 853 # StrToDate returns DATEs in other dialects (eg. postgres), so 854 # this branch aims to improve the transpilation to clickhouse 855 return f"CAST({strtodate_sql} AS DATE)" 856 857 return strtodate_sql 858 859 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 860 this = expression.this 861 862 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 863 return self.sql(this) 864 865 return super().cast_sql(expression, safe_prefix=safe_prefix) 866 867 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 868 this = self.json_path_part(expression.this) 869 return str(int(this) + 1) if is_int(this) else this 870 871 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 872 return f"AS {self.sql(expression, 'this')}" 873 874 def _any_to_has( 875 self, 876 expression: exp.EQ | exp.NEQ, 877 default: t.Callable[[t.Any], str], 878 prefix: str = "", 879 ) -> str: 880 if isinstance(expression.left, exp.Any): 881 arr = expression.left 882 this = expression.right 883 elif isinstance(expression.right, exp.Any): 884 arr = expression.right 885 this = expression.left 886 else: 887 return default(expression) 888 889 return prefix + self.func("has", arr.this.unnest(), this) 890 891 def eq_sql(self, expression: exp.EQ) -> str: 892 return self._any_to_has(expression, super().eq_sql) 893 894 def neq_sql(self, expression: exp.NEQ) -> str: 895 return self._any_to_has(expression, super().neq_sql, "NOT ") 896 897 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 898 # Manually add a flag to make the search case-insensitive 899 regex = self.func("CONCAT", "'(?i)'", expression.expression) 900 return self.func("match", expression.this, regex) 901 902 def datatype_sql(self, expression: exp.DataType) -> str: 903 # String is the standard ClickHouse type, every other variant is just an alias. 904 # Additionally, any supplied length parameter will be ignored. 905 # 906 # https://clickhouse.com/docs/en/sql-reference/data-types/string 907 if expression.this in self.STRING_TYPE_MAPPING: 908 return "String" 909 910 return super().datatype_sql(expression) 911 912 def cte_sql(self, expression: exp.CTE) -> str: 913 if expression.args.get("scalar"): 914 this = self.sql(expression, "this") 915 alias = self.sql(expression, "alias") 916 return f"{this} AS {alias}" 917 918 return super().cte_sql(expression) 919 920 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 921 return super().after_limit_modifiers(expression) + [ 922 ( 923 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 924 if expression.args.get("settings") 925 else "" 926 ), 927 ( 928 self.seg("FORMAT ") + self.sql(expression, "format") 929 if expression.args.get("format") 930 else "" 931 ), 932 ] 933 934 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 935 params = self.expressions(expression, key="params", flat=True) 936 return self.func(expression.name, *expression.expressions) + f"({params})" 937 938 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 939 return self.func(expression.name, *expression.expressions) 940 941 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 942 return self.anonymousaggfunc_sql(expression) 943 944 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 945 return self.parameterizedagg_sql(expression) 946 947 def placeholder_sql(self, expression: exp.Placeholder) -> str: 948 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 949 950 def oncluster_sql(self, expression: exp.OnCluster) -> str: 951 return f"ON CLUSTER {self.sql(expression, 'this')}" 952 953 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 954 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 955 exp.Properties.Location.POST_NAME 956 ): 957 this_name = self.sql(expression.this, "this") 958 this_properties = " ".join( 959 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 960 ) 961 this_schema = self.schema_columns_sql(expression.this) 962 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 963 964 return super().createable_sql(expression, locations) 965 966 def prewhere_sql(self, expression: exp.PreWhere) -> str: 967 this = self.indent(self.sql(expression, "this")) 968 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 969 970 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 971 this = self.sql(expression, "this") 972 this = f" {this}" if this else "" 973 expr = self.sql(expression, "expression") 974 expr = f" {expr}" if expr else "" 975 index_type = self.sql(expression, "index_type") 976 index_type = f" TYPE {index_type}" if index_type else "" 977 granularity = self.sql(expression, "granularity") 978 granularity = f" GRANULARITY {granularity}" if granularity else "" 979 980 return f"INDEX{this}{expr}{index_type}{granularity}" 981 982 def partition_sql(self, expression: exp.Partition) -> str: 983 return f"PARTITION {self.expressions(expression, flat=True)}" 984 985 def partitionid_sql(self, expression: exp.PartitionId) -> str: 986 return f"ID {self.sql(expression.this)}" 987 988 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 989 return ( 990 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 991 ) 992 993 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 994 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
849 def strtodate_sql(self, expression: exp.StrToDate) -> str: 850 strtodate_sql = self.function_fallback_sql(expression) 851 852 if not isinstance(expression.parent, exp.Cast): 853 # StrToDate returns DATEs in other dialects (eg. postgres), so 854 # this branch aims to improve the transpilation to clickhouse 855 return f"CAST({strtodate_sql} AS DATE)" 856 857 return strtodate_sql
859 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 860 this = expression.this 861 862 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 863 return self.sql(this) 864 865 return super().cast_sql(expression, safe_prefix=safe_prefix)
902 def datatype_sql(self, expression: exp.DataType) -> str: 903 # String is the standard ClickHouse type, every other variant is just an alias. 904 # Additionally, any supplied length parameter will be ignored. 905 # 906 # https://clickhouse.com/docs/en/sql-reference/data-types/string 907 if expression.this in self.STRING_TYPE_MAPPING: 908 return "String" 909 910 return super().datatype_sql(expression)
920 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 921 return super().after_limit_modifiers(expression) + [ 922 ( 923 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 924 if expression.args.get("settings") 925 else "" 926 ), 927 ( 928 self.seg("FORMAT ") + self.sql(expression, "format") 929 if expression.args.get("format") 930 else "" 931 ), 932 ]
953 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 954 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 955 exp.Properties.Location.POST_NAME 956 ): 957 this_name = self.sql(expression.this, "this") 958 this_properties = " ".join( 959 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 960 ) 961 this_schema = self.schema_columns_sql(expression.this) 962 return f"{this_name}{self.sep()}{this_properties}{self.sep()}{this_schema}" 963 964 return super().createable_sql(expression, locations)
970 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 971 this = self.sql(expression, "this") 972 this = f" {this}" if this else "" 973 expr = self.sql(expression, "expression") 974 expr = f" {expr}" if expr else "" 975 index_type = self.sql(expression, "index_type") 976 index_type = f" TYPE {index_type}" if index_type else "" 977 granularity = self.sql(expression, "granularity") 978 granularity = f" GRANULARITY {granularity}" if granularity else "" 979 980 return f"INDEX{this}{expr}{index_type}{granularity}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql