sqlglot.dialects.clickhouse
1from __future__ import annotations 2import typing as t 3import datetime 4from sqlglot import exp, generator, parser, tokens 5from sqlglot.dialects.dialect import ( 6 Dialect, 7 NormalizationStrategy, 8 arg_max_or_min_no_count, 9 build_date_delta, 10 build_formatted_time, 11 inline_array_sql, 12 json_extract_segments, 13 json_path_key_only_name, 14 length_or_char_length_sql, 15 no_pivot_sql, 16 build_json_extract_path, 17 rename_func, 18 sha256_sql, 19 strposition_sql, 20 var_map_sql, 21 timestamptrunc_sql, 22 unit_to_var, 23 trim_sql, 24) 25from sqlglot.generator import Generator 26from sqlglot.helper import is_int, seq_get 27from sqlglot.tokens import Token, TokenType 28from sqlglot.generator import unsupported_args 29 30DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 31 32 33def _build_date_format(args: t.List) -> exp.TimeToStr: 34 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 35 36 timezone = seq_get(args, 2) 37 if timezone: 38 expr.set("zone", timezone) 39 40 return expr 41 42 43def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 44 scale = expression.args.get("scale") 45 timestamp = expression.this 46 47 if scale in (None, exp.UnixToTime.SECONDS): 48 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 49 if scale == exp.UnixToTime.MILLIS: 50 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 51 if scale == exp.UnixToTime.MICROS: 52 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 53 if scale == exp.UnixToTime.NANOS: 54 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 55 56 return self.func( 57 "fromUnixTimestamp", 58 exp.cast( 59 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 60 ), 61 ) 62 63 64def _lower_func(sql: str) -> str: 65 index = sql.index("(") 66 return sql[:index].lower() + sql[index:] 67 68 69def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 70 quantile = expression.args["quantile"] 71 args = f"({self.sql(expression, 'this')})" 72 73 if isinstance(quantile, exp.Array): 74 func = self.func("quantiles", *quantile) 75 else: 76 func = self.func("quantile", quantile) 77 78 return func + args 79 80 81def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 82 if len(args) == 1: 83 return exp.CountIf(this=seq_get(args, 0)) 84 85 return exp.CombinedAggFunc(this="countIf", expressions=args) 86 87 88def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 89 if len(args) == 3: 90 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 91 92 strtodate = exp.StrToDate.from_arg_list(args) 93 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 94 95 96def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 97 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 98 if not expression.unit: 99 return rename_func(name)(self, expression) 100 101 return self.func( 102 name, 103 unit_to_var(expression), 104 expression.expression, 105 expression.this, 106 ) 107 108 return _delta_sql 109 110 111def _timestrtotime_sql(self: ClickHouse.Generator, expression: exp.TimeStrToTime): 112 ts = expression.this 113 114 tz = expression.args.get("zone") 115 if tz and isinstance(ts, exp.Literal): 116 # Clickhouse will not accept timestamps that include a UTC offset, so we must remove them. 117 # The first step to removing is parsing the string with `datetime.datetime.fromisoformat`. 118 # 119 # In python <3.11, `fromisoformat()` can only parse timestamps of millisecond (3 digit) 120 # or microsecond (6 digit) precision. It will error if passed any other number of fractional 121 # digits, so we extract the fractional seconds and pad to 6 digits before parsing. 122 ts_string = ts.name.strip() 123 124 # separate [date and time] from [fractional seconds and UTC offset] 125 ts_parts = ts_string.split(".") 126 if len(ts_parts) == 2: 127 # separate fractional seconds and UTC offset 128 offset_sep = "+" if "+" in ts_parts[1] else "-" 129 ts_frac_parts = ts_parts[1].split(offset_sep) 130 num_frac_parts = len(ts_frac_parts) 131 132 # pad to 6 digits if fractional seconds present 133 ts_frac_parts[0] = ts_frac_parts[0].ljust(6, "0") 134 ts_string = "".join( 135 [ 136 ts_parts[0], # date and time 137 ".", 138 ts_frac_parts[0], # fractional seconds 139 offset_sep if num_frac_parts > 1 else "", 140 ts_frac_parts[1] if num_frac_parts > 1 else "", # utc offset (if present) 141 ] 142 ) 143 144 # return literal with no timezone, eg turn '2020-01-01 12:13:14-08:00' into '2020-01-01 12:13:14' 145 # this is because Clickhouse encodes the timezone as a data type parameter and throws an error if 146 # it's part of the timestamp string 147 ts_without_tz = ( 148 datetime.datetime.fromisoformat(ts_string).replace(tzinfo=None).isoformat(sep=" ") 149 ) 150 ts = exp.Literal.string(ts_without_tz) 151 152 # Non-nullable DateTime64 with microsecond precision 153 expressions = [exp.DataTypeParam(this=tz)] if tz else [] 154 datatype = exp.DataType.build( 155 exp.DataType.Type.DATETIME64, 156 expressions=[exp.DataTypeParam(this=exp.Literal.number(6)), *expressions], 157 nullable=False, 158 ) 159 160 return self.sql(exp.cast(ts, datatype, dialect=self.dialect)) 161 162 163def _map_sql(self: ClickHouse.Generator, expression: exp.Map | exp.VarMap) -> str: 164 if not (expression.parent and expression.parent.arg_key == "settings"): 165 return _lower_func(var_map_sql(self, expression)) 166 167 keys = expression.args.get("keys") 168 values = expression.args.get("values") 169 170 if not isinstance(keys, exp.Array) or not isinstance(values, exp.Array): 171 self.unsupported("Cannot convert array columns into map.") 172 return "" 173 174 args = [] 175 for key, value in zip(keys.expressions, values.expressions): 176 args.append(f"{self.sql(key)}: {self.sql(value)}") 177 178 csv_args = ", ".join(args) 179 180 return f"{{{csv_args}}}" 181 182 183class ClickHouse(Dialect): 184 NORMALIZE_FUNCTIONS: bool | str = False 185 NULL_ORDERING = "nulls_are_last" 186 SUPPORTS_USER_DEFINED_TYPES = False 187 SAFE_DIVISION = True 188 LOG_BASE_FIRST: t.Optional[bool] = None 189 FORCE_EARLY_ALIAS_REF_EXPANSION = True 190 PRESERVE_ORIGINAL_NAMES = True 191 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 192 IDENTIFIERS_CAN_START_WITH_DIGIT = True 193 HEX_STRING_IS_INTEGER_TYPE = True 194 195 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 196 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 197 198 UNESCAPED_SEQUENCES = { 199 "\\0": "\0", 200 } 201 202 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 203 204 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 205 exp.Except: False, 206 exp.Intersect: False, 207 exp.Union: None, 208 } 209 210 class Tokenizer(tokens.Tokenizer): 211 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 212 IDENTIFIERS = ['"', "`"] 213 IDENTIFIER_ESCAPES = ["\\"] 214 STRING_ESCAPES = ["'", "\\"] 215 BIT_STRINGS = [("0b", "")] 216 HEX_STRINGS = [("0x", ""), ("0X", "")] 217 HEREDOC_STRINGS = ["$"] 218 219 KEYWORDS = { 220 **tokens.Tokenizer.KEYWORDS, 221 ".:": TokenType.DOTCOLON, 222 "ATTACH": TokenType.COMMAND, 223 "DATE32": TokenType.DATE32, 224 "DATETIME64": TokenType.DATETIME64, 225 "DICTIONARY": TokenType.DICTIONARY, 226 "DYNAMIC": TokenType.DYNAMIC, 227 "ENUM8": TokenType.ENUM8, 228 "ENUM16": TokenType.ENUM16, 229 "FINAL": TokenType.FINAL, 230 "FIXEDSTRING": TokenType.FIXEDSTRING, 231 "FLOAT32": TokenType.FLOAT, 232 "FLOAT64": TokenType.DOUBLE, 233 "GLOBAL": TokenType.GLOBAL, 234 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 235 "MAP": TokenType.MAP, 236 "NESTED": TokenType.NESTED, 237 "SAMPLE": TokenType.TABLE_SAMPLE, 238 "TUPLE": TokenType.STRUCT, 239 "UINT16": TokenType.USMALLINT, 240 "UINT32": TokenType.UINT, 241 "UINT64": TokenType.UBIGINT, 242 "UINT8": TokenType.UTINYINT, 243 "IPV4": TokenType.IPV4, 244 "IPV6": TokenType.IPV6, 245 "POINT": TokenType.POINT, 246 "RING": TokenType.RING, 247 "LINESTRING": TokenType.LINESTRING, 248 "MULTILINESTRING": TokenType.MULTILINESTRING, 249 "POLYGON": TokenType.POLYGON, 250 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 251 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 252 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 253 "SYSTEM": TokenType.COMMAND, 254 "PREWHERE": TokenType.PREWHERE, 255 } 256 KEYWORDS.pop("/*+") 257 258 SINGLE_TOKENS = { 259 **tokens.Tokenizer.SINGLE_TOKENS, 260 "$": TokenType.HEREDOC_STRING, 261 } 262 263 class Parser(parser.Parser): 264 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 265 # * select x from t1 union all select x from t2 limit 1; 266 # * select x from t1 union all (select x from t2 limit 1); 267 MODIFIERS_ATTACHED_TO_SET_OP = False 268 INTERVAL_SPANS = False 269 OPTIONAL_ALIAS_TOKEN_CTE = False 270 271 FUNCTIONS = { 272 **parser.Parser.FUNCTIONS, 273 "ANY": exp.AnyValue.from_arg_list, 274 "ARRAYSUM": exp.ArraySum.from_arg_list, 275 "COUNTIF": _build_count_if, 276 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 277 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 278 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 279 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 280 "DATE_FORMAT": _build_date_format, 281 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 282 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 283 "FORMATDATETIME": _build_date_format, 284 "JSONEXTRACTSTRING": build_json_extract_path( 285 exp.JSONExtractScalar, zero_based_indexing=False 286 ), 287 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 288 "MAP": parser.build_var_map, 289 "MATCH": exp.RegexpLike.from_arg_list, 290 "RANDCANONICAL": exp.Rand.from_arg_list, 291 "STR_TO_DATE": _build_str_to_date, 292 "TUPLE": exp.Struct.from_arg_list, 293 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 294 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 295 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 296 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 297 "UNIQ": exp.ApproxDistinct.from_arg_list, 298 "XOR": lambda args: exp.Xor(expressions=args), 299 "MD5": exp.MD5Digest.from_arg_list, 300 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 301 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 302 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 303 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 304 } 305 FUNCTIONS.pop("TRANSFORM") 306 307 AGG_FUNCTIONS = { 308 "count", 309 "min", 310 "max", 311 "sum", 312 "avg", 313 "any", 314 "stddevPop", 315 "stddevSamp", 316 "varPop", 317 "varSamp", 318 "corr", 319 "covarPop", 320 "covarSamp", 321 "entropy", 322 "exponentialMovingAverage", 323 "intervalLengthSum", 324 "kolmogorovSmirnovTest", 325 "mannWhitneyUTest", 326 "median", 327 "rankCorr", 328 "sumKahan", 329 "studentTTest", 330 "welchTTest", 331 "anyHeavy", 332 "anyLast", 333 "boundingRatio", 334 "first_value", 335 "last_value", 336 "argMin", 337 "argMax", 338 "avgWeighted", 339 "topK", 340 "topKWeighted", 341 "deltaSum", 342 "deltaSumTimestamp", 343 "groupArray", 344 "groupArrayLast", 345 "groupUniqArray", 346 "groupArrayInsertAt", 347 "groupArrayMovingAvg", 348 "groupArrayMovingSum", 349 "groupArraySample", 350 "groupBitAnd", 351 "groupBitOr", 352 "groupBitXor", 353 "groupBitmap", 354 "groupBitmapAnd", 355 "groupBitmapOr", 356 "groupBitmapXor", 357 "sumWithOverflow", 358 "sumMap", 359 "minMap", 360 "maxMap", 361 "skewSamp", 362 "skewPop", 363 "kurtSamp", 364 "kurtPop", 365 "uniq", 366 "uniqExact", 367 "uniqCombined", 368 "uniqCombined64", 369 "uniqHLL12", 370 "uniqTheta", 371 "quantile", 372 "quantiles", 373 "quantileExact", 374 "quantilesExact", 375 "quantileExactLow", 376 "quantilesExactLow", 377 "quantileExactHigh", 378 "quantilesExactHigh", 379 "quantileExactWeighted", 380 "quantilesExactWeighted", 381 "quantileTiming", 382 "quantilesTiming", 383 "quantileTimingWeighted", 384 "quantilesTimingWeighted", 385 "quantileDeterministic", 386 "quantilesDeterministic", 387 "quantileTDigest", 388 "quantilesTDigest", 389 "quantileTDigestWeighted", 390 "quantilesTDigestWeighted", 391 "quantileBFloat16", 392 "quantilesBFloat16", 393 "quantileBFloat16Weighted", 394 "quantilesBFloat16Weighted", 395 "simpleLinearRegression", 396 "stochasticLinearRegression", 397 "stochasticLogisticRegression", 398 "categoricalInformationValue", 399 "contingency", 400 "cramersV", 401 "cramersVBiasCorrected", 402 "theilsU", 403 "maxIntersections", 404 "maxIntersectionsPosition", 405 "meanZTest", 406 "quantileInterpolatedWeighted", 407 "quantilesInterpolatedWeighted", 408 "quantileGK", 409 "quantilesGK", 410 "sparkBar", 411 "sumCount", 412 "largestTriangleThreeBuckets", 413 "histogram", 414 "sequenceMatch", 415 "sequenceCount", 416 "windowFunnel", 417 "retention", 418 "uniqUpTo", 419 "sequenceNextNode", 420 "exponentialTimeDecayedAvg", 421 } 422 423 AGG_FUNCTIONS_SUFFIXES = [ 424 "If", 425 "Array", 426 "ArrayIf", 427 "Map", 428 "SimpleState", 429 "State", 430 "Merge", 431 "MergeState", 432 "ForEach", 433 "Distinct", 434 "OrDefault", 435 "OrNull", 436 "Resample", 437 "ArgMin", 438 "ArgMax", 439 ] 440 441 FUNC_TOKENS = { 442 *parser.Parser.FUNC_TOKENS, 443 TokenType.AND, 444 TokenType.OR, 445 TokenType.SET, 446 } 447 448 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 449 450 ID_VAR_TOKENS = { 451 *parser.Parser.ID_VAR_TOKENS, 452 TokenType.LIKE, 453 } 454 455 AGG_FUNC_MAPPING = ( 456 lambda functions, suffixes: { 457 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 458 } 459 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 460 461 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 462 463 FUNCTION_PARSERS = { 464 **parser.Parser.FUNCTION_PARSERS, 465 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 466 "QUANTILE": lambda self: self._parse_quantile(), 467 "MEDIAN": lambda self: self._parse_quantile(), 468 "COLUMNS": lambda self: self._parse_columns(), 469 } 470 471 FUNCTION_PARSERS.pop("MATCH") 472 473 PROPERTY_PARSERS = parser.Parser.PROPERTY_PARSERS.copy() 474 PROPERTY_PARSERS.pop("DYNAMIC") 475 476 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 477 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 478 479 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 480 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 481 482 RANGE_PARSERS = { 483 **parser.Parser.RANGE_PARSERS, 484 TokenType.GLOBAL: lambda self, this: self._parse_global_in(this), 485 } 486 487 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 488 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 489 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 490 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 491 492 JOIN_KINDS = { 493 *parser.Parser.JOIN_KINDS, 494 TokenType.ANY, 495 TokenType.ASOF, 496 TokenType.ARRAY, 497 } 498 499 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 500 TokenType.ANY, 501 TokenType.ARRAY, 502 TokenType.FINAL, 503 TokenType.FORMAT, 504 TokenType.SETTINGS, 505 } 506 507 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 508 TokenType.FORMAT, 509 } 510 511 LOG_DEFAULTS_TO_LN = True 512 513 QUERY_MODIFIER_PARSERS = { 514 **parser.Parser.QUERY_MODIFIER_PARSERS, 515 TokenType.SETTINGS: lambda self: ( 516 "settings", 517 self._advance() or self._parse_csv(self._parse_assignment), 518 ), 519 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 520 } 521 522 CONSTRAINT_PARSERS = { 523 **parser.Parser.CONSTRAINT_PARSERS, 524 "INDEX": lambda self: self._parse_index_constraint(), 525 "CODEC": lambda self: self._parse_compress(), 526 } 527 528 ALTER_PARSERS = { 529 **parser.Parser.ALTER_PARSERS, 530 "REPLACE": lambda self: self._parse_alter_table_replace(), 531 } 532 533 SCHEMA_UNNAMED_CONSTRAINTS = { 534 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 535 "INDEX", 536 } 537 538 PLACEHOLDER_PARSERS = { 539 **parser.Parser.PLACEHOLDER_PARSERS, 540 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 541 } 542 543 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 544 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 545 return self._parse_lambda() 546 547 def _parse_types( 548 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 549 ) -> t.Optional[exp.Expression]: 550 dtype = super()._parse_types( 551 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 552 ) 553 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 554 # Mark every type as non-nullable which is ClickHouse's default, unless it's 555 # already marked as nullable. This marker helps us transpile types from other 556 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 557 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 558 # fail in ClickHouse without the `Nullable` type constructor. 559 dtype.set("nullable", False) 560 561 return dtype 562 563 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 564 index = self._index 565 this = self._parse_bitwise() 566 if self._match(TokenType.FROM): 567 self._retreat(index) 568 return super()._parse_extract() 569 570 # We return Anonymous here because extract and regexpExtract have different semantics, 571 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 572 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 573 # 574 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 575 self._match(TokenType.COMMA) 576 return self.expression( 577 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 578 ) 579 580 def _parse_assignment(self) -> t.Optional[exp.Expression]: 581 this = super()._parse_assignment() 582 583 if self._match(TokenType.PLACEHOLDER): 584 return self.expression( 585 exp.If, 586 this=this, 587 true=self._parse_assignment(), 588 false=self._match(TokenType.COLON) and self._parse_assignment(), 589 ) 590 591 return this 592 593 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 594 """ 595 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 596 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 597 """ 598 index = self._index 599 600 this = self._parse_id_var() 601 self._match(TokenType.COLON) 602 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 603 self._match_text_seq("IDENTIFIER") and "Identifier" 604 ) 605 606 if not kind: 607 self._retreat(index) 608 return None 609 elif not self._match(TokenType.R_BRACE): 610 self.raise_error("Expecting }") 611 612 return self.expression(exp.Placeholder, this=this, kind=kind) 613 614 def _parse_bracket( 615 self, this: t.Optional[exp.Expression] = None 616 ) -> t.Optional[exp.Expression]: 617 l_brace = self._match(TokenType.L_BRACE, advance=False) 618 bracket = super()._parse_bracket(this) 619 620 if l_brace and isinstance(bracket, exp.Struct): 621 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 622 for expression in bracket.expressions: 623 if not isinstance(expression, exp.PropertyEQ): 624 break 625 626 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 627 varmap.args["values"].append("expressions", expression.expression) 628 629 return varmap 630 631 return bracket 632 633 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 634 this = super()._parse_in(this) 635 this.set("is_global", is_global) 636 return this 637 638 def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In: 639 is_negated = self._match(TokenType.NOT) 640 this = self._match(TokenType.IN) and self._parse_in(this, is_global=True) 641 return self.expression(exp.Not, this=this) if is_negated else this 642 643 def _parse_table( 644 self, 645 schema: bool = False, 646 joins: bool = False, 647 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 648 parse_bracket: bool = False, 649 is_db_reference: bool = False, 650 parse_partition: bool = False, 651 ) -> t.Optional[exp.Expression]: 652 this = super()._parse_table( 653 schema=schema, 654 joins=joins, 655 alias_tokens=alias_tokens, 656 parse_bracket=parse_bracket, 657 is_db_reference=is_db_reference, 658 ) 659 660 if isinstance(this, exp.Table): 661 inner = this.this 662 alias = this.args.get("alias") 663 664 if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns: 665 alias.set("columns", [exp.to_identifier("generate_series")]) 666 667 if self._match(TokenType.FINAL): 668 this = self.expression(exp.Final, this=this) 669 670 return this 671 672 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 673 return super()._parse_position(haystack_first=True) 674 675 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 676 def _parse_cte(self) -> t.Optional[exp.CTE]: 677 # WITH <identifier> AS <subquery expression> 678 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 679 680 if not cte: 681 # WITH <expression> AS <identifier> 682 cte = self.expression( 683 exp.CTE, 684 this=self._parse_assignment(), 685 alias=self._parse_table_alias(), 686 scalar=True, 687 ) 688 689 return cte 690 691 def _parse_join_parts( 692 self, 693 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 694 is_global = self._match(TokenType.GLOBAL) and self._prev 695 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 696 697 if kind_pre: 698 kind = self._match_set(self.JOIN_KINDS) and self._prev 699 side = self._match_set(self.JOIN_SIDES) and self._prev 700 return is_global, side, kind 701 702 return ( 703 is_global, 704 self._match_set(self.JOIN_SIDES) and self._prev, 705 self._match_set(self.JOIN_KINDS) and self._prev, 706 ) 707 708 def _parse_join( 709 self, skip_join_token: bool = False, parse_bracket: bool = False 710 ) -> t.Optional[exp.Join]: 711 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 712 if join: 713 join.set("global", join.args.pop("method", None)) 714 715 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 716 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 717 if join.kind == "ARRAY": 718 for table in join.find_all(exp.Table): 719 table.replace(table.to_column()) 720 721 return join 722 723 def _parse_function( 724 self, 725 functions: t.Optional[t.Dict[str, t.Callable]] = None, 726 anonymous: bool = False, 727 optional_parens: bool = True, 728 any_token: bool = False, 729 ) -> t.Optional[exp.Expression]: 730 expr = super()._parse_function( 731 functions=functions, 732 anonymous=anonymous, 733 optional_parens=optional_parens, 734 any_token=any_token, 735 ) 736 737 func = expr.this if isinstance(expr, exp.Window) else expr 738 739 # Aggregate functions can be split in 2 parts: <func_name><suffix> 740 parts = ( 741 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 742 ) 743 744 if parts: 745 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 746 params = self._parse_func_params(anon_func) 747 748 kwargs = { 749 "this": anon_func.this, 750 "expressions": anon_func.expressions, 751 } 752 if parts[1]: 753 exp_class: t.Type[exp.Expression] = ( 754 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 755 ) 756 else: 757 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 758 759 kwargs["exp_class"] = exp_class 760 if params: 761 kwargs["params"] = params 762 763 func = self.expression(**kwargs) 764 765 if isinstance(expr, exp.Window): 766 # The window's func was parsed as Anonymous in base parser, fix its 767 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 768 expr.set("this", func) 769 elif params: 770 # Params have blocked super()._parse_function() from parsing the following window 771 # (if that exists) as they're standing between the function call and the window spec 772 expr = self._parse_window(func) 773 else: 774 expr = func 775 776 return expr 777 778 def _parse_func_params( 779 self, this: t.Optional[exp.Func] = None 780 ) -> t.Optional[t.List[exp.Expression]]: 781 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 782 return self._parse_csv(self._parse_lambda) 783 784 if self._match(TokenType.L_PAREN): 785 params = self._parse_csv(self._parse_lambda) 786 self._match_r_paren(this) 787 return params 788 789 return None 790 791 def _parse_quantile(self) -> exp.Quantile: 792 this = self._parse_lambda() 793 params = self._parse_func_params() 794 if params: 795 return self.expression(exp.Quantile, this=params[0], quantile=this) 796 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 797 798 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 799 return super()._parse_wrapped_id_vars(optional=True) 800 801 def _parse_primary_key( 802 self, wrapped_optional: bool = False, in_props: bool = False 803 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 804 return super()._parse_primary_key( 805 wrapped_optional=wrapped_optional or in_props, in_props=in_props 806 ) 807 808 def _parse_on_property(self) -> t.Optional[exp.Expression]: 809 index = self._index 810 if self._match_text_seq("CLUSTER"): 811 this = self._parse_id_var() 812 if this: 813 return self.expression(exp.OnCluster, this=this) 814 else: 815 self._retreat(index) 816 return None 817 818 def _parse_index_constraint( 819 self, kind: t.Optional[str] = None 820 ) -> exp.IndexColumnConstraint: 821 # INDEX name1 expr TYPE type1(args) GRANULARITY value 822 this = self._parse_id_var() 823 expression = self._parse_assignment() 824 825 index_type = self._match_text_seq("TYPE") and ( 826 self._parse_function() or self._parse_var() 827 ) 828 829 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 830 831 return self.expression( 832 exp.IndexColumnConstraint, 833 this=this, 834 expression=expression, 835 index_type=index_type, 836 granularity=granularity, 837 ) 838 839 def _parse_partition(self) -> t.Optional[exp.Partition]: 840 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 841 if not self._match(TokenType.PARTITION): 842 return None 843 844 if self._match_text_seq("ID"): 845 # Corresponds to the PARTITION ID <string_value> syntax 846 expressions: t.List[exp.Expression] = [ 847 self.expression(exp.PartitionId, this=self._parse_string()) 848 ] 849 else: 850 expressions = self._parse_expressions() 851 852 return self.expression(exp.Partition, expressions=expressions) 853 854 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 855 partition = self._parse_partition() 856 857 if not partition or not self._match(TokenType.FROM): 858 return None 859 860 return self.expression( 861 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 862 ) 863 864 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 865 if not self._match_text_seq("PROJECTION"): 866 return None 867 868 return self.expression( 869 exp.ProjectionDef, 870 this=self._parse_id_var(), 871 expression=self._parse_wrapped(self._parse_statement), 872 ) 873 874 def _parse_constraint(self) -> t.Optional[exp.Expression]: 875 return super()._parse_constraint() or self._parse_projection_def() 876 877 def _parse_alias( 878 self, this: t.Optional[exp.Expression], explicit: bool = False 879 ) -> t.Optional[exp.Expression]: 880 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 881 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 882 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 883 return this 884 885 return super()._parse_alias(this=this, explicit=explicit) 886 887 def _parse_expression(self) -> t.Optional[exp.Expression]: 888 this = super()._parse_expression() 889 890 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 891 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 892 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 893 self._match(TokenType.R_PAREN) 894 895 return this 896 897 def _parse_columns(self) -> exp.Expression: 898 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 899 900 while self._next and self._match_text_seq(")", "APPLY", "("): 901 self._match(TokenType.R_PAREN) 902 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 903 return this 904 905 class Generator(generator.Generator): 906 QUERY_HINTS = False 907 STRUCT_DELIMITER = ("(", ")") 908 NVL2_SUPPORTED = False 909 TABLESAMPLE_REQUIRES_PARENS = False 910 TABLESAMPLE_SIZE_IS_ROWS = False 911 TABLESAMPLE_KEYWORDS = "SAMPLE" 912 LAST_DAY_SUPPORTS_DATE_PART = False 913 CAN_IMPLEMENT_ARRAY_ANY = True 914 SUPPORTS_TO_NUMBER = False 915 JOIN_HINTS = False 916 TABLE_HINTS = False 917 GROUPINGS_SEP = "" 918 SET_OP_MODIFIERS = False 919 VALUES_AS_TABLE = False 920 ARRAY_SIZE_NAME = "LENGTH" 921 922 STRING_TYPE_MAPPING = { 923 exp.DataType.Type.BLOB: "String", 924 exp.DataType.Type.CHAR: "String", 925 exp.DataType.Type.LONGBLOB: "String", 926 exp.DataType.Type.LONGTEXT: "String", 927 exp.DataType.Type.MEDIUMBLOB: "String", 928 exp.DataType.Type.MEDIUMTEXT: "String", 929 exp.DataType.Type.TINYBLOB: "String", 930 exp.DataType.Type.TINYTEXT: "String", 931 exp.DataType.Type.TEXT: "String", 932 exp.DataType.Type.VARBINARY: "String", 933 exp.DataType.Type.VARCHAR: "String", 934 } 935 936 SUPPORTED_JSON_PATH_PARTS = { 937 exp.JSONPathKey, 938 exp.JSONPathRoot, 939 exp.JSONPathSubscript, 940 } 941 942 TYPE_MAPPING = { 943 **generator.Generator.TYPE_MAPPING, 944 **STRING_TYPE_MAPPING, 945 exp.DataType.Type.ARRAY: "Array", 946 exp.DataType.Type.BOOLEAN: "Bool", 947 exp.DataType.Type.BIGINT: "Int64", 948 exp.DataType.Type.DATE32: "Date32", 949 exp.DataType.Type.DATETIME: "DateTime", 950 exp.DataType.Type.DATETIME2: "DateTime", 951 exp.DataType.Type.SMALLDATETIME: "DateTime", 952 exp.DataType.Type.DATETIME64: "DateTime64", 953 exp.DataType.Type.DECIMAL: "Decimal", 954 exp.DataType.Type.DECIMAL32: "Decimal32", 955 exp.DataType.Type.DECIMAL64: "Decimal64", 956 exp.DataType.Type.DECIMAL128: "Decimal128", 957 exp.DataType.Type.DECIMAL256: "Decimal256", 958 exp.DataType.Type.TIMESTAMP: "DateTime", 959 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 960 exp.DataType.Type.DOUBLE: "Float64", 961 exp.DataType.Type.ENUM: "Enum", 962 exp.DataType.Type.ENUM8: "Enum8", 963 exp.DataType.Type.ENUM16: "Enum16", 964 exp.DataType.Type.FIXEDSTRING: "FixedString", 965 exp.DataType.Type.FLOAT: "Float32", 966 exp.DataType.Type.INT: "Int32", 967 exp.DataType.Type.MEDIUMINT: "Int32", 968 exp.DataType.Type.INT128: "Int128", 969 exp.DataType.Type.INT256: "Int256", 970 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 971 exp.DataType.Type.MAP: "Map", 972 exp.DataType.Type.NESTED: "Nested", 973 exp.DataType.Type.SMALLINT: "Int16", 974 exp.DataType.Type.STRUCT: "Tuple", 975 exp.DataType.Type.TINYINT: "Int8", 976 exp.DataType.Type.UBIGINT: "UInt64", 977 exp.DataType.Type.UINT: "UInt32", 978 exp.DataType.Type.UINT128: "UInt128", 979 exp.DataType.Type.UINT256: "UInt256", 980 exp.DataType.Type.USMALLINT: "UInt16", 981 exp.DataType.Type.UTINYINT: "UInt8", 982 exp.DataType.Type.IPV4: "IPv4", 983 exp.DataType.Type.IPV6: "IPv6", 984 exp.DataType.Type.POINT: "Point", 985 exp.DataType.Type.RING: "Ring", 986 exp.DataType.Type.LINESTRING: "LineString", 987 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 988 exp.DataType.Type.POLYGON: "Polygon", 989 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 990 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 991 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 992 exp.DataType.Type.DYNAMIC: "Dynamic", 993 } 994 995 TRANSFORMS = { 996 **generator.Generator.TRANSFORMS, 997 exp.AnyValue: rename_func("any"), 998 exp.ApproxDistinct: rename_func("uniq"), 999 exp.ArrayConcat: rename_func("arrayConcat"), 1000 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 1001 exp.ArraySum: rename_func("arraySum"), 1002 exp.ArgMax: arg_max_or_min_no_count("argMax"), 1003 exp.ArgMin: arg_max_or_min_no_count("argMin"), 1004 exp.Array: inline_array_sql, 1005 exp.CastToStrType: rename_func("CAST"), 1006 exp.CountIf: rename_func("countIf"), 1007 exp.CompressColumnConstraint: lambda self, 1008 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 1009 exp.ComputedColumnConstraint: lambda self, 1010 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 1011 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 1012 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 1013 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 1014 exp.DateStrToDate: rename_func("toDate"), 1015 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 1016 exp.Explode: rename_func("arrayJoin"), 1017 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 1018 exp.IsNan: rename_func("isNaN"), 1019 exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}", 1020 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 1021 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 1022 exp.JSONPathKey: json_path_key_only_name, 1023 exp.JSONPathRoot: lambda *_: "", 1024 exp.Length: length_or_char_length_sql, 1025 exp.Map: _map_sql, 1026 exp.Median: rename_func("median"), 1027 exp.Nullif: rename_func("nullIf"), 1028 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1029 exp.Pivot: no_pivot_sql, 1030 exp.Quantile: _quantile_sql, 1031 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1032 exp.Rand: rename_func("randCanonical"), 1033 exp.StartsWith: rename_func("startsWith"), 1034 exp.StrPosition: lambda self, e: strposition_sql( 1035 self, 1036 e, 1037 func_name="POSITION", 1038 supports_position=True, 1039 use_ansi_position=False, 1040 ), 1041 exp.TimeToStr: lambda self, e: self.func( 1042 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1043 ), 1044 exp.TimeStrToTime: _timestrtotime_sql, 1045 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1046 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1047 exp.VarMap: _map_sql, 1048 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1049 exp.MD5Digest: rename_func("MD5"), 1050 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1051 exp.SHA: rename_func("SHA1"), 1052 exp.SHA2: sha256_sql, 1053 exp.UnixToTime: _unix_to_time_sql, 1054 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1055 exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"), 1056 exp.Variance: rename_func("varSamp"), 1057 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1058 exp.Stddev: rename_func("stddevSamp"), 1059 exp.Chr: rename_func("CHAR"), 1060 exp.Lag: lambda self, e: self.func( 1061 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1062 ), 1063 exp.Lead: lambda self, e: self.func( 1064 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1065 ), 1066 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1067 rename_func("editDistance") 1068 ), 1069 } 1070 1071 PROPERTIES_LOCATION = { 1072 **generator.Generator.PROPERTIES_LOCATION, 1073 exp.OnCluster: exp.Properties.Location.POST_NAME, 1074 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1075 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1076 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1077 } 1078 1079 # There's no list in docs, but it can be found in Clickhouse code 1080 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1081 ON_CLUSTER_TARGETS = { 1082 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1083 "DATABASE", 1084 "TABLE", 1085 "VIEW", 1086 "DICTIONARY", 1087 "INDEX", 1088 "FUNCTION", 1089 "NAMED COLLECTION", 1090 } 1091 1092 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1093 NON_NULLABLE_TYPES = { 1094 exp.DataType.Type.ARRAY, 1095 exp.DataType.Type.MAP, 1096 exp.DataType.Type.STRUCT, 1097 exp.DataType.Type.POINT, 1098 exp.DataType.Type.RING, 1099 exp.DataType.Type.LINESTRING, 1100 exp.DataType.Type.MULTILINESTRING, 1101 exp.DataType.Type.POLYGON, 1102 exp.DataType.Type.MULTIPOLYGON, 1103 } 1104 1105 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1106 strtodate_sql = self.function_fallback_sql(expression) 1107 1108 if not isinstance(expression.parent, exp.Cast): 1109 # StrToDate returns DATEs in other dialects (eg. postgres), so 1110 # this branch aims to improve the transpilation to clickhouse 1111 return self.cast_sql(exp.cast(expression, "DATE")) 1112 1113 return strtodate_sql 1114 1115 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1116 this = expression.this 1117 1118 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1119 return self.sql(this) 1120 1121 return super().cast_sql(expression, safe_prefix=safe_prefix) 1122 1123 def trycast_sql(self, expression: exp.TryCast) -> str: 1124 dtype = expression.to 1125 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1126 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1127 dtype.set("nullable", True) 1128 1129 return super().cast_sql(expression) 1130 1131 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1132 this = self.json_path_part(expression.this) 1133 return str(int(this) + 1) if is_int(this) else this 1134 1135 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1136 return f"AS {self.sql(expression, 'this')}" 1137 1138 def _any_to_has( 1139 self, 1140 expression: exp.EQ | exp.NEQ, 1141 default: t.Callable[[t.Any], str], 1142 prefix: str = "", 1143 ) -> str: 1144 if isinstance(expression.left, exp.Any): 1145 arr = expression.left 1146 this = expression.right 1147 elif isinstance(expression.right, exp.Any): 1148 arr = expression.right 1149 this = expression.left 1150 else: 1151 return default(expression) 1152 1153 return prefix + self.func("has", arr.this.unnest(), this) 1154 1155 def eq_sql(self, expression: exp.EQ) -> str: 1156 return self._any_to_has(expression, super().eq_sql) 1157 1158 def neq_sql(self, expression: exp.NEQ) -> str: 1159 return self._any_to_has(expression, super().neq_sql, "NOT ") 1160 1161 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1162 # Manually add a flag to make the search case-insensitive 1163 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1164 return self.func("match", expression.this, regex) 1165 1166 def datatype_sql(self, expression: exp.DataType) -> str: 1167 # String is the standard ClickHouse type, every other variant is just an alias. 1168 # Additionally, any supplied length parameter will be ignored. 1169 # 1170 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1171 if expression.this in self.STRING_TYPE_MAPPING: 1172 dtype = "String" 1173 else: 1174 dtype = super().datatype_sql(expression) 1175 1176 # This section changes the type to `Nullable(...)` if the following conditions hold: 1177 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1178 # and change their semantics 1179 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1180 # constraint: "Type of Map key must be a type, that can be represented by integer or 1181 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1182 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1183 parent = expression.parent 1184 nullable = expression.args.get("nullable") 1185 if nullable is True or ( 1186 nullable is None 1187 and not ( 1188 isinstance(parent, exp.DataType) 1189 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1190 and expression.index in (None, 0) 1191 ) 1192 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1193 ): 1194 dtype = f"Nullable({dtype})" 1195 1196 return dtype 1197 1198 def cte_sql(self, expression: exp.CTE) -> str: 1199 if expression.args.get("scalar"): 1200 this = self.sql(expression, "this") 1201 alias = self.sql(expression, "alias") 1202 return f"{this} AS {alias}" 1203 1204 return super().cte_sql(expression) 1205 1206 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1207 return super().after_limit_modifiers(expression) + [ 1208 ( 1209 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1210 if expression.args.get("settings") 1211 else "" 1212 ), 1213 ( 1214 self.seg("FORMAT ") + self.sql(expression, "format") 1215 if expression.args.get("format") 1216 else "" 1217 ), 1218 ] 1219 1220 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1221 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1222 1223 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1224 return f"ON CLUSTER {self.sql(expression, 'this')}" 1225 1226 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1227 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1228 exp.Properties.Location.POST_NAME 1229 ): 1230 this_name = self.sql( 1231 expression.this if isinstance(expression.this, exp.Schema) else expression, 1232 "this", 1233 ) 1234 this_properties = " ".join( 1235 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1236 ) 1237 this_schema = self.schema_columns_sql(expression.this) 1238 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1239 1240 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1241 1242 return super().createable_sql(expression, locations) 1243 1244 def create_sql(self, expression: exp.Create) -> str: 1245 # The comment property comes last in CTAS statements, i.e. after the query 1246 query = expression.expression 1247 if isinstance(query, exp.Query): 1248 comment_prop = expression.find(exp.SchemaCommentProperty) 1249 if comment_prop: 1250 comment_prop.pop() 1251 query.replace(exp.paren(query)) 1252 else: 1253 comment_prop = None 1254 1255 create_sql = super().create_sql(expression) 1256 1257 comment_sql = self.sql(comment_prop) 1258 comment_sql = f" {comment_sql}" if comment_sql else "" 1259 1260 return f"{create_sql}{comment_sql}" 1261 1262 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1263 this = self.indent(self.sql(expression, "this")) 1264 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1265 1266 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1267 this = self.sql(expression, "this") 1268 this = f" {this}" if this else "" 1269 expr = self.sql(expression, "expression") 1270 expr = f" {expr}" if expr else "" 1271 index_type = self.sql(expression, "index_type") 1272 index_type = f" TYPE {index_type}" if index_type else "" 1273 granularity = self.sql(expression, "granularity") 1274 granularity = f" GRANULARITY {granularity}" if granularity else "" 1275 1276 return f"INDEX{this}{expr}{index_type}{granularity}" 1277 1278 def partition_sql(self, expression: exp.Partition) -> str: 1279 return f"PARTITION {self.expressions(expression, flat=True)}" 1280 1281 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1282 return f"ID {self.sql(expression.this)}" 1283 1284 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1285 return ( 1286 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1287 ) 1288 1289 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1290 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1291 1292 def is_sql(self, expression: exp.Is) -> str: 1293 is_sql = super().is_sql(expression) 1294 1295 if isinstance(expression.parent, exp.Not): 1296 # value IS NOT NULL -> NOT (value IS NULL) 1297 is_sql = self.wrap(is_sql) 1298 1299 return is_sql 1300 1301 def in_sql(self, expression: exp.In) -> str: 1302 in_sql = super().in_sql(expression) 1303 1304 if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"): 1305 in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1) 1306 1307 return in_sql 1308 1309 def not_sql(self, expression: exp.Not) -> str: 1310 if isinstance(expression.this, exp.In) and expression.this.args.get("is_global"): 1311 # let `GLOBAL IN` child interpose `NOT` 1312 return self.sql(expression, "this") 1313 1314 return super().not_sql(expression)
184class ClickHouse(Dialect): 185 NORMALIZE_FUNCTIONS: bool | str = False 186 NULL_ORDERING = "nulls_are_last" 187 SUPPORTS_USER_DEFINED_TYPES = False 188 SAFE_DIVISION = True 189 LOG_BASE_FIRST: t.Optional[bool] = None 190 FORCE_EARLY_ALIAS_REF_EXPANSION = True 191 PRESERVE_ORIGINAL_NAMES = True 192 NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True 193 IDENTIFIERS_CAN_START_WITH_DIGIT = True 194 HEX_STRING_IS_INTEGER_TYPE = True 195 196 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 197 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 198 199 UNESCAPED_SEQUENCES = { 200 "\\0": "\0", 201 } 202 203 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 204 205 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 206 exp.Except: False, 207 exp.Intersect: False, 208 exp.Union: None, 209 } 210 211 class Tokenizer(tokens.Tokenizer): 212 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 213 IDENTIFIERS = ['"', "`"] 214 IDENTIFIER_ESCAPES = ["\\"] 215 STRING_ESCAPES = ["'", "\\"] 216 BIT_STRINGS = [("0b", "")] 217 HEX_STRINGS = [("0x", ""), ("0X", "")] 218 HEREDOC_STRINGS = ["$"] 219 220 KEYWORDS = { 221 **tokens.Tokenizer.KEYWORDS, 222 ".:": TokenType.DOTCOLON, 223 "ATTACH": TokenType.COMMAND, 224 "DATE32": TokenType.DATE32, 225 "DATETIME64": TokenType.DATETIME64, 226 "DICTIONARY": TokenType.DICTIONARY, 227 "DYNAMIC": TokenType.DYNAMIC, 228 "ENUM8": TokenType.ENUM8, 229 "ENUM16": TokenType.ENUM16, 230 "FINAL": TokenType.FINAL, 231 "FIXEDSTRING": TokenType.FIXEDSTRING, 232 "FLOAT32": TokenType.FLOAT, 233 "FLOAT64": TokenType.DOUBLE, 234 "GLOBAL": TokenType.GLOBAL, 235 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 236 "MAP": TokenType.MAP, 237 "NESTED": TokenType.NESTED, 238 "SAMPLE": TokenType.TABLE_SAMPLE, 239 "TUPLE": TokenType.STRUCT, 240 "UINT16": TokenType.USMALLINT, 241 "UINT32": TokenType.UINT, 242 "UINT64": TokenType.UBIGINT, 243 "UINT8": TokenType.UTINYINT, 244 "IPV4": TokenType.IPV4, 245 "IPV6": TokenType.IPV6, 246 "POINT": TokenType.POINT, 247 "RING": TokenType.RING, 248 "LINESTRING": TokenType.LINESTRING, 249 "MULTILINESTRING": TokenType.MULTILINESTRING, 250 "POLYGON": TokenType.POLYGON, 251 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 252 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 253 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 254 "SYSTEM": TokenType.COMMAND, 255 "PREWHERE": TokenType.PREWHERE, 256 } 257 KEYWORDS.pop("/*+") 258 259 SINGLE_TOKENS = { 260 **tokens.Tokenizer.SINGLE_TOKENS, 261 "$": TokenType.HEREDOC_STRING, 262 } 263 264 class Parser(parser.Parser): 265 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 266 # * select x from t1 union all select x from t2 limit 1; 267 # * select x from t1 union all (select x from t2 limit 1); 268 MODIFIERS_ATTACHED_TO_SET_OP = False 269 INTERVAL_SPANS = False 270 OPTIONAL_ALIAS_TOKEN_CTE = False 271 272 FUNCTIONS = { 273 **parser.Parser.FUNCTIONS, 274 "ANY": exp.AnyValue.from_arg_list, 275 "ARRAYSUM": exp.ArraySum.from_arg_list, 276 "COUNTIF": _build_count_if, 277 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 278 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 279 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 280 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 281 "DATE_FORMAT": _build_date_format, 282 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 283 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 284 "FORMATDATETIME": _build_date_format, 285 "JSONEXTRACTSTRING": build_json_extract_path( 286 exp.JSONExtractScalar, zero_based_indexing=False 287 ), 288 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 289 "MAP": parser.build_var_map, 290 "MATCH": exp.RegexpLike.from_arg_list, 291 "RANDCANONICAL": exp.Rand.from_arg_list, 292 "STR_TO_DATE": _build_str_to_date, 293 "TUPLE": exp.Struct.from_arg_list, 294 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 295 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 296 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 297 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 298 "UNIQ": exp.ApproxDistinct.from_arg_list, 299 "XOR": lambda args: exp.Xor(expressions=args), 300 "MD5": exp.MD5Digest.from_arg_list, 301 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 302 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 303 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 304 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 305 } 306 FUNCTIONS.pop("TRANSFORM") 307 308 AGG_FUNCTIONS = { 309 "count", 310 "min", 311 "max", 312 "sum", 313 "avg", 314 "any", 315 "stddevPop", 316 "stddevSamp", 317 "varPop", 318 "varSamp", 319 "corr", 320 "covarPop", 321 "covarSamp", 322 "entropy", 323 "exponentialMovingAverage", 324 "intervalLengthSum", 325 "kolmogorovSmirnovTest", 326 "mannWhitneyUTest", 327 "median", 328 "rankCorr", 329 "sumKahan", 330 "studentTTest", 331 "welchTTest", 332 "anyHeavy", 333 "anyLast", 334 "boundingRatio", 335 "first_value", 336 "last_value", 337 "argMin", 338 "argMax", 339 "avgWeighted", 340 "topK", 341 "topKWeighted", 342 "deltaSum", 343 "deltaSumTimestamp", 344 "groupArray", 345 "groupArrayLast", 346 "groupUniqArray", 347 "groupArrayInsertAt", 348 "groupArrayMovingAvg", 349 "groupArrayMovingSum", 350 "groupArraySample", 351 "groupBitAnd", 352 "groupBitOr", 353 "groupBitXor", 354 "groupBitmap", 355 "groupBitmapAnd", 356 "groupBitmapOr", 357 "groupBitmapXor", 358 "sumWithOverflow", 359 "sumMap", 360 "minMap", 361 "maxMap", 362 "skewSamp", 363 "skewPop", 364 "kurtSamp", 365 "kurtPop", 366 "uniq", 367 "uniqExact", 368 "uniqCombined", 369 "uniqCombined64", 370 "uniqHLL12", 371 "uniqTheta", 372 "quantile", 373 "quantiles", 374 "quantileExact", 375 "quantilesExact", 376 "quantileExactLow", 377 "quantilesExactLow", 378 "quantileExactHigh", 379 "quantilesExactHigh", 380 "quantileExactWeighted", 381 "quantilesExactWeighted", 382 "quantileTiming", 383 "quantilesTiming", 384 "quantileTimingWeighted", 385 "quantilesTimingWeighted", 386 "quantileDeterministic", 387 "quantilesDeterministic", 388 "quantileTDigest", 389 "quantilesTDigest", 390 "quantileTDigestWeighted", 391 "quantilesTDigestWeighted", 392 "quantileBFloat16", 393 "quantilesBFloat16", 394 "quantileBFloat16Weighted", 395 "quantilesBFloat16Weighted", 396 "simpleLinearRegression", 397 "stochasticLinearRegression", 398 "stochasticLogisticRegression", 399 "categoricalInformationValue", 400 "contingency", 401 "cramersV", 402 "cramersVBiasCorrected", 403 "theilsU", 404 "maxIntersections", 405 "maxIntersectionsPosition", 406 "meanZTest", 407 "quantileInterpolatedWeighted", 408 "quantilesInterpolatedWeighted", 409 "quantileGK", 410 "quantilesGK", 411 "sparkBar", 412 "sumCount", 413 "largestTriangleThreeBuckets", 414 "histogram", 415 "sequenceMatch", 416 "sequenceCount", 417 "windowFunnel", 418 "retention", 419 "uniqUpTo", 420 "sequenceNextNode", 421 "exponentialTimeDecayedAvg", 422 } 423 424 AGG_FUNCTIONS_SUFFIXES = [ 425 "If", 426 "Array", 427 "ArrayIf", 428 "Map", 429 "SimpleState", 430 "State", 431 "Merge", 432 "MergeState", 433 "ForEach", 434 "Distinct", 435 "OrDefault", 436 "OrNull", 437 "Resample", 438 "ArgMin", 439 "ArgMax", 440 ] 441 442 FUNC_TOKENS = { 443 *parser.Parser.FUNC_TOKENS, 444 TokenType.AND, 445 TokenType.OR, 446 TokenType.SET, 447 } 448 449 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 450 451 ID_VAR_TOKENS = { 452 *parser.Parser.ID_VAR_TOKENS, 453 TokenType.LIKE, 454 } 455 456 AGG_FUNC_MAPPING = ( 457 lambda functions, suffixes: { 458 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 459 } 460 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 461 462 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 463 464 FUNCTION_PARSERS = { 465 **parser.Parser.FUNCTION_PARSERS, 466 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 467 "QUANTILE": lambda self: self._parse_quantile(), 468 "MEDIAN": lambda self: self._parse_quantile(), 469 "COLUMNS": lambda self: self._parse_columns(), 470 } 471 472 FUNCTION_PARSERS.pop("MATCH") 473 474 PROPERTY_PARSERS = parser.Parser.PROPERTY_PARSERS.copy() 475 PROPERTY_PARSERS.pop("DYNAMIC") 476 477 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 478 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 479 480 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 481 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 482 483 RANGE_PARSERS = { 484 **parser.Parser.RANGE_PARSERS, 485 TokenType.GLOBAL: lambda self, this: self._parse_global_in(this), 486 } 487 488 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 489 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 490 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 491 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 492 493 JOIN_KINDS = { 494 *parser.Parser.JOIN_KINDS, 495 TokenType.ANY, 496 TokenType.ASOF, 497 TokenType.ARRAY, 498 } 499 500 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 501 TokenType.ANY, 502 TokenType.ARRAY, 503 TokenType.FINAL, 504 TokenType.FORMAT, 505 TokenType.SETTINGS, 506 } 507 508 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 509 TokenType.FORMAT, 510 } 511 512 LOG_DEFAULTS_TO_LN = True 513 514 QUERY_MODIFIER_PARSERS = { 515 **parser.Parser.QUERY_MODIFIER_PARSERS, 516 TokenType.SETTINGS: lambda self: ( 517 "settings", 518 self._advance() or self._parse_csv(self._parse_assignment), 519 ), 520 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 521 } 522 523 CONSTRAINT_PARSERS = { 524 **parser.Parser.CONSTRAINT_PARSERS, 525 "INDEX": lambda self: self._parse_index_constraint(), 526 "CODEC": lambda self: self._parse_compress(), 527 } 528 529 ALTER_PARSERS = { 530 **parser.Parser.ALTER_PARSERS, 531 "REPLACE": lambda self: self._parse_alter_table_replace(), 532 } 533 534 SCHEMA_UNNAMED_CONSTRAINTS = { 535 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 536 "INDEX", 537 } 538 539 PLACEHOLDER_PARSERS = { 540 **parser.Parser.PLACEHOLDER_PARSERS, 541 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 542 } 543 544 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 545 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 546 return self._parse_lambda() 547 548 def _parse_types( 549 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 550 ) -> t.Optional[exp.Expression]: 551 dtype = super()._parse_types( 552 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 553 ) 554 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 555 # Mark every type as non-nullable which is ClickHouse's default, unless it's 556 # already marked as nullable. This marker helps us transpile types from other 557 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 558 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 559 # fail in ClickHouse without the `Nullable` type constructor. 560 dtype.set("nullable", False) 561 562 return dtype 563 564 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 565 index = self._index 566 this = self._parse_bitwise() 567 if self._match(TokenType.FROM): 568 self._retreat(index) 569 return super()._parse_extract() 570 571 # We return Anonymous here because extract and regexpExtract have different semantics, 572 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 573 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 574 # 575 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 576 self._match(TokenType.COMMA) 577 return self.expression( 578 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 579 ) 580 581 def _parse_assignment(self) -> t.Optional[exp.Expression]: 582 this = super()._parse_assignment() 583 584 if self._match(TokenType.PLACEHOLDER): 585 return self.expression( 586 exp.If, 587 this=this, 588 true=self._parse_assignment(), 589 false=self._match(TokenType.COLON) and self._parse_assignment(), 590 ) 591 592 return this 593 594 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 595 """ 596 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 597 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 598 """ 599 index = self._index 600 601 this = self._parse_id_var() 602 self._match(TokenType.COLON) 603 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 604 self._match_text_seq("IDENTIFIER") and "Identifier" 605 ) 606 607 if not kind: 608 self._retreat(index) 609 return None 610 elif not self._match(TokenType.R_BRACE): 611 self.raise_error("Expecting }") 612 613 return self.expression(exp.Placeholder, this=this, kind=kind) 614 615 def _parse_bracket( 616 self, this: t.Optional[exp.Expression] = None 617 ) -> t.Optional[exp.Expression]: 618 l_brace = self._match(TokenType.L_BRACE, advance=False) 619 bracket = super()._parse_bracket(this) 620 621 if l_brace and isinstance(bracket, exp.Struct): 622 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 623 for expression in bracket.expressions: 624 if not isinstance(expression, exp.PropertyEQ): 625 break 626 627 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 628 varmap.args["values"].append("expressions", expression.expression) 629 630 return varmap 631 632 return bracket 633 634 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 635 this = super()._parse_in(this) 636 this.set("is_global", is_global) 637 return this 638 639 def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In: 640 is_negated = self._match(TokenType.NOT) 641 this = self._match(TokenType.IN) and self._parse_in(this, is_global=True) 642 return self.expression(exp.Not, this=this) if is_negated else this 643 644 def _parse_table( 645 self, 646 schema: bool = False, 647 joins: bool = False, 648 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 649 parse_bracket: bool = False, 650 is_db_reference: bool = False, 651 parse_partition: bool = False, 652 ) -> t.Optional[exp.Expression]: 653 this = super()._parse_table( 654 schema=schema, 655 joins=joins, 656 alias_tokens=alias_tokens, 657 parse_bracket=parse_bracket, 658 is_db_reference=is_db_reference, 659 ) 660 661 if isinstance(this, exp.Table): 662 inner = this.this 663 alias = this.args.get("alias") 664 665 if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns: 666 alias.set("columns", [exp.to_identifier("generate_series")]) 667 668 if self._match(TokenType.FINAL): 669 this = self.expression(exp.Final, this=this) 670 671 return this 672 673 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 674 return super()._parse_position(haystack_first=True) 675 676 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 677 def _parse_cte(self) -> t.Optional[exp.CTE]: 678 # WITH <identifier> AS <subquery expression> 679 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 680 681 if not cte: 682 # WITH <expression> AS <identifier> 683 cte = self.expression( 684 exp.CTE, 685 this=self._parse_assignment(), 686 alias=self._parse_table_alias(), 687 scalar=True, 688 ) 689 690 return cte 691 692 def _parse_join_parts( 693 self, 694 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 695 is_global = self._match(TokenType.GLOBAL) and self._prev 696 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 697 698 if kind_pre: 699 kind = self._match_set(self.JOIN_KINDS) and self._prev 700 side = self._match_set(self.JOIN_SIDES) and self._prev 701 return is_global, side, kind 702 703 return ( 704 is_global, 705 self._match_set(self.JOIN_SIDES) and self._prev, 706 self._match_set(self.JOIN_KINDS) and self._prev, 707 ) 708 709 def _parse_join( 710 self, skip_join_token: bool = False, parse_bracket: bool = False 711 ) -> t.Optional[exp.Join]: 712 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 713 if join: 714 join.set("global", join.args.pop("method", None)) 715 716 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 717 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 718 if join.kind == "ARRAY": 719 for table in join.find_all(exp.Table): 720 table.replace(table.to_column()) 721 722 return join 723 724 def _parse_function( 725 self, 726 functions: t.Optional[t.Dict[str, t.Callable]] = None, 727 anonymous: bool = False, 728 optional_parens: bool = True, 729 any_token: bool = False, 730 ) -> t.Optional[exp.Expression]: 731 expr = super()._parse_function( 732 functions=functions, 733 anonymous=anonymous, 734 optional_parens=optional_parens, 735 any_token=any_token, 736 ) 737 738 func = expr.this if isinstance(expr, exp.Window) else expr 739 740 # Aggregate functions can be split in 2 parts: <func_name><suffix> 741 parts = ( 742 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 743 ) 744 745 if parts: 746 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 747 params = self._parse_func_params(anon_func) 748 749 kwargs = { 750 "this": anon_func.this, 751 "expressions": anon_func.expressions, 752 } 753 if parts[1]: 754 exp_class: t.Type[exp.Expression] = ( 755 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 756 ) 757 else: 758 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 759 760 kwargs["exp_class"] = exp_class 761 if params: 762 kwargs["params"] = params 763 764 func = self.expression(**kwargs) 765 766 if isinstance(expr, exp.Window): 767 # The window's func was parsed as Anonymous in base parser, fix its 768 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 769 expr.set("this", func) 770 elif params: 771 # Params have blocked super()._parse_function() from parsing the following window 772 # (if that exists) as they're standing between the function call and the window spec 773 expr = self._parse_window(func) 774 else: 775 expr = func 776 777 return expr 778 779 def _parse_func_params( 780 self, this: t.Optional[exp.Func] = None 781 ) -> t.Optional[t.List[exp.Expression]]: 782 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 783 return self._parse_csv(self._parse_lambda) 784 785 if self._match(TokenType.L_PAREN): 786 params = self._parse_csv(self._parse_lambda) 787 self._match_r_paren(this) 788 return params 789 790 return None 791 792 def _parse_quantile(self) -> exp.Quantile: 793 this = self._parse_lambda() 794 params = self._parse_func_params() 795 if params: 796 return self.expression(exp.Quantile, this=params[0], quantile=this) 797 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 798 799 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 800 return super()._parse_wrapped_id_vars(optional=True) 801 802 def _parse_primary_key( 803 self, wrapped_optional: bool = False, in_props: bool = False 804 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 805 return super()._parse_primary_key( 806 wrapped_optional=wrapped_optional or in_props, in_props=in_props 807 ) 808 809 def _parse_on_property(self) -> t.Optional[exp.Expression]: 810 index = self._index 811 if self._match_text_seq("CLUSTER"): 812 this = self._parse_id_var() 813 if this: 814 return self.expression(exp.OnCluster, this=this) 815 else: 816 self._retreat(index) 817 return None 818 819 def _parse_index_constraint( 820 self, kind: t.Optional[str] = None 821 ) -> exp.IndexColumnConstraint: 822 # INDEX name1 expr TYPE type1(args) GRANULARITY value 823 this = self._parse_id_var() 824 expression = self._parse_assignment() 825 826 index_type = self._match_text_seq("TYPE") and ( 827 self._parse_function() or self._parse_var() 828 ) 829 830 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 831 832 return self.expression( 833 exp.IndexColumnConstraint, 834 this=this, 835 expression=expression, 836 index_type=index_type, 837 granularity=granularity, 838 ) 839 840 def _parse_partition(self) -> t.Optional[exp.Partition]: 841 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 842 if not self._match(TokenType.PARTITION): 843 return None 844 845 if self._match_text_seq("ID"): 846 # Corresponds to the PARTITION ID <string_value> syntax 847 expressions: t.List[exp.Expression] = [ 848 self.expression(exp.PartitionId, this=self._parse_string()) 849 ] 850 else: 851 expressions = self._parse_expressions() 852 853 return self.expression(exp.Partition, expressions=expressions) 854 855 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 856 partition = self._parse_partition() 857 858 if not partition or not self._match(TokenType.FROM): 859 return None 860 861 return self.expression( 862 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 863 ) 864 865 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 866 if not self._match_text_seq("PROJECTION"): 867 return None 868 869 return self.expression( 870 exp.ProjectionDef, 871 this=self._parse_id_var(), 872 expression=self._parse_wrapped(self._parse_statement), 873 ) 874 875 def _parse_constraint(self) -> t.Optional[exp.Expression]: 876 return super()._parse_constraint() or self._parse_projection_def() 877 878 def _parse_alias( 879 self, this: t.Optional[exp.Expression], explicit: bool = False 880 ) -> t.Optional[exp.Expression]: 881 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 882 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 883 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 884 return this 885 886 return super()._parse_alias(this=this, explicit=explicit) 887 888 def _parse_expression(self) -> t.Optional[exp.Expression]: 889 this = super()._parse_expression() 890 891 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 892 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 893 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 894 self._match(TokenType.R_PAREN) 895 896 return this 897 898 def _parse_columns(self) -> exp.Expression: 899 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 900 901 while self._next and self._match_text_seq(")", "APPLY", "("): 902 self._match(TokenType.R_PAREN) 903 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 904 return this 905 906 class Generator(generator.Generator): 907 QUERY_HINTS = False 908 STRUCT_DELIMITER = ("(", ")") 909 NVL2_SUPPORTED = False 910 TABLESAMPLE_REQUIRES_PARENS = False 911 TABLESAMPLE_SIZE_IS_ROWS = False 912 TABLESAMPLE_KEYWORDS = "SAMPLE" 913 LAST_DAY_SUPPORTS_DATE_PART = False 914 CAN_IMPLEMENT_ARRAY_ANY = True 915 SUPPORTS_TO_NUMBER = False 916 JOIN_HINTS = False 917 TABLE_HINTS = False 918 GROUPINGS_SEP = "" 919 SET_OP_MODIFIERS = False 920 VALUES_AS_TABLE = False 921 ARRAY_SIZE_NAME = "LENGTH" 922 923 STRING_TYPE_MAPPING = { 924 exp.DataType.Type.BLOB: "String", 925 exp.DataType.Type.CHAR: "String", 926 exp.DataType.Type.LONGBLOB: "String", 927 exp.DataType.Type.LONGTEXT: "String", 928 exp.DataType.Type.MEDIUMBLOB: "String", 929 exp.DataType.Type.MEDIUMTEXT: "String", 930 exp.DataType.Type.TINYBLOB: "String", 931 exp.DataType.Type.TINYTEXT: "String", 932 exp.DataType.Type.TEXT: "String", 933 exp.DataType.Type.VARBINARY: "String", 934 exp.DataType.Type.VARCHAR: "String", 935 } 936 937 SUPPORTED_JSON_PATH_PARTS = { 938 exp.JSONPathKey, 939 exp.JSONPathRoot, 940 exp.JSONPathSubscript, 941 } 942 943 TYPE_MAPPING = { 944 **generator.Generator.TYPE_MAPPING, 945 **STRING_TYPE_MAPPING, 946 exp.DataType.Type.ARRAY: "Array", 947 exp.DataType.Type.BOOLEAN: "Bool", 948 exp.DataType.Type.BIGINT: "Int64", 949 exp.DataType.Type.DATE32: "Date32", 950 exp.DataType.Type.DATETIME: "DateTime", 951 exp.DataType.Type.DATETIME2: "DateTime", 952 exp.DataType.Type.SMALLDATETIME: "DateTime", 953 exp.DataType.Type.DATETIME64: "DateTime64", 954 exp.DataType.Type.DECIMAL: "Decimal", 955 exp.DataType.Type.DECIMAL32: "Decimal32", 956 exp.DataType.Type.DECIMAL64: "Decimal64", 957 exp.DataType.Type.DECIMAL128: "Decimal128", 958 exp.DataType.Type.DECIMAL256: "Decimal256", 959 exp.DataType.Type.TIMESTAMP: "DateTime", 960 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 961 exp.DataType.Type.DOUBLE: "Float64", 962 exp.DataType.Type.ENUM: "Enum", 963 exp.DataType.Type.ENUM8: "Enum8", 964 exp.DataType.Type.ENUM16: "Enum16", 965 exp.DataType.Type.FIXEDSTRING: "FixedString", 966 exp.DataType.Type.FLOAT: "Float32", 967 exp.DataType.Type.INT: "Int32", 968 exp.DataType.Type.MEDIUMINT: "Int32", 969 exp.DataType.Type.INT128: "Int128", 970 exp.DataType.Type.INT256: "Int256", 971 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 972 exp.DataType.Type.MAP: "Map", 973 exp.DataType.Type.NESTED: "Nested", 974 exp.DataType.Type.SMALLINT: "Int16", 975 exp.DataType.Type.STRUCT: "Tuple", 976 exp.DataType.Type.TINYINT: "Int8", 977 exp.DataType.Type.UBIGINT: "UInt64", 978 exp.DataType.Type.UINT: "UInt32", 979 exp.DataType.Type.UINT128: "UInt128", 980 exp.DataType.Type.UINT256: "UInt256", 981 exp.DataType.Type.USMALLINT: "UInt16", 982 exp.DataType.Type.UTINYINT: "UInt8", 983 exp.DataType.Type.IPV4: "IPv4", 984 exp.DataType.Type.IPV6: "IPv6", 985 exp.DataType.Type.POINT: "Point", 986 exp.DataType.Type.RING: "Ring", 987 exp.DataType.Type.LINESTRING: "LineString", 988 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 989 exp.DataType.Type.POLYGON: "Polygon", 990 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 991 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 992 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 993 exp.DataType.Type.DYNAMIC: "Dynamic", 994 } 995 996 TRANSFORMS = { 997 **generator.Generator.TRANSFORMS, 998 exp.AnyValue: rename_func("any"), 999 exp.ApproxDistinct: rename_func("uniq"), 1000 exp.ArrayConcat: rename_func("arrayConcat"), 1001 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 1002 exp.ArraySum: rename_func("arraySum"), 1003 exp.ArgMax: arg_max_or_min_no_count("argMax"), 1004 exp.ArgMin: arg_max_or_min_no_count("argMin"), 1005 exp.Array: inline_array_sql, 1006 exp.CastToStrType: rename_func("CAST"), 1007 exp.CountIf: rename_func("countIf"), 1008 exp.CompressColumnConstraint: lambda self, 1009 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 1010 exp.ComputedColumnConstraint: lambda self, 1011 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 1012 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 1013 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 1014 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 1015 exp.DateStrToDate: rename_func("toDate"), 1016 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 1017 exp.Explode: rename_func("arrayJoin"), 1018 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 1019 exp.IsNan: rename_func("isNaN"), 1020 exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}", 1021 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 1022 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 1023 exp.JSONPathKey: json_path_key_only_name, 1024 exp.JSONPathRoot: lambda *_: "", 1025 exp.Length: length_or_char_length_sql, 1026 exp.Map: _map_sql, 1027 exp.Median: rename_func("median"), 1028 exp.Nullif: rename_func("nullIf"), 1029 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1030 exp.Pivot: no_pivot_sql, 1031 exp.Quantile: _quantile_sql, 1032 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1033 exp.Rand: rename_func("randCanonical"), 1034 exp.StartsWith: rename_func("startsWith"), 1035 exp.StrPosition: lambda self, e: strposition_sql( 1036 self, 1037 e, 1038 func_name="POSITION", 1039 supports_position=True, 1040 use_ansi_position=False, 1041 ), 1042 exp.TimeToStr: lambda self, e: self.func( 1043 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1044 ), 1045 exp.TimeStrToTime: _timestrtotime_sql, 1046 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1047 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1048 exp.VarMap: _map_sql, 1049 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1050 exp.MD5Digest: rename_func("MD5"), 1051 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1052 exp.SHA: rename_func("SHA1"), 1053 exp.SHA2: sha256_sql, 1054 exp.UnixToTime: _unix_to_time_sql, 1055 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1056 exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"), 1057 exp.Variance: rename_func("varSamp"), 1058 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1059 exp.Stddev: rename_func("stddevSamp"), 1060 exp.Chr: rename_func("CHAR"), 1061 exp.Lag: lambda self, e: self.func( 1062 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1063 ), 1064 exp.Lead: lambda self, e: self.func( 1065 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1066 ), 1067 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1068 rename_func("editDistance") 1069 ), 1070 } 1071 1072 PROPERTIES_LOCATION = { 1073 **generator.Generator.PROPERTIES_LOCATION, 1074 exp.OnCluster: exp.Properties.Location.POST_NAME, 1075 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1076 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1077 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1078 } 1079 1080 # There's no list in docs, but it can be found in Clickhouse code 1081 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1082 ON_CLUSTER_TARGETS = { 1083 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1084 "DATABASE", 1085 "TABLE", 1086 "VIEW", 1087 "DICTIONARY", 1088 "INDEX", 1089 "FUNCTION", 1090 "NAMED COLLECTION", 1091 } 1092 1093 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1094 NON_NULLABLE_TYPES = { 1095 exp.DataType.Type.ARRAY, 1096 exp.DataType.Type.MAP, 1097 exp.DataType.Type.STRUCT, 1098 exp.DataType.Type.POINT, 1099 exp.DataType.Type.RING, 1100 exp.DataType.Type.LINESTRING, 1101 exp.DataType.Type.MULTILINESTRING, 1102 exp.DataType.Type.POLYGON, 1103 exp.DataType.Type.MULTIPOLYGON, 1104 } 1105 1106 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1107 strtodate_sql = self.function_fallback_sql(expression) 1108 1109 if not isinstance(expression.parent, exp.Cast): 1110 # StrToDate returns DATEs in other dialects (eg. postgres), so 1111 # this branch aims to improve the transpilation to clickhouse 1112 return self.cast_sql(exp.cast(expression, "DATE")) 1113 1114 return strtodate_sql 1115 1116 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1117 this = expression.this 1118 1119 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1120 return self.sql(this) 1121 1122 return super().cast_sql(expression, safe_prefix=safe_prefix) 1123 1124 def trycast_sql(self, expression: exp.TryCast) -> str: 1125 dtype = expression.to 1126 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1127 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1128 dtype.set("nullable", True) 1129 1130 return super().cast_sql(expression) 1131 1132 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1133 this = self.json_path_part(expression.this) 1134 return str(int(this) + 1) if is_int(this) else this 1135 1136 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1137 return f"AS {self.sql(expression, 'this')}" 1138 1139 def _any_to_has( 1140 self, 1141 expression: exp.EQ | exp.NEQ, 1142 default: t.Callable[[t.Any], str], 1143 prefix: str = "", 1144 ) -> str: 1145 if isinstance(expression.left, exp.Any): 1146 arr = expression.left 1147 this = expression.right 1148 elif isinstance(expression.right, exp.Any): 1149 arr = expression.right 1150 this = expression.left 1151 else: 1152 return default(expression) 1153 1154 return prefix + self.func("has", arr.this.unnest(), this) 1155 1156 def eq_sql(self, expression: exp.EQ) -> str: 1157 return self._any_to_has(expression, super().eq_sql) 1158 1159 def neq_sql(self, expression: exp.NEQ) -> str: 1160 return self._any_to_has(expression, super().neq_sql, "NOT ") 1161 1162 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1163 # Manually add a flag to make the search case-insensitive 1164 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1165 return self.func("match", expression.this, regex) 1166 1167 def datatype_sql(self, expression: exp.DataType) -> str: 1168 # String is the standard ClickHouse type, every other variant is just an alias. 1169 # Additionally, any supplied length parameter will be ignored. 1170 # 1171 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1172 if expression.this in self.STRING_TYPE_MAPPING: 1173 dtype = "String" 1174 else: 1175 dtype = super().datatype_sql(expression) 1176 1177 # This section changes the type to `Nullable(...)` if the following conditions hold: 1178 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1179 # and change their semantics 1180 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1181 # constraint: "Type of Map key must be a type, that can be represented by integer or 1182 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1183 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1184 parent = expression.parent 1185 nullable = expression.args.get("nullable") 1186 if nullable is True or ( 1187 nullable is None 1188 and not ( 1189 isinstance(parent, exp.DataType) 1190 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1191 and expression.index in (None, 0) 1192 ) 1193 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1194 ): 1195 dtype = f"Nullable({dtype})" 1196 1197 return dtype 1198 1199 def cte_sql(self, expression: exp.CTE) -> str: 1200 if expression.args.get("scalar"): 1201 this = self.sql(expression, "this") 1202 alias = self.sql(expression, "alias") 1203 return f"{this} AS {alias}" 1204 1205 return super().cte_sql(expression) 1206 1207 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1208 return super().after_limit_modifiers(expression) + [ 1209 ( 1210 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1211 if expression.args.get("settings") 1212 else "" 1213 ), 1214 ( 1215 self.seg("FORMAT ") + self.sql(expression, "format") 1216 if expression.args.get("format") 1217 else "" 1218 ), 1219 ] 1220 1221 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1222 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1223 1224 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1225 return f"ON CLUSTER {self.sql(expression, 'this')}" 1226 1227 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1228 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1229 exp.Properties.Location.POST_NAME 1230 ): 1231 this_name = self.sql( 1232 expression.this if isinstance(expression.this, exp.Schema) else expression, 1233 "this", 1234 ) 1235 this_properties = " ".join( 1236 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1237 ) 1238 this_schema = self.schema_columns_sql(expression.this) 1239 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1240 1241 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1242 1243 return super().createable_sql(expression, locations) 1244 1245 def create_sql(self, expression: exp.Create) -> str: 1246 # The comment property comes last in CTAS statements, i.e. after the query 1247 query = expression.expression 1248 if isinstance(query, exp.Query): 1249 comment_prop = expression.find(exp.SchemaCommentProperty) 1250 if comment_prop: 1251 comment_prop.pop() 1252 query.replace(exp.paren(query)) 1253 else: 1254 comment_prop = None 1255 1256 create_sql = super().create_sql(expression) 1257 1258 comment_sql = self.sql(comment_prop) 1259 comment_sql = f" {comment_sql}" if comment_sql else "" 1260 1261 return f"{create_sql}{comment_sql}" 1262 1263 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1264 this = self.indent(self.sql(expression, "this")) 1265 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1266 1267 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1268 this = self.sql(expression, "this") 1269 this = f" {this}" if this else "" 1270 expr = self.sql(expression, "expression") 1271 expr = f" {expr}" if expr else "" 1272 index_type = self.sql(expression, "index_type") 1273 index_type = f" TYPE {index_type}" if index_type else "" 1274 granularity = self.sql(expression, "granularity") 1275 granularity = f" GRANULARITY {granularity}" if granularity else "" 1276 1277 return f"INDEX{this}{expr}{index_type}{granularity}" 1278 1279 def partition_sql(self, expression: exp.Partition) -> str: 1280 return f"PARTITION {self.expressions(expression, flat=True)}" 1281 1282 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1283 return f"ID {self.sql(expression.this)}" 1284 1285 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1286 return ( 1287 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1288 ) 1289 1290 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1291 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1292 1293 def is_sql(self, expression: exp.Is) -> str: 1294 is_sql = super().is_sql(expression) 1295 1296 if isinstance(expression.parent, exp.Not): 1297 # value IS NOT NULL -> NOT (value IS NULL) 1298 is_sql = self.wrap(is_sql) 1299 1300 return is_sql 1301 1302 def in_sql(self, expression: exp.In) -> str: 1303 in_sql = super().in_sql(expression) 1304 1305 if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"): 1306 in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1) 1307 1308 return in_sql 1309 1310 def not_sql(self, expression: exp.Not) -> str: 1311 if isinstance(expression.this, exp.In) and expression.this.args.get("is_global"): 1312 # let `GLOBAL IN` child interpose `NOT` 1313 return self.sql(expression, "this") 1314 1315 return super().not_sql(expression)
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Whether number literals can include underscores for better readability
Whether hex strings such as x'CC' evaluate to integer or binary/blob type
Specifies the strategy according to which identifiers should be normalized.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
211 class Tokenizer(tokens.Tokenizer): 212 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 213 IDENTIFIERS = ['"', "`"] 214 IDENTIFIER_ESCAPES = ["\\"] 215 STRING_ESCAPES = ["'", "\\"] 216 BIT_STRINGS = [("0b", "")] 217 HEX_STRINGS = [("0x", ""), ("0X", "")] 218 HEREDOC_STRINGS = ["$"] 219 220 KEYWORDS = { 221 **tokens.Tokenizer.KEYWORDS, 222 ".:": TokenType.DOTCOLON, 223 "ATTACH": TokenType.COMMAND, 224 "DATE32": TokenType.DATE32, 225 "DATETIME64": TokenType.DATETIME64, 226 "DICTIONARY": TokenType.DICTIONARY, 227 "DYNAMIC": TokenType.DYNAMIC, 228 "ENUM8": TokenType.ENUM8, 229 "ENUM16": TokenType.ENUM16, 230 "FINAL": TokenType.FINAL, 231 "FIXEDSTRING": TokenType.FIXEDSTRING, 232 "FLOAT32": TokenType.FLOAT, 233 "FLOAT64": TokenType.DOUBLE, 234 "GLOBAL": TokenType.GLOBAL, 235 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 236 "MAP": TokenType.MAP, 237 "NESTED": TokenType.NESTED, 238 "SAMPLE": TokenType.TABLE_SAMPLE, 239 "TUPLE": TokenType.STRUCT, 240 "UINT16": TokenType.USMALLINT, 241 "UINT32": TokenType.UINT, 242 "UINT64": TokenType.UBIGINT, 243 "UINT8": TokenType.UTINYINT, 244 "IPV4": TokenType.IPV4, 245 "IPV6": TokenType.IPV6, 246 "POINT": TokenType.POINT, 247 "RING": TokenType.RING, 248 "LINESTRING": TokenType.LINESTRING, 249 "MULTILINESTRING": TokenType.MULTILINESTRING, 250 "POLYGON": TokenType.POLYGON, 251 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 252 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 253 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 254 "SYSTEM": TokenType.COMMAND, 255 "PREWHERE": TokenType.PREWHERE, 256 } 257 KEYWORDS.pop("/*+") 258 259 SINGLE_TOKENS = { 260 **tokens.Tokenizer.SINGLE_TOKENS, 261 "$": TokenType.HEREDOC_STRING, 262 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
264 class Parser(parser.Parser): 265 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 266 # * select x from t1 union all select x from t2 limit 1; 267 # * select x from t1 union all (select x from t2 limit 1); 268 MODIFIERS_ATTACHED_TO_SET_OP = False 269 INTERVAL_SPANS = False 270 OPTIONAL_ALIAS_TOKEN_CTE = False 271 272 FUNCTIONS = { 273 **parser.Parser.FUNCTIONS, 274 "ANY": exp.AnyValue.from_arg_list, 275 "ARRAYSUM": exp.ArraySum.from_arg_list, 276 "COUNTIF": _build_count_if, 277 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 278 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 279 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 280 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 281 "DATE_FORMAT": _build_date_format, 282 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 283 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 284 "FORMATDATETIME": _build_date_format, 285 "JSONEXTRACTSTRING": build_json_extract_path( 286 exp.JSONExtractScalar, zero_based_indexing=False 287 ), 288 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 289 "MAP": parser.build_var_map, 290 "MATCH": exp.RegexpLike.from_arg_list, 291 "RANDCANONICAL": exp.Rand.from_arg_list, 292 "STR_TO_DATE": _build_str_to_date, 293 "TUPLE": exp.Struct.from_arg_list, 294 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 295 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 296 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 297 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 298 "UNIQ": exp.ApproxDistinct.from_arg_list, 299 "XOR": lambda args: exp.Xor(expressions=args), 300 "MD5": exp.MD5Digest.from_arg_list, 301 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 302 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 303 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 304 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 305 } 306 FUNCTIONS.pop("TRANSFORM") 307 308 AGG_FUNCTIONS = { 309 "count", 310 "min", 311 "max", 312 "sum", 313 "avg", 314 "any", 315 "stddevPop", 316 "stddevSamp", 317 "varPop", 318 "varSamp", 319 "corr", 320 "covarPop", 321 "covarSamp", 322 "entropy", 323 "exponentialMovingAverage", 324 "intervalLengthSum", 325 "kolmogorovSmirnovTest", 326 "mannWhitneyUTest", 327 "median", 328 "rankCorr", 329 "sumKahan", 330 "studentTTest", 331 "welchTTest", 332 "anyHeavy", 333 "anyLast", 334 "boundingRatio", 335 "first_value", 336 "last_value", 337 "argMin", 338 "argMax", 339 "avgWeighted", 340 "topK", 341 "topKWeighted", 342 "deltaSum", 343 "deltaSumTimestamp", 344 "groupArray", 345 "groupArrayLast", 346 "groupUniqArray", 347 "groupArrayInsertAt", 348 "groupArrayMovingAvg", 349 "groupArrayMovingSum", 350 "groupArraySample", 351 "groupBitAnd", 352 "groupBitOr", 353 "groupBitXor", 354 "groupBitmap", 355 "groupBitmapAnd", 356 "groupBitmapOr", 357 "groupBitmapXor", 358 "sumWithOverflow", 359 "sumMap", 360 "minMap", 361 "maxMap", 362 "skewSamp", 363 "skewPop", 364 "kurtSamp", 365 "kurtPop", 366 "uniq", 367 "uniqExact", 368 "uniqCombined", 369 "uniqCombined64", 370 "uniqHLL12", 371 "uniqTheta", 372 "quantile", 373 "quantiles", 374 "quantileExact", 375 "quantilesExact", 376 "quantileExactLow", 377 "quantilesExactLow", 378 "quantileExactHigh", 379 "quantilesExactHigh", 380 "quantileExactWeighted", 381 "quantilesExactWeighted", 382 "quantileTiming", 383 "quantilesTiming", 384 "quantileTimingWeighted", 385 "quantilesTimingWeighted", 386 "quantileDeterministic", 387 "quantilesDeterministic", 388 "quantileTDigest", 389 "quantilesTDigest", 390 "quantileTDigestWeighted", 391 "quantilesTDigestWeighted", 392 "quantileBFloat16", 393 "quantilesBFloat16", 394 "quantileBFloat16Weighted", 395 "quantilesBFloat16Weighted", 396 "simpleLinearRegression", 397 "stochasticLinearRegression", 398 "stochasticLogisticRegression", 399 "categoricalInformationValue", 400 "contingency", 401 "cramersV", 402 "cramersVBiasCorrected", 403 "theilsU", 404 "maxIntersections", 405 "maxIntersectionsPosition", 406 "meanZTest", 407 "quantileInterpolatedWeighted", 408 "quantilesInterpolatedWeighted", 409 "quantileGK", 410 "quantilesGK", 411 "sparkBar", 412 "sumCount", 413 "largestTriangleThreeBuckets", 414 "histogram", 415 "sequenceMatch", 416 "sequenceCount", 417 "windowFunnel", 418 "retention", 419 "uniqUpTo", 420 "sequenceNextNode", 421 "exponentialTimeDecayedAvg", 422 } 423 424 AGG_FUNCTIONS_SUFFIXES = [ 425 "If", 426 "Array", 427 "ArrayIf", 428 "Map", 429 "SimpleState", 430 "State", 431 "Merge", 432 "MergeState", 433 "ForEach", 434 "Distinct", 435 "OrDefault", 436 "OrNull", 437 "Resample", 438 "ArgMin", 439 "ArgMax", 440 ] 441 442 FUNC_TOKENS = { 443 *parser.Parser.FUNC_TOKENS, 444 TokenType.AND, 445 TokenType.OR, 446 TokenType.SET, 447 } 448 449 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 450 451 ID_VAR_TOKENS = { 452 *parser.Parser.ID_VAR_TOKENS, 453 TokenType.LIKE, 454 } 455 456 AGG_FUNC_MAPPING = ( 457 lambda functions, suffixes: { 458 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 459 } 460 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 461 462 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 463 464 FUNCTION_PARSERS = { 465 **parser.Parser.FUNCTION_PARSERS, 466 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 467 "QUANTILE": lambda self: self._parse_quantile(), 468 "MEDIAN": lambda self: self._parse_quantile(), 469 "COLUMNS": lambda self: self._parse_columns(), 470 } 471 472 FUNCTION_PARSERS.pop("MATCH") 473 474 PROPERTY_PARSERS = parser.Parser.PROPERTY_PARSERS.copy() 475 PROPERTY_PARSERS.pop("DYNAMIC") 476 477 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 478 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 479 480 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 481 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 482 483 RANGE_PARSERS = { 484 **parser.Parser.RANGE_PARSERS, 485 TokenType.GLOBAL: lambda self, this: self._parse_global_in(this), 486 } 487 488 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 489 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 490 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 491 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 492 493 JOIN_KINDS = { 494 *parser.Parser.JOIN_KINDS, 495 TokenType.ANY, 496 TokenType.ASOF, 497 TokenType.ARRAY, 498 } 499 500 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 501 TokenType.ANY, 502 TokenType.ARRAY, 503 TokenType.FINAL, 504 TokenType.FORMAT, 505 TokenType.SETTINGS, 506 } 507 508 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 509 TokenType.FORMAT, 510 } 511 512 LOG_DEFAULTS_TO_LN = True 513 514 QUERY_MODIFIER_PARSERS = { 515 **parser.Parser.QUERY_MODIFIER_PARSERS, 516 TokenType.SETTINGS: lambda self: ( 517 "settings", 518 self._advance() or self._parse_csv(self._parse_assignment), 519 ), 520 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 521 } 522 523 CONSTRAINT_PARSERS = { 524 **parser.Parser.CONSTRAINT_PARSERS, 525 "INDEX": lambda self: self._parse_index_constraint(), 526 "CODEC": lambda self: self._parse_compress(), 527 } 528 529 ALTER_PARSERS = { 530 **parser.Parser.ALTER_PARSERS, 531 "REPLACE": lambda self: self._parse_alter_table_replace(), 532 } 533 534 SCHEMA_UNNAMED_CONSTRAINTS = { 535 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 536 "INDEX", 537 } 538 539 PLACEHOLDER_PARSERS = { 540 **parser.Parser.PLACEHOLDER_PARSERS, 541 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 542 } 543 544 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 545 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 546 return self._parse_lambda() 547 548 def _parse_types( 549 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 550 ) -> t.Optional[exp.Expression]: 551 dtype = super()._parse_types( 552 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 553 ) 554 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 555 # Mark every type as non-nullable which is ClickHouse's default, unless it's 556 # already marked as nullable. This marker helps us transpile types from other 557 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 558 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 559 # fail in ClickHouse without the `Nullable` type constructor. 560 dtype.set("nullable", False) 561 562 return dtype 563 564 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 565 index = self._index 566 this = self._parse_bitwise() 567 if self._match(TokenType.FROM): 568 self._retreat(index) 569 return super()._parse_extract() 570 571 # We return Anonymous here because extract and regexpExtract have different semantics, 572 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 573 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 574 # 575 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 576 self._match(TokenType.COMMA) 577 return self.expression( 578 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 579 ) 580 581 def _parse_assignment(self) -> t.Optional[exp.Expression]: 582 this = super()._parse_assignment() 583 584 if self._match(TokenType.PLACEHOLDER): 585 return self.expression( 586 exp.If, 587 this=this, 588 true=self._parse_assignment(), 589 false=self._match(TokenType.COLON) and self._parse_assignment(), 590 ) 591 592 return this 593 594 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 595 """ 596 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 597 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 598 """ 599 index = self._index 600 601 this = self._parse_id_var() 602 self._match(TokenType.COLON) 603 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 604 self._match_text_seq("IDENTIFIER") and "Identifier" 605 ) 606 607 if not kind: 608 self._retreat(index) 609 return None 610 elif not self._match(TokenType.R_BRACE): 611 self.raise_error("Expecting }") 612 613 return self.expression(exp.Placeholder, this=this, kind=kind) 614 615 def _parse_bracket( 616 self, this: t.Optional[exp.Expression] = None 617 ) -> t.Optional[exp.Expression]: 618 l_brace = self._match(TokenType.L_BRACE, advance=False) 619 bracket = super()._parse_bracket(this) 620 621 if l_brace and isinstance(bracket, exp.Struct): 622 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 623 for expression in bracket.expressions: 624 if not isinstance(expression, exp.PropertyEQ): 625 break 626 627 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 628 varmap.args["values"].append("expressions", expression.expression) 629 630 return varmap 631 632 return bracket 633 634 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 635 this = super()._parse_in(this) 636 this.set("is_global", is_global) 637 return this 638 639 def _parse_global_in(self, this: t.Optional[exp.Expression]) -> exp.Not | exp.In: 640 is_negated = self._match(TokenType.NOT) 641 this = self._match(TokenType.IN) and self._parse_in(this, is_global=True) 642 return self.expression(exp.Not, this=this) if is_negated else this 643 644 def _parse_table( 645 self, 646 schema: bool = False, 647 joins: bool = False, 648 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 649 parse_bracket: bool = False, 650 is_db_reference: bool = False, 651 parse_partition: bool = False, 652 ) -> t.Optional[exp.Expression]: 653 this = super()._parse_table( 654 schema=schema, 655 joins=joins, 656 alias_tokens=alias_tokens, 657 parse_bracket=parse_bracket, 658 is_db_reference=is_db_reference, 659 ) 660 661 if isinstance(this, exp.Table): 662 inner = this.this 663 alias = this.args.get("alias") 664 665 if isinstance(inner, exp.GenerateSeries) and alias and not alias.columns: 666 alias.set("columns", [exp.to_identifier("generate_series")]) 667 668 if self._match(TokenType.FINAL): 669 this = self.expression(exp.Final, this=this) 670 671 return this 672 673 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 674 return super()._parse_position(haystack_first=True) 675 676 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 677 def _parse_cte(self) -> t.Optional[exp.CTE]: 678 # WITH <identifier> AS <subquery expression> 679 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 680 681 if not cte: 682 # WITH <expression> AS <identifier> 683 cte = self.expression( 684 exp.CTE, 685 this=self._parse_assignment(), 686 alias=self._parse_table_alias(), 687 scalar=True, 688 ) 689 690 return cte 691 692 def _parse_join_parts( 693 self, 694 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 695 is_global = self._match(TokenType.GLOBAL) and self._prev 696 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 697 698 if kind_pre: 699 kind = self._match_set(self.JOIN_KINDS) and self._prev 700 side = self._match_set(self.JOIN_SIDES) and self._prev 701 return is_global, side, kind 702 703 return ( 704 is_global, 705 self._match_set(self.JOIN_SIDES) and self._prev, 706 self._match_set(self.JOIN_KINDS) and self._prev, 707 ) 708 709 def _parse_join( 710 self, skip_join_token: bool = False, parse_bracket: bool = False 711 ) -> t.Optional[exp.Join]: 712 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 713 if join: 714 join.set("global", join.args.pop("method", None)) 715 716 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 717 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 718 if join.kind == "ARRAY": 719 for table in join.find_all(exp.Table): 720 table.replace(table.to_column()) 721 722 return join 723 724 def _parse_function( 725 self, 726 functions: t.Optional[t.Dict[str, t.Callable]] = None, 727 anonymous: bool = False, 728 optional_parens: bool = True, 729 any_token: bool = False, 730 ) -> t.Optional[exp.Expression]: 731 expr = super()._parse_function( 732 functions=functions, 733 anonymous=anonymous, 734 optional_parens=optional_parens, 735 any_token=any_token, 736 ) 737 738 func = expr.this if isinstance(expr, exp.Window) else expr 739 740 # Aggregate functions can be split in 2 parts: <func_name><suffix> 741 parts = ( 742 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 743 ) 744 745 if parts: 746 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 747 params = self._parse_func_params(anon_func) 748 749 kwargs = { 750 "this": anon_func.this, 751 "expressions": anon_func.expressions, 752 } 753 if parts[1]: 754 exp_class: t.Type[exp.Expression] = ( 755 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 756 ) 757 else: 758 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 759 760 kwargs["exp_class"] = exp_class 761 if params: 762 kwargs["params"] = params 763 764 func = self.expression(**kwargs) 765 766 if isinstance(expr, exp.Window): 767 # The window's func was parsed as Anonymous in base parser, fix its 768 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 769 expr.set("this", func) 770 elif params: 771 # Params have blocked super()._parse_function() from parsing the following window 772 # (if that exists) as they're standing between the function call and the window spec 773 expr = self._parse_window(func) 774 else: 775 expr = func 776 777 return expr 778 779 def _parse_func_params( 780 self, this: t.Optional[exp.Func] = None 781 ) -> t.Optional[t.List[exp.Expression]]: 782 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 783 return self._parse_csv(self._parse_lambda) 784 785 if self._match(TokenType.L_PAREN): 786 params = self._parse_csv(self._parse_lambda) 787 self._match_r_paren(this) 788 return params 789 790 return None 791 792 def _parse_quantile(self) -> exp.Quantile: 793 this = self._parse_lambda() 794 params = self._parse_func_params() 795 if params: 796 return self.expression(exp.Quantile, this=params[0], quantile=this) 797 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 798 799 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 800 return super()._parse_wrapped_id_vars(optional=True) 801 802 def _parse_primary_key( 803 self, wrapped_optional: bool = False, in_props: bool = False 804 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 805 return super()._parse_primary_key( 806 wrapped_optional=wrapped_optional or in_props, in_props=in_props 807 ) 808 809 def _parse_on_property(self) -> t.Optional[exp.Expression]: 810 index = self._index 811 if self._match_text_seq("CLUSTER"): 812 this = self._parse_id_var() 813 if this: 814 return self.expression(exp.OnCluster, this=this) 815 else: 816 self._retreat(index) 817 return None 818 819 def _parse_index_constraint( 820 self, kind: t.Optional[str] = None 821 ) -> exp.IndexColumnConstraint: 822 # INDEX name1 expr TYPE type1(args) GRANULARITY value 823 this = self._parse_id_var() 824 expression = self._parse_assignment() 825 826 index_type = self._match_text_seq("TYPE") and ( 827 self._parse_function() or self._parse_var() 828 ) 829 830 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 831 832 return self.expression( 833 exp.IndexColumnConstraint, 834 this=this, 835 expression=expression, 836 index_type=index_type, 837 granularity=granularity, 838 ) 839 840 def _parse_partition(self) -> t.Optional[exp.Partition]: 841 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 842 if not self._match(TokenType.PARTITION): 843 return None 844 845 if self._match_text_seq("ID"): 846 # Corresponds to the PARTITION ID <string_value> syntax 847 expressions: t.List[exp.Expression] = [ 848 self.expression(exp.PartitionId, this=self._parse_string()) 849 ] 850 else: 851 expressions = self._parse_expressions() 852 853 return self.expression(exp.Partition, expressions=expressions) 854 855 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 856 partition = self._parse_partition() 857 858 if not partition or not self._match(TokenType.FROM): 859 return None 860 861 return self.expression( 862 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 863 ) 864 865 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 866 if not self._match_text_seq("PROJECTION"): 867 return None 868 869 return self.expression( 870 exp.ProjectionDef, 871 this=self._parse_id_var(), 872 expression=self._parse_wrapped(self._parse_statement), 873 ) 874 875 def _parse_constraint(self) -> t.Optional[exp.Expression]: 876 return super()._parse_constraint() or self._parse_projection_def() 877 878 def _parse_alias( 879 self, this: t.Optional[exp.Expression], explicit: bool = False 880 ) -> t.Optional[exp.Expression]: 881 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 882 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 883 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 884 return this 885 886 return super()._parse_alias(this=this, explicit=explicit) 887 888 def _parse_expression(self) -> t.Optional[exp.Expression]: 889 this = super()._parse_expression() 890 891 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 892 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 893 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 894 self._match(TokenType.R_PAREN) 895 896 return this 897 898 def _parse_columns(self) -> exp.Expression: 899 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 900 901 while self._next and self._match_text_seq(")", "APPLY", "("): 902 self._match(TokenType.R_PAREN) 903 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 904 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
906 class Generator(generator.Generator): 907 QUERY_HINTS = False 908 STRUCT_DELIMITER = ("(", ")") 909 NVL2_SUPPORTED = False 910 TABLESAMPLE_REQUIRES_PARENS = False 911 TABLESAMPLE_SIZE_IS_ROWS = False 912 TABLESAMPLE_KEYWORDS = "SAMPLE" 913 LAST_DAY_SUPPORTS_DATE_PART = False 914 CAN_IMPLEMENT_ARRAY_ANY = True 915 SUPPORTS_TO_NUMBER = False 916 JOIN_HINTS = False 917 TABLE_HINTS = False 918 GROUPINGS_SEP = "" 919 SET_OP_MODIFIERS = False 920 VALUES_AS_TABLE = False 921 ARRAY_SIZE_NAME = "LENGTH" 922 923 STRING_TYPE_MAPPING = { 924 exp.DataType.Type.BLOB: "String", 925 exp.DataType.Type.CHAR: "String", 926 exp.DataType.Type.LONGBLOB: "String", 927 exp.DataType.Type.LONGTEXT: "String", 928 exp.DataType.Type.MEDIUMBLOB: "String", 929 exp.DataType.Type.MEDIUMTEXT: "String", 930 exp.DataType.Type.TINYBLOB: "String", 931 exp.DataType.Type.TINYTEXT: "String", 932 exp.DataType.Type.TEXT: "String", 933 exp.DataType.Type.VARBINARY: "String", 934 exp.DataType.Type.VARCHAR: "String", 935 } 936 937 SUPPORTED_JSON_PATH_PARTS = { 938 exp.JSONPathKey, 939 exp.JSONPathRoot, 940 exp.JSONPathSubscript, 941 } 942 943 TYPE_MAPPING = { 944 **generator.Generator.TYPE_MAPPING, 945 **STRING_TYPE_MAPPING, 946 exp.DataType.Type.ARRAY: "Array", 947 exp.DataType.Type.BOOLEAN: "Bool", 948 exp.DataType.Type.BIGINT: "Int64", 949 exp.DataType.Type.DATE32: "Date32", 950 exp.DataType.Type.DATETIME: "DateTime", 951 exp.DataType.Type.DATETIME2: "DateTime", 952 exp.DataType.Type.SMALLDATETIME: "DateTime", 953 exp.DataType.Type.DATETIME64: "DateTime64", 954 exp.DataType.Type.DECIMAL: "Decimal", 955 exp.DataType.Type.DECIMAL32: "Decimal32", 956 exp.DataType.Type.DECIMAL64: "Decimal64", 957 exp.DataType.Type.DECIMAL128: "Decimal128", 958 exp.DataType.Type.DECIMAL256: "Decimal256", 959 exp.DataType.Type.TIMESTAMP: "DateTime", 960 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 961 exp.DataType.Type.DOUBLE: "Float64", 962 exp.DataType.Type.ENUM: "Enum", 963 exp.DataType.Type.ENUM8: "Enum8", 964 exp.DataType.Type.ENUM16: "Enum16", 965 exp.DataType.Type.FIXEDSTRING: "FixedString", 966 exp.DataType.Type.FLOAT: "Float32", 967 exp.DataType.Type.INT: "Int32", 968 exp.DataType.Type.MEDIUMINT: "Int32", 969 exp.DataType.Type.INT128: "Int128", 970 exp.DataType.Type.INT256: "Int256", 971 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 972 exp.DataType.Type.MAP: "Map", 973 exp.DataType.Type.NESTED: "Nested", 974 exp.DataType.Type.SMALLINT: "Int16", 975 exp.DataType.Type.STRUCT: "Tuple", 976 exp.DataType.Type.TINYINT: "Int8", 977 exp.DataType.Type.UBIGINT: "UInt64", 978 exp.DataType.Type.UINT: "UInt32", 979 exp.DataType.Type.UINT128: "UInt128", 980 exp.DataType.Type.UINT256: "UInt256", 981 exp.DataType.Type.USMALLINT: "UInt16", 982 exp.DataType.Type.UTINYINT: "UInt8", 983 exp.DataType.Type.IPV4: "IPv4", 984 exp.DataType.Type.IPV6: "IPv6", 985 exp.DataType.Type.POINT: "Point", 986 exp.DataType.Type.RING: "Ring", 987 exp.DataType.Type.LINESTRING: "LineString", 988 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 989 exp.DataType.Type.POLYGON: "Polygon", 990 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 991 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 992 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 993 exp.DataType.Type.DYNAMIC: "Dynamic", 994 } 995 996 TRANSFORMS = { 997 **generator.Generator.TRANSFORMS, 998 exp.AnyValue: rename_func("any"), 999 exp.ApproxDistinct: rename_func("uniq"), 1000 exp.ArrayConcat: rename_func("arrayConcat"), 1001 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 1002 exp.ArraySum: rename_func("arraySum"), 1003 exp.ArgMax: arg_max_or_min_no_count("argMax"), 1004 exp.ArgMin: arg_max_or_min_no_count("argMin"), 1005 exp.Array: inline_array_sql, 1006 exp.CastToStrType: rename_func("CAST"), 1007 exp.CountIf: rename_func("countIf"), 1008 exp.CompressColumnConstraint: lambda self, 1009 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 1010 exp.ComputedColumnConstraint: lambda self, 1011 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 1012 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 1013 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 1014 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 1015 exp.DateStrToDate: rename_func("toDate"), 1016 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 1017 exp.Explode: rename_func("arrayJoin"), 1018 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 1019 exp.IsNan: rename_func("isNaN"), 1020 exp.JSONCast: lambda self, e: f"{self.sql(e, 'this')}.:{self.sql(e, 'to')}", 1021 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 1022 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 1023 exp.JSONPathKey: json_path_key_only_name, 1024 exp.JSONPathRoot: lambda *_: "", 1025 exp.Length: length_or_char_length_sql, 1026 exp.Map: _map_sql, 1027 exp.Median: rename_func("median"), 1028 exp.Nullif: rename_func("nullIf"), 1029 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1030 exp.Pivot: no_pivot_sql, 1031 exp.Quantile: _quantile_sql, 1032 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 1033 exp.Rand: rename_func("randCanonical"), 1034 exp.StartsWith: rename_func("startsWith"), 1035 exp.StrPosition: lambda self, e: strposition_sql( 1036 self, 1037 e, 1038 func_name="POSITION", 1039 supports_position=True, 1040 use_ansi_position=False, 1041 ), 1042 exp.TimeToStr: lambda self, e: self.func( 1043 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 1044 ), 1045 exp.TimeStrToTime: _timestrtotime_sql, 1046 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 1047 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 1048 exp.VarMap: _map_sql, 1049 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1050 exp.MD5Digest: rename_func("MD5"), 1051 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1052 exp.SHA: rename_func("SHA1"), 1053 exp.SHA2: sha256_sql, 1054 exp.UnixToTime: _unix_to_time_sql, 1055 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1056 exp.Trim: lambda self, e: trim_sql(self, e, default_trim_type="BOTH"), 1057 exp.Variance: rename_func("varSamp"), 1058 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1059 exp.Stddev: rename_func("stddevSamp"), 1060 exp.Chr: rename_func("CHAR"), 1061 exp.Lag: lambda self, e: self.func( 1062 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1063 ), 1064 exp.Lead: lambda self, e: self.func( 1065 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1066 ), 1067 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1068 rename_func("editDistance") 1069 ), 1070 } 1071 1072 PROPERTIES_LOCATION = { 1073 **generator.Generator.PROPERTIES_LOCATION, 1074 exp.OnCluster: exp.Properties.Location.POST_NAME, 1075 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1076 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1077 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1078 } 1079 1080 # There's no list in docs, but it can be found in Clickhouse code 1081 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1082 ON_CLUSTER_TARGETS = { 1083 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1084 "DATABASE", 1085 "TABLE", 1086 "VIEW", 1087 "DICTIONARY", 1088 "INDEX", 1089 "FUNCTION", 1090 "NAMED COLLECTION", 1091 } 1092 1093 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1094 NON_NULLABLE_TYPES = { 1095 exp.DataType.Type.ARRAY, 1096 exp.DataType.Type.MAP, 1097 exp.DataType.Type.STRUCT, 1098 exp.DataType.Type.POINT, 1099 exp.DataType.Type.RING, 1100 exp.DataType.Type.LINESTRING, 1101 exp.DataType.Type.MULTILINESTRING, 1102 exp.DataType.Type.POLYGON, 1103 exp.DataType.Type.MULTIPOLYGON, 1104 } 1105 1106 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1107 strtodate_sql = self.function_fallback_sql(expression) 1108 1109 if not isinstance(expression.parent, exp.Cast): 1110 # StrToDate returns DATEs in other dialects (eg. postgres), so 1111 # this branch aims to improve the transpilation to clickhouse 1112 return self.cast_sql(exp.cast(expression, "DATE")) 1113 1114 return strtodate_sql 1115 1116 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1117 this = expression.this 1118 1119 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1120 return self.sql(this) 1121 1122 return super().cast_sql(expression, safe_prefix=safe_prefix) 1123 1124 def trycast_sql(self, expression: exp.TryCast) -> str: 1125 dtype = expression.to 1126 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1127 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1128 dtype.set("nullable", True) 1129 1130 return super().cast_sql(expression) 1131 1132 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1133 this = self.json_path_part(expression.this) 1134 return str(int(this) + 1) if is_int(this) else this 1135 1136 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1137 return f"AS {self.sql(expression, 'this')}" 1138 1139 def _any_to_has( 1140 self, 1141 expression: exp.EQ | exp.NEQ, 1142 default: t.Callable[[t.Any], str], 1143 prefix: str = "", 1144 ) -> str: 1145 if isinstance(expression.left, exp.Any): 1146 arr = expression.left 1147 this = expression.right 1148 elif isinstance(expression.right, exp.Any): 1149 arr = expression.right 1150 this = expression.left 1151 else: 1152 return default(expression) 1153 1154 return prefix + self.func("has", arr.this.unnest(), this) 1155 1156 def eq_sql(self, expression: exp.EQ) -> str: 1157 return self._any_to_has(expression, super().eq_sql) 1158 1159 def neq_sql(self, expression: exp.NEQ) -> str: 1160 return self._any_to_has(expression, super().neq_sql, "NOT ") 1161 1162 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1163 # Manually add a flag to make the search case-insensitive 1164 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1165 return self.func("match", expression.this, regex) 1166 1167 def datatype_sql(self, expression: exp.DataType) -> str: 1168 # String is the standard ClickHouse type, every other variant is just an alias. 1169 # Additionally, any supplied length parameter will be ignored. 1170 # 1171 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1172 if expression.this in self.STRING_TYPE_MAPPING: 1173 dtype = "String" 1174 else: 1175 dtype = super().datatype_sql(expression) 1176 1177 # This section changes the type to `Nullable(...)` if the following conditions hold: 1178 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1179 # and change their semantics 1180 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1181 # constraint: "Type of Map key must be a type, that can be represented by integer or 1182 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1183 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1184 parent = expression.parent 1185 nullable = expression.args.get("nullable") 1186 if nullable is True or ( 1187 nullable is None 1188 and not ( 1189 isinstance(parent, exp.DataType) 1190 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1191 and expression.index in (None, 0) 1192 ) 1193 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1194 ): 1195 dtype = f"Nullable({dtype})" 1196 1197 return dtype 1198 1199 def cte_sql(self, expression: exp.CTE) -> str: 1200 if expression.args.get("scalar"): 1201 this = self.sql(expression, "this") 1202 alias = self.sql(expression, "alias") 1203 return f"{this} AS {alias}" 1204 1205 return super().cte_sql(expression) 1206 1207 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1208 return super().after_limit_modifiers(expression) + [ 1209 ( 1210 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1211 if expression.args.get("settings") 1212 else "" 1213 ), 1214 ( 1215 self.seg("FORMAT ") + self.sql(expression, "format") 1216 if expression.args.get("format") 1217 else "" 1218 ), 1219 ] 1220 1221 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1222 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1223 1224 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1225 return f"ON CLUSTER {self.sql(expression, 'this')}" 1226 1227 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1228 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1229 exp.Properties.Location.POST_NAME 1230 ): 1231 this_name = self.sql( 1232 expression.this if isinstance(expression.this, exp.Schema) else expression, 1233 "this", 1234 ) 1235 this_properties = " ".join( 1236 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1237 ) 1238 this_schema = self.schema_columns_sql(expression.this) 1239 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1240 1241 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1242 1243 return super().createable_sql(expression, locations) 1244 1245 def create_sql(self, expression: exp.Create) -> str: 1246 # The comment property comes last in CTAS statements, i.e. after the query 1247 query = expression.expression 1248 if isinstance(query, exp.Query): 1249 comment_prop = expression.find(exp.SchemaCommentProperty) 1250 if comment_prop: 1251 comment_prop.pop() 1252 query.replace(exp.paren(query)) 1253 else: 1254 comment_prop = None 1255 1256 create_sql = super().create_sql(expression) 1257 1258 comment_sql = self.sql(comment_prop) 1259 comment_sql = f" {comment_sql}" if comment_sql else "" 1260 1261 return f"{create_sql}{comment_sql}" 1262 1263 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1264 this = self.indent(self.sql(expression, "this")) 1265 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1266 1267 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1268 this = self.sql(expression, "this") 1269 this = f" {this}" if this else "" 1270 expr = self.sql(expression, "expression") 1271 expr = f" {expr}" if expr else "" 1272 index_type = self.sql(expression, "index_type") 1273 index_type = f" TYPE {index_type}" if index_type else "" 1274 granularity = self.sql(expression, "granularity") 1275 granularity = f" GRANULARITY {granularity}" if granularity else "" 1276 1277 return f"INDEX{this}{expr}{index_type}{granularity}" 1278 1279 def partition_sql(self, expression: exp.Partition) -> str: 1280 return f"PARTITION {self.expressions(expression, flat=True)}" 1281 1282 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1283 return f"ID {self.sql(expression.this)}" 1284 1285 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1286 return ( 1287 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1288 ) 1289 1290 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1291 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1292 1293 def is_sql(self, expression: exp.Is) -> str: 1294 is_sql = super().is_sql(expression) 1295 1296 if isinstance(expression.parent, exp.Not): 1297 # value IS NOT NULL -> NOT (value IS NULL) 1298 is_sql = self.wrap(is_sql) 1299 1300 return is_sql 1301 1302 def in_sql(self, expression: exp.In) -> str: 1303 in_sql = super().in_sql(expression) 1304 1305 if isinstance(expression.parent, exp.Not) and expression.args.get("is_global"): 1306 in_sql = in_sql.replace("GLOBAL IN", "GLOBAL NOT IN", 1) 1307 1308 return in_sql 1309 1310 def not_sql(self, expression: exp.Not) -> str: 1311 if isinstance(expression.this, exp.In) and expression.this.args.get("is_global"): 1312 # let `GLOBAL IN` child interpose `NOT` 1313 return self.sql(expression, "this") 1314 1315 return super().not_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1106 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1107 strtodate_sql = self.function_fallback_sql(expression) 1108 1109 if not isinstance(expression.parent, exp.Cast): 1110 # StrToDate returns DATEs in other dialects (eg. postgres), so 1111 # this branch aims to improve the transpilation to clickhouse 1112 return self.cast_sql(exp.cast(expression, "DATE")) 1113 1114 return strtodate_sql
1116 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1117 this = expression.this 1118 1119 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1120 return self.sql(this) 1121 1122 return super().cast_sql(expression, safe_prefix=safe_prefix)
1124 def trycast_sql(self, expression: exp.TryCast) -> str: 1125 dtype = expression.to 1126 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1127 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1128 dtype.set("nullable", True) 1129 1130 return super().cast_sql(expression)
1167 def datatype_sql(self, expression: exp.DataType) -> str: 1168 # String is the standard ClickHouse type, every other variant is just an alias. 1169 # Additionally, any supplied length parameter will be ignored. 1170 # 1171 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1172 if expression.this in self.STRING_TYPE_MAPPING: 1173 dtype = "String" 1174 else: 1175 dtype = super().datatype_sql(expression) 1176 1177 # This section changes the type to `Nullable(...)` if the following conditions hold: 1178 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1179 # and change their semantics 1180 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1181 # constraint: "Type of Map key must be a type, that can be represented by integer or 1182 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1183 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1184 parent = expression.parent 1185 nullable = expression.args.get("nullable") 1186 if nullable is True or ( 1187 nullable is None 1188 and not ( 1189 isinstance(parent, exp.DataType) 1190 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1191 and expression.index in (None, 0) 1192 ) 1193 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1194 ): 1195 dtype = f"Nullable({dtype})" 1196 1197 return dtype
1207 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1208 return super().after_limit_modifiers(expression) + [ 1209 ( 1210 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1211 if expression.args.get("settings") 1212 else "" 1213 ), 1214 ( 1215 self.seg("FORMAT ") + self.sql(expression, "format") 1216 if expression.args.get("format") 1217 else "" 1218 ), 1219 ]
1227 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1228 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1229 exp.Properties.Location.POST_NAME 1230 ): 1231 this_name = self.sql( 1232 expression.this if isinstance(expression.this, exp.Schema) else expression, 1233 "this", 1234 ) 1235 this_properties = " ".join( 1236 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1237 ) 1238 this_schema = self.schema_columns_sql(expression.this) 1239 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1240 1241 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1242 1243 return super().createable_sql(expression, locations)
1245 def create_sql(self, expression: exp.Create) -> str: 1246 # The comment property comes last in CTAS statements, i.e. after the query 1247 query = expression.expression 1248 if isinstance(query, exp.Query): 1249 comment_prop = expression.find(exp.SchemaCommentProperty) 1250 if comment_prop: 1251 comment_prop.pop() 1252 query.replace(exp.paren(query)) 1253 else: 1254 comment_prop = None 1255 1256 create_sql = super().create_sql(expression) 1257 1258 comment_sql = self.sql(comment_prop) 1259 comment_sql = f" {comment_sql}" if comment_sql else "" 1260 1261 return f"{create_sql}{comment_sql}"
1267 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1268 this = self.sql(expression, "this") 1269 this = f" {this}" if this else "" 1270 expr = self.sql(expression, "expression") 1271 expr = f" {expr}" if expr else "" 1272 index_type = self.sql(expression, "index_type") 1273 index_type = f" TYPE {index_type}" if index_type else "" 1274 granularity = self.sql(expression, "granularity") 1275 granularity = f" GRANULARITY {granularity}" if granularity else "" 1276 1277 return f"INDEX{this}{expr}{index_type}{granularity}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- PARSE_JSON_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- put_sql