sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 build_formatted_time, 16 filter_array_using_unnest, 17 if_sql, 18 inline_array_unless_query, 19 max_or_greatest, 20 min_or_least, 21 no_ilike_sql, 22 build_date_delta_with_interval, 23 regexp_replace_sql, 24 rename_func, 25 sha256_sql, 26 timestrtotime_sql, 27 ts_or_ds_add_cast, 28 unit_to_var, 29) 30from sqlglot.helper import seq_get, split_num_words 31from sqlglot.tokens import TokenType 32 33if t.TYPE_CHECKING: 34 from sqlglot._typing import E, Lit 35 36 from sqlglot.optimizer.annotate_types import TypeAnnotator 37 38logger = logging.getLogger("sqlglot") 39 40 41def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 42 if not expression.find_ancestor(exp.From, exp.Join): 43 return self.values_sql(expression) 44 45 structs = [] 46 alias = expression.args.get("alias") 47 for tup in expression.find_all(exp.Tuple): 48 field_aliases = ( 49 alias.columns 50 if alias and alias.columns 51 else (f"_c{i}" for i in range(len(tup.expressions))) 52 ) 53 expressions = [ 54 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 55 for name, fld in zip(field_aliases, tup.expressions) 56 ] 57 structs.append(exp.Struct(expressions=expressions)) 58 59 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 60 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 61 return self.unnest_sql( 62 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 63 ) 64 65 66def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 67 this = expression.this 68 if isinstance(this, exp.Schema): 69 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 70 else: 71 this = self.sql(this) 72 return f"RETURNS {this}" 73 74 75def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 76 returns = expression.find(exp.ReturnsProperty) 77 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 78 expression.set("kind", "TABLE FUNCTION") 79 80 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 81 expression.set("expression", expression.expression.this) 82 83 return self.create_sql(expression) 84 85 86# https://issuetracker.google.com/issues/162294746 87# workaround for bigquery bug when grouping by an expression and then ordering 88# WITH x AS (SELECT 1 y) 89# SELECT y + 1 z 90# FROM x 91# GROUP BY x + 1 92# ORDER by z 93def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 94 if isinstance(expression, exp.Select): 95 group = expression.args.get("group") 96 order = expression.args.get("order") 97 98 if group and order: 99 aliases = { 100 select.this: select.args["alias"] 101 for select in expression.selects 102 if isinstance(select, exp.Alias) 103 } 104 105 for grouped in group.expressions: 106 if grouped.is_int: 107 continue 108 alias = aliases.get(grouped) 109 if alias: 110 grouped.replace(exp.column(alias)) 111 112 return expression 113 114 115def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 116 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 117 if isinstance(expression, exp.CTE) and expression.alias_column_names: 118 cte_query = expression.this 119 120 if cte_query.is_star: 121 logger.warning( 122 "Can't push down CTE column names for star queries. Run the query through" 123 " the optimizer or use 'qualify' to expand the star projections first." 124 ) 125 return expression 126 127 column_names = expression.alias_column_names 128 expression.args["alias"].set("columns", None) 129 130 for name, select in zip(column_names, cte_query.selects): 131 to_replace = select 132 133 if isinstance(select, exp.Alias): 134 select = select.this 135 136 # Inner aliases are shadowed by the CTE column names 137 to_replace.replace(exp.alias_(select, name)) 138 139 return expression 140 141 142def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 143 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 144 this.set("zone", seq_get(args, 2)) 145 return this 146 147 148def _build_timestamp(args: t.List) -> exp.Timestamp: 149 timestamp = exp.Timestamp.from_arg_list(args) 150 timestamp.set("with_tz", True) 151 return timestamp 152 153 154def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 155 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 156 return expr_type.from_arg_list(args) 157 158 159def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 160 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 161 arg = seq_get(args, 0) 162 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg) 163 164 165def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 166 return self.sql( 167 exp.Exists( 168 this=exp.select("1") 169 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 170 .where(exp.column("_col").eq(expression.right)) 171 ) 172 ) 173 174 175def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 176 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 177 178 179def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 180 expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 181 expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)) 182 unit = unit_to_var(expression) 183 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 184 185 186def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 187 scale = expression.args.get("scale") 188 timestamp = expression.this 189 190 if scale in (None, exp.UnixToTime.SECONDS): 191 return self.func("TIMESTAMP_SECONDS", timestamp) 192 if scale == exp.UnixToTime.MILLIS: 193 return self.func("TIMESTAMP_MILLIS", timestamp) 194 if scale == exp.UnixToTime.MICROS: 195 return self.func("TIMESTAMP_MICROS", timestamp) 196 197 unix_seconds = exp.cast( 198 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 199 ) 200 return self.func("TIMESTAMP_SECONDS", unix_seconds) 201 202 203def _build_time(args: t.List) -> exp.Func: 204 if len(args) == 1: 205 return exp.TsOrDsToTime(this=args[0]) 206 if len(args) == 2: 207 return exp.Time.from_arg_list(args) 208 return exp.TimeFromParts.from_arg_list(args) 209 210 211def _build_datetime(args: t.List) -> exp.Func: 212 if len(args) == 1: 213 return exp.TsOrDsToTimestamp.from_arg_list(args) 214 if len(args) == 2: 215 return exp.Datetime.from_arg_list(args) 216 return exp.TimestampFromParts.from_arg_list(args) 217 218 219def _build_regexp_extract(args: t.List) -> exp.RegexpExtract: 220 try: 221 group = re.compile(args[1].name).groups == 1 222 except re.error: 223 group = False 224 225 return exp.RegexpExtract( 226 this=seq_get(args, 0), 227 expression=seq_get(args, 1), 228 position=seq_get(args, 2), 229 occurrence=seq_get(args, 3), 230 group=exp.Literal.number(1) if group else None, 231 ) 232 233 234def _str_to_datetime_sql( 235 self: BigQuery.Generator, expression: exp.StrToDate | exp.StrToTime 236) -> str: 237 this = self.sql(expression, "this") 238 dtype = "DATE" if isinstance(expression, exp.StrToDate) else "TIMESTAMP" 239 240 if expression.args.get("safe"): 241 fmt = self.format_time( 242 expression, 243 self.dialect.INVERSE_FORMAT_MAPPING, 244 self.dialect.INVERSE_FORMAT_TRIE, 245 ) 246 return f"SAFE_CAST({this} AS {dtype} FORMAT {fmt})" 247 248 fmt = self.format_time(expression) 249 return self.func(f"PARSE_{dtype}", fmt, this, expression.args.get("zone")) 250 251 252def _annotate_math_functions(self: TypeAnnotator, expression: E) -> E: 253 """ 254 Many BigQuery math functions such as CEIL, FLOOR etc follow this return type convention: 255 +---------+---------+---------+------------+---------+ 256 | INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 257 +---------+---------+---------+------------+---------+ 258 | OUTPUT | FLOAT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 259 +---------+---------+---------+------------+---------+ 260 """ 261 self._annotate_args(expression) 262 263 this: exp.Expression = expression.this 264 265 self._set_type( 266 expression, 267 exp.DataType.Type.DOUBLE if this.is_type(*exp.DataType.INTEGER_TYPES) else this.type, 268 ) 269 return expression 270 271 272class BigQuery(Dialect): 273 WEEK_OFFSET = -1 274 UNNEST_COLUMN_ONLY = True 275 SUPPORTS_USER_DEFINED_TYPES = False 276 SUPPORTS_SEMI_ANTI_JOIN = False 277 LOG_BASE_FIRST = False 278 HEX_LOWERCASE = True 279 FORCE_EARLY_ALIAS_REF_EXPANSION = True 280 EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = True 281 282 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 283 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 284 285 # bigquery udfs are case sensitive 286 NORMALIZE_FUNCTIONS = False 287 288 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 289 TIME_MAPPING = { 290 "%D": "%m/%d/%y", 291 "%E6S": "%S.%f", 292 "%e": "%-d", 293 } 294 295 FORMAT_MAPPING = { 296 "DD": "%d", 297 "MM": "%m", 298 "MON": "%b", 299 "MONTH": "%B", 300 "YYYY": "%Y", 301 "YY": "%y", 302 "HH": "%I", 303 "HH12": "%I", 304 "HH24": "%H", 305 "MI": "%M", 306 "SS": "%S", 307 "SSSSS": "%f", 308 "TZH": "%z", 309 } 310 311 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 312 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 313 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 314 315 # All set operations require either a DISTINCT or ALL specifier 316 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 317 318 ANNOTATORS = { 319 **Dialect.ANNOTATORS, 320 **{ 321 expr_type: lambda self, e: _annotate_math_functions(self, e) 322 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 323 }, 324 **{ 325 expr_type: lambda self, e: self._annotate_by_args(e, "this") 326 for expr_type in ( 327 exp.Left, 328 exp.Right, 329 exp.Lower, 330 exp.Upper, 331 exp.Pad, 332 exp.Trim, 333 exp.RegexpExtract, 334 exp.RegexpReplace, 335 exp.Repeat, 336 exp.Substring, 337 ) 338 }, 339 exp.Concat: lambda self, e: self._annotate_by_args(e, "expressions"), 340 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 341 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 342 } 343 344 def normalize_identifier(self, expression: E) -> E: 345 if ( 346 isinstance(expression, exp.Identifier) 347 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 348 ): 349 parent = expression.parent 350 while isinstance(parent, exp.Dot): 351 parent = parent.parent 352 353 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 354 # by default. The following check uses a heuristic to detect tables based on whether 355 # they are qualified. This should generally be correct, because tables in BigQuery 356 # must be qualified with at least a dataset, unless @@dataset_id is set. 357 case_sensitive = ( 358 isinstance(parent, exp.UserDefinedFunction) 359 or ( 360 isinstance(parent, exp.Table) 361 and parent.db 362 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 363 ) 364 or expression.meta.get("is_table") 365 ) 366 if not case_sensitive: 367 expression.set("this", expression.this.lower()) 368 369 return expression 370 371 class Tokenizer(tokens.Tokenizer): 372 QUOTES = ["'", '"', '"""', "'''"] 373 COMMENTS = ["--", "#", ("/*", "*/")] 374 IDENTIFIERS = ["`"] 375 STRING_ESCAPES = ["\\"] 376 377 HEX_STRINGS = [("0x", ""), ("0X", "")] 378 379 BYTE_STRINGS = [ 380 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 381 ] 382 383 RAW_STRINGS = [ 384 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 385 ] 386 387 KEYWORDS = { 388 **tokens.Tokenizer.KEYWORDS, 389 "ANY TYPE": TokenType.VARIANT, 390 "BEGIN": TokenType.COMMAND, 391 "BEGIN TRANSACTION": TokenType.BEGIN, 392 "BYTEINT": TokenType.INT, 393 "BYTES": TokenType.BINARY, 394 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 395 "DATETIME": TokenType.TIMESTAMP, 396 "DECLARE": TokenType.COMMAND, 397 "ELSEIF": TokenType.COMMAND, 398 "EXCEPTION": TokenType.COMMAND, 399 "FLOAT64": TokenType.DOUBLE, 400 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 401 "MODEL": TokenType.MODEL, 402 "NOT DETERMINISTIC": TokenType.VOLATILE, 403 "RECORD": TokenType.STRUCT, 404 "TIMESTAMP": TokenType.TIMESTAMPTZ, 405 } 406 KEYWORDS.pop("DIV") 407 KEYWORDS.pop("VALUES") 408 KEYWORDS.pop("/*+") 409 410 class Parser(parser.Parser): 411 PREFIXED_PIVOT_COLUMNS = True 412 LOG_DEFAULTS_TO_LN = True 413 SUPPORTS_IMPLICIT_UNNEST = True 414 415 FUNCTIONS = { 416 **parser.Parser.FUNCTIONS, 417 "DATE": _build_date, 418 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 419 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 420 "DATE_TRUNC": lambda args: exp.DateTrunc( 421 unit=exp.Literal.string(str(seq_get(args, 1))), 422 this=seq_get(args, 0), 423 ), 424 "DATETIME": _build_datetime, 425 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 426 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 427 "DIV": binary_from_function(exp.IntDiv), 428 "EDIT_DISTANCE": lambda args: exp.Levenshtein( 429 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 430 ), 431 "FORMAT_DATE": lambda args: exp.TimeToStr( 432 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 433 ), 434 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 435 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 436 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 437 ), 438 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 439 "MD5": exp.MD5Digest.from_arg_list, 440 "TO_HEX": _build_to_hex, 441 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 442 [seq_get(args, 1), seq_get(args, 0)] 443 ), 444 "PARSE_TIMESTAMP": _build_parse_timestamp, 445 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 446 "REGEXP_EXTRACT": _build_regexp_extract, 447 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 448 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 449 "SPLIT": lambda args: exp.Split( 450 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 451 this=seq_get(args, 0), 452 expression=seq_get(args, 1) or exp.Literal.string(","), 453 ), 454 "TIME": _build_time, 455 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 456 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 457 "TIMESTAMP": _build_timestamp, 458 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 459 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 460 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 461 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 462 ), 463 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 464 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 465 ), 466 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 467 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 468 "FORMAT_DATETIME": lambda args: exp.TimeToStr( 469 this=exp.TsOrDsToTimestamp(this=seq_get(args, 1)), format=seq_get(args, 0) 470 ), 471 } 472 473 FUNCTION_PARSERS = { 474 **parser.Parser.FUNCTION_PARSERS, 475 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 476 } 477 FUNCTION_PARSERS.pop("TRIM") 478 479 NO_PAREN_FUNCTIONS = { 480 **parser.Parser.NO_PAREN_FUNCTIONS, 481 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 482 } 483 484 NESTED_TYPE_TOKENS = { 485 *parser.Parser.NESTED_TYPE_TOKENS, 486 TokenType.TABLE, 487 } 488 489 PROPERTY_PARSERS = { 490 **parser.Parser.PROPERTY_PARSERS, 491 "NOT DETERMINISTIC": lambda self: self.expression( 492 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 493 ), 494 "OPTIONS": lambda self: self._parse_with_property(), 495 } 496 497 CONSTRAINT_PARSERS = { 498 **parser.Parser.CONSTRAINT_PARSERS, 499 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 500 } 501 502 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 503 RANGE_PARSERS.pop(TokenType.OVERLAPS) 504 505 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 506 507 STATEMENT_PARSERS = { 508 **parser.Parser.STATEMENT_PARSERS, 509 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 510 TokenType.END: lambda self: self._parse_as_command(self._prev), 511 TokenType.FOR: lambda self: self._parse_for_in(), 512 } 513 514 BRACKET_OFFSETS = { 515 "OFFSET": (0, False), 516 "ORDINAL": (1, False), 517 "SAFE_OFFSET": (0, True), 518 "SAFE_ORDINAL": (1, True), 519 } 520 521 def _parse_for_in(self) -> exp.ForIn: 522 this = self._parse_range() 523 self._match_text_seq("DO") 524 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 525 526 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 527 this = super()._parse_table_part(schema=schema) or self._parse_number() 528 529 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 530 if isinstance(this, exp.Identifier): 531 table_name = this.name 532 while self._match(TokenType.DASH, advance=False) and self._next: 533 text = "" 534 while self._is_connected() and self._curr.token_type != TokenType.DOT: 535 self._advance() 536 text += self._prev.text 537 table_name += text 538 539 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 540 elif isinstance(this, exp.Literal): 541 table_name = this.name 542 543 if self._is_connected() and self._parse_var(any_token=True): 544 table_name += self._prev.text 545 546 this = exp.Identifier(this=table_name, quoted=True) 547 548 return this 549 550 def _parse_table_parts( 551 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 552 ) -> exp.Table: 553 table = super()._parse_table_parts( 554 schema=schema, is_db_reference=is_db_reference, wildcard=True 555 ) 556 557 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 558 if not table.catalog: 559 if table.db: 560 parts = table.db.split(".") 561 if len(parts) == 2 and not table.args["db"].quoted: 562 table.set("catalog", exp.Identifier(this=parts[0])) 563 table.set("db", exp.Identifier(this=parts[1])) 564 else: 565 parts = table.name.split(".") 566 if len(parts) == 2 and not table.this.quoted: 567 table.set("db", exp.Identifier(this=parts[0])) 568 table.set("this", exp.Identifier(this=parts[1])) 569 570 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 571 catalog, db, this, *rest = ( 572 exp.to_identifier(p, quoted=True) 573 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 574 ) 575 576 if rest and this: 577 this = exp.Dot.build([this, *rest]) # type: ignore 578 579 table = exp.Table( 580 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 581 ) 582 table.meta["quoted_table"] = True 583 584 return table 585 586 def _parse_column(self) -> t.Optional[exp.Expression]: 587 column = super()._parse_column() 588 if isinstance(column, exp.Column): 589 parts = column.parts 590 if any("." in p.name for p in parts): 591 catalog, db, table, this, *rest = ( 592 exp.to_identifier(p, quoted=True) 593 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 594 ) 595 596 if rest and this: 597 this = exp.Dot.build([this, *rest]) # type: ignore 598 599 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 600 column.meta["quoted_column"] = True 601 602 return column 603 604 @t.overload 605 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 606 607 @t.overload 608 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 609 610 def _parse_json_object(self, agg=False): 611 json_object = super()._parse_json_object() 612 array_kv_pair = seq_get(json_object.expressions, 0) 613 614 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 615 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 616 if ( 617 array_kv_pair 618 and isinstance(array_kv_pair.this, exp.Array) 619 and isinstance(array_kv_pair.expression, exp.Array) 620 ): 621 keys = array_kv_pair.this.expressions 622 values = array_kv_pair.expression.expressions 623 624 json_object.set( 625 "expressions", 626 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 627 ) 628 629 return json_object 630 631 def _parse_bracket( 632 self, this: t.Optional[exp.Expression] = None 633 ) -> t.Optional[exp.Expression]: 634 bracket = super()._parse_bracket(this) 635 636 if this is bracket: 637 return bracket 638 639 if isinstance(bracket, exp.Bracket): 640 for expression in bracket.expressions: 641 name = expression.name.upper() 642 643 if name not in self.BRACKET_OFFSETS: 644 break 645 646 offset, safe = self.BRACKET_OFFSETS[name] 647 bracket.set("offset", offset) 648 bracket.set("safe", safe) 649 expression.replace(expression.expressions[0]) 650 651 return bracket 652 653 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 654 unnest = super()._parse_unnest(with_alias=with_alias) 655 656 if not unnest: 657 return None 658 659 unnest_expr = seq_get(unnest.expressions, 0) 660 if unnest_expr: 661 from sqlglot.optimizer.annotate_types import annotate_types 662 663 unnest_expr = annotate_types(unnest_expr) 664 665 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 666 # in contrast to other dialects such as DuckDB which flattens only the array by default 667 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 668 array_elem.is_type(exp.DataType.Type.STRUCT) 669 for array_elem in unnest_expr._type.expressions 670 ): 671 unnest.set("explode_array", True) 672 673 return unnest 674 675 class Generator(generator.Generator): 676 INTERVAL_ALLOWS_PLURAL_FORM = False 677 JOIN_HINTS = False 678 QUERY_HINTS = False 679 TABLE_HINTS = False 680 LIMIT_FETCH = "LIMIT" 681 RENAME_TABLE_WITH_DB = False 682 NVL2_SUPPORTED = False 683 UNNEST_WITH_ORDINALITY = False 684 COLLATE_IS_FUNC = True 685 LIMIT_ONLY_LITERALS = True 686 SUPPORTS_TABLE_ALIAS_COLUMNS = False 687 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 688 JSON_KEY_VALUE_PAIR_SEP = "," 689 NULL_ORDERING_SUPPORTED = False 690 IGNORE_NULLS_IN_FUNC = True 691 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 692 CAN_IMPLEMENT_ARRAY_ANY = True 693 SUPPORTS_TO_NUMBER = False 694 NAMED_PLACEHOLDER_TOKEN = "@" 695 HEX_FUNC = "TO_HEX" 696 WITH_PROPERTIES_PREFIX = "OPTIONS" 697 SUPPORTS_EXPLODING_PROJECTIONS = False 698 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 699 700 TRANSFORMS = { 701 **generator.Generator.TRANSFORMS, 702 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 703 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 704 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 705 exp.Array: inline_array_unless_query, 706 exp.ArrayContains: _array_contains_sql, 707 exp.ArrayFilter: filter_array_using_unnest, 708 exp.ArraySize: rename_func("ARRAY_LENGTH"), 709 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 710 exp.CollateProperty: lambda self, e: ( 711 f"DEFAULT COLLATE {self.sql(e, 'this')}" 712 if e.args.get("default") 713 else f"COLLATE {self.sql(e, 'this')}" 714 ), 715 exp.Commit: lambda *_: "COMMIT TRANSACTION", 716 exp.CountIf: rename_func("COUNTIF"), 717 exp.Create: _create_sql, 718 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 719 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 720 exp.DateDiff: lambda self, e: self.func( 721 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 722 ), 723 exp.DateFromParts: rename_func("DATE"), 724 exp.DateStrToDate: datestrtodate_sql, 725 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 726 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 727 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 728 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 729 exp.FromTimeZone: lambda self, e: self.func( 730 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 731 ), 732 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 733 exp.GroupConcat: rename_func("STRING_AGG"), 734 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 735 exp.If: if_sql(false_value="NULL"), 736 exp.ILike: no_ilike_sql, 737 exp.IntDiv: rename_func("DIV"), 738 exp.JSONFormat: rename_func("TO_JSON_STRING"), 739 exp.Levenshtein: rename_func("EDIT_DISTANCE"), 740 exp.Max: max_or_greatest, 741 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 742 exp.MD5Digest: rename_func("MD5"), 743 exp.Min: min_or_least, 744 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 745 exp.RegexpExtract: lambda self, e: self.func( 746 "REGEXP_EXTRACT", 747 e.this, 748 e.expression, 749 e.args.get("position"), 750 e.args.get("occurrence"), 751 ), 752 exp.RegexpReplace: regexp_replace_sql, 753 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 754 exp.ReturnsProperty: _returnsproperty_sql, 755 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 756 exp.Select: transforms.preprocess( 757 [ 758 transforms.explode_to_unnest(), 759 transforms.unqualify_unnest, 760 transforms.eliminate_distinct_on, 761 _alias_ordered_group, 762 transforms.eliminate_semi_and_anti_joins, 763 ] 764 ), 765 exp.SHA: rename_func("SHA1"), 766 exp.SHA2: sha256_sql, 767 exp.StabilityProperty: lambda self, e: ( 768 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 769 ), 770 exp.String: rename_func("STRING"), 771 exp.StrToDate: _str_to_datetime_sql, 772 exp.StrToTime: _str_to_datetime_sql, 773 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 774 exp.TimeFromParts: rename_func("TIME"), 775 exp.TimestampFromParts: rename_func("DATETIME"), 776 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 777 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 778 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 779 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 780 exp.TimeStrToTime: timestrtotime_sql, 781 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 782 exp.TsOrDsAdd: _ts_or_ds_add_sql, 783 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 784 exp.TsOrDsToTime: rename_func("TIME"), 785 exp.TsOrDsToTimestamp: rename_func("DATETIME"), 786 exp.Unhex: rename_func("FROM_HEX"), 787 exp.UnixDate: rename_func("UNIX_DATE"), 788 exp.UnixToTime: _unix_to_time_sql, 789 exp.Uuid: lambda *_: "GENERATE_UUID()", 790 exp.Values: _derived_table_values_to_unnest, 791 exp.VariancePop: rename_func("VAR_POP"), 792 } 793 794 SUPPORTED_JSON_PATH_PARTS = { 795 exp.JSONPathKey, 796 exp.JSONPathRoot, 797 exp.JSONPathSubscript, 798 } 799 800 TYPE_MAPPING = { 801 **generator.Generator.TYPE_MAPPING, 802 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 803 exp.DataType.Type.BIGINT: "INT64", 804 exp.DataType.Type.BINARY: "BYTES", 805 exp.DataType.Type.BOOLEAN: "BOOL", 806 exp.DataType.Type.CHAR: "STRING", 807 exp.DataType.Type.DECIMAL: "NUMERIC", 808 exp.DataType.Type.DOUBLE: "FLOAT64", 809 exp.DataType.Type.FLOAT: "FLOAT64", 810 exp.DataType.Type.INT: "INT64", 811 exp.DataType.Type.NCHAR: "STRING", 812 exp.DataType.Type.NVARCHAR: "STRING", 813 exp.DataType.Type.SMALLINT: "INT64", 814 exp.DataType.Type.TEXT: "STRING", 815 exp.DataType.Type.TIMESTAMP: "DATETIME", 816 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 817 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 818 exp.DataType.Type.TINYINT: "INT64", 819 exp.DataType.Type.ROWVERSION: "BYTES", 820 exp.DataType.Type.UUID: "STRING", 821 exp.DataType.Type.VARBINARY: "BYTES", 822 exp.DataType.Type.VARCHAR: "STRING", 823 exp.DataType.Type.VARIANT: "ANY TYPE", 824 } 825 826 PROPERTIES_LOCATION = { 827 **generator.Generator.PROPERTIES_LOCATION, 828 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 829 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 830 } 831 832 # WINDOW comes after QUALIFY 833 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 834 AFTER_HAVING_MODIFIER_TRANSFORMS = { 835 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 836 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 837 } 838 839 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 840 RESERVED_KEYWORDS = { 841 "all", 842 "and", 843 "any", 844 "array", 845 "as", 846 "asc", 847 "assert_rows_modified", 848 "at", 849 "between", 850 "by", 851 "case", 852 "cast", 853 "collate", 854 "contains", 855 "create", 856 "cross", 857 "cube", 858 "current", 859 "default", 860 "define", 861 "desc", 862 "distinct", 863 "else", 864 "end", 865 "enum", 866 "escape", 867 "except", 868 "exclude", 869 "exists", 870 "extract", 871 "false", 872 "fetch", 873 "following", 874 "for", 875 "from", 876 "full", 877 "group", 878 "grouping", 879 "groups", 880 "hash", 881 "having", 882 "if", 883 "ignore", 884 "in", 885 "inner", 886 "intersect", 887 "interval", 888 "into", 889 "is", 890 "join", 891 "lateral", 892 "left", 893 "like", 894 "limit", 895 "lookup", 896 "merge", 897 "natural", 898 "new", 899 "no", 900 "not", 901 "null", 902 "nulls", 903 "of", 904 "on", 905 "or", 906 "order", 907 "outer", 908 "over", 909 "partition", 910 "preceding", 911 "proto", 912 "qualify", 913 "range", 914 "recursive", 915 "respect", 916 "right", 917 "rollup", 918 "rows", 919 "select", 920 "set", 921 "some", 922 "struct", 923 "tablesample", 924 "then", 925 "to", 926 "treat", 927 "true", 928 "unbounded", 929 "union", 930 "unnest", 931 "using", 932 "when", 933 "where", 934 "window", 935 "with", 936 "within", 937 } 938 939 def mod_sql(self, expression: exp.Mod) -> str: 940 this = expression.this 941 expr = expression.expression 942 return self.func( 943 "MOD", 944 this.unnest() if isinstance(this, exp.Paren) else this, 945 expr.unnest() if isinstance(expr, exp.Paren) else expr, 946 ) 947 948 def column_parts(self, expression: exp.Column) -> str: 949 if expression.meta.get("quoted_column"): 950 # If a column reference is of the form `dataset.table`.name, we need 951 # to preserve the quoted table path, otherwise the reference breaks 952 table_parts = ".".join(p.name for p in expression.parts[:-1]) 953 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 954 return f"{table_path}.{self.sql(expression, 'this')}" 955 956 return super().column_parts(expression) 957 958 def table_parts(self, expression: exp.Table) -> str: 959 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 960 # we need to make sure the correct quoting is used in each case. 961 # 962 # For example, if there is a CTE x that clashes with a schema name, then the former will 963 # return the table y in that schema, whereas the latter will return the CTE's y column: 964 # 965 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 966 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 967 if expression.meta.get("quoted_table"): 968 table_parts = ".".join(p.name for p in expression.parts) 969 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 970 971 return super().table_parts(expression) 972 973 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 974 if isinstance(expression.this, exp.TsOrDsToTimestamp): 975 func_name = "FORMAT_DATETIME" 976 else: 977 func_name = "FORMAT_DATE" 978 this = ( 979 expression.this 980 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 981 else expression 982 ) 983 return self.func(func_name, self.format_time(expression), this.this) 984 985 def eq_sql(self, expression: exp.EQ) -> str: 986 # Operands of = cannot be NULL in BigQuery 987 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 988 if not isinstance(expression.parent, exp.Update): 989 return "NULL" 990 991 return self.binary(expression, "=") 992 993 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 994 parent = expression.parent 995 996 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 997 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 998 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 999 return self.func( 1000 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1001 ) 1002 1003 return super().attimezone_sql(expression) 1004 1005 def trycast_sql(self, expression: exp.TryCast) -> str: 1006 return self.cast_sql(expression, safe_prefix="SAFE_") 1007 1008 def bracket_sql(self, expression: exp.Bracket) -> str: 1009 this = expression.this 1010 expressions = expression.expressions 1011 1012 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1013 arg = expressions[0] 1014 if arg.type is None: 1015 from sqlglot.optimizer.annotate_types import annotate_types 1016 1017 arg = annotate_types(arg) 1018 1019 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1020 # BQ doesn't support bracket syntax with string values for structs 1021 return f"{self.sql(this)}.{arg.name}" 1022 1023 expressions_sql = self.expressions(expression, flat=True) 1024 offset = expression.args.get("offset") 1025 1026 if offset == 0: 1027 expressions_sql = f"OFFSET({expressions_sql})" 1028 elif offset == 1: 1029 expressions_sql = f"ORDINAL({expressions_sql})" 1030 elif offset is not None: 1031 self.unsupported(f"Unsupported array offset: {offset}") 1032 1033 if expression.args.get("safe"): 1034 expressions_sql = f"SAFE_{expressions_sql}" 1035 1036 return f"{self.sql(this)}[{expressions_sql}]" 1037 1038 def in_unnest_op(self, expression: exp.Unnest) -> str: 1039 return self.sql(expression) 1040 1041 def version_sql(self, expression: exp.Version) -> str: 1042 if expression.name == "TIMESTAMP": 1043 expression.set("this", "SYSTEM_TIME") 1044 return super().version_sql(expression)
273class BigQuery(Dialect): 274 WEEK_OFFSET = -1 275 UNNEST_COLUMN_ONLY = True 276 SUPPORTS_USER_DEFINED_TYPES = False 277 SUPPORTS_SEMI_ANTI_JOIN = False 278 LOG_BASE_FIRST = False 279 HEX_LOWERCASE = True 280 FORCE_EARLY_ALIAS_REF_EXPANSION = True 281 EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = True 282 283 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 284 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 285 286 # bigquery udfs are case sensitive 287 NORMALIZE_FUNCTIONS = False 288 289 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 290 TIME_MAPPING = { 291 "%D": "%m/%d/%y", 292 "%E6S": "%S.%f", 293 "%e": "%-d", 294 } 295 296 FORMAT_MAPPING = { 297 "DD": "%d", 298 "MM": "%m", 299 "MON": "%b", 300 "MONTH": "%B", 301 "YYYY": "%Y", 302 "YY": "%y", 303 "HH": "%I", 304 "HH12": "%I", 305 "HH24": "%H", 306 "MI": "%M", 307 "SS": "%S", 308 "SSSSS": "%f", 309 "TZH": "%z", 310 } 311 312 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 313 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 314 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 315 316 # All set operations require either a DISTINCT or ALL specifier 317 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 318 319 ANNOTATORS = { 320 **Dialect.ANNOTATORS, 321 **{ 322 expr_type: lambda self, e: _annotate_math_functions(self, e) 323 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 324 }, 325 **{ 326 expr_type: lambda self, e: self._annotate_by_args(e, "this") 327 for expr_type in ( 328 exp.Left, 329 exp.Right, 330 exp.Lower, 331 exp.Upper, 332 exp.Pad, 333 exp.Trim, 334 exp.RegexpExtract, 335 exp.RegexpReplace, 336 exp.Repeat, 337 exp.Substring, 338 ) 339 }, 340 exp.Concat: lambda self, e: self._annotate_by_args(e, "expressions"), 341 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 342 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 343 } 344 345 def normalize_identifier(self, expression: E) -> E: 346 if ( 347 isinstance(expression, exp.Identifier) 348 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 349 ): 350 parent = expression.parent 351 while isinstance(parent, exp.Dot): 352 parent = parent.parent 353 354 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 355 # by default. The following check uses a heuristic to detect tables based on whether 356 # they are qualified. This should generally be correct, because tables in BigQuery 357 # must be qualified with at least a dataset, unless @@dataset_id is set. 358 case_sensitive = ( 359 isinstance(parent, exp.UserDefinedFunction) 360 or ( 361 isinstance(parent, exp.Table) 362 and parent.db 363 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 364 ) 365 or expression.meta.get("is_table") 366 ) 367 if not case_sensitive: 368 expression.set("this", expression.this.lower()) 369 370 return expression 371 372 class Tokenizer(tokens.Tokenizer): 373 QUOTES = ["'", '"', '"""', "'''"] 374 COMMENTS = ["--", "#", ("/*", "*/")] 375 IDENTIFIERS = ["`"] 376 STRING_ESCAPES = ["\\"] 377 378 HEX_STRINGS = [("0x", ""), ("0X", "")] 379 380 BYTE_STRINGS = [ 381 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 382 ] 383 384 RAW_STRINGS = [ 385 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 386 ] 387 388 KEYWORDS = { 389 **tokens.Tokenizer.KEYWORDS, 390 "ANY TYPE": TokenType.VARIANT, 391 "BEGIN": TokenType.COMMAND, 392 "BEGIN TRANSACTION": TokenType.BEGIN, 393 "BYTEINT": TokenType.INT, 394 "BYTES": TokenType.BINARY, 395 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 396 "DATETIME": TokenType.TIMESTAMP, 397 "DECLARE": TokenType.COMMAND, 398 "ELSEIF": TokenType.COMMAND, 399 "EXCEPTION": TokenType.COMMAND, 400 "FLOAT64": TokenType.DOUBLE, 401 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 402 "MODEL": TokenType.MODEL, 403 "NOT DETERMINISTIC": TokenType.VOLATILE, 404 "RECORD": TokenType.STRUCT, 405 "TIMESTAMP": TokenType.TIMESTAMPTZ, 406 } 407 KEYWORDS.pop("DIV") 408 KEYWORDS.pop("VALUES") 409 KEYWORDS.pop("/*+") 410 411 class Parser(parser.Parser): 412 PREFIXED_PIVOT_COLUMNS = True 413 LOG_DEFAULTS_TO_LN = True 414 SUPPORTS_IMPLICIT_UNNEST = True 415 416 FUNCTIONS = { 417 **parser.Parser.FUNCTIONS, 418 "DATE": _build_date, 419 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 420 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 421 "DATE_TRUNC": lambda args: exp.DateTrunc( 422 unit=exp.Literal.string(str(seq_get(args, 1))), 423 this=seq_get(args, 0), 424 ), 425 "DATETIME": _build_datetime, 426 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 427 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 428 "DIV": binary_from_function(exp.IntDiv), 429 "EDIT_DISTANCE": lambda args: exp.Levenshtein( 430 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 431 ), 432 "FORMAT_DATE": lambda args: exp.TimeToStr( 433 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 434 ), 435 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 436 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 437 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 438 ), 439 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 440 "MD5": exp.MD5Digest.from_arg_list, 441 "TO_HEX": _build_to_hex, 442 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 443 [seq_get(args, 1), seq_get(args, 0)] 444 ), 445 "PARSE_TIMESTAMP": _build_parse_timestamp, 446 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 447 "REGEXP_EXTRACT": _build_regexp_extract, 448 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 449 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 450 "SPLIT": lambda args: exp.Split( 451 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 452 this=seq_get(args, 0), 453 expression=seq_get(args, 1) or exp.Literal.string(","), 454 ), 455 "TIME": _build_time, 456 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 457 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 458 "TIMESTAMP": _build_timestamp, 459 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 460 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 461 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 462 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 463 ), 464 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 465 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 466 ), 467 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 468 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 469 "FORMAT_DATETIME": lambda args: exp.TimeToStr( 470 this=exp.TsOrDsToTimestamp(this=seq_get(args, 1)), format=seq_get(args, 0) 471 ), 472 } 473 474 FUNCTION_PARSERS = { 475 **parser.Parser.FUNCTION_PARSERS, 476 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 477 } 478 FUNCTION_PARSERS.pop("TRIM") 479 480 NO_PAREN_FUNCTIONS = { 481 **parser.Parser.NO_PAREN_FUNCTIONS, 482 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 483 } 484 485 NESTED_TYPE_TOKENS = { 486 *parser.Parser.NESTED_TYPE_TOKENS, 487 TokenType.TABLE, 488 } 489 490 PROPERTY_PARSERS = { 491 **parser.Parser.PROPERTY_PARSERS, 492 "NOT DETERMINISTIC": lambda self: self.expression( 493 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 494 ), 495 "OPTIONS": lambda self: self._parse_with_property(), 496 } 497 498 CONSTRAINT_PARSERS = { 499 **parser.Parser.CONSTRAINT_PARSERS, 500 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 501 } 502 503 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 504 RANGE_PARSERS.pop(TokenType.OVERLAPS) 505 506 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 507 508 STATEMENT_PARSERS = { 509 **parser.Parser.STATEMENT_PARSERS, 510 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 511 TokenType.END: lambda self: self._parse_as_command(self._prev), 512 TokenType.FOR: lambda self: self._parse_for_in(), 513 } 514 515 BRACKET_OFFSETS = { 516 "OFFSET": (0, False), 517 "ORDINAL": (1, False), 518 "SAFE_OFFSET": (0, True), 519 "SAFE_ORDINAL": (1, True), 520 } 521 522 def _parse_for_in(self) -> exp.ForIn: 523 this = self._parse_range() 524 self._match_text_seq("DO") 525 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 526 527 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 528 this = super()._parse_table_part(schema=schema) or self._parse_number() 529 530 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 531 if isinstance(this, exp.Identifier): 532 table_name = this.name 533 while self._match(TokenType.DASH, advance=False) and self._next: 534 text = "" 535 while self._is_connected() and self._curr.token_type != TokenType.DOT: 536 self._advance() 537 text += self._prev.text 538 table_name += text 539 540 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 541 elif isinstance(this, exp.Literal): 542 table_name = this.name 543 544 if self._is_connected() and self._parse_var(any_token=True): 545 table_name += self._prev.text 546 547 this = exp.Identifier(this=table_name, quoted=True) 548 549 return this 550 551 def _parse_table_parts( 552 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 553 ) -> exp.Table: 554 table = super()._parse_table_parts( 555 schema=schema, is_db_reference=is_db_reference, wildcard=True 556 ) 557 558 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 559 if not table.catalog: 560 if table.db: 561 parts = table.db.split(".") 562 if len(parts) == 2 and not table.args["db"].quoted: 563 table.set("catalog", exp.Identifier(this=parts[0])) 564 table.set("db", exp.Identifier(this=parts[1])) 565 else: 566 parts = table.name.split(".") 567 if len(parts) == 2 and not table.this.quoted: 568 table.set("db", exp.Identifier(this=parts[0])) 569 table.set("this", exp.Identifier(this=parts[1])) 570 571 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 572 catalog, db, this, *rest = ( 573 exp.to_identifier(p, quoted=True) 574 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 575 ) 576 577 if rest and this: 578 this = exp.Dot.build([this, *rest]) # type: ignore 579 580 table = exp.Table( 581 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 582 ) 583 table.meta["quoted_table"] = True 584 585 return table 586 587 def _parse_column(self) -> t.Optional[exp.Expression]: 588 column = super()._parse_column() 589 if isinstance(column, exp.Column): 590 parts = column.parts 591 if any("." in p.name for p in parts): 592 catalog, db, table, this, *rest = ( 593 exp.to_identifier(p, quoted=True) 594 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 595 ) 596 597 if rest and this: 598 this = exp.Dot.build([this, *rest]) # type: ignore 599 600 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 601 column.meta["quoted_column"] = True 602 603 return column 604 605 @t.overload 606 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 607 608 @t.overload 609 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 610 611 def _parse_json_object(self, agg=False): 612 json_object = super()._parse_json_object() 613 array_kv_pair = seq_get(json_object.expressions, 0) 614 615 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 616 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 617 if ( 618 array_kv_pair 619 and isinstance(array_kv_pair.this, exp.Array) 620 and isinstance(array_kv_pair.expression, exp.Array) 621 ): 622 keys = array_kv_pair.this.expressions 623 values = array_kv_pair.expression.expressions 624 625 json_object.set( 626 "expressions", 627 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 628 ) 629 630 return json_object 631 632 def _parse_bracket( 633 self, this: t.Optional[exp.Expression] = None 634 ) -> t.Optional[exp.Expression]: 635 bracket = super()._parse_bracket(this) 636 637 if this is bracket: 638 return bracket 639 640 if isinstance(bracket, exp.Bracket): 641 for expression in bracket.expressions: 642 name = expression.name.upper() 643 644 if name not in self.BRACKET_OFFSETS: 645 break 646 647 offset, safe = self.BRACKET_OFFSETS[name] 648 bracket.set("offset", offset) 649 bracket.set("safe", safe) 650 expression.replace(expression.expressions[0]) 651 652 return bracket 653 654 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 655 unnest = super()._parse_unnest(with_alias=with_alias) 656 657 if not unnest: 658 return None 659 660 unnest_expr = seq_get(unnest.expressions, 0) 661 if unnest_expr: 662 from sqlglot.optimizer.annotate_types import annotate_types 663 664 unnest_expr = annotate_types(unnest_expr) 665 666 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 667 # in contrast to other dialects such as DuckDB which flattens only the array by default 668 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 669 array_elem.is_type(exp.DataType.Type.STRUCT) 670 for array_elem in unnest_expr._type.expressions 671 ): 672 unnest.set("explode_array", True) 673 674 return unnest 675 676 class Generator(generator.Generator): 677 INTERVAL_ALLOWS_PLURAL_FORM = False 678 JOIN_HINTS = False 679 QUERY_HINTS = False 680 TABLE_HINTS = False 681 LIMIT_FETCH = "LIMIT" 682 RENAME_TABLE_WITH_DB = False 683 NVL2_SUPPORTED = False 684 UNNEST_WITH_ORDINALITY = False 685 COLLATE_IS_FUNC = True 686 LIMIT_ONLY_LITERALS = True 687 SUPPORTS_TABLE_ALIAS_COLUMNS = False 688 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 689 JSON_KEY_VALUE_PAIR_SEP = "," 690 NULL_ORDERING_SUPPORTED = False 691 IGNORE_NULLS_IN_FUNC = True 692 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 693 CAN_IMPLEMENT_ARRAY_ANY = True 694 SUPPORTS_TO_NUMBER = False 695 NAMED_PLACEHOLDER_TOKEN = "@" 696 HEX_FUNC = "TO_HEX" 697 WITH_PROPERTIES_PREFIX = "OPTIONS" 698 SUPPORTS_EXPLODING_PROJECTIONS = False 699 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 700 701 TRANSFORMS = { 702 **generator.Generator.TRANSFORMS, 703 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 704 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 705 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 706 exp.Array: inline_array_unless_query, 707 exp.ArrayContains: _array_contains_sql, 708 exp.ArrayFilter: filter_array_using_unnest, 709 exp.ArraySize: rename_func("ARRAY_LENGTH"), 710 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 711 exp.CollateProperty: lambda self, e: ( 712 f"DEFAULT COLLATE {self.sql(e, 'this')}" 713 if e.args.get("default") 714 else f"COLLATE {self.sql(e, 'this')}" 715 ), 716 exp.Commit: lambda *_: "COMMIT TRANSACTION", 717 exp.CountIf: rename_func("COUNTIF"), 718 exp.Create: _create_sql, 719 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 720 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 721 exp.DateDiff: lambda self, e: self.func( 722 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 723 ), 724 exp.DateFromParts: rename_func("DATE"), 725 exp.DateStrToDate: datestrtodate_sql, 726 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 727 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 728 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 729 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 730 exp.FromTimeZone: lambda self, e: self.func( 731 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 732 ), 733 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 734 exp.GroupConcat: rename_func("STRING_AGG"), 735 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 736 exp.If: if_sql(false_value="NULL"), 737 exp.ILike: no_ilike_sql, 738 exp.IntDiv: rename_func("DIV"), 739 exp.JSONFormat: rename_func("TO_JSON_STRING"), 740 exp.Levenshtein: rename_func("EDIT_DISTANCE"), 741 exp.Max: max_or_greatest, 742 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 743 exp.MD5Digest: rename_func("MD5"), 744 exp.Min: min_or_least, 745 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 746 exp.RegexpExtract: lambda self, e: self.func( 747 "REGEXP_EXTRACT", 748 e.this, 749 e.expression, 750 e.args.get("position"), 751 e.args.get("occurrence"), 752 ), 753 exp.RegexpReplace: regexp_replace_sql, 754 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 755 exp.ReturnsProperty: _returnsproperty_sql, 756 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 757 exp.Select: transforms.preprocess( 758 [ 759 transforms.explode_to_unnest(), 760 transforms.unqualify_unnest, 761 transforms.eliminate_distinct_on, 762 _alias_ordered_group, 763 transforms.eliminate_semi_and_anti_joins, 764 ] 765 ), 766 exp.SHA: rename_func("SHA1"), 767 exp.SHA2: sha256_sql, 768 exp.StabilityProperty: lambda self, e: ( 769 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 770 ), 771 exp.String: rename_func("STRING"), 772 exp.StrToDate: _str_to_datetime_sql, 773 exp.StrToTime: _str_to_datetime_sql, 774 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 775 exp.TimeFromParts: rename_func("TIME"), 776 exp.TimestampFromParts: rename_func("DATETIME"), 777 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 778 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 779 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 780 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 781 exp.TimeStrToTime: timestrtotime_sql, 782 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 783 exp.TsOrDsAdd: _ts_or_ds_add_sql, 784 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 785 exp.TsOrDsToTime: rename_func("TIME"), 786 exp.TsOrDsToTimestamp: rename_func("DATETIME"), 787 exp.Unhex: rename_func("FROM_HEX"), 788 exp.UnixDate: rename_func("UNIX_DATE"), 789 exp.UnixToTime: _unix_to_time_sql, 790 exp.Uuid: lambda *_: "GENERATE_UUID()", 791 exp.Values: _derived_table_values_to_unnest, 792 exp.VariancePop: rename_func("VAR_POP"), 793 } 794 795 SUPPORTED_JSON_PATH_PARTS = { 796 exp.JSONPathKey, 797 exp.JSONPathRoot, 798 exp.JSONPathSubscript, 799 } 800 801 TYPE_MAPPING = { 802 **generator.Generator.TYPE_MAPPING, 803 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 804 exp.DataType.Type.BIGINT: "INT64", 805 exp.DataType.Type.BINARY: "BYTES", 806 exp.DataType.Type.BOOLEAN: "BOOL", 807 exp.DataType.Type.CHAR: "STRING", 808 exp.DataType.Type.DECIMAL: "NUMERIC", 809 exp.DataType.Type.DOUBLE: "FLOAT64", 810 exp.DataType.Type.FLOAT: "FLOAT64", 811 exp.DataType.Type.INT: "INT64", 812 exp.DataType.Type.NCHAR: "STRING", 813 exp.DataType.Type.NVARCHAR: "STRING", 814 exp.DataType.Type.SMALLINT: "INT64", 815 exp.DataType.Type.TEXT: "STRING", 816 exp.DataType.Type.TIMESTAMP: "DATETIME", 817 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 818 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 819 exp.DataType.Type.TINYINT: "INT64", 820 exp.DataType.Type.ROWVERSION: "BYTES", 821 exp.DataType.Type.UUID: "STRING", 822 exp.DataType.Type.VARBINARY: "BYTES", 823 exp.DataType.Type.VARCHAR: "STRING", 824 exp.DataType.Type.VARIANT: "ANY TYPE", 825 } 826 827 PROPERTIES_LOCATION = { 828 **generator.Generator.PROPERTIES_LOCATION, 829 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 830 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 831 } 832 833 # WINDOW comes after QUALIFY 834 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 835 AFTER_HAVING_MODIFIER_TRANSFORMS = { 836 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 837 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 838 } 839 840 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 841 RESERVED_KEYWORDS = { 842 "all", 843 "and", 844 "any", 845 "array", 846 "as", 847 "asc", 848 "assert_rows_modified", 849 "at", 850 "between", 851 "by", 852 "case", 853 "cast", 854 "collate", 855 "contains", 856 "create", 857 "cross", 858 "cube", 859 "current", 860 "default", 861 "define", 862 "desc", 863 "distinct", 864 "else", 865 "end", 866 "enum", 867 "escape", 868 "except", 869 "exclude", 870 "exists", 871 "extract", 872 "false", 873 "fetch", 874 "following", 875 "for", 876 "from", 877 "full", 878 "group", 879 "grouping", 880 "groups", 881 "hash", 882 "having", 883 "if", 884 "ignore", 885 "in", 886 "inner", 887 "intersect", 888 "interval", 889 "into", 890 "is", 891 "join", 892 "lateral", 893 "left", 894 "like", 895 "limit", 896 "lookup", 897 "merge", 898 "natural", 899 "new", 900 "no", 901 "not", 902 "null", 903 "nulls", 904 "of", 905 "on", 906 "or", 907 "order", 908 "outer", 909 "over", 910 "partition", 911 "preceding", 912 "proto", 913 "qualify", 914 "range", 915 "recursive", 916 "respect", 917 "right", 918 "rollup", 919 "rows", 920 "select", 921 "set", 922 "some", 923 "struct", 924 "tablesample", 925 "then", 926 "to", 927 "treat", 928 "true", 929 "unbounded", 930 "union", 931 "unnest", 932 "using", 933 "when", 934 "where", 935 "window", 936 "with", 937 "within", 938 } 939 940 def mod_sql(self, expression: exp.Mod) -> str: 941 this = expression.this 942 expr = expression.expression 943 return self.func( 944 "MOD", 945 this.unnest() if isinstance(this, exp.Paren) else this, 946 expr.unnest() if isinstance(expr, exp.Paren) else expr, 947 ) 948 949 def column_parts(self, expression: exp.Column) -> str: 950 if expression.meta.get("quoted_column"): 951 # If a column reference is of the form `dataset.table`.name, we need 952 # to preserve the quoted table path, otherwise the reference breaks 953 table_parts = ".".join(p.name for p in expression.parts[:-1]) 954 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 955 return f"{table_path}.{self.sql(expression, 'this')}" 956 957 return super().column_parts(expression) 958 959 def table_parts(self, expression: exp.Table) -> str: 960 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 961 # we need to make sure the correct quoting is used in each case. 962 # 963 # For example, if there is a CTE x that clashes with a schema name, then the former will 964 # return the table y in that schema, whereas the latter will return the CTE's y column: 965 # 966 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 967 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 968 if expression.meta.get("quoted_table"): 969 table_parts = ".".join(p.name for p in expression.parts) 970 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 971 972 return super().table_parts(expression) 973 974 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 975 if isinstance(expression.this, exp.TsOrDsToTimestamp): 976 func_name = "FORMAT_DATETIME" 977 else: 978 func_name = "FORMAT_DATE" 979 this = ( 980 expression.this 981 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 982 else expression 983 ) 984 return self.func(func_name, self.format_time(expression), this.this) 985 986 def eq_sql(self, expression: exp.EQ) -> str: 987 # Operands of = cannot be NULL in BigQuery 988 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 989 if not isinstance(expression.parent, exp.Update): 990 return "NULL" 991 992 return self.binary(expression, "=") 993 994 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 995 parent = expression.parent 996 997 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 998 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 999 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1000 return self.func( 1001 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1002 ) 1003 1004 return super().attimezone_sql(expression) 1005 1006 def trycast_sql(self, expression: exp.TryCast) -> str: 1007 return self.cast_sql(expression, safe_prefix="SAFE_") 1008 1009 def bracket_sql(self, expression: exp.Bracket) -> str: 1010 this = expression.this 1011 expressions = expression.expressions 1012 1013 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1014 arg = expressions[0] 1015 if arg.type is None: 1016 from sqlglot.optimizer.annotate_types import annotate_types 1017 1018 arg = annotate_types(arg) 1019 1020 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1021 # BQ doesn't support bracket syntax with string values for structs 1022 return f"{self.sql(this)}.{arg.name}" 1023 1024 expressions_sql = self.expressions(expression, flat=True) 1025 offset = expression.args.get("offset") 1026 1027 if offset == 0: 1028 expressions_sql = f"OFFSET({expressions_sql})" 1029 elif offset == 1: 1030 expressions_sql = f"ORDINAL({expressions_sql})" 1031 elif offset is not None: 1032 self.unsupported(f"Unsupported array offset: {offset}") 1033 1034 if expression.args.get("safe"): 1035 expressions_sql = f"SAFE_{expressions_sql}" 1036 1037 return f"{self.sql(this)}[{expressions_sql}]" 1038 1039 def in_unnest_op(self, expression: exp.Unnest) -> str: 1040 return self.sql(expression) 1041 1042 def version_sql(self, expression: exp.Version) -> str: 1043 if expression.name == "TIMESTAMP": 1044 expression.set("this", "SYSTEM_TIME") 1045 return super().version_sql(expression)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether alias reference expansion before qualification should only happen for the GROUP BY clause.
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
345 def normalize_identifier(self, expression: E) -> E: 346 if ( 347 isinstance(expression, exp.Identifier) 348 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 349 ): 350 parent = expression.parent 351 while isinstance(parent, exp.Dot): 352 parent = parent.parent 353 354 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 355 # by default. The following check uses a heuristic to detect tables based on whether 356 # they are qualified. This should generally be correct, because tables in BigQuery 357 # must be qualified with at least a dataset, unless @@dataset_id is set. 358 case_sensitive = ( 359 isinstance(parent, exp.UserDefinedFunction) 360 or ( 361 isinstance(parent, exp.Table) 362 and parent.db 363 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 364 ) 365 or expression.meta.get("is_table") 366 ) 367 if not case_sensitive: 368 expression.set("this", expression.this.lower()) 369 370 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- COPY_PARAMS_ARE_CSV
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- REGEXP_EXTRACT_DEFAULT_GROUP
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- get_or_raise
- format_time
- settings
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
372 class Tokenizer(tokens.Tokenizer): 373 QUOTES = ["'", '"', '"""', "'''"] 374 COMMENTS = ["--", "#", ("/*", "*/")] 375 IDENTIFIERS = ["`"] 376 STRING_ESCAPES = ["\\"] 377 378 HEX_STRINGS = [("0x", ""), ("0X", "")] 379 380 BYTE_STRINGS = [ 381 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 382 ] 383 384 RAW_STRINGS = [ 385 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 386 ] 387 388 KEYWORDS = { 389 **tokens.Tokenizer.KEYWORDS, 390 "ANY TYPE": TokenType.VARIANT, 391 "BEGIN": TokenType.COMMAND, 392 "BEGIN TRANSACTION": TokenType.BEGIN, 393 "BYTEINT": TokenType.INT, 394 "BYTES": TokenType.BINARY, 395 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 396 "DATETIME": TokenType.TIMESTAMP, 397 "DECLARE": TokenType.COMMAND, 398 "ELSEIF": TokenType.COMMAND, 399 "EXCEPTION": TokenType.COMMAND, 400 "FLOAT64": TokenType.DOUBLE, 401 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 402 "MODEL": TokenType.MODEL, 403 "NOT DETERMINISTIC": TokenType.VOLATILE, 404 "RECORD": TokenType.STRUCT, 405 "TIMESTAMP": TokenType.TIMESTAMPTZ, 406 } 407 KEYWORDS.pop("DIV") 408 KEYWORDS.pop("VALUES") 409 KEYWORDS.pop("/*+")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
411 class Parser(parser.Parser): 412 PREFIXED_PIVOT_COLUMNS = True 413 LOG_DEFAULTS_TO_LN = True 414 SUPPORTS_IMPLICIT_UNNEST = True 415 416 FUNCTIONS = { 417 **parser.Parser.FUNCTIONS, 418 "DATE": _build_date, 419 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 420 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 421 "DATE_TRUNC": lambda args: exp.DateTrunc( 422 unit=exp.Literal.string(str(seq_get(args, 1))), 423 this=seq_get(args, 0), 424 ), 425 "DATETIME": _build_datetime, 426 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 427 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 428 "DIV": binary_from_function(exp.IntDiv), 429 "EDIT_DISTANCE": lambda args: exp.Levenshtein( 430 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 431 ), 432 "FORMAT_DATE": lambda args: exp.TimeToStr( 433 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 434 ), 435 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 436 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 437 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 438 ), 439 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 440 "MD5": exp.MD5Digest.from_arg_list, 441 "TO_HEX": _build_to_hex, 442 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 443 [seq_get(args, 1), seq_get(args, 0)] 444 ), 445 "PARSE_TIMESTAMP": _build_parse_timestamp, 446 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 447 "REGEXP_EXTRACT": _build_regexp_extract, 448 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 449 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 450 "SPLIT": lambda args: exp.Split( 451 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 452 this=seq_get(args, 0), 453 expression=seq_get(args, 1) or exp.Literal.string(","), 454 ), 455 "TIME": _build_time, 456 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 457 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 458 "TIMESTAMP": _build_timestamp, 459 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 460 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 461 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 462 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 463 ), 464 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 465 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 466 ), 467 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 468 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 469 "FORMAT_DATETIME": lambda args: exp.TimeToStr( 470 this=exp.TsOrDsToTimestamp(this=seq_get(args, 1)), format=seq_get(args, 0) 471 ), 472 } 473 474 FUNCTION_PARSERS = { 475 **parser.Parser.FUNCTION_PARSERS, 476 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 477 } 478 FUNCTION_PARSERS.pop("TRIM") 479 480 NO_PAREN_FUNCTIONS = { 481 **parser.Parser.NO_PAREN_FUNCTIONS, 482 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 483 } 484 485 NESTED_TYPE_TOKENS = { 486 *parser.Parser.NESTED_TYPE_TOKENS, 487 TokenType.TABLE, 488 } 489 490 PROPERTY_PARSERS = { 491 **parser.Parser.PROPERTY_PARSERS, 492 "NOT DETERMINISTIC": lambda self: self.expression( 493 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 494 ), 495 "OPTIONS": lambda self: self._parse_with_property(), 496 } 497 498 CONSTRAINT_PARSERS = { 499 **parser.Parser.CONSTRAINT_PARSERS, 500 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 501 } 502 503 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 504 RANGE_PARSERS.pop(TokenType.OVERLAPS) 505 506 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 507 508 STATEMENT_PARSERS = { 509 **parser.Parser.STATEMENT_PARSERS, 510 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 511 TokenType.END: lambda self: self._parse_as_command(self._prev), 512 TokenType.FOR: lambda self: self._parse_for_in(), 513 } 514 515 BRACKET_OFFSETS = { 516 "OFFSET": (0, False), 517 "ORDINAL": (1, False), 518 "SAFE_OFFSET": (0, True), 519 "SAFE_ORDINAL": (1, True), 520 } 521 522 def _parse_for_in(self) -> exp.ForIn: 523 this = self._parse_range() 524 self._match_text_seq("DO") 525 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 526 527 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 528 this = super()._parse_table_part(schema=schema) or self._parse_number() 529 530 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 531 if isinstance(this, exp.Identifier): 532 table_name = this.name 533 while self._match(TokenType.DASH, advance=False) and self._next: 534 text = "" 535 while self._is_connected() and self._curr.token_type != TokenType.DOT: 536 self._advance() 537 text += self._prev.text 538 table_name += text 539 540 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 541 elif isinstance(this, exp.Literal): 542 table_name = this.name 543 544 if self._is_connected() and self._parse_var(any_token=True): 545 table_name += self._prev.text 546 547 this = exp.Identifier(this=table_name, quoted=True) 548 549 return this 550 551 def _parse_table_parts( 552 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 553 ) -> exp.Table: 554 table = super()._parse_table_parts( 555 schema=schema, is_db_reference=is_db_reference, wildcard=True 556 ) 557 558 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 559 if not table.catalog: 560 if table.db: 561 parts = table.db.split(".") 562 if len(parts) == 2 and not table.args["db"].quoted: 563 table.set("catalog", exp.Identifier(this=parts[0])) 564 table.set("db", exp.Identifier(this=parts[1])) 565 else: 566 parts = table.name.split(".") 567 if len(parts) == 2 and not table.this.quoted: 568 table.set("db", exp.Identifier(this=parts[0])) 569 table.set("this", exp.Identifier(this=parts[1])) 570 571 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 572 catalog, db, this, *rest = ( 573 exp.to_identifier(p, quoted=True) 574 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 575 ) 576 577 if rest and this: 578 this = exp.Dot.build([this, *rest]) # type: ignore 579 580 table = exp.Table( 581 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 582 ) 583 table.meta["quoted_table"] = True 584 585 return table 586 587 def _parse_column(self) -> t.Optional[exp.Expression]: 588 column = super()._parse_column() 589 if isinstance(column, exp.Column): 590 parts = column.parts 591 if any("." in p.name for p in parts): 592 catalog, db, table, this, *rest = ( 593 exp.to_identifier(p, quoted=True) 594 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 595 ) 596 597 if rest and this: 598 this = exp.Dot.build([this, *rest]) # type: ignore 599 600 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 601 column.meta["quoted_column"] = True 602 603 return column 604 605 @t.overload 606 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 607 608 @t.overload 609 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 610 611 def _parse_json_object(self, agg=False): 612 json_object = super()._parse_json_object() 613 array_kv_pair = seq_get(json_object.expressions, 0) 614 615 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 616 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 617 if ( 618 array_kv_pair 619 and isinstance(array_kv_pair.this, exp.Array) 620 and isinstance(array_kv_pair.expression, exp.Array) 621 ): 622 keys = array_kv_pair.this.expressions 623 values = array_kv_pair.expression.expressions 624 625 json_object.set( 626 "expressions", 627 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 628 ) 629 630 return json_object 631 632 def _parse_bracket( 633 self, this: t.Optional[exp.Expression] = None 634 ) -> t.Optional[exp.Expression]: 635 bracket = super()._parse_bracket(this) 636 637 if this is bracket: 638 return bracket 639 640 if isinstance(bracket, exp.Bracket): 641 for expression in bracket.expressions: 642 name = expression.name.upper() 643 644 if name not in self.BRACKET_OFFSETS: 645 break 646 647 offset, safe = self.BRACKET_OFFSETS[name] 648 bracket.set("offset", offset) 649 bracket.set("safe", safe) 650 expression.replace(expression.expressions[0]) 651 652 return bracket 653 654 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 655 unnest = super()._parse_unnest(with_alias=with_alias) 656 657 if not unnest: 658 return None 659 660 unnest_expr = seq_get(unnest.expressions, 0) 661 if unnest_expr: 662 from sqlglot.optimizer.annotate_types import annotate_types 663 664 unnest_expr = annotate_types(unnest_expr) 665 666 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 667 # in contrast to other dialects such as DuckDB which flattens only the array by default 668 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 669 array_elem.is_type(exp.DataType.Type.STRUCT) 670 for array_elem in unnest_expr._type.expressions 671 ): 672 unnest.set("explode_array", True) 673 674 return unnest
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- OPERATION_MODIFIERS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
676 class Generator(generator.Generator): 677 INTERVAL_ALLOWS_PLURAL_FORM = False 678 JOIN_HINTS = False 679 QUERY_HINTS = False 680 TABLE_HINTS = False 681 LIMIT_FETCH = "LIMIT" 682 RENAME_TABLE_WITH_DB = False 683 NVL2_SUPPORTED = False 684 UNNEST_WITH_ORDINALITY = False 685 COLLATE_IS_FUNC = True 686 LIMIT_ONLY_LITERALS = True 687 SUPPORTS_TABLE_ALIAS_COLUMNS = False 688 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 689 JSON_KEY_VALUE_PAIR_SEP = "," 690 NULL_ORDERING_SUPPORTED = False 691 IGNORE_NULLS_IN_FUNC = True 692 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 693 CAN_IMPLEMENT_ARRAY_ANY = True 694 SUPPORTS_TO_NUMBER = False 695 NAMED_PLACEHOLDER_TOKEN = "@" 696 HEX_FUNC = "TO_HEX" 697 WITH_PROPERTIES_PREFIX = "OPTIONS" 698 SUPPORTS_EXPLODING_PROJECTIONS = False 699 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 700 701 TRANSFORMS = { 702 **generator.Generator.TRANSFORMS, 703 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 704 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 705 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 706 exp.Array: inline_array_unless_query, 707 exp.ArrayContains: _array_contains_sql, 708 exp.ArrayFilter: filter_array_using_unnest, 709 exp.ArraySize: rename_func("ARRAY_LENGTH"), 710 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 711 exp.CollateProperty: lambda self, e: ( 712 f"DEFAULT COLLATE {self.sql(e, 'this')}" 713 if e.args.get("default") 714 else f"COLLATE {self.sql(e, 'this')}" 715 ), 716 exp.Commit: lambda *_: "COMMIT TRANSACTION", 717 exp.CountIf: rename_func("COUNTIF"), 718 exp.Create: _create_sql, 719 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 720 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 721 exp.DateDiff: lambda self, e: self.func( 722 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 723 ), 724 exp.DateFromParts: rename_func("DATE"), 725 exp.DateStrToDate: datestrtodate_sql, 726 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 727 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 728 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 729 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 730 exp.FromTimeZone: lambda self, e: self.func( 731 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 732 ), 733 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 734 exp.GroupConcat: rename_func("STRING_AGG"), 735 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 736 exp.If: if_sql(false_value="NULL"), 737 exp.ILike: no_ilike_sql, 738 exp.IntDiv: rename_func("DIV"), 739 exp.JSONFormat: rename_func("TO_JSON_STRING"), 740 exp.Levenshtein: rename_func("EDIT_DISTANCE"), 741 exp.Max: max_or_greatest, 742 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 743 exp.MD5Digest: rename_func("MD5"), 744 exp.Min: min_or_least, 745 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 746 exp.RegexpExtract: lambda self, e: self.func( 747 "REGEXP_EXTRACT", 748 e.this, 749 e.expression, 750 e.args.get("position"), 751 e.args.get("occurrence"), 752 ), 753 exp.RegexpReplace: regexp_replace_sql, 754 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 755 exp.ReturnsProperty: _returnsproperty_sql, 756 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 757 exp.Select: transforms.preprocess( 758 [ 759 transforms.explode_to_unnest(), 760 transforms.unqualify_unnest, 761 transforms.eliminate_distinct_on, 762 _alias_ordered_group, 763 transforms.eliminate_semi_and_anti_joins, 764 ] 765 ), 766 exp.SHA: rename_func("SHA1"), 767 exp.SHA2: sha256_sql, 768 exp.StabilityProperty: lambda self, e: ( 769 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 770 ), 771 exp.String: rename_func("STRING"), 772 exp.StrToDate: _str_to_datetime_sql, 773 exp.StrToTime: _str_to_datetime_sql, 774 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 775 exp.TimeFromParts: rename_func("TIME"), 776 exp.TimestampFromParts: rename_func("DATETIME"), 777 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 778 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 779 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 780 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 781 exp.TimeStrToTime: timestrtotime_sql, 782 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 783 exp.TsOrDsAdd: _ts_or_ds_add_sql, 784 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 785 exp.TsOrDsToTime: rename_func("TIME"), 786 exp.TsOrDsToTimestamp: rename_func("DATETIME"), 787 exp.Unhex: rename_func("FROM_HEX"), 788 exp.UnixDate: rename_func("UNIX_DATE"), 789 exp.UnixToTime: _unix_to_time_sql, 790 exp.Uuid: lambda *_: "GENERATE_UUID()", 791 exp.Values: _derived_table_values_to_unnest, 792 exp.VariancePop: rename_func("VAR_POP"), 793 } 794 795 SUPPORTED_JSON_PATH_PARTS = { 796 exp.JSONPathKey, 797 exp.JSONPathRoot, 798 exp.JSONPathSubscript, 799 } 800 801 TYPE_MAPPING = { 802 **generator.Generator.TYPE_MAPPING, 803 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 804 exp.DataType.Type.BIGINT: "INT64", 805 exp.DataType.Type.BINARY: "BYTES", 806 exp.DataType.Type.BOOLEAN: "BOOL", 807 exp.DataType.Type.CHAR: "STRING", 808 exp.DataType.Type.DECIMAL: "NUMERIC", 809 exp.DataType.Type.DOUBLE: "FLOAT64", 810 exp.DataType.Type.FLOAT: "FLOAT64", 811 exp.DataType.Type.INT: "INT64", 812 exp.DataType.Type.NCHAR: "STRING", 813 exp.DataType.Type.NVARCHAR: "STRING", 814 exp.DataType.Type.SMALLINT: "INT64", 815 exp.DataType.Type.TEXT: "STRING", 816 exp.DataType.Type.TIMESTAMP: "DATETIME", 817 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 818 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 819 exp.DataType.Type.TINYINT: "INT64", 820 exp.DataType.Type.ROWVERSION: "BYTES", 821 exp.DataType.Type.UUID: "STRING", 822 exp.DataType.Type.VARBINARY: "BYTES", 823 exp.DataType.Type.VARCHAR: "STRING", 824 exp.DataType.Type.VARIANT: "ANY TYPE", 825 } 826 827 PROPERTIES_LOCATION = { 828 **generator.Generator.PROPERTIES_LOCATION, 829 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 830 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 831 } 832 833 # WINDOW comes after QUALIFY 834 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 835 AFTER_HAVING_MODIFIER_TRANSFORMS = { 836 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 837 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 838 } 839 840 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 841 RESERVED_KEYWORDS = { 842 "all", 843 "and", 844 "any", 845 "array", 846 "as", 847 "asc", 848 "assert_rows_modified", 849 "at", 850 "between", 851 "by", 852 "case", 853 "cast", 854 "collate", 855 "contains", 856 "create", 857 "cross", 858 "cube", 859 "current", 860 "default", 861 "define", 862 "desc", 863 "distinct", 864 "else", 865 "end", 866 "enum", 867 "escape", 868 "except", 869 "exclude", 870 "exists", 871 "extract", 872 "false", 873 "fetch", 874 "following", 875 "for", 876 "from", 877 "full", 878 "group", 879 "grouping", 880 "groups", 881 "hash", 882 "having", 883 "if", 884 "ignore", 885 "in", 886 "inner", 887 "intersect", 888 "interval", 889 "into", 890 "is", 891 "join", 892 "lateral", 893 "left", 894 "like", 895 "limit", 896 "lookup", 897 "merge", 898 "natural", 899 "new", 900 "no", 901 "not", 902 "null", 903 "nulls", 904 "of", 905 "on", 906 "or", 907 "order", 908 "outer", 909 "over", 910 "partition", 911 "preceding", 912 "proto", 913 "qualify", 914 "range", 915 "recursive", 916 "respect", 917 "right", 918 "rollup", 919 "rows", 920 "select", 921 "set", 922 "some", 923 "struct", 924 "tablesample", 925 "then", 926 "to", 927 "treat", 928 "true", 929 "unbounded", 930 "union", 931 "unnest", 932 "using", 933 "when", 934 "where", 935 "window", 936 "with", 937 "within", 938 } 939 940 def mod_sql(self, expression: exp.Mod) -> str: 941 this = expression.this 942 expr = expression.expression 943 return self.func( 944 "MOD", 945 this.unnest() if isinstance(this, exp.Paren) else this, 946 expr.unnest() if isinstance(expr, exp.Paren) else expr, 947 ) 948 949 def column_parts(self, expression: exp.Column) -> str: 950 if expression.meta.get("quoted_column"): 951 # If a column reference is of the form `dataset.table`.name, we need 952 # to preserve the quoted table path, otherwise the reference breaks 953 table_parts = ".".join(p.name for p in expression.parts[:-1]) 954 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 955 return f"{table_path}.{self.sql(expression, 'this')}" 956 957 return super().column_parts(expression) 958 959 def table_parts(self, expression: exp.Table) -> str: 960 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 961 # we need to make sure the correct quoting is used in each case. 962 # 963 # For example, if there is a CTE x that clashes with a schema name, then the former will 964 # return the table y in that schema, whereas the latter will return the CTE's y column: 965 # 966 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 967 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 968 if expression.meta.get("quoted_table"): 969 table_parts = ".".join(p.name for p in expression.parts) 970 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 971 972 return super().table_parts(expression) 973 974 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 975 if isinstance(expression.this, exp.TsOrDsToTimestamp): 976 func_name = "FORMAT_DATETIME" 977 else: 978 func_name = "FORMAT_DATE" 979 this = ( 980 expression.this 981 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 982 else expression 983 ) 984 return self.func(func_name, self.format_time(expression), this.this) 985 986 def eq_sql(self, expression: exp.EQ) -> str: 987 # Operands of = cannot be NULL in BigQuery 988 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 989 if not isinstance(expression.parent, exp.Update): 990 return "NULL" 991 992 return self.binary(expression, "=") 993 994 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 995 parent = expression.parent 996 997 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 998 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 999 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1000 return self.func( 1001 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1002 ) 1003 1004 return super().attimezone_sql(expression) 1005 1006 def trycast_sql(self, expression: exp.TryCast) -> str: 1007 return self.cast_sql(expression, safe_prefix="SAFE_") 1008 1009 def bracket_sql(self, expression: exp.Bracket) -> str: 1010 this = expression.this 1011 expressions = expression.expressions 1012 1013 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1014 arg = expressions[0] 1015 if arg.type is None: 1016 from sqlglot.optimizer.annotate_types import annotate_types 1017 1018 arg = annotate_types(arg) 1019 1020 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1021 # BQ doesn't support bracket syntax with string values for structs 1022 return f"{self.sql(this)}.{arg.name}" 1023 1024 expressions_sql = self.expressions(expression, flat=True) 1025 offset = expression.args.get("offset") 1026 1027 if offset == 0: 1028 expressions_sql = f"OFFSET({expressions_sql})" 1029 elif offset == 1: 1030 expressions_sql = f"ORDINAL({expressions_sql})" 1031 elif offset is not None: 1032 self.unsupported(f"Unsupported array offset: {offset}") 1033 1034 if expression.args.get("safe"): 1035 expressions_sql = f"SAFE_{expressions_sql}" 1036 1037 return f"{self.sql(this)}[{expressions_sql}]" 1038 1039 def in_unnest_op(self, expression: exp.Unnest) -> str: 1040 return self.sql(expression) 1041 1042 def version_sql(self, expression: exp.Version) -> str: 1043 if expression.name == "TIMESTAMP": 1044 expression.set("this", "SYSTEM_TIME") 1045 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
949 def column_parts(self, expression: exp.Column) -> str: 950 if expression.meta.get("quoted_column"): 951 # If a column reference is of the form `dataset.table`.name, we need 952 # to preserve the quoted table path, otherwise the reference breaks 953 table_parts = ".".join(p.name for p in expression.parts[:-1]) 954 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 955 return f"{table_path}.{self.sql(expression, 'this')}" 956 957 return super().column_parts(expression)
959 def table_parts(self, expression: exp.Table) -> str: 960 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 961 # we need to make sure the correct quoting is used in each case. 962 # 963 # For example, if there is a CTE x that clashes with a schema name, then the former will 964 # return the table y in that schema, whereas the latter will return the CTE's y column: 965 # 966 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 967 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 968 if expression.meta.get("quoted_table"): 969 table_parts = ".".join(p.name for p in expression.parts) 970 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 971 972 return super().table_parts(expression)
974 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 975 if isinstance(expression.this, exp.TsOrDsToTimestamp): 976 func_name = "FORMAT_DATETIME" 977 else: 978 func_name = "FORMAT_DATE" 979 this = ( 980 expression.this 981 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 982 else expression 983 ) 984 return self.func(func_name, self.format_time(expression), this.this)
994 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 995 parent = expression.parent 996 997 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 998 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 999 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1000 return self.func( 1001 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1002 ) 1003 1004 return super().attimezone_sql(expression)
1009 def bracket_sql(self, expression: exp.Bracket) -> str: 1010 this = expression.this 1011 expressions = expression.expressions 1012 1013 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1014 arg = expressions[0] 1015 if arg.type is None: 1016 from sqlglot.optimizer.annotate_types import annotate_types 1017 1018 arg = annotate_types(arg) 1019 1020 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1021 # BQ doesn't support bracket syntax with string values for structs 1022 return f"{self.sql(this)}.{arg.name}" 1023 1024 expressions_sql = self.expressions(expression, flat=True) 1025 offset = expression.args.get("offset") 1026 1027 if offset == 0: 1028 expressions_sql = f"OFFSET({expressions_sql})" 1029 elif offset == 1: 1030 expressions_sql = f"ORDINAL({expressions_sql})" 1031 elif offset is not None: 1032 self.unsupported(f"Unsupported array offset: {offset}") 1033 1034 if expression.args.get("safe"): 1035 expressions_sql = f"SAFE_{expressions_sql}" 1036 1037 return f"{self.sql(this)}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql