sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 build_formatted_time, 16 filter_array_using_unnest, 17 if_sql, 18 inline_array_unless_query, 19 max_or_greatest, 20 min_or_least, 21 no_ilike_sql, 22 build_date_delta_with_interval, 23 regexp_replace_sql, 24 rename_func, 25 sha256_sql, 26 timestrtotime_sql, 27 ts_or_ds_add_cast, 28 unit_to_var, 29 strposition_sql, 30 groupconcat_sql, 31) 32from sqlglot.helper import seq_get, split_num_words 33from sqlglot.tokens import TokenType 34from sqlglot.generator import unsupported_args 35 36if t.TYPE_CHECKING: 37 from sqlglot._typing import E, Lit 38 39 from sqlglot.optimizer.annotate_types import TypeAnnotator 40 41logger = logging.getLogger("sqlglot") 42 43 44JSON_EXTRACT_TYPE = t.Union[exp.JSONExtract, exp.JSONExtractScalar, exp.JSONExtractArray] 45 46DQUOTES_ESCAPING_JSON_FUNCTIONS = ("JSON_QUERY", "JSON_VALUE", "JSON_QUERY_ARRAY") 47 48 49def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 50 if not expression.find_ancestor(exp.From, exp.Join): 51 return self.values_sql(expression) 52 53 structs = [] 54 alias = expression.args.get("alias") 55 for tup in expression.find_all(exp.Tuple): 56 field_aliases = ( 57 alias.columns 58 if alias and alias.columns 59 else (f"_c{i}" for i in range(len(tup.expressions))) 60 ) 61 expressions = [ 62 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 63 for name, fld in zip(field_aliases, tup.expressions) 64 ] 65 structs.append(exp.Struct(expressions=expressions)) 66 67 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 68 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 69 return self.unnest_sql( 70 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 71 ) 72 73 74def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 75 this = expression.this 76 if isinstance(this, exp.Schema): 77 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 78 else: 79 this = self.sql(this) 80 return f"RETURNS {this}" 81 82 83def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 84 returns = expression.find(exp.ReturnsProperty) 85 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 86 expression.set("kind", "TABLE FUNCTION") 87 88 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 89 expression.set("expression", expression.expression.this) 90 91 return self.create_sql(expression) 92 93 94# https://issuetracker.google.com/issues/162294746 95# workaround for bigquery bug when grouping by an expression and then ordering 96# WITH x AS (SELECT 1 y) 97# SELECT y + 1 z 98# FROM x 99# GROUP BY x + 1 100# ORDER by z 101def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 102 if isinstance(expression, exp.Select): 103 group = expression.args.get("group") 104 order = expression.args.get("order") 105 106 if group and order: 107 aliases = { 108 select.this: select.args["alias"] 109 for select in expression.selects 110 if isinstance(select, exp.Alias) 111 } 112 113 for grouped in group.expressions: 114 if grouped.is_int: 115 continue 116 alias = aliases.get(grouped) 117 if alias: 118 grouped.replace(exp.column(alias)) 119 120 return expression 121 122 123def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 124 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 125 if isinstance(expression, exp.CTE) and expression.alias_column_names: 126 cte_query = expression.this 127 128 if cte_query.is_star: 129 logger.warning( 130 "Can't push down CTE column names for star queries. Run the query through" 131 " the optimizer or use 'qualify' to expand the star projections first." 132 ) 133 return expression 134 135 column_names = expression.alias_column_names 136 expression.args["alias"].set("columns", None) 137 138 for name, select in zip(column_names, cte_query.selects): 139 to_replace = select 140 141 if isinstance(select, exp.Alias): 142 select = select.this 143 144 # Inner aliases are shadowed by the CTE column names 145 to_replace.replace(exp.alias_(select, name)) 146 147 return expression 148 149 150def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 151 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 152 this.set("zone", seq_get(args, 2)) 153 return this 154 155 156def _build_timestamp(args: t.List) -> exp.Timestamp: 157 timestamp = exp.Timestamp.from_arg_list(args) 158 timestamp.set("with_tz", True) 159 return timestamp 160 161 162def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 163 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 164 return expr_type.from_arg_list(args) 165 166 167def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 168 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 169 arg = seq_get(args, 0) 170 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg) 171 172 173def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 174 return self.sql( 175 exp.Exists( 176 this=exp.select("1") 177 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 178 .where(exp.column("_col").eq(expression.right)) 179 ) 180 ) 181 182 183def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 184 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 185 186 187def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 188 expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 189 expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)) 190 unit = unit_to_var(expression) 191 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 192 193 194def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 195 scale = expression.args.get("scale") 196 timestamp = expression.this 197 198 if scale in (None, exp.UnixToTime.SECONDS): 199 return self.func("TIMESTAMP_SECONDS", timestamp) 200 if scale == exp.UnixToTime.MILLIS: 201 return self.func("TIMESTAMP_MILLIS", timestamp) 202 if scale == exp.UnixToTime.MICROS: 203 return self.func("TIMESTAMP_MICROS", timestamp) 204 205 unix_seconds = exp.cast( 206 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 207 ) 208 return self.func("TIMESTAMP_SECONDS", unix_seconds) 209 210 211def _build_time(args: t.List) -> exp.Func: 212 if len(args) == 1: 213 return exp.TsOrDsToTime(this=args[0]) 214 if len(args) == 2: 215 return exp.Time.from_arg_list(args) 216 return exp.TimeFromParts.from_arg_list(args) 217 218 219def _build_datetime(args: t.List) -> exp.Func: 220 if len(args) == 1: 221 return exp.TsOrDsToDatetime.from_arg_list(args) 222 if len(args) == 2: 223 return exp.Datetime.from_arg_list(args) 224 return exp.TimestampFromParts.from_arg_list(args) 225 226 227def _build_regexp_extract( 228 expr_type: t.Type[E], default_group: t.Optional[exp.Expression] = None 229) -> t.Callable[[t.List], E]: 230 def _builder(args: t.List) -> E: 231 try: 232 group = re.compile(args[1].name).groups == 1 233 except re.error: 234 group = False 235 236 # Default group is used for the transpilation of REGEXP_EXTRACT_ALL 237 return expr_type( 238 this=seq_get(args, 0), 239 expression=seq_get(args, 1), 240 position=seq_get(args, 2), 241 occurrence=seq_get(args, 3), 242 group=exp.Literal.number(1) if group else default_group, 243 ) 244 245 return _builder 246 247 248def _build_extract_json_with_default_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 249 def _builder(args: t.List, dialect: Dialect) -> E: 250 if len(args) == 1: 251 # The default value for the JSONPath is '$' i.e all of the data 252 args.append(exp.Literal.string("$")) 253 return parser.build_extract_json_with_path(expr_type)(args, dialect) 254 255 return _builder 256 257 258def _str_to_datetime_sql( 259 self: BigQuery.Generator, expression: exp.StrToDate | exp.StrToTime 260) -> str: 261 this = self.sql(expression, "this") 262 dtype = "DATE" if isinstance(expression, exp.StrToDate) else "TIMESTAMP" 263 264 if expression.args.get("safe"): 265 fmt = self.format_time( 266 expression, 267 self.dialect.INVERSE_FORMAT_MAPPING, 268 self.dialect.INVERSE_FORMAT_TRIE, 269 ) 270 return f"SAFE_CAST({this} AS {dtype} FORMAT {fmt})" 271 272 fmt = self.format_time(expression) 273 return self.func(f"PARSE_{dtype}", fmt, this, expression.args.get("zone")) 274 275 276def _annotate_math_functions(self: TypeAnnotator, expression: E) -> E: 277 """ 278 Many BigQuery math functions such as CEIL, FLOOR etc follow this return type convention: 279 +---------+---------+---------+------------+---------+ 280 | INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 281 +---------+---------+---------+------------+---------+ 282 | OUTPUT | FLOAT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 283 +---------+---------+---------+------------+---------+ 284 """ 285 self._annotate_args(expression) 286 287 this: exp.Expression = expression.this 288 289 self._set_type( 290 expression, 291 exp.DataType.Type.DOUBLE if this.is_type(*exp.DataType.INTEGER_TYPES) else this.type, 292 ) 293 return expression 294 295 296@unsupported_args("ins_cost", "del_cost", "sub_cost") 297def _levenshtein_sql(self: BigQuery.Generator, expression: exp.Levenshtein) -> str: 298 max_dist = expression.args.get("max_dist") 299 if max_dist: 300 max_dist = exp.Kwarg(this=exp.var("max_distance"), expression=max_dist) 301 302 return self.func("EDIT_DISTANCE", expression.this, expression.expression, max_dist) 303 304 305def _build_levenshtein(args: t.List) -> exp.Levenshtein: 306 max_dist = seq_get(args, 2) 307 return exp.Levenshtein( 308 this=seq_get(args, 0), 309 expression=seq_get(args, 1), 310 max_dist=max_dist.expression if max_dist else None, 311 ) 312 313 314def _build_format_time(expr_type: t.Type[exp.Expression]) -> t.Callable[[t.List], exp.TimeToStr]: 315 def _builder(args: t.List) -> exp.TimeToStr: 316 return exp.TimeToStr(this=expr_type(this=seq_get(args, 1)), format=seq_get(args, 0)) 317 318 return _builder 319 320 321def _build_contains_substring(args: t.List) -> exp.Contains | exp.Anonymous: 322 if len(args) == 3: 323 return exp.Anonymous(this="CONTAINS_SUBSTR", expressions=args) 324 325 # Lowercase the operands in case of transpilation, as exp.Contains 326 # is case-sensitive on other dialects 327 this = exp.Lower(this=seq_get(args, 0)) 328 expr = exp.Lower(this=seq_get(args, 1)) 329 330 return exp.Contains(this=this, expression=expr) 331 332 333def _json_extract_sql(self: BigQuery.Generator, expression: JSON_EXTRACT_TYPE) -> str: 334 name = (expression._meta and expression.meta.get("name")) or expression.sql_name() 335 upper = name.upper() 336 337 dquote_escaping = upper in DQUOTES_ESCAPING_JSON_FUNCTIONS 338 339 if dquote_escaping: 340 self._quote_json_path_key_using_brackets = False 341 342 sql = rename_func(upper)(self, expression) 343 344 if dquote_escaping: 345 self._quote_json_path_key_using_brackets = True 346 347 return sql 348 349 350class BigQuery(Dialect): 351 WEEK_OFFSET = -1 352 UNNEST_COLUMN_ONLY = True 353 SUPPORTS_USER_DEFINED_TYPES = False 354 SUPPORTS_SEMI_ANTI_JOIN = False 355 LOG_BASE_FIRST = False 356 HEX_LOWERCASE = True 357 FORCE_EARLY_ALIAS_REF_EXPANSION = True 358 EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = True 359 PRESERVE_ORIGINAL_NAMES = True 360 HEX_STRING_IS_INTEGER_TYPE = True 361 362 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 363 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 364 365 # bigquery udfs are case sensitive 366 NORMALIZE_FUNCTIONS = False 367 368 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 369 TIME_MAPPING = { 370 "%D": "%m/%d/%y", 371 "%E6S": "%S.%f", 372 "%e": "%-d", 373 } 374 375 FORMAT_MAPPING = { 376 "DD": "%d", 377 "MM": "%m", 378 "MON": "%b", 379 "MONTH": "%B", 380 "YYYY": "%Y", 381 "YY": "%y", 382 "HH": "%I", 383 "HH12": "%I", 384 "HH24": "%H", 385 "MI": "%M", 386 "SS": "%S", 387 "SSSSS": "%f", 388 "TZH": "%z", 389 } 390 391 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 392 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 393 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 394 395 # All set operations require either a DISTINCT or ALL specifier 396 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 397 398 ANNOTATORS = { 399 **Dialect.ANNOTATORS, 400 **{ 401 expr_type: lambda self, e: _annotate_math_functions(self, e) 402 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 403 }, 404 **{ 405 expr_type: lambda self, e: self._annotate_by_args(e, "this") 406 for expr_type in ( 407 exp.Left, 408 exp.Right, 409 exp.Lower, 410 exp.Upper, 411 exp.Pad, 412 exp.Trim, 413 exp.RegexpExtract, 414 exp.RegexpReplace, 415 exp.Repeat, 416 exp.Substring, 417 ) 418 }, 419 exp.Concat: lambda self, e: self._annotate_by_args(e, "expressions"), 420 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 421 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 422 } 423 424 def normalize_identifier(self, expression: E) -> E: 425 if ( 426 isinstance(expression, exp.Identifier) 427 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 428 ): 429 parent = expression.parent 430 while isinstance(parent, exp.Dot): 431 parent = parent.parent 432 433 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 434 # by default. The following check uses a heuristic to detect tables based on whether 435 # they are qualified. This should generally be correct, because tables in BigQuery 436 # must be qualified with at least a dataset, unless @@dataset_id is set. 437 case_sensitive = ( 438 isinstance(parent, exp.UserDefinedFunction) 439 or ( 440 isinstance(parent, exp.Table) 441 and parent.db 442 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 443 ) 444 or expression.meta.get("is_table") 445 ) 446 if not case_sensitive: 447 expression.set("this", expression.this.lower()) 448 449 return expression 450 451 class Tokenizer(tokens.Tokenizer): 452 QUOTES = ["'", '"', '"""', "'''"] 453 COMMENTS = ["--", "#", ("/*", "*/")] 454 IDENTIFIERS = ["`"] 455 STRING_ESCAPES = ["\\"] 456 457 HEX_STRINGS = [("0x", ""), ("0X", "")] 458 459 BYTE_STRINGS = [ 460 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 461 ] 462 463 RAW_STRINGS = [ 464 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 465 ] 466 467 KEYWORDS = { 468 **tokens.Tokenizer.KEYWORDS, 469 "ANY TYPE": TokenType.VARIANT, 470 "BEGIN": TokenType.COMMAND, 471 "BEGIN TRANSACTION": TokenType.BEGIN, 472 "BYTEINT": TokenType.INT, 473 "BYTES": TokenType.BINARY, 474 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 475 "DATETIME": TokenType.TIMESTAMP, 476 "DECLARE": TokenType.COMMAND, 477 "ELSEIF": TokenType.COMMAND, 478 "EXCEPTION": TokenType.COMMAND, 479 "EXPORT": TokenType.EXPORT, 480 "FLOAT64": TokenType.DOUBLE, 481 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 482 "MODEL": TokenType.MODEL, 483 "NOT DETERMINISTIC": TokenType.VOLATILE, 484 "RECORD": TokenType.STRUCT, 485 "TIMESTAMP": TokenType.TIMESTAMPTZ, 486 } 487 KEYWORDS.pop("DIV") 488 KEYWORDS.pop("VALUES") 489 KEYWORDS.pop("/*+") 490 491 class Parser(parser.Parser): 492 PREFIXED_PIVOT_COLUMNS = True 493 LOG_DEFAULTS_TO_LN = True 494 SUPPORTS_IMPLICIT_UNNEST = True 495 496 FUNCTIONS = { 497 **parser.Parser.FUNCTIONS, 498 "CONTAINS_SUBSTR": _build_contains_substring, 499 "DATE": _build_date, 500 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 501 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 502 "DATE_TRUNC": lambda args: exp.DateTrunc( 503 unit=exp.Literal.string(str(seq_get(args, 1))), 504 this=seq_get(args, 0), 505 zone=seq_get(args, 2), 506 ), 507 "DATETIME": _build_datetime, 508 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 509 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 510 "DIV": binary_from_function(exp.IntDiv), 511 "EDIT_DISTANCE": _build_levenshtein, 512 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 513 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 514 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 515 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 516 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 517 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 518 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 519 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 520 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 521 "MD5": exp.MD5Digest.from_arg_list, 522 "TO_HEX": _build_to_hex, 523 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 524 [seq_get(args, 1), seq_get(args, 0)] 525 ), 526 "PARSE_TIMESTAMP": _build_parse_timestamp, 527 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 528 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 529 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 530 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 531 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 532 ), 533 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 534 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 535 "SPLIT": lambda args: exp.Split( 536 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 537 this=seq_get(args, 0), 538 expression=seq_get(args, 1) or exp.Literal.string(","), 539 ), 540 "STRPOS": exp.StrPosition.from_arg_list, 541 "TIME": _build_time, 542 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 543 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 544 "TIMESTAMP": _build_timestamp, 545 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 546 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 547 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 548 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 549 ), 550 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 551 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 552 ), 553 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 554 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 555 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 556 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 557 } 558 559 FUNCTION_PARSERS = { 560 **parser.Parser.FUNCTION_PARSERS, 561 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 562 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 563 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 564 } 565 FUNCTION_PARSERS.pop("TRIM") 566 567 NO_PAREN_FUNCTIONS = { 568 **parser.Parser.NO_PAREN_FUNCTIONS, 569 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 570 } 571 572 NESTED_TYPE_TOKENS = { 573 *parser.Parser.NESTED_TYPE_TOKENS, 574 TokenType.TABLE, 575 } 576 577 PROPERTY_PARSERS = { 578 **parser.Parser.PROPERTY_PARSERS, 579 "NOT DETERMINISTIC": lambda self: self.expression( 580 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 581 ), 582 "OPTIONS": lambda self: self._parse_with_property(), 583 } 584 585 CONSTRAINT_PARSERS = { 586 **parser.Parser.CONSTRAINT_PARSERS, 587 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 588 } 589 590 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 591 RANGE_PARSERS.pop(TokenType.OVERLAPS) 592 593 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 594 595 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 596 597 STATEMENT_PARSERS = { 598 **parser.Parser.STATEMENT_PARSERS, 599 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 600 TokenType.END: lambda self: self._parse_as_command(self._prev), 601 TokenType.FOR: lambda self: self._parse_for_in(), 602 TokenType.EXPORT: lambda self: self._parse_export_data(), 603 } 604 605 BRACKET_OFFSETS = { 606 "OFFSET": (0, False), 607 "ORDINAL": (1, False), 608 "SAFE_OFFSET": (0, True), 609 "SAFE_ORDINAL": (1, True), 610 } 611 612 def _parse_for_in(self) -> exp.ForIn: 613 this = self._parse_range() 614 self._match_text_seq("DO") 615 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 616 617 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 618 this = super()._parse_table_part(schema=schema) or self._parse_number() 619 620 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 621 if isinstance(this, exp.Identifier): 622 table_name = this.name 623 while self._match(TokenType.DASH, advance=False) and self._next: 624 start = self._curr 625 while self._is_connected() and not self._match_set( 626 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 627 ): 628 self._advance() 629 630 table_name += self._find_sql(start, self._prev) 631 632 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 633 elif isinstance(this, exp.Literal): 634 table_name = this.name 635 636 if self._is_connected() and self._parse_var(any_token=True): 637 table_name += self._prev.text 638 639 this = exp.Identifier(this=table_name, quoted=True) 640 641 return this 642 643 def _parse_table_parts( 644 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 645 ) -> exp.Table: 646 table = super()._parse_table_parts( 647 schema=schema, is_db_reference=is_db_reference, wildcard=True 648 ) 649 650 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 651 if not table.catalog: 652 if table.db: 653 parts = table.db.split(".") 654 if len(parts) == 2 and not table.args["db"].quoted: 655 table.set("catalog", exp.Identifier(this=parts[0])) 656 table.set("db", exp.Identifier(this=parts[1])) 657 else: 658 parts = table.name.split(".") 659 if len(parts) == 2 and not table.this.quoted: 660 table.set("db", exp.Identifier(this=parts[0])) 661 table.set("this", exp.Identifier(this=parts[1])) 662 663 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 664 alias = table.this 665 catalog, db, this, *rest = ( 666 exp.to_identifier(p, quoted=True) 667 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 668 ) 669 670 if rest and this: 671 this = exp.Dot.build([this, *rest]) # type: ignore 672 673 table = exp.Table( 674 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 675 ) 676 table.meta["quoted_table"] = True 677 else: 678 alias = None 679 680 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 681 # dataset, so if the project identifier is omitted we need to fix the ast so that 682 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 683 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 684 # views, because it would seem like the "catalog" part is set, when it'd actually 685 # be the region/dataset. Merging the two identifiers into a single one is done to 686 # avoid producing a 4-part Table reference, which would cause issues in the schema 687 # module, when there are 3-part table names mixed with information schema views. 688 # 689 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 690 table_parts = table.parts 691 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 692 # We need to alias the table here to avoid breaking existing qualified columns. 693 # This is expected to be safe, because if there's an actual alias coming up in 694 # the token stream, it will overwrite this one. If there isn't one, we are only 695 # exposing the name that can be used to reference the view explicitly (a no-op). 696 exp.alias_( 697 table, 698 t.cast(exp.Identifier, alias or table_parts[-1]), 699 table=True, 700 copy=False, 701 ) 702 703 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 704 table.set("this", exp.Identifier(this=info_schema_view, quoted=True)) 705 table.set("db", seq_get(table_parts, -3)) 706 table.set("catalog", seq_get(table_parts, -4)) 707 708 return table 709 710 def _parse_column(self) -> t.Optional[exp.Expression]: 711 column = super()._parse_column() 712 if isinstance(column, exp.Column): 713 parts = column.parts 714 if any("." in p.name for p in parts): 715 catalog, db, table, this, *rest = ( 716 exp.to_identifier(p, quoted=True) 717 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 718 ) 719 720 if rest and this: 721 this = exp.Dot.build([this, *rest]) # type: ignore 722 723 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 724 column.meta["quoted_column"] = True 725 726 return column 727 728 @t.overload 729 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 730 731 @t.overload 732 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 733 734 def _parse_json_object(self, agg=False): 735 json_object = super()._parse_json_object() 736 array_kv_pair = seq_get(json_object.expressions, 0) 737 738 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 739 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 740 if ( 741 array_kv_pair 742 and isinstance(array_kv_pair.this, exp.Array) 743 and isinstance(array_kv_pair.expression, exp.Array) 744 ): 745 keys = array_kv_pair.this.expressions 746 values = array_kv_pair.expression.expressions 747 748 json_object.set( 749 "expressions", 750 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 751 ) 752 753 return json_object 754 755 def _parse_bracket( 756 self, this: t.Optional[exp.Expression] = None 757 ) -> t.Optional[exp.Expression]: 758 bracket = super()._parse_bracket(this) 759 760 if this is bracket: 761 return bracket 762 763 if isinstance(bracket, exp.Bracket): 764 for expression in bracket.expressions: 765 name = expression.name.upper() 766 767 if name not in self.BRACKET_OFFSETS: 768 break 769 770 offset, safe = self.BRACKET_OFFSETS[name] 771 bracket.set("offset", offset) 772 bracket.set("safe", safe) 773 expression.replace(expression.expressions[0]) 774 775 return bracket 776 777 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 778 unnest = super()._parse_unnest(with_alias=with_alias) 779 780 if not unnest: 781 return None 782 783 unnest_expr = seq_get(unnest.expressions, 0) 784 if unnest_expr: 785 from sqlglot.optimizer.annotate_types import annotate_types 786 787 unnest_expr = annotate_types(unnest_expr) 788 789 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 790 # in contrast to other dialects such as DuckDB which flattens only the array by default 791 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 792 array_elem.is_type(exp.DataType.Type.STRUCT) 793 for array_elem in unnest_expr._type.expressions 794 ): 795 unnest.set("explode_array", True) 796 797 return unnest 798 799 def _parse_make_interval(self) -> exp.MakeInterval: 800 expr = exp.MakeInterval() 801 802 for arg_key in expr.arg_types: 803 value = self._parse_lambda() 804 805 if not value: 806 break 807 808 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 809 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 810 if isinstance(value, exp.Kwarg): 811 arg_key = value.this.name 812 813 expr.set(arg_key, value) 814 815 self._match(TokenType.COMMA) 816 817 return expr 818 819 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 820 expr = self.expression( 821 exp.FeaturesAtTime, 822 this=(self._match(TokenType.TABLE) and self._parse_table()) 823 or self._parse_select(nested=True), 824 ) 825 826 while self._match(TokenType.COMMA): 827 arg = self._parse_lambda() 828 829 # Get the LHS of the Kwarg and set the arg to that value, e.g 830 # "num_rows => 1" sets the expr's `num_rows` arg 831 if arg: 832 expr.set(arg.this.name, arg) 833 834 return expr 835 836 def _parse_export_data(self) -> exp.Export: 837 self._match_text_seq("DATA") 838 839 return self.expression( 840 exp.Export, 841 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 842 options=self._parse_properties(), 843 this=self._match_text_seq("AS") and self._parse_select(), 844 ) 845 846 class Generator(generator.Generator): 847 INTERVAL_ALLOWS_PLURAL_FORM = False 848 JOIN_HINTS = False 849 QUERY_HINTS = False 850 TABLE_HINTS = False 851 LIMIT_FETCH = "LIMIT" 852 RENAME_TABLE_WITH_DB = False 853 NVL2_SUPPORTED = False 854 UNNEST_WITH_ORDINALITY = False 855 COLLATE_IS_FUNC = True 856 LIMIT_ONLY_LITERALS = True 857 SUPPORTS_TABLE_ALIAS_COLUMNS = False 858 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 859 JSON_KEY_VALUE_PAIR_SEP = "," 860 NULL_ORDERING_SUPPORTED = False 861 IGNORE_NULLS_IN_FUNC = True 862 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 863 CAN_IMPLEMENT_ARRAY_ANY = True 864 SUPPORTS_TO_NUMBER = False 865 NAMED_PLACEHOLDER_TOKEN = "@" 866 HEX_FUNC = "TO_HEX" 867 WITH_PROPERTIES_PREFIX = "OPTIONS" 868 SUPPORTS_EXPLODING_PROJECTIONS = False 869 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 870 SUPPORTS_UNIX_SECONDS = True 871 872 TRANSFORMS = { 873 **generator.Generator.TRANSFORMS, 874 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 875 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 876 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 877 exp.Array: inline_array_unless_query, 878 exp.ArrayContains: _array_contains_sql, 879 exp.ArrayFilter: filter_array_using_unnest, 880 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 881 exp.CollateProperty: lambda self, e: ( 882 f"DEFAULT COLLATE {self.sql(e, 'this')}" 883 if e.args.get("default") 884 else f"COLLATE {self.sql(e, 'this')}" 885 ), 886 exp.Commit: lambda *_: "COMMIT TRANSACTION", 887 exp.CountIf: rename_func("COUNTIF"), 888 exp.Create: _create_sql, 889 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 890 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 891 exp.DateDiff: lambda self, e: self.func( 892 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 893 ), 894 exp.DateFromParts: rename_func("DATE"), 895 exp.DateStrToDate: datestrtodate_sql, 896 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 897 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 898 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 899 exp.DateTrunc: lambda self, e: self.func( 900 "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone") 901 ), 902 exp.FromTimeZone: lambda self, e: self.func( 903 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 904 ), 905 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 906 exp.GroupConcat: lambda self, e: groupconcat_sql( 907 self, e, func_name="STRING_AGG", within_group=False 908 ), 909 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 910 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 911 exp.If: if_sql(false_value="NULL"), 912 exp.ILike: no_ilike_sql, 913 exp.IntDiv: rename_func("DIV"), 914 exp.Int64: rename_func("INT64"), 915 exp.JSONExtract: _json_extract_sql, 916 exp.JSONExtractArray: _json_extract_sql, 917 exp.JSONExtractScalar: _json_extract_sql, 918 exp.JSONFormat: rename_func("TO_JSON_STRING"), 919 exp.Levenshtein: _levenshtein_sql, 920 exp.Max: max_or_greatest, 921 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 922 exp.MD5Digest: rename_func("MD5"), 923 exp.Min: min_or_least, 924 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 925 exp.RegexpExtract: lambda self, e: self.func( 926 "REGEXP_EXTRACT", 927 e.this, 928 e.expression, 929 e.args.get("position"), 930 e.args.get("occurrence"), 931 ), 932 exp.RegexpExtractAll: lambda self, e: self.func( 933 "REGEXP_EXTRACT_ALL", e.this, e.expression 934 ), 935 exp.RegexpReplace: regexp_replace_sql, 936 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 937 exp.ReturnsProperty: _returnsproperty_sql, 938 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 939 exp.Select: transforms.preprocess( 940 [ 941 transforms.explode_to_unnest(), 942 transforms.unqualify_unnest, 943 transforms.eliminate_distinct_on, 944 _alias_ordered_group, 945 transforms.eliminate_semi_and_anti_joins, 946 ] 947 ), 948 exp.SHA: rename_func("SHA1"), 949 exp.SHA2: sha256_sql, 950 exp.StabilityProperty: lambda self, e: ( 951 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 952 ), 953 exp.String: rename_func("STRING"), 954 exp.StrPosition: lambda self, e: ( 955 strposition_sql( 956 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 957 ) 958 ), 959 exp.StrToDate: _str_to_datetime_sql, 960 exp.StrToTime: _str_to_datetime_sql, 961 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 962 exp.TimeFromParts: rename_func("TIME"), 963 exp.TimestampFromParts: rename_func("DATETIME"), 964 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 965 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 966 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 967 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 968 exp.TimeStrToTime: timestrtotime_sql, 969 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 970 exp.TsOrDsAdd: _ts_or_ds_add_sql, 971 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 972 exp.TsOrDsToTime: rename_func("TIME"), 973 exp.TsOrDsToDatetime: rename_func("DATETIME"), 974 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 975 exp.Unhex: rename_func("FROM_HEX"), 976 exp.UnixDate: rename_func("UNIX_DATE"), 977 exp.UnixToTime: _unix_to_time_sql, 978 exp.Uuid: lambda *_: "GENERATE_UUID()", 979 exp.Values: _derived_table_values_to_unnest, 980 exp.VariancePop: rename_func("VAR_POP"), 981 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 982 } 983 984 SUPPORTED_JSON_PATH_PARTS = { 985 exp.JSONPathKey, 986 exp.JSONPathRoot, 987 exp.JSONPathSubscript, 988 } 989 990 TYPE_MAPPING = { 991 **generator.Generator.TYPE_MAPPING, 992 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 993 exp.DataType.Type.BIGINT: "INT64", 994 exp.DataType.Type.BINARY: "BYTES", 995 exp.DataType.Type.BLOB: "BYTES", 996 exp.DataType.Type.BOOLEAN: "BOOL", 997 exp.DataType.Type.CHAR: "STRING", 998 exp.DataType.Type.DECIMAL: "NUMERIC", 999 exp.DataType.Type.DOUBLE: "FLOAT64", 1000 exp.DataType.Type.FLOAT: "FLOAT64", 1001 exp.DataType.Type.INT: "INT64", 1002 exp.DataType.Type.NCHAR: "STRING", 1003 exp.DataType.Type.NVARCHAR: "STRING", 1004 exp.DataType.Type.SMALLINT: "INT64", 1005 exp.DataType.Type.TEXT: "STRING", 1006 exp.DataType.Type.TIMESTAMP: "DATETIME", 1007 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1008 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1009 exp.DataType.Type.TINYINT: "INT64", 1010 exp.DataType.Type.ROWVERSION: "BYTES", 1011 exp.DataType.Type.UUID: "STRING", 1012 exp.DataType.Type.VARBINARY: "BYTES", 1013 exp.DataType.Type.VARCHAR: "STRING", 1014 exp.DataType.Type.VARIANT: "ANY TYPE", 1015 } 1016 1017 PROPERTIES_LOCATION = { 1018 **generator.Generator.PROPERTIES_LOCATION, 1019 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1020 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1021 } 1022 1023 # WINDOW comes after QUALIFY 1024 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1025 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1026 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1027 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1028 } 1029 1030 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1031 RESERVED_KEYWORDS = { 1032 "all", 1033 "and", 1034 "any", 1035 "array", 1036 "as", 1037 "asc", 1038 "assert_rows_modified", 1039 "at", 1040 "between", 1041 "by", 1042 "case", 1043 "cast", 1044 "collate", 1045 "contains", 1046 "create", 1047 "cross", 1048 "cube", 1049 "current", 1050 "default", 1051 "define", 1052 "desc", 1053 "distinct", 1054 "else", 1055 "end", 1056 "enum", 1057 "escape", 1058 "except", 1059 "exclude", 1060 "exists", 1061 "extract", 1062 "false", 1063 "fetch", 1064 "following", 1065 "for", 1066 "from", 1067 "full", 1068 "group", 1069 "grouping", 1070 "groups", 1071 "hash", 1072 "having", 1073 "if", 1074 "ignore", 1075 "in", 1076 "inner", 1077 "intersect", 1078 "interval", 1079 "into", 1080 "is", 1081 "join", 1082 "lateral", 1083 "left", 1084 "like", 1085 "limit", 1086 "lookup", 1087 "merge", 1088 "natural", 1089 "new", 1090 "no", 1091 "not", 1092 "null", 1093 "nulls", 1094 "of", 1095 "on", 1096 "or", 1097 "order", 1098 "outer", 1099 "over", 1100 "partition", 1101 "preceding", 1102 "proto", 1103 "qualify", 1104 "range", 1105 "recursive", 1106 "respect", 1107 "right", 1108 "rollup", 1109 "rows", 1110 "select", 1111 "set", 1112 "some", 1113 "struct", 1114 "tablesample", 1115 "then", 1116 "to", 1117 "treat", 1118 "true", 1119 "unbounded", 1120 "union", 1121 "unnest", 1122 "using", 1123 "when", 1124 "where", 1125 "window", 1126 "with", 1127 "within", 1128 } 1129 1130 def mod_sql(self, expression: exp.Mod) -> str: 1131 this = expression.this 1132 expr = expression.expression 1133 return self.func( 1134 "MOD", 1135 this.unnest() if isinstance(this, exp.Paren) else this, 1136 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1137 ) 1138 1139 def column_parts(self, expression: exp.Column) -> str: 1140 if expression.meta.get("quoted_column"): 1141 # If a column reference is of the form `dataset.table`.name, we need 1142 # to preserve the quoted table path, otherwise the reference breaks 1143 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1144 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1145 return f"{table_path}.{self.sql(expression, 'this')}" 1146 1147 return super().column_parts(expression) 1148 1149 def table_parts(self, expression: exp.Table) -> str: 1150 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1151 # we need to make sure the correct quoting is used in each case. 1152 # 1153 # For example, if there is a CTE x that clashes with a schema name, then the former will 1154 # return the table y in that schema, whereas the latter will return the CTE's y column: 1155 # 1156 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1157 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1158 if expression.meta.get("quoted_table"): 1159 table_parts = ".".join(p.name for p in expression.parts) 1160 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1161 1162 return super().table_parts(expression) 1163 1164 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1165 this = expression.this 1166 if isinstance(this, exp.TsOrDsToDatetime): 1167 func_name = "FORMAT_DATETIME" 1168 elif isinstance(this, exp.TsOrDsToTimestamp): 1169 func_name = "FORMAT_TIMESTAMP" 1170 else: 1171 func_name = "FORMAT_DATE" 1172 1173 time_expr = ( 1174 this 1175 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1176 else expression 1177 ) 1178 return self.func(func_name, self.format_time(expression), time_expr.this) 1179 1180 def eq_sql(self, expression: exp.EQ) -> str: 1181 # Operands of = cannot be NULL in BigQuery 1182 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1183 if not isinstance(expression.parent, exp.Update): 1184 return "NULL" 1185 1186 return self.binary(expression, "=") 1187 1188 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1189 parent = expression.parent 1190 1191 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1192 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1193 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1194 return self.func( 1195 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1196 ) 1197 1198 return super().attimezone_sql(expression) 1199 1200 def trycast_sql(self, expression: exp.TryCast) -> str: 1201 return self.cast_sql(expression, safe_prefix="SAFE_") 1202 1203 def bracket_sql(self, expression: exp.Bracket) -> str: 1204 this = expression.this 1205 expressions = expression.expressions 1206 1207 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1208 arg = expressions[0] 1209 if arg.type is None: 1210 from sqlglot.optimizer.annotate_types import annotate_types 1211 1212 arg = annotate_types(arg) 1213 1214 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1215 # BQ doesn't support bracket syntax with string values for structs 1216 return f"{self.sql(this)}.{arg.name}" 1217 1218 expressions_sql = self.expressions(expression, flat=True) 1219 offset = expression.args.get("offset") 1220 1221 if offset == 0: 1222 expressions_sql = f"OFFSET({expressions_sql})" 1223 elif offset == 1: 1224 expressions_sql = f"ORDINAL({expressions_sql})" 1225 elif offset is not None: 1226 self.unsupported(f"Unsupported array offset: {offset}") 1227 1228 if expression.args.get("safe"): 1229 expressions_sql = f"SAFE_{expressions_sql}" 1230 1231 return f"{self.sql(this)}[{expressions_sql}]" 1232 1233 def in_unnest_op(self, expression: exp.Unnest) -> str: 1234 return self.sql(expression) 1235 1236 def version_sql(self, expression: exp.Version) -> str: 1237 if expression.name == "TIMESTAMP": 1238 expression.set("this", "SYSTEM_TIME") 1239 return super().version_sql(expression) 1240 1241 def contains_sql(self, expression: exp.Contains) -> str: 1242 this = expression.this 1243 expr = expression.expression 1244 1245 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1246 this = this.this 1247 expr = expr.this 1248 1249 return self.func("CONTAINS_SUBSTR", this, expr) 1250 1251 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1252 this = expression.this 1253 1254 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1255 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1256 # because they aren't literals and so the above syntax is invalid BigQuery. 1257 if isinstance(this, exp.Array): 1258 elem = seq_get(this.expressions, 0) 1259 if not (elem and elem.find(exp.Query)): 1260 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1261 1262 return super().cast_sql(expression, safe_prefix=safe_prefix)
351class BigQuery(Dialect): 352 WEEK_OFFSET = -1 353 UNNEST_COLUMN_ONLY = True 354 SUPPORTS_USER_DEFINED_TYPES = False 355 SUPPORTS_SEMI_ANTI_JOIN = False 356 LOG_BASE_FIRST = False 357 HEX_LOWERCASE = True 358 FORCE_EARLY_ALIAS_REF_EXPANSION = True 359 EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = True 360 PRESERVE_ORIGINAL_NAMES = True 361 HEX_STRING_IS_INTEGER_TYPE = True 362 363 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 364 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 365 366 # bigquery udfs are case sensitive 367 NORMALIZE_FUNCTIONS = False 368 369 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 370 TIME_MAPPING = { 371 "%D": "%m/%d/%y", 372 "%E6S": "%S.%f", 373 "%e": "%-d", 374 } 375 376 FORMAT_MAPPING = { 377 "DD": "%d", 378 "MM": "%m", 379 "MON": "%b", 380 "MONTH": "%B", 381 "YYYY": "%Y", 382 "YY": "%y", 383 "HH": "%I", 384 "HH12": "%I", 385 "HH24": "%H", 386 "MI": "%M", 387 "SS": "%S", 388 "SSSSS": "%f", 389 "TZH": "%z", 390 } 391 392 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 393 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 394 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 395 396 # All set operations require either a DISTINCT or ALL specifier 397 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 398 399 ANNOTATORS = { 400 **Dialect.ANNOTATORS, 401 **{ 402 expr_type: lambda self, e: _annotate_math_functions(self, e) 403 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 404 }, 405 **{ 406 expr_type: lambda self, e: self._annotate_by_args(e, "this") 407 for expr_type in ( 408 exp.Left, 409 exp.Right, 410 exp.Lower, 411 exp.Upper, 412 exp.Pad, 413 exp.Trim, 414 exp.RegexpExtract, 415 exp.RegexpReplace, 416 exp.Repeat, 417 exp.Substring, 418 ) 419 }, 420 exp.Concat: lambda self, e: self._annotate_by_args(e, "expressions"), 421 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 422 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 423 } 424 425 def normalize_identifier(self, expression: E) -> E: 426 if ( 427 isinstance(expression, exp.Identifier) 428 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 429 ): 430 parent = expression.parent 431 while isinstance(parent, exp.Dot): 432 parent = parent.parent 433 434 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 435 # by default. The following check uses a heuristic to detect tables based on whether 436 # they are qualified. This should generally be correct, because tables in BigQuery 437 # must be qualified with at least a dataset, unless @@dataset_id is set. 438 case_sensitive = ( 439 isinstance(parent, exp.UserDefinedFunction) 440 or ( 441 isinstance(parent, exp.Table) 442 and parent.db 443 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 444 ) 445 or expression.meta.get("is_table") 446 ) 447 if not case_sensitive: 448 expression.set("this", expression.this.lower()) 449 450 return expression 451 452 class Tokenizer(tokens.Tokenizer): 453 QUOTES = ["'", '"', '"""', "'''"] 454 COMMENTS = ["--", "#", ("/*", "*/")] 455 IDENTIFIERS = ["`"] 456 STRING_ESCAPES = ["\\"] 457 458 HEX_STRINGS = [("0x", ""), ("0X", "")] 459 460 BYTE_STRINGS = [ 461 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 462 ] 463 464 RAW_STRINGS = [ 465 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 466 ] 467 468 KEYWORDS = { 469 **tokens.Tokenizer.KEYWORDS, 470 "ANY TYPE": TokenType.VARIANT, 471 "BEGIN": TokenType.COMMAND, 472 "BEGIN TRANSACTION": TokenType.BEGIN, 473 "BYTEINT": TokenType.INT, 474 "BYTES": TokenType.BINARY, 475 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 476 "DATETIME": TokenType.TIMESTAMP, 477 "DECLARE": TokenType.COMMAND, 478 "ELSEIF": TokenType.COMMAND, 479 "EXCEPTION": TokenType.COMMAND, 480 "EXPORT": TokenType.EXPORT, 481 "FLOAT64": TokenType.DOUBLE, 482 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 483 "MODEL": TokenType.MODEL, 484 "NOT DETERMINISTIC": TokenType.VOLATILE, 485 "RECORD": TokenType.STRUCT, 486 "TIMESTAMP": TokenType.TIMESTAMPTZ, 487 } 488 KEYWORDS.pop("DIV") 489 KEYWORDS.pop("VALUES") 490 KEYWORDS.pop("/*+") 491 492 class Parser(parser.Parser): 493 PREFIXED_PIVOT_COLUMNS = True 494 LOG_DEFAULTS_TO_LN = True 495 SUPPORTS_IMPLICIT_UNNEST = True 496 497 FUNCTIONS = { 498 **parser.Parser.FUNCTIONS, 499 "CONTAINS_SUBSTR": _build_contains_substring, 500 "DATE": _build_date, 501 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 502 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 503 "DATE_TRUNC": lambda args: exp.DateTrunc( 504 unit=exp.Literal.string(str(seq_get(args, 1))), 505 this=seq_get(args, 0), 506 zone=seq_get(args, 2), 507 ), 508 "DATETIME": _build_datetime, 509 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 510 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 511 "DIV": binary_from_function(exp.IntDiv), 512 "EDIT_DISTANCE": _build_levenshtein, 513 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 514 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 515 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 516 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 517 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 518 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 519 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 520 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 521 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 522 "MD5": exp.MD5Digest.from_arg_list, 523 "TO_HEX": _build_to_hex, 524 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 525 [seq_get(args, 1), seq_get(args, 0)] 526 ), 527 "PARSE_TIMESTAMP": _build_parse_timestamp, 528 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 529 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 530 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 531 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 532 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 533 ), 534 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 535 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 536 "SPLIT": lambda args: exp.Split( 537 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 538 this=seq_get(args, 0), 539 expression=seq_get(args, 1) or exp.Literal.string(","), 540 ), 541 "STRPOS": exp.StrPosition.from_arg_list, 542 "TIME": _build_time, 543 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 544 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 545 "TIMESTAMP": _build_timestamp, 546 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 547 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 548 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 549 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 550 ), 551 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 552 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 553 ), 554 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 555 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 556 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 557 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 558 } 559 560 FUNCTION_PARSERS = { 561 **parser.Parser.FUNCTION_PARSERS, 562 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 563 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 564 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 565 } 566 FUNCTION_PARSERS.pop("TRIM") 567 568 NO_PAREN_FUNCTIONS = { 569 **parser.Parser.NO_PAREN_FUNCTIONS, 570 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 571 } 572 573 NESTED_TYPE_TOKENS = { 574 *parser.Parser.NESTED_TYPE_TOKENS, 575 TokenType.TABLE, 576 } 577 578 PROPERTY_PARSERS = { 579 **parser.Parser.PROPERTY_PARSERS, 580 "NOT DETERMINISTIC": lambda self: self.expression( 581 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 582 ), 583 "OPTIONS": lambda self: self._parse_with_property(), 584 } 585 586 CONSTRAINT_PARSERS = { 587 **parser.Parser.CONSTRAINT_PARSERS, 588 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 589 } 590 591 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 592 RANGE_PARSERS.pop(TokenType.OVERLAPS) 593 594 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 595 596 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 597 598 STATEMENT_PARSERS = { 599 **parser.Parser.STATEMENT_PARSERS, 600 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 601 TokenType.END: lambda self: self._parse_as_command(self._prev), 602 TokenType.FOR: lambda self: self._parse_for_in(), 603 TokenType.EXPORT: lambda self: self._parse_export_data(), 604 } 605 606 BRACKET_OFFSETS = { 607 "OFFSET": (0, False), 608 "ORDINAL": (1, False), 609 "SAFE_OFFSET": (0, True), 610 "SAFE_ORDINAL": (1, True), 611 } 612 613 def _parse_for_in(self) -> exp.ForIn: 614 this = self._parse_range() 615 self._match_text_seq("DO") 616 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 617 618 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 619 this = super()._parse_table_part(schema=schema) or self._parse_number() 620 621 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 622 if isinstance(this, exp.Identifier): 623 table_name = this.name 624 while self._match(TokenType.DASH, advance=False) and self._next: 625 start = self._curr 626 while self._is_connected() and not self._match_set( 627 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 628 ): 629 self._advance() 630 631 table_name += self._find_sql(start, self._prev) 632 633 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 634 elif isinstance(this, exp.Literal): 635 table_name = this.name 636 637 if self._is_connected() and self._parse_var(any_token=True): 638 table_name += self._prev.text 639 640 this = exp.Identifier(this=table_name, quoted=True) 641 642 return this 643 644 def _parse_table_parts( 645 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 646 ) -> exp.Table: 647 table = super()._parse_table_parts( 648 schema=schema, is_db_reference=is_db_reference, wildcard=True 649 ) 650 651 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 652 if not table.catalog: 653 if table.db: 654 parts = table.db.split(".") 655 if len(parts) == 2 and not table.args["db"].quoted: 656 table.set("catalog", exp.Identifier(this=parts[0])) 657 table.set("db", exp.Identifier(this=parts[1])) 658 else: 659 parts = table.name.split(".") 660 if len(parts) == 2 and not table.this.quoted: 661 table.set("db", exp.Identifier(this=parts[0])) 662 table.set("this", exp.Identifier(this=parts[1])) 663 664 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 665 alias = table.this 666 catalog, db, this, *rest = ( 667 exp.to_identifier(p, quoted=True) 668 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 669 ) 670 671 if rest and this: 672 this = exp.Dot.build([this, *rest]) # type: ignore 673 674 table = exp.Table( 675 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 676 ) 677 table.meta["quoted_table"] = True 678 else: 679 alias = None 680 681 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 682 # dataset, so if the project identifier is omitted we need to fix the ast so that 683 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 684 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 685 # views, because it would seem like the "catalog" part is set, when it'd actually 686 # be the region/dataset. Merging the two identifiers into a single one is done to 687 # avoid producing a 4-part Table reference, which would cause issues in the schema 688 # module, when there are 3-part table names mixed with information schema views. 689 # 690 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 691 table_parts = table.parts 692 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 693 # We need to alias the table here to avoid breaking existing qualified columns. 694 # This is expected to be safe, because if there's an actual alias coming up in 695 # the token stream, it will overwrite this one. If there isn't one, we are only 696 # exposing the name that can be used to reference the view explicitly (a no-op). 697 exp.alias_( 698 table, 699 t.cast(exp.Identifier, alias or table_parts[-1]), 700 table=True, 701 copy=False, 702 ) 703 704 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 705 table.set("this", exp.Identifier(this=info_schema_view, quoted=True)) 706 table.set("db", seq_get(table_parts, -3)) 707 table.set("catalog", seq_get(table_parts, -4)) 708 709 return table 710 711 def _parse_column(self) -> t.Optional[exp.Expression]: 712 column = super()._parse_column() 713 if isinstance(column, exp.Column): 714 parts = column.parts 715 if any("." in p.name for p in parts): 716 catalog, db, table, this, *rest = ( 717 exp.to_identifier(p, quoted=True) 718 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 719 ) 720 721 if rest and this: 722 this = exp.Dot.build([this, *rest]) # type: ignore 723 724 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 725 column.meta["quoted_column"] = True 726 727 return column 728 729 @t.overload 730 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 731 732 @t.overload 733 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 734 735 def _parse_json_object(self, agg=False): 736 json_object = super()._parse_json_object() 737 array_kv_pair = seq_get(json_object.expressions, 0) 738 739 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 740 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 741 if ( 742 array_kv_pair 743 and isinstance(array_kv_pair.this, exp.Array) 744 and isinstance(array_kv_pair.expression, exp.Array) 745 ): 746 keys = array_kv_pair.this.expressions 747 values = array_kv_pair.expression.expressions 748 749 json_object.set( 750 "expressions", 751 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 752 ) 753 754 return json_object 755 756 def _parse_bracket( 757 self, this: t.Optional[exp.Expression] = None 758 ) -> t.Optional[exp.Expression]: 759 bracket = super()._parse_bracket(this) 760 761 if this is bracket: 762 return bracket 763 764 if isinstance(bracket, exp.Bracket): 765 for expression in bracket.expressions: 766 name = expression.name.upper() 767 768 if name not in self.BRACKET_OFFSETS: 769 break 770 771 offset, safe = self.BRACKET_OFFSETS[name] 772 bracket.set("offset", offset) 773 bracket.set("safe", safe) 774 expression.replace(expression.expressions[0]) 775 776 return bracket 777 778 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 779 unnest = super()._parse_unnest(with_alias=with_alias) 780 781 if not unnest: 782 return None 783 784 unnest_expr = seq_get(unnest.expressions, 0) 785 if unnest_expr: 786 from sqlglot.optimizer.annotate_types import annotate_types 787 788 unnest_expr = annotate_types(unnest_expr) 789 790 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 791 # in contrast to other dialects such as DuckDB which flattens only the array by default 792 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 793 array_elem.is_type(exp.DataType.Type.STRUCT) 794 for array_elem in unnest_expr._type.expressions 795 ): 796 unnest.set("explode_array", True) 797 798 return unnest 799 800 def _parse_make_interval(self) -> exp.MakeInterval: 801 expr = exp.MakeInterval() 802 803 for arg_key in expr.arg_types: 804 value = self._parse_lambda() 805 806 if not value: 807 break 808 809 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 810 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 811 if isinstance(value, exp.Kwarg): 812 arg_key = value.this.name 813 814 expr.set(arg_key, value) 815 816 self._match(TokenType.COMMA) 817 818 return expr 819 820 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 821 expr = self.expression( 822 exp.FeaturesAtTime, 823 this=(self._match(TokenType.TABLE) and self._parse_table()) 824 or self._parse_select(nested=True), 825 ) 826 827 while self._match(TokenType.COMMA): 828 arg = self._parse_lambda() 829 830 # Get the LHS of the Kwarg and set the arg to that value, e.g 831 # "num_rows => 1" sets the expr's `num_rows` arg 832 if arg: 833 expr.set(arg.this.name, arg) 834 835 return expr 836 837 def _parse_export_data(self) -> exp.Export: 838 self._match_text_seq("DATA") 839 840 return self.expression( 841 exp.Export, 842 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 843 options=self._parse_properties(), 844 this=self._match_text_seq("AS") and self._parse_select(), 845 ) 846 847 class Generator(generator.Generator): 848 INTERVAL_ALLOWS_PLURAL_FORM = False 849 JOIN_HINTS = False 850 QUERY_HINTS = False 851 TABLE_HINTS = False 852 LIMIT_FETCH = "LIMIT" 853 RENAME_TABLE_WITH_DB = False 854 NVL2_SUPPORTED = False 855 UNNEST_WITH_ORDINALITY = False 856 COLLATE_IS_FUNC = True 857 LIMIT_ONLY_LITERALS = True 858 SUPPORTS_TABLE_ALIAS_COLUMNS = False 859 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 860 JSON_KEY_VALUE_PAIR_SEP = "," 861 NULL_ORDERING_SUPPORTED = False 862 IGNORE_NULLS_IN_FUNC = True 863 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 864 CAN_IMPLEMENT_ARRAY_ANY = True 865 SUPPORTS_TO_NUMBER = False 866 NAMED_PLACEHOLDER_TOKEN = "@" 867 HEX_FUNC = "TO_HEX" 868 WITH_PROPERTIES_PREFIX = "OPTIONS" 869 SUPPORTS_EXPLODING_PROJECTIONS = False 870 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 871 SUPPORTS_UNIX_SECONDS = True 872 873 TRANSFORMS = { 874 **generator.Generator.TRANSFORMS, 875 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 876 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 877 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 878 exp.Array: inline_array_unless_query, 879 exp.ArrayContains: _array_contains_sql, 880 exp.ArrayFilter: filter_array_using_unnest, 881 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 882 exp.CollateProperty: lambda self, e: ( 883 f"DEFAULT COLLATE {self.sql(e, 'this')}" 884 if e.args.get("default") 885 else f"COLLATE {self.sql(e, 'this')}" 886 ), 887 exp.Commit: lambda *_: "COMMIT TRANSACTION", 888 exp.CountIf: rename_func("COUNTIF"), 889 exp.Create: _create_sql, 890 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 891 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 892 exp.DateDiff: lambda self, e: self.func( 893 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 894 ), 895 exp.DateFromParts: rename_func("DATE"), 896 exp.DateStrToDate: datestrtodate_sql, 897 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 898 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 899 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 900 exp.DateTrunc: lambda self, e: self.func( 901 "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone") 902 ), 903 exp.FromTimeZone: lambda self, e: self.func( 904 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 905 ), 906 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 907 exp.GroupConcat: lambda self, e: groupconcat_sql( 908 self, e, func_name="STRING_AGG", within_group=False 909 ), 910 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 911 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 912 exp.If: if_sql(false_value="NULL"), 913 exp.ILike: no_ilike_sql, 914 exp.IntDiv: rename_func("DIV"), 915 exp.Int64: rename_func("INT64"), 916 exp.JSONExtract: _json_extract_sql, 917 exp.JSONExtractArray: _json_extract_sql, 918 exp.JSONExtractScalar: _json_extract_sql, 919 exp.JSONFormat: rename_func("TO_JSON_STRING"), 920 exp.Levenshtein: _levenshtein_sql, 921 exp.Max: max_or_greatest, 922 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 923 exp.MD5Digest: rename_func("MD5"), 924 exp.Min: min_or_least, 925 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 926 exp.RegexpExtract: lambda self, e: self.func( 927 "REGEXP_EXTRACT", 928 e.this, 929 e.expression, 930 e.args.get("position"), 931 e.args.get("occurrence"), 932 ), 933 exp.RegexpExtractAll: lambda self, e: self.func( 934 "REGEXP_EXTRACT_ALL", e.this, e.expression 935 ), 936 exp.RegexpReplace: regexp_replace_sql, 937 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 938 exp.ReturnsProperty: _returnsproperty_sql, 939 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 940 exp.Select: transforms.preprocess( 941 [ 942 transforms.explode_to_unnest(), 943 transforms.unqualify_unnest, 944 transforms.eliminate_distinct_on, 945 _alias_ordered_group, 946 transforms.eliminate_semi_and_anti_joins, 947 ] 948 ), 949 exp.SHA: rename_func("SHA1"), 950 exp.SHA2: sha256_sql, 951 exp.StabilityProperty: lambda self, e: ( 952 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 953 ), 954 exp.String: rename_func("STRING"), 955 exp.StrPosition: lambda self, e: ( 956 strposition_sql( 957 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 958 ) 959 ), 960 exp.StrToDate: _str_to_datetime_sql, 961 exp.StrToTime: _str_to_datetime_sql, 962 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 963 exp.TimeFromParts: rename_func("TIME"), 964 exp.TimestampFromParts: rename_func("DATETIME"), 965 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 966 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 967 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 968 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 969 exp.TimeStrToTime: timestrtotime_sql, 970 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 971 exp.TsOrDsAdd: _ts_or_ds_add_sql, 972 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 973 exp.TsOrDsToTime: rename_func("TIME"), 974 exp.TsOrDsToDatetime: rename_func("DATETIME"), 975 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 976 exp.Unhex: rename_func("FROM_HEX"), 977 exp.UnixDate: rename_func("UNIX_DATE"), 978 exp.UnixToTime: _unix_to_time_sql, 979 exp.Uuid: lambda *_: "GENERATE_UUID()", 980 exp.Values: _derived_table_values_to_unnest, 981 exp.VariancePop: rename_func("VAR_POP"), 982 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 983 } 984 985 SUPPORTED_JSON_PATH_PARTS = { 986 exp.JSONPathKey, 987 exp.JSONPathRoot, 988 exp.JSONPathSubscript, 989 } 990 991 TYPE_MAPPING = { 992 **generator.Generator.TYPE_MAPPING, 993 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 994 exp.DataType.Type.BIGINT: "INT64", 995 exp.DataType.Type.BINARY: "BYTES", 996 exp.DataType.Type.BLOB: "BYTES", 997 exp.DataType.Type.BOOLEAN: "BOOL", 998 exp.DataType.Type.CHAR: "STRING", 999 exp.DataType.Type.DECIMAL: "NUMERIC", 1000 exp.DataType.Type.DOUBLE: "FLOAT64", 1001 exp.DataType.Type.FLOAT: "FLOAT64", 1002 exp.DataType.Type.INT: "INT64", 1003 exp.DataType.Type.NCHAR: "STRING", 1004 exp.DataType.Type.NVARCHAR: "STRING", 1005 exp.DataType.Type.SMALLINT: "INT64", 1006 exp.DataType.Type.TEXT: "STRING", 1007 exp.DataType.Type.TIMESTAMP: "DATETIME", 1008 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1009 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1010 exp.DataType.Type.TINYINT: "INT64", 1011 exp.DataType.Type.ROWVERSION: "BYTES", 1012 exp.DataType.Type.UUID: "STRING", 1013 exp.DataType.Type.VARBINARY: "BYTES", 1014 exp.DataType.Type.VARCHAR: "STRING", 1015 exp.DataType.Type.VARIANT: "ANY TYPE", 1016 } 1017 1018 PROPERTIES_LOCATION = { 1019 **generator.Generator.PROPERTIES_LOCATION, 1020 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1021 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1022 } 1023 1024 # WINDOW comes after QUALIFY 1025 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1026 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1027 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1028 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1029 } 1030 1031 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1032 RESERVED_KEYWORDS = { 1033 "all", 1034 "and", 1035 "any", 1036 "array", 1037 "as", 1038 "asc", 1039 "assert_rows_modified", 1040 "at", 1041 "between", 1042 "by", 1043 "case", 1044 "cast", 1045 "collate", 1046 "contains", 1047 "create", 1048 "cross", 1049 "cube", 1050 "current", 1051 "default", 1052 "define", 1053 "desc", 1054 "distinct", 1055 "else", 1056 "end", 1057 "enum", 1058 "escape", 1059 "except", 1060 "exclude", 1061 "exists", 1062 "extract", 1063 "false", 1064 "fetch", 1065 "following", 1066 "for", 1067 "from", 1068 "full", 1069 "group", 1070 "grouping", 1071 "groups", 1072 "hash", 1073 "having", 1074 "if", 1075 "ignore", 1076 "in", 1077 "inner", 1078 "intersect", 1079 "interval", 1080 "into", 1081 "is", 1082 "join", 1083 "lateral", 1084 "left", 1085 "like", 1086 "limit", 1087 "lookup", 1088 "merge", 1089 "natural", 1090 "new", 1091 "no", 1092 "not", 1093 "null", 1094 "nulls", 1095 "of", 1096 "on", 1097 "or", 1098 "order", 1099 "outer", 1100 "over", 1101 "partition", 1102 "preceding", 1103 "proto", 1104 "qualify", 1105 "range", 1106 "recursive", 1107 "respect", 1108 "right", 1109 "rollup", 1110 "rows", 1111 "select", 1112 "set", 1113 "some", 1114 "struct", 1115 "tablesample", 1116 "then", 1117 "to", 1118 "treat", 1119 "true", 1120 "unbounded", 1121 "union", 1122 "unnest", 1123 "using", 1124 "when", 1125 "where", 1126 "window", 1127 "with", 1128 "within", 1129 } 1130 1131 def mod_sql(self, expression: exp.Mod) -> str: 1132 this = expression.this 1133 expr = expression.expression 1134 return self.func( 1135 "MOD", 1136 this.unnest() if isinstance(this, exp.Paren) else this, 1137 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1138 ) 1139 1140 def column_parts(self, expression: exp.Column) -> str: 1141 if expression.meta.get("quoted_column"): 1142 # If a column reference is of the form `dataset.table`.name, we need 1143 # to preserve the quoted table path, otherwise the reference breaks 1144 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1145 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1146 return f"{table_path}.{self.sql(expression, 'this')}" 1147 1148 return super().column_parts(expression) 1149 1150 def table_parts(self, expression: exp.Table) -> str: 1151 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1152 # we need to make sure the correct quoting is used in each case. 1153 # 1154 # For example, if there is a CTE x that clashes with a schema name, then the former will 1155 # return the table y in that schema, whereas the latter will return the CTE's y column: 1156 # 1157 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1158 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1159 if expression.meta.get("quoted_table"): 1160 table_parts = ".".join(p.name for p in expression.parts) 1161 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1162 1163 return super().table_parts(expression) 1164 1165 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1166 this = expression.this 1167 if isinstance(this, exp.TsOrDsToDatetime): 1168 func_name = "FORMAT_DATETIME" 1169 elif isinstance(this, exp.TsOrDsToTimestamp): 1170 func_name = "FORMAT_TIMESTAMP" 1171 else: 1172 func_name = "FORMAT_DATE" 1173 1174 time_expr = ( 1175 this 1176 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1177 else expression 1178 ) 1179 return self.func(func_name, self.format_time(expression), time_expr.this) 1180 1181 def eq_sql(self, expression: exp.EQ) -> str: 1182 # Operands of = cannot be NULL in BigQuery 1183 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1184 if not isinstance(expression.parent, exp.Update): 1185 return "NULL" 1186 1187 return self.binary(expression, "=") 1188 1189 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1190 parent = expression.parent 1191 1192 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1193 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1194 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1195 return self.func( 1196 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1197 ) 1198 1199 return super().attimezone_sql(expression) 1200 1201 def trycast_sql(self, expression: exp.TryCast) -> str: 1202 return self.cast_sql(expression, safe_prefix="SAFE_") 1203 1204 def bracket_sql(self, expression: exp.Bracket) -> str: 1205 this = expression.this 1206 expressions = expression.expressions 1207 1208 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1209 arg = expressions[0] 1210 if arg.type is None: 1211 from sqlglot.optimizer.annotate_types import annotate_types 1212 1213 arg = annotate_types(arg) 1214 1215 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1216 # BQ doesn't support bracket syntax with string values for structs 1217 return f"{self.sql(this)}.{arg.name}" 1218 1219 expressions_sql = self.expressions(expression, flat=True) 1220 offset = expression.args.get("offset") 1221 1222 if offset == 0: 1223 expressions_sql = f"OFFSET({expressions_sql})" 1224 elif offset == 1: 1225 expressions_sql = f"ORDINAL({expressions_sql})" 1226 elif offset is not None: 1227 self.unsupported(f"Unsupported array offset: {offset}") 1228 1229 if expression.args.get("safe"): 1230 expressions_sql = f"SAFE_{expressions_sql}" 1231 1232 return f"{self.sql(this)}[{expressions_sql}]" 1233 1234 def in_unnest_op(self, expression: exp.Unnest) -> str: 1235 return self.sql(expression) 1236 1237 def version_sql(self, expression: exp.Version) -> str: 1238 if expression.name == "TIMESTAMP": 1239 expression.set("this", "SYSTEM_TIME") 1240 return super().version_sql(expression) 1241 1242 def contains_sql(self, expression: exp.Contains) -> str: 1243 this = expression.this 1244 expr = expression.expression 1245 1246 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1247 this = this.this 1248 expr = expr.this 1249 1250 return self.func("CONTAINS_SUBSTR", this, expr) 1251 1252 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1253 this = expression.this 1254 1255 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1256 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1257 # because they aren't literals and so the above syntax is invalid BigQuery. 1258 if isinstance(this, exp.Array): 1259 elem = seq_get(this.expressions, 0) 1260 if not (elem and elem.find(exp.Query)): 1261 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1262 1263 return super().cast_sql(expression, safe_prefix=safe_prefix)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether alias reference expansion before qualification should only happen for the GROUP BY clause.
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Whether hex strings such as x'CC' evaluate to integer or binary/blob type
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
425 def normalize_identifier(self, expression: E) -> E: 426 if ( 427 isinstance(expression, exp.Identifier) 428 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 429 ): 430 parent = expression.parent 431 while isinstance(parent, exp.Dot): 432 parent = parent.parent 433 434 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 435 # by default. The following check uses a heuristic to detect tables based on whether 436 # they are qualified. This should generally be correct, because tables in BigQuery 437 # must be qualified with at least a dataset, unless @@dataset_id is set. 438 case_sensitive = ( 439 isinstance(parent, exp.UserDefinedFunction) 440 or ( 441 isinstance(parent, exp.Table) 442 and parent.db 443 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 444 ) 445 or expression.meta.get("is_table") 446 ) 447 if not case_sensitive: 448 expression.set("this", expression.this.lower()) 449 450 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
452 class Tokenizer(tokens.Tokenizer): 453 QUOTES = ["'", '"', '"""', "'''"] 454 COMMENTS = ["--", "#", ("/*", "*/")] 455 IDENTIFIERS = ["`"] 456 STRING_ESCAPES = ["\\"] 457 458 HEX_STRINGS = [("0x", ""), ("0X", "")] 459 460 BYTE_STRINGS = [ 461 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 462 ] 463 464 RAW_STRINGS = [ 465 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 466 ] 467 468 KEYWORDS = { 469 **tokens.Tokenizer.KEYWORDS, 470 "ANY TYPE": TokenType.VARIANT, 471 "BEGIN": TokenType.COMMAND, 472 "BEGIN TRANSACTION": TokenType.BEGIN, 473 "BYTEINT": TokenType.INT, 474 "BYTES": TokenType.BINARY, 475 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 476 "DATETIME": TokenType.TIMESTAMP, 477 "DECLARE": TokenType.COMMAND, 478 "ELSEIF": TokenType.COMMAND, 479 "EXCEPTION": TokenType.COMMAND, 480 "EXPORT": TokenType.EXPORT, 481 "FLOAT64": TokenType.DOUBLE, 482 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 483 "MODEL": TokenType.MODEL, 484 "NOT DETERMINISTIC": TokenType.VOLATILE, 485 "RECORD": TokenType.STRUCT, 486 "TIMESTAMP": TokenType.TIMESTAMPTZ, 487 } 488 KEYWORDS.pop("DIV") 489 KEYWORDS.pop("VALUES") 490 KEYWORDS.pop("/*+")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
492 class Parser(parser.Parser): 493 PREFIXED_PIVOT_COLUMNS = True 494 LOG_DEFAULTS_TO_LN = True 495 SUPPORTS_IMPLICIT_UNNEST = True 496 497 FUNCTIONS = { 498 **parser.Parser.FUNCTIONS, 499 "CONTAINS_SUBSTR": _build_contains_substring, 500 "DATE": _build_date, 501 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 502 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 503 "DATE_TRUNC": lambda args: exp.DateTrunc( 504 unit=exp.Literal.string(str(seq_get(args, 1))), 505 this=seq_get(args, 0), 506 zone=seq_get(args, 2), 507 ), 508 "DATETIME": _build_datetime, 509 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 510 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 511 "DIV": binary_from_function(exp.IntDiv), 512 "EDIT_DISTANCE": _build_levenshtein, 513 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 514 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 515 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 516 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 517 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 518 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 519 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 520 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 521 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 522 "MD5": exp.MD5Digest.from_arg_list, 523 "TO_HEX": _build_to_hex, 524 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 525 [seq_get(args, 1), seq_get(args, 0)] 526 ), 527 "PARSE_TIMESTAMP": _build_parse_timestamp, 528 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 529 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 530 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 531 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 532 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 533 ), 534 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 535 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 536 "SPLIT": lambda args: exp.Split( 537 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 538 this=seq_get(args, 0), 539 expression=seq_get(args, 1) or exp.Literal.string(","), 540 ), 541 "STRPOS": exp.StrPosition.from_arg_list, 542 "TIME": _build_time, 543 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 544 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 545 "TIMESTAMP": _build_timestamp, 546 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 547 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 548 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 549 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 550 ), 551 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 552 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 553 ), 554 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 555 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 556 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 557 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 558 } 559 560 FUNCTION_PARSERS = { 561 **parser.Parser.FUNCTION_PARSERS, 562 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 563 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 564 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 565 } 566 FUNCTION_PARSERS.pop("TRIM") 567 568 NO_PAREN_FUNCTIONS = { 569 **parser.Parser.NO_PAREN_FUNCTIONS, 570 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 571 } 572 573 NESTED_TYPE_TOKENS = { 574 *parser.Parser.NESTED_TYPE_TOKENS, 575 TokenType.TABLE, 576 } 577 578 PROPERTY_PARSERS = { 579 **parser.Parser.PROPERTY_PARSERS, 580 "NOT DETERMINISTIC": lambda self: self.expression( 581 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 582 ), 583 "OPTIONS": lambda self: self._parse_with_property(), 584 } 585 586 CONSTRAINT_PARSERS = { 587 **parser.Parser.CONSTRAINT_PARSERS, 588 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 589 } 590 591 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 592 RANGE_PARSERS.pop(TokenType.OVERLAPS) 593 594 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 595 596 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 597 598 STATEMENT_PARSERS = { 599 **parser.Parser.STATEMENT_PARSERS, 600 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 601 TokenType.END: lambda self: self._parse_as_command(self._prev), 602 TokenType.FOR: lambda self: self._parse_for_in(), 603 TokenType.EXPORT: lambda self: self._parse_export_data(), 604 } 605 606 BRACKET_OFFSETS = { 607 "OFFSET": (0, False), 608 "ORDINAL": (1, False), 609 "SAFE_OFFSET": (0, True), 610 "SAFE_ORDINAL": (1, True), 611 } 612 613 def _parse_for_in(self) -> exp.ForIn: 614 this = self._parse_range() 615 self._match_text_seq("DO") 616 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 617 618 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 619 this = super()._parse_table_part(schema=schema) or self._parse_number() 620 621 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 622 if isinstance(this, exp.Identifier): 623 table_name = this.name 624 while self._match(TokenType.DASH, advance=False) and self._next: 625 start = self._curr 626 while self._is_connected() and not self._match_set( 627 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 628 ): 629 self._advance() 630 631 table_name += self._find_sql(start, self._prev) 632 633 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 634 elif isinstance(this, exp.Literal): 635 table_name = this.name 636 637 if self._is_connected() and self._parse_var(any_token=True): 638 table_name += self._prev.text 639 640 this = exp.Identifier(this=table_name, quoted=True) 641 642 return this 643 644 def _parse_table_parts( 645 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 646 ) -> exp.Table: 647 table = super()._parse_table_parts( 648 schema=schema, is_db_reference=is_db_reference, wildcard=True 649 ) 650 651 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 652 if not table.catalog: 653 if table.db: 654 parts = table.db.split(".") 655 if len(parts) == 2 and not table.args["db"].quoted: 656 table.set("catalog", exp.Identifier(this=parts[0])) 657 table.set("db", exp.Identifier(this=parts[1])) 658 else: 659 parts = table.name.split(".") 660 if len(parts) == 2 and not table.this.quoted: 661 table.set("db", exp.Identifier(this=parts[0])) 662 table.set("this", exp.Identifier(this=parts[1])) 663 664 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 665 alias = table.this 666 catalog, db, this, *rest = ( 667 exp.to_identifier(p, quoted=True) 668 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 669 ) 670 671 if rest and this: 672 this = exp.Dot.build([this, *rest]) # type: ignore 673 674 table = exp.Table( 675 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 676 ) 677 table.meta["quoted_table"] = True 678 else: 679 alias = None 680 681 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 682 # dataset, so if the project identifier is omitted we need to fix the ast so that 683 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 684 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 685 # views, because it would seem like the "catalog" part is set, when it'd actually 686 # be the region/dataset. Merging the two identifiers into a single one is done to 687 # avoid producing a 4-part Table reference, which would cause issues in the schema 688 # module, when there are 3-part table names mixed with information schema views. 689 # 690 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 691 table_parts = table.parts 692 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 693 # We need to alias the table here to avoid breaking existing qualified columns. 694 # This is expected to be safe, because if there's an actual alias coming up in 695 # the token stream, it will overwrite this one. If there isn't one, we are only 696 # exposing the name that can be used to reference the view explicitly (a no-op). 697 exp.alias_( 698 table, 699 t.cast(exp.Identifier, alias or table_parts[-1]), 700 table=True, 701 copy=False, 702 ) 703 704 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 705 table.set("this", exp.Identifier(this=info_schema_view, quoted=True)) 706 table.set("db", seq_get(table_parts, -3)) 707 table.set("catalog", seq_get(table_parts, -4)) 708 709 return table 710 711 def _parse_column(self) -> t.Optional[exp.Expression]: 712 column = super()._parse_column() 713 if isinstance(column, exp.Column): 714 parts = column.parts 715 if any("." in p.name for p in parts): 716 catalog, db, table, this, *rest = ( 717 exp.to_identifier(p, quoted=True) 718 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 719 ) 720 721 if rest and this: 722 this = exp.Dot.build([this, *rest]) # type: ignore 723 724 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 725 column.meta["quoted_column"] = True 726 727 return column 728 729 @t.overload 730 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 731 732 @t.overload 733 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 734 735 def _parse_json_object(self, agg=False): 736 json_object = super()._parse_json_object() 737 array_kv_pair = seq_get(json_object.expressions, 0) 738 739 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 740 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 741 if ( 742 array_kv_pair 743 and isinstance(array_kv_pair.this, exp.Array) 744 and isinstance(array_kv_pair.expression, exp.Array) 745 ): 746 keys = array_kv_pair.this.expressions 747 values = array_kv_pair.expression.expressions 748 749 json_object.set( 750 "expressions", 751 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 752 ) 753 754 return json_object 755 756 def _parse_bracket( 757 self, this: t.Optional[exp.Expression] = None 758 ) -> t.Optional[exp.Expression]: 759 bracket = super()._parse_bracket(this) 760 761 if this is bracket: 762 return bracket 763 764 if isinstance(bracket, exp.Bracket): 765 for expression in bracket.expressions: 766 name = expression.name.upper() 767 768 if name not in self.BRACKET_OFFSETS: 769 break 770 771 offset, safe = self.BRACKET_OFFSETS[name] 772 bracket.set("offset", offset) 773 bracket.set("safe", safe) 774 expression.replace(expression.expressions[0]) 775 776 return bracket 777 778 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 779 unnest = super()._parse_unnest(with_alias=with_alias) 780 781 if not unnest: 782 return None 783 784 unnest_expr = seq_get(unnest.expressions, 0) 785 if unnest_expr: 786 from sqlglot.optimizer.annotate_types import annotate_types 787 788 unnest_expr = annotate_types(unnest_expr) 789 790 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 791 # in contrast to other dialects such as DuckDB which flattens only the array by default 792 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 793 array_elem.is_type(exp.DataType.Type.STRUCT) 794 for array_elem in unnest_expr._type.expressions 795 ): 796 unnest.set("explode_array", True) 797 798 return unnest 799 800 def _parse_make_interval(self) -> exp.MakeInterval: 801 expr = exp.MakeInterval() 802 803 for arg_key in expr.arg_types: 804 value = self._parse_lambda() 805 806 if not value: 807 break 808 809 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 810 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 811 if isinstance(value, exp.Kwarg): 812 arg_key = value.this.name 813 814 expr.set(arg_key, value) 815 816 self._match(TokenType.COMMA) 817 818 return expr 819 820 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 821 expr = self.expression( 822 exp.FeaturesAtTime, 823 this=(self._match(TokenType.TABLE) and self._parse_table()) 824 or self._parse_select(nested=True), 825 ) 826 827 while self._match(TokenType.COMMA): 828 arg = self._parse_lambda() 829 830 # Get the LHS of the Kwarg and set the arg to that value, e.g 831 # "num_rows => 1" sets the expr's `num_rows` arg 832 if arg: 833 expr.set(arg.this.name, arg) 834 835 return expr 836 837 def _parse_export_data(self) -> exp.Export: 838 self._match_text_seq("DATA") 839 840 return self.expression( 841 exp.Export, 842 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 843 options=self._parse_properties(), 844 this=self._match_text_seq("AS") and self._parse_select(), 845 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
847 class Generator(generator.Generator): 848 INTERVAL_ALLOWS_PLURAL_FORM = False 849 JOIN_HINTS = False 850 QUERY_HINTS = False 851 TABLE_HINTS = False 852 LIMIT_FETCH = "LIMIT" 853 RENAME_TABLE_WITH_DB = False 854 NVL2_SUPPORTED = False 855 UNNEST_WITH_ORDINALITY = False 856 COLLATE_IS_FUNC = True 857 LIMIT_ONLY_LITERALS = True 858 SUPPORTS_TABLE_ALIAS_COLUMNS = False 859 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 860 JSON_KEY_VALUE_PAIR_SEP = "," 861 NULL_ORDERING_SUPPORTED = False 862 IGNORE_NULLS_IN_FUNC = True 863 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 864 CAN_IMPLEMENT_ARRAY_ANY = True 865 SUPPORTS_TO_NUMBER = False 866 NAMED_PLACEHOLDER_TOKEN = "@" 867 HEX_FUNC = "TO_HEX" 868 WITH_PROPERTIES_PREFIX = "OPTIONS" 869 SUPPORTS_EXPLODING_PROJECTIONS = False 870 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 871 SUPPORTS_UNIX_SECONDS = True 872 873 TRANSFORMS = { 874 **generator.Generator.TRANSFORMS, 875 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 876 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 877 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 878 exp.Array: inline_array_unless_query, 879 exp.ArrayContains: _array_contains_sql, 880 exp.ArrayFilter: filter_array_using_unnest, 881 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 882 exp.CollateProperty: lambda self, e: ( 883 f"DEFAULT COLLATE {self.sql(e, 'this')}" 884 if e.args.get("default") 885 else f"COLLATE {self.sql(e, 'this')}" 886 ), 887 exp.Commit: lambda *_: "COMMIT TRANSACTION", 888 exp.CountIf: rename_func("COUNTIF"), 889 exp.Create: _create_sql, 890 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 891 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 892 exp.DateDiff: lambda self, e: self.func( 893 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 894 ), 895 exp.DateFromParts: rename_func("DATE"), 896 exp.DateStrToDate: datestrtodate_sql, 897 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 898 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 899 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 900 exp.DateTrunc: lambda self, e: self.func( 901 "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone") 902 ), 903 exp.FromTimeZone: lambda self, e: self.func( 904 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 905 ), 906 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 907 exp.GroupConcat: lambda self, e: groupconcat_sql( 908 self, e, func_name="STRING_AGG", within_group=False 909 ), 910 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 911 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 912 exp.If: if_sql(false_value="NULL"), 913 exp.ILike: no_ilike_sql, 914 exp.IntDiv: rename_func("DIV"), 915 exp.Int64: rename_func("INT64"), 916 exp.JSONExtract: _json_extract_sql, 917 exp.JSONExtractArray: _json_extract_sql, 918 exp.JSONExtractScalar: _json_extract_sql, 919 exp.JSONFormat: rename_func("TO_JSON_STRING"), 920 exp.Levenshtein: _levenshtein_sql, 921 exp.Max: max_or_greatest, 922 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 923 exp.MD5Digest: rename_func("MD5"), 924 exp.Min: min_or_least, 925 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 926 exp.RegexpExtract: lambda self, e: self.func( 927 "REGEXP_EXTRACT", 928 e.this, 929 e.expression, 930 e.args.get("position"), 931 e.args.get("occurrence"), 932 ), 933 exp.RegexpExtractAll: lambda self, e: self.func( 934 "REGEXP_EXTRACT_ALL", e.this, e.expression 935 ), 936 exp.RegexpReplace: regexp_replace_sql, 937 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 938 exp.ReturnsProperty: _returnsproperty_sql, 939 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 940 exp.Select: transforms.preprocess( 941 [ 942 transforms.explode_to_unnest(), 943 transforms.unqualify_unnest, 944 transforms.eliminate_distinct_on, 945 _alias_ordered_group, 946 transforms.eliminate_semi_and_anti_joins, 947 ] 948 ), 949 exp.SHA: rename_func("SHA1"), 950 exp.SHA2: sha256_sql, 951 exp.StabilityProperty: lambda self, e: ( 952 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 953 ), 954 exp.String: rename_func("STRING"), 955 exp.StrPosition: lambda self, e: ( 956 strposition_sql( 957 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 958 ) 959 ), 960 exp.StrToDate: _str_to_datetime_sql, 961 exp.StrToTime: _str_to_datetime_sql, 962 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 963 exp.TimeFromParts: rename_func("TIME"), 964 exp.TimestampFromParts: rename_func("DATETIME"), 965 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 966 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 967 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 968 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 969 exp.TimeStrToTime: timestrtotime_sql, 970 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 971 exp.TsOrDsAdd: _ts_or_ds_add_sql, 972 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 973 exp.TsOrDsToTime: rename_func("TIME"), 974 exp.TsOrDsToDatetime: rename_func("DATETIME"), 975 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 976 exp.Unhex: rename_func("FROM_HEX"), 977 exp.UnixDate: rename_func("UNIX_DATE"), 978 exp.UnixToTime: _unix_to_time_sql, 979 exp.Uuid: lambda *_: "GENERATE_UUID()", 980 exp.Values: _derived_table_values_to_unnest, 981 exp.VariancePop: rename_func("VAR_POP"), 982 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 983 } 984 985 SUPPORTED_JSON_PATH_PARTS = { 986 exp.JSONPathKey, 987 exp.JSONPathRoot, 988 exp.JSONPathSubscript, 989 } 990 991 TYPE_MAPPING = { 992 **generator.Generator.TYPE_MAPPING, 993 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 994 exp.DataType.Type.BIGINT: "INT64", 995 exp.DataType.Type.BINARY: "BYTES", 996 exp.DataType.Type.BLOB: "BYTES", 997 exp.DataType.Type.BOOLEAN: "BOOL", 998 exp.DataType.Type.CHAR: "STRING", 999 exp.DataType.Type.DECIMAL: "NUMERIC", 1000 exp.DataType.Type.DOUBLE: "FLOAT64", 1001 exp.DataType.Type.FLOAT: "FLOAT64", 1002 exp.DataType.Type.INT: "INT64", 1003 exp.DataType.Type.NCHAR: "STRING", 1004 exp.DataType.Type.NVARCHAR: "STRING", 1005 exp.DataType.Type.SMALLINT: "INT64", 1006 exp.DataType.Type.TEXT: "STRING", 1007 exp.DataType.Type.TIMESTAMP: "DATETIME", 1008 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1009 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1010 exp.DataType.Type.TINYINT: "INT64", 1011 exp.DataType.Type.ROWVERSION: "BYTES", 1012 exp.DataType.Type.UUID: "STRING", 1013 exp.DataType.Type.VARBINARY: "BYTES", 1014 exp.DataType.Type.VARCHAR: "STRING", 1015 exp.DataType.Type.VARIANT: "ANY TYPE", 1016 } 1017 1018 PROPERTIES_LOCATION = { 1019 **generator.Generator.PROPERTIES_LOCATION, 1020 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1021 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1022 } 1023 1024 # WINDOW comes after QUALIFY 1025 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1026 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1027 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1028 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1029 } 1030 1031 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1032 RESERVED_KEYWORDS = { 1033 "all", 1034 "and", 1035 "any", 1036 "array", 1037 "as", 1038 "asc", 1039 "assert_rows_modified", 1040 "at", 1041 "between", 1042 "by", 1043 "case", 1044 "cast", 1045 "collate", 1046 "contains", 1047 "create", 1048 "cross", 1049 "cube", 1050 "current", 1051 "default", 1052 "define", 1053 "desc", 1054 "distinct", 1055 "else", 1056 "end", 1057 "enum", 1058 "escape", 1059 "except", 1060 "exclude", 1061 "exists", 1062 "extract", 1063 "false", 1064 "fetch", 1065 "following", 1066 "for", 1067 "from", 1068 "full", 1069 "group", 1070 "grouping", 1071 "groups", 1072 "hash", 1073 "having", 1074 "if", 1075 "ignore", 1076 "in", 1077 "inner", 1078 "intersect", 1079 "interval", 1080 "into", 1081 "is", 1082 "join", 1083 "lateral", 1084 "left", 1085 "like", 1086 "limit", 1087 "lookup", 1088 "merge", 1089 "natural", 1090 "new", 1091 "no", 1092 "not", 1093 "null", 1094 "nulls", 1095 "of", 1096 "on", 1097 "or", 1098 "order", 1099 "outer", 1100 "over", 1101 "partition", 1102 "preceding", 1103 "proto", 1104 "qualify", 1105 "range", 1106 "recursive", 1107 "respect", 1108 "right", 1109 "rollup", 1110 "rows", 1111 "select", 1112 "set", 1113 "some", 1114 "struct", 1115 "tablesample", 1116 "then", 1117 "to", 1118 "treat", 1119 "true", 1120 "unbounded", 1121 "union", 1122 "unnest", 1123 "using", 1124 "when", 1125 "where", 1126 "window", 1127 "with", 1128 "within", 1129 } 1130 1131 def mod_sql(self, expression: exp.Mod) -> str: 1132 this = expression.this 1133 expr = expression.expression 1134 return self.func( 1135 "MOD", 1136 this.unnest() if isinstance(this, exp.Paren) else this, 1137 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1138 ) 1139 1140 def column_parts(self, expression: exp.Column) -> str: 1141 if expression.meta.get("quoted_column"): 1142 # If a column reference is of the form `dataset.table`.name, we need 1143 # to preserve the quoted table path, otherwise the reference breaks 1144 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1145 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1146 return f"{table_path}.{self.sql(expression, 'this')}" 1147 1148 return super().column_parts(expression) 1149 1150 def table_parts(self, expression: exp.Table) -> str: 1151 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1152 # we need to make sure the correct quoting is used in each case. 1153 # 1154 # For example, if there is a CTE x that clashes with a schema name, then the former will 1155 # return the table y in that schema, whereas the latter will return the CTE's y column: 1156 # 1157 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1158 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1159 if expression.meta.get("quoted_table"): 1160 table_parts = ".".join(p.name for p in expression.parts) 1161 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1162 1163 return super().table_parts(expression) 1164 1165 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1166 this = expression.this 1167 if isinstance(this, exp.TsOrDsToDatetime): 1168 func_name = "FORMAT_DATETIME" 1169 elif isinstance(this, exp.TsOrDsToTimestamp): 1170 func_name = "FORMAT_TIMESTAMP" 1171 else: 1172 func_name = "FORMAT_DATE" 1173 1174 time_expr = ( 1175 this 1176 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1177 else expression 1178 ) 1179 return self.func(func_name, self.format_time(expression), time_expr.this) 1180 1181 def eq_sql(self, expression: exp.EQ) -> str: 1182 # Operands of = cannot be NULL in BigQuery 1183 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1184 if not isinstance(expression.parent, exp.Update): 1185 return "NULL" 1186 1187 return self.binary(expression, "=") 1188 1189 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1190 parent = expression.parent 1191 1192 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1193 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1194 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1195 return self.func( 1196 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1197 ) 1198 1199 return super().attimezone_sql(expression) 1200 1201 def trycast_sql(self, expression: exp.TryCast) -> str: 1202 return self.cast_sql(expression, safe_prefix="SAFE_") 1203 1204 def bracket_sql(self, expression: exp.Bracket) -> str: 1205 this = expression.this 1206 expressions = expression.expressions 1207 1208 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1209 arg = expressions[0] 1210 if arg.type is None: 1211 from sqlglot.optimizer.annotate_types import annotate_types 1212 1213 arg = annotate_types(arg) 1214 1215 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1216 # BQ doesn't support bracket syntax with string values for structs 1217 return f"{self.sql(this)}.{arg.name}" 1218 1219 expressions_sql = self.expressions(expression, flat=True) 1220 offset = expression.args.get("offset") 1221 1222 if offset == 0: 1223 expressions_sql = f"OFFSET({expressions_sql})" 1224 elif offset == 1: 1225 expressions_sql = f"ORDINAL({expressions_sql})" 1226 elif offset is not None: 1227 self.unsupported(f"Unsupported array offset: {offset}") 1228 1229 if expression.args.get("safe"): 1230 expressions_sql = f"SAFE_{expressions_sql}" 1231 1232 return f"{self.sql(this)}[{expressions_sql}]" 1233 1234 def in_unnest_op(self, expression: exp.Unnest) -> str: 1235 return self.sql(expression) 1236 1237 def version_sql(self, expression: exp.Version) -> str: 1238 if expression.name == "TIMESTAMP": 1239 expression.set("this", "SYSTEM_TIME") 1240 return super().version_sql(expression) 1241 1242 def contains_sql(self, expression: exp.Contains) -> str: 1243 this = expression.this 1244 expr = expression.expression 1245 1246 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1247 this = this.this 1248 expr = expr.this 1249 1250 return self.func("CONTAINS_SUBSTR", this, expr) 1251 1252 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1253 this = expression.this 1254 1255 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1256 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1257 # because they aren't literals and so the above syntax is invalid BigQuery. 1258 if isinstance(this, exp.Array): 1259 elem = seq_get(this.expressions, 0) 1260 if not (elem and elem.find(exp.Query)): 1261 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1262 1263 return super().cast_sql(expression, safe_prefix=safe_prefix)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1140 def column_parts(self, expression: exp.Column) -> str: 1141 if expression.meta.get("quoted_column"): 1142 # If a column reference is of the form `dataset.table`.name, we need 1143 # to preserve the quoted table path, otherwise the reference breaks 1144 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1145 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1146 return f"{table_path}.{self.sql(expression, 'this')}" 1147 1148 return super().column_parts(expression)
1150 def table_parts(self, expression: exp.Table) -> str: 1151 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1152 # we need to make sure the correct quoting is used in each case. 1153 # 1154 # For example, if there is a CTE x that clashes with a schema name, then the former will 1155 # return the table y in that schema, whereas the latter will return the CTE's y column: 1156 # 1157 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1158 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1159 if expression.meta.get("quoted_table"): 1160 table_parts = ".".join(p.name for p in expression.parts) 1161 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1162 1163 return super().table_parts(expression)
1165 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1166 this = expression.this 1167 if isinstance(this, exp.TsOrDsToDatetime): 1168 func_name = "FORMAT_DATETIME" 1169 elif isinstance(this, exp.TsOrDsToTimestamp): 1170 func_name = "FORMAT_TIMESTAMP" 1171 else: 1172 func_name = "FORMAT_DATE" 1173 1174 time_expr = ( 1175 this 1176 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1177 else expression 1178 ) 1179 return self.func(func_name, self.format_time(expression), time_expr.this)
1181 def eq_sql(self, expression: exp.EQ) -> str: 1182 # Operands of = cannot be NULL in BigQuery 1183 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1184 if not isinstance(expression.parent, exp.Update): 1185 return "NULL" 1186 1187 return self.binary(expression, "=")
1189 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1190 parent = expression.parent 1191 1192 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1193 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1194 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1195 return self.func( 1196 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1197 ) 1198 1199 return super().attimezone_sql(expression)
1204 def bracket_sql(self, expression: exp.Bracket) -> str: 1205 this = expression.this 1206 expressions = expression.expressions 1207 1208 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1209 arg = expressions[0] 1210 if arg.type is None: 1211 from sqlglot.optimizer.annotate_types import annotate_types 1212 1213 arg = annotate_types(arg) 1214 1215 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1216 # BQ doesn't support bracket syntax with string values for structs 1217 return f"{self.sql(this)}.{arg.name}" 1218 1219 expressions_sql = self.expressions(expression, flat=True) 1220 offset = expression.args.get("offset") 1221 1222 if offset == 0: 1223 expressions_sql = f"OFFSET({expressions_sql})" 1224 elif offset == 1: 1225 expressions_sql = f"ORDINAL({expressions_sql})" 1226 elif offset is not None: 1227 self.unsupported(f"Unsupported array offset: {offset}") 1228 1229 if expression.args.get("safe"): 1230 expressions_sql = f"SAFE_{expressions_sql}" 1231 1232 return f"{self.sql(this)}[{expressions_sql}]"
1252 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1253 this = expression.this 1254 1255 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1256 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1257 # because they aren't literals and so the above syntax is invalid BigQuery. 1258 if isinstance(this, exp.Array): 1259 elem = seq_get(this.expressions, 0) 1260 if not (elem and elem.find(exp.Query)): 1261 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1262 1263 return super().cast_sql(expression, safe_prefix=safe_prefix)
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- put_sql