sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 build_formatted_time, 16 filter_array_using_unnest, 17 if_sql, 18 inline_array_unless_query, 19 max_or_greatest, 20 min_or_least, 21 no_ilike_sql, 22 build_date_delta_with_interval, 23 regexp_replace_sql, 24 rename_func, 25 sha256_sql, 26 timestrtotime_sql, 27 ts_or_ds_add_cast, 28 unit_to_var, 29 str_position_sql, 30) 31from sqlglot.helper import seq_get, split_num_words 32from sqlglot.tokens import TokenType 33from sqlglot.generator import unsupported_args 34 35if t.TYPE_CHECKING: 36 from sqlglot._typing import E, Lit 37 38 from sqlglot.optimizer.annotate_types import TypeAnnotator 39 40logger = logging.getLogger("sqlglot") 41 42 43def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 44 if not expression.find_ancestor(exp.From, exp.Join): 45 return self.values_sql(expression) 46 47 structs = [] 48 alias = expression.args.get("alias") 49 for tup in expression.find_all(exp.Tuple): 50 field_aliases = ( 51 alias.columns 52 if alias and alias.columns 53 else (f"_c{i}" for i in range(len(tup.expressions))) 54 ) 55 expressions = [ 56 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 57 for name, fld in zip(field_aliases, tup.expressions) 58 ] 59 structs.append(exp.Struct(expressions=expressions)) 60 61 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 62 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 63 return self.unnest_sql( 64 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 65 ) 66 67 68def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 69 this = expression.this 70 if isinstance(this, exp.Schema): 71 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 72 else: 73 this = self.sql(this) 74 return f"RETURNS {this}" 75 76 77def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 78 returns = expression.find(exp.ReturnsProperty) 79 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 80 expression.set("kind", "TABLE FUNCTION") 81 82 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 83 expression.set("expression", expression.expression.this) 84 85 return self.create_sql(expression) 86 87 88# https://issuetracker.google.com/issues/162294746 89# workaround for bigquery bug when grouping by an expression and then ordering 90# WITH x AS (SELECT 1 y) 91# SELECT y + 1 z 92# FROM x 93# GROUP BY x + 1 94# ORDER by z 95def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 96 if isinstance(expression, exp.Select): 97 group = expression.args.get("group") 98 order = expression.args.get("order") 99 100 if group and order: 101 aliases = { 102 select.this: select.args["alias"] 103 for select in expression.selects 104 if isinstance(select, exp.Alias) 105 } 106 107 for grouped in group.expressions: 108 if grouped.is_int: 109 continue 110 alias = aliases.get(grouped) 111 if alias: 112 grouped.replace(exp.column(alias)) 113 114 return expression 115 116 117def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 118 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 119 if isinstance(expression, exp.CTE) and expression.alias_column_names: 120 cte_query = expression.this 121 122 if cte_query.is_star: 123 logger.warning( 124 "Can't push down CTE column names for star queries. Run the query through" 125 " the optimizer or use 'qualify' to expand the star projections first." 126 ) 127 return expression 128 129 column_names = expression.alias_column_names 130 expression.args["alias"].set("columns", None) 131 132 for name, select in zip(column_names, cte_query.selects): 133 to_replace = select 134 135 if isinstance(select, exp.Alias): 136 select = select.this 137 138 # Inner aliases are shadowed by the CTE column names 139 to_replace.replace(exp.alias_(select, name)) 140 141 return expression 142 143 144def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 145 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 146 this.set("zone", seq_get(args, 2)) 147 return this 148 149 150def _build_timestamp(args: t.List) -> exp.Timestamp: 151 timestamp = exp.Timestamp.from_arg_list(args) 152 timestamp.set("with_tz", True) 153 return timestamp 154 155 156def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 157 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 158 return expr_type.from_arg_list(args) 159 160 161def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 162 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 163 arg = seq_get(args, 0) 164 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg) 165 166 167def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 168 return self.sql( 169 exp.Exists( 170 this=exp.select("1") 171 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 172 .where(exp.column("_col").eq(expression.right)) 173 ) 174 ) 175 176 177def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 178 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 179 180 181def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 182 expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 183 expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)) 184 unit = unit_to_var(expression) 185 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 186 187 188def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 189 scale = expression.args.get("scale") 190 timestamp = expression.this 191 192 if scale in (None, exp.UnixToTime.SECONDS): 193 return self.func("TIMESTAMP_SECONDS", timestamp) 194 if scale == exp.UnixToTime.MILLIS: 195 return self.func("TIMESTAMP_MILLIS", timestamp) 196 if scale == exp.UnixToTime.MICROS: 197 return self.func("TIMESTAMP_MICROS", timestamp) 198 199 unix_seconds = exp.cast( 200 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 201 ) 202 return self.func("TIMESTAMP_SECONDS", unix_seconds) 203 204 205def _build_time(args: t.List) -> exp.Func: 206 if len(args) == 1: 207 return exp.TsOrDsToTime(this=args[0]) 208 if len(args) == 2: 209 return exp.Time.from_arg_list(args) 210 return exp.TimeFromParts.from_arg_list(args) 211 212 213def _build_datetime(args: t.List) -> exp.Func: 214 if len(args) == 1: 215 return exp.TsOrDsToDatetime.from_arg_list(args) 216 if len(args) == 2: 217 return exp.Datetime.from_arg_list(args) 218 return exp.TimestampFromParts.from_arg_list(args) 219 220 221def _build_regexp_extract( 222 expr_type: t.Type[E], default_group: t.Optional[exp.Expression] = None 223) -> t.Callable[[t.List], E]: 224 def _builder(args: t.List) -> E: 225 try: 226 group = re.compile(args[1].name).groups == 1 227 except re.error: 228 group = False 229 230 # Default group is used for the transpilation of REGEXP_EXTRACT_ALL 231 return expr_type( 232 this=seq_get(args, 0), 233 expression=seq_get(args, 1), 234 position=seq_get(args, 2), 235 occurrence=seq_get(args, 3), 236 group=exp.Literal.number(1) if group else default_group, 237 ) 238 239 return _builder 240 241 242def _build_extract_json_with_default_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 243 def _builder(args: t.List, dialect: Dialect) -> E: 244 if len(args) == 1: 245 # The default value for the JSONPath is '$' i.e all of the data 246 args.append(exp.Literal.string("$")) 247 return parser.build_extract_json_with_path(expr_type)(args, dialect) 248 249 return _builder 250 251 252def _str_to_datetime_sql( 253 self: BigQuery.Generator, expression: exp.StrToDate | exp.StrToTime 254) -> str: 255 this = self.sql(expression, "this") 256 dtype = "DATE" if isinstance(expression, exp.StrToDate) else "TIMESTAMP" 257 258 if expression.args.get("safe"): 259 fmt = self.format_time( 260 expression, 261 self.dialect.INVERSE_FORMAT_MAPPING, 262 self.dialect.INVERSE_FORMAT_TRIE, 263 ) 264 return f"SAFE_CAST({this} AS {dtype} FORMAT {fmt})" 265 266 fmt = self.format_time(expression) 267 return self.func(f"PARSE_{dtype}", fmt, this, expression.args.get("zone")) 268 269 270def _annotate_math_functions(self: TypeAnnotator, expression: E) -> E: 271 """ 272 Many BigQuery math functions such as CEIL, FLOOR etc follow this return type convention: 273 +---------+---------+---------+------------+---------+ 274 | INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 275 +---------+---------+---------+------------+---------+ 276 | OUTPUT | FLOAT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 277 +---------+---------+---------+------------+---------+ 278 """ 279 self._annotate_args(expression) 280 281 this: exp.Expression = expression.this 282 283 self._set_type( 284 expression, 285 exp.DataType.Type.DOUBLE if this.is_type(*exp.DataType.INTEGER_TYPES) else this.type, 286 ) 287 return expression 288 289 290@unsupported_args("ins_cost", "del_cost", "sub_cost") 291def _levenshtein_sql(self: BigQuery.Generator, expression: exp.Levenshtein) -> str: 292 max_dist = expression.args.get("max_dist") 293 if max_dist: 294 max_dist = exp.Kwarg(this=exp.var("max_distance"), expression=max_dist) 295 296 return self.func("EDIT_DISTANCE", expression.this, expression.expression, max_dist) 297 298 299def _build_levenshtein(args: t.List) -> exp.Levenshtein: 300 max_dist = seq_get(args, 2) 301 return exp.Levenshtein( 302 this=seq_get(args, 0), 303 expression=seq_get(args, 1), 304 max_dist=max_dist.expression if max_dist else None, 305 ) 306 307 308def _build_format_time(expr_type: t.Type[exp.Expression]) -> t.Callable[[t.List], exp.TimeToStr]: 309 def _builder(args: t.List) -> exp.TimeToStr: 310 return exp.TimeToStr(this=expr_type(this=seq_get(args, 1)), format=seq_get(args, 0)) 311 312 return _builder 313 314 315def _build_contains_substring(args: t.List) -> exp.Contains | exp.Anonymous: 316 if len(args) == 3: 317 return exp.Anonymous(this="CONTAINS_SUBSTRING", expressions=args) 318 319 # Lowercase the operands in case of transpilation, as exp.Contains 320 # is case-sensitive on other dialects 321 this = exp.Lower(this=seq_get(args, 0)) 322 expr = exp.Lower(this=seq_get(args, 1)) 323 324 return exp.Contains(this=this, expression=expr) 325 326 327class BigQuery(Dialect): 328 WEEK_OFFSET = -1 329 UNNEST_COLUMN_ONLY = True 330 SUPPORTS_USER_DEFINED_TYPES = False 331 SUPPORTS_SEMI_ANTI_JOIN = False 332 LOG_BASE_FIRST = False 333 HEX_LOWERCASE = True 334 FORCE_EARLY_ALIAS_REF_EXPANSION = True 335 EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = True 336 PRESERVE_ORIGINAL_NAMES = True 337 338 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 339 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 340 341 # bigquery udfs are case sensitive 342 NORMALIZE_FUNCTIONS = False 343 344 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 345 TIME_MAPPING = { 346 "%D": "%m/%d/%y", 347 "%E6S": "%S.%f", 348 "%e": "%-d", 349 } 350 351 FORMAT_MAPPING = { 352 "DD": "%d", 353 "MM": "%m", 354 "MON": "%b", 355 "MONTH": "%B", 356 "YYYY": "%Y", 357 "YY": "%y", 358 "HH": "%I", 359 "HH12": "%I", 360 "HH24": "%H", 361 "MI": "%M", 362 "SS": "%S", 363 "SSSSS": "%f", 364 "TZH": "%z", 365 } 366 367 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 368 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 369 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 370 371 # All set operations require either a DISTINCT or ALL specifier 372 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 373 374 ANNOTATORS = { 375 **Dialect.ANNOTATORS, 376 **{ 377 expr_type: lambda self, e: _annotate_math_functions(self, e) 378 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 379 }, 380 **{ 381 expr_type: lambda self, e: self._annotate_by_args(e, "this") 382 for expr_type in ( 383 exp.Left, 384 exp.Right, 385 exp.Lower, 386 exp.Upper, 387 exp.Pad, 388 exp.Trim, 389 exp.RegexpExtract, 390 exp.RegexpReplace, 391 exp.Repeat, 392 exp.Substring, 393 ) 394 }, 395 exp.Concat: lambda self, e: self._annotate_by_args(e, "expressions"), 396 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 397 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 398 } 399 400 def normalize_identifier(self, expression: E) -> E: 401 if ( 402 isinstance(expression, exp.Identifier) 403 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 404 ): 405 parent = expression.parent 406 while isinstance(parent, exp.Dot): 407 parent = parent.parent 408 409 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 410 # by default. The following check uses a heuristic to detect tables based on whether 411 # they are qualified. This should generally be correct, because tables in BigQuery 412 # must be qualified with at least a dataset, unless @@dataset_id is set. 413 case_sensitive = ( 414 isinstance(parent, exp.UserDefinedFunction) 415 or ( 416 isinstance(parent, exp.Table) 417 and parent.db 418 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 419 ) 420 or expression.meta.get("is_table") 421 ) 422 if not case_sensitive: 423 expression.set("this", expression.this.lower()) 424 425 return expression 426 427 class Tokenizer(tokens.Tokenizer): 428 QUOTES = ["'", '"', '"""', "'''"] 429 COMMENTS = ["--", "#", ("/*", "*/")] 430 IDENTIFIERS = ["`"] 431 STRING_ESCAPES = ["\\"] 432 433 HEX_STRINGS = [("0x", ""), ("0X", "")] 434 435 BYTE_STRINGS = [ 436 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 437 ] 438 439 RAW_STRINGS = [ 440 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 441 ] 442 443 KEYWORDS = { 444 **tokens.Tokenizer.KEYWORDS, 445 "ANY TYPE": TokenType.VARIANT, 446 "BEGIN": TokenType.COMMAND, 447 "BEGIN TRANSACTION": TokenType.BEGIN, 448 "BYTEINT": TokenType.INT, 449 "BYTES": TokenType.BINARY, 450 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 451 "DATETIME": TokenType.TIMESTAMP, 452 "DECLARE": TokenType.COMMAND, 453 "ELSEIF": TokenType.COMMAND, 454 "EXCEPTION": TokenType.COMMAND, 455 "FLOAT64": TokenType.DOUBLE, 456 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 457 "MODEL": TokenType.MODEL, 458 "NOT DETERMINISTIC": TokenType.VOLATILE, 459 "RECORD": TokenType.STRUCT, 460 "TIMESTAMP": TokenType.TIMESTAMPTZ, 461 } 462 KEYWORDS.pop("DIV") 463 KEYWORDS.pop("VALUES") 464 KEYWORDS.pop("/*+") 465 466 class Parser(parser.Parser): 467 PREFIXED_PIVOT_COLUMNS = True 468 LOG_DEFAULTS_TO_LN = True 469 SUPPORTS_IMPLICIT_UNNEST = True 470 471 FUNCTIONS = { 472 **parser.Parser.FUNCTIONS, 473 "CONTAINS_SUBSTRING": _build_contains_substring, 474 "DATE": _build_date, 475 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 476 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 477 "DATE_TRUNC": lambda args: exp.DateTrunc( 478 unit=exp.Literal.string(str(seq_get(args, 1))), 479 this=seq_get(args, 0), 480 zone=seq_get(args, 2), 481 ), 482 "DATETIME": _build_datetime, 483 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 484 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 485 "DIV": binary_from_function(exp.IntDiv), 486 "EDIT_DISTANCE": _build_levenshtein, 487 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 488 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 489 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 490 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 491 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 492 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 493 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 494 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 495 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 496 "MD5": exp.MD5Digest.from_arg_list, 497 "TO_HEX": _build_to_hex, 498 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 499 [seq_get(args, 1), seq_get(args, 0)] 500 ), 501 "PARSE_TIMESTAMP": _build_parse_timestamp, 502 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 503 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 504 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 505 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 506 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 507 ), 508 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 509 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 510 "SPLIT": lambda args: exp.Split( 511 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 512 this=seq_get(args, 0), 513 expression=seq_get(args, 1) or exp.Literal.string(","), 514 ), 515 "STRPOS": exp.StrPosition.from_arg_list, 516 "TIME": _build_time, 517 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 518 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 519 "TIMESTAMP": _build_timestamp, 520 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 521 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 522 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 523 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 524 ), 525 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 526 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 527 ), 528 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 529 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 530 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 531 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 532 } 533 534 FUNCTION_PARSERS = { 535 **parser.Parser.FUNCTION_PARSERS, 536 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 537 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 538 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 539 } 540 FUNCTION_PARSERS.pop("TRIM") 541 542 NO_PAREN_FUNCTIONS = { 543 **parser.Parser.NO_PAREN_FUNCTIONS, 544 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 545 } 546 547 NESTED_TYPE_TOKENS = { 548 *parser.Parser.NESTED_TYPE_TOKENS, 549 TokenType.TABLE, 550 } 551 552 PROPERTY_PARSERS = { 553 **parser.Parser.PROPERTY_PARSERS, 554 "NOT DETERMINISTIC": lambda self: self.expression( 555 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 556 ), 557 "OPTIONS": lambda self: self._parse_with_property(), 558 } 559 560 CONSTRAINT_PARSERS = { 561 **parser.Parser.CONSTRAINT_PARSERS, 562 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 563 } 564 565 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 566 RANGE_PARSERS.pop(TokenType.OVERLAPS) 567 568 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 569 570 STATEMENT_PARSERS = { 571 **parser.Parser.STATEMENT_PARSERS, 572 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 573 TokenType.END: lambda self: self._parse_as_command(self._prev), 574 TokenType.FOR: lambda self: self._parse_for_in(), 575 } 576 577 BRACKET_OFFSETS = { 578 "OFFSET": (0, False), 579 "ORDINAL": (1, False), 580 "SAFE_OFFSET": (0, True), 581 "SAFE_ORDINAL": (1, True), 582 } 583 584 def _parse_for_in(self) -> exp.ForIn: 585 this = self._parse_range() 586 self._match_text_seq("DO") 587 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 588 589 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 590 this = super()._parse_table_part(schema=schema) or self._parse_number() 591 592 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 593 if isinstance(this, exp.Identifier): 594 table_name = this.name 595 while self._match(TokenType.DASH, advance=False) and self._next: 596 text = "" 597 while self._is_connected() and self._curr.token_type != TokenType.DOT: 598 self._advance() 599 text += self._prev.text 600 table_name += text 601 602 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 603 elif isinstance(this, exp.Literal): 604 table_name = this.name 605 606 if self._is_connected() and self._parse_var(any_token=True): 607 table_name += self._prev.text 608 609 this = exp.Identifier(this=table_name, quoted=True) 610 611 return this 612 613 def _parse_table_parts( 614 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 615 ) -> exp.Table: 616 table = super()._parse_table_parts( 617 schema=schema, is_db_reference=is_db_reference, wildcard=True 618 ) 619 620 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 621 if not table.catalog: 622 if table.db: 623 parts = table.db.split(".") 624 if len(parts) == 2 and not table.args["db"].quoted: 625 table.set("catalog", exp.Identifier(this=parts[0])) 626 table.set("db", exp.Identifier(this=parts[1])) 627 else: 628 parts = table.name.split(".") 629 if len(parts) == 2 and not table.this.quoted: 630 table.set("db", exp.Identifier(this=parts[0])) 631 table.set("this", exp.Identifier(this=parts[1])) 632 633 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 634 alias = table.this 635 catalog, db, this, *rest = ( 636 exp.to_identifier(p, quoted=True) 637 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 638 ) 639 640 if rest and this: 641 this = exp.Dot.build([this, *rest]) # type: ignore 642 643 table = exp.Table( 644 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 645 ) 646 table.meta["quoted_table"] = True 647 else: 648 alias = None 649 650 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 651 # dataset, so if the project identifier is omitted we need to fix the ast so that 652 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 653 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 654 # views, because it would seem like the "catalog" part is set, when it'd actually 655 # be the region/dataset. Merging the two identifiers into a single one is done to 656 # avoid producing a 4-part Table reference, which would cause issues in the schema 657 # module, when there are 3-part table names mixed with information schema views. 658 # 659 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 660 table_parts = table.parts 661 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 662 # We need to alias the table here to avoid breaking existing qualified columns. 663 # This is expected to be safe, because if there's an actual alias coming up in 664 # the token stream, it will overwrite this one. If there isn't one, we are only 665 # exposing the name that can be used to reference the view explicitly (a no-op). 666 exp.alias_( 667 table, 668 t.cast(exp.Identifier, alias or table_parts[-1]), 669 table=True, 670 copy=False, 671 ) 672 673 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 674 table.set("this", exp.Identifier(this=info_schema_view, quoted=True)) 675 table.set("db", seq_get(table_parts, -3)) 676 table.set("catalog", seq_get(table_parts, -4)) 677 678 return table 679 680 def _parse_column(self) -> t.Optional[exp.Expression]: 681 column = super()._parse_column() 682 if isinstance(column, exp.Column): 683 parts = column.parts 684 if any("." in p.name for p in parts): 685 catalog, db, table, this, *rest = ( 686 exp.to_identifier(p, quoted=True) 687 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 688 ) 689 690 if rest and this: 691 this = exp.Dot.build([this, *rest]) # type: ignore 692 693 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 694 column.meta["quoted_column"] = True 695 696 return column 697 698 @t.overload 699 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 700 701 @t.overload 702 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 703 704 def _parse_json_object(self, agg=False): 705 json_object = super()._parse_json_object() 706 array_kv_pair = seq_get(json_object.expressions, 0) 707 708 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 709 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 710 if ( 711 array_kv_pair 712 and isinstance(array_kv_pair.this, exp.Array) 713 and isinstance(array_kv_pair.expression, exp.Array) 714 ): 715 keys = array_kv_pair.this.expressions 716 values = array_kv_pair.expression.expressions 717 718 json_object.set( 719 "expressions", 720 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 721 ) 722 723 return json_object 724 725 def _parse_bracket( 726 self, this: t.Optional[exp.Expression] = None 727 ) -> t.Optional[exp.Expression]: 728 bracket = super()._parse_bracket(this) 729 730 if this is bracket: 731 return bracket 732 733 if isinstance(bracket, exp.Bracket): 734 for expression in bracket.expressions: 735 name = expression.name.upper() 736 737 if name not in self.BRACKET_OFFSETS: 738 break 739 740 offset, safe = self.BRACKET_OFFSETS[name] 741 bracket.set("offset", offset) 742 bracket.set("safe", safe) 743 expression.replace(expression.expressions[0]) 744 745 return bracket 746 747 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 748 unnest = super()._parse_unnest(with_alias=with_alias) 749 750 if not unnest: 751 return None 752 753 unnest_expr = seq_get(unnest.expressions, 0) 754 if unnest_expr: 755 from sqlglot.optimizer.annotate_types import annotate_types 756 757 unnest_expr = annotate_types(unnest_expr) 758 759 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 760 # in contrast to other dialects such as DuckDB which flattens only the array by default 761 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 762 array_elem.is_type(exp.DataType.Type.STRUCT) 763 for array_elem in unnest_expr._type.expressions 764 ): 765 unnest.set("explode_array", True) 766 767 return unnest 768 769 def _parse_make_interval(self) -> exp.MakeInterval: 770 expr = exp.MakeInterval() 771 772 for arg_key in expr.arg_types: 773 value = self._parse_lambda() 774 775 if not value: 776 break 777 778 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 779 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 780 if isinstance(value, exp.Kwarg): 781 arg_key = value.this.name 782 783 expr.set(arg_key, value) 784 785 self._match(TokenType.COMMA) 786 787 return expr 788 789 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 790 expr = self.expression( 791 exp.FeaturesAtTime, 792 this=(self._match(TokenType.TABLE) and self._parse_table()) 793 or self._parse_select(nested=True), 794 ) 795 796 while self._match(TokenType.COMMA): 797 arg = self._parse_lambda() 798 799 # Get the LHS of the Kwarg and set the arg to that value, e.g 800 # "num_rows => 1" sets the expr's `num_rows` arg 801 if arg: 802 expr.set(arg.this.name, arg) 803 804 return expr 805 806 class Generator(generator.Generator): 807 INTERVAL_ALLOWS_PLURAL_FORM = False 808 JOIN_HINTS = False 809 QUERY_HINTS = False 810 TABLE_HINTS = False 811 LIMIT_FETCH = "LIMIT" 812 RENAME_TABLE_WITH_DB = False 813 NVL2_SUPPORTED = False 814 UNNEST_WITH_ORDINALITY = False 815 COLLATE_IS_FUNC = True 816 LIMIT_ONLY_LITERALS = True 817 SUPPORTS_TABLE_ALIAS_COLUMNS = False 818 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 819 JSON_KEY_VALUE_PAIR_SEP = "," 820 NULL_ORDERING_SUPPORTED = False 821 IGNORE_NULLS_IN_FUNC = True 822 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 823 CAN_IMPLEMENT_ARRAY_ANY = True 824 SUPPORTS_TO_NUMBER = False 825 NAMED_PLACEHOLDER_TOKEN = "@" 826 HEX_FUNC = "TO_HEX" 827 WITH_PROPERTIES_PREFIX = "OPTIONS" 828 SUPPORTS_EXPLODING_PROJECTIONS = False 829 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 830 SUPPORTS_UNIX_SECONDS = True 831 832 TRANSFORMS = { 833 **generator.Generator.TRANSFORMS, 834 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 835 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 836 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 837 exp.Array: inline_array_unless_query, 838 exp.ArrayContains: _array_contains_sql, 839 exp.ArrayFilter: filter_array_using_unnest, 840 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 841 exp.CollateProperty: lambda self, e: ( 842 f"DEFAULT COLLATE {self.sql(e, 'this')}" 843 if e.args.get("default") 844 else f"COLLATE {self.sql(e, 'this')}" 845 ), 846 exp.Commit: lambda *_: "COMMIT TRANSACTION", 847 exp.CountIf: rename_func("COUNTIF"), 848 exp.Create: _create_sql, 849 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 850 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 851 exp.DateDiff: lambda self, e: self.func( 852 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 853 ), 854 exp.DateFromParts: rename_func("DATE"), 855 exp.DateStrToDate: datestrtodate_sql, 856 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 857 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 858 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 859 exp.DateTrunc: lambda self, e: self.func( 860 "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone") 861 ), 862 exp.FromTimeZone: lambda self, e: self.func( 863 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 864 ), 865 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 866 exp.GroupConcat: rename_func("STRING_AGG"), 867 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 868 exp.If: if_sql(false_value="NULL"), 869 exp.ILike: no_ilike_sql, 870 exp.IntDiv: rename_func("DIV"), 871 exp.Int64: rename_func("INT64"), 872 exp.JSONFormat: rename_func("TO_JSON_STRING"), 873 exp.Levenshtein: _levenshtein_sql, 874 exp.Max: max_or_greatest, 875 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 876 exp.MD5Digest: rename_func("MD5"), 877 exp.Min: min_or_least, 878 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 879 exp.RegexpExtract: lambda self, e: self.func( 880 "REGEXP_EXTRACT", 881 e.this, 882 e.expression, 883 e.args.get("position"), 884 e.args.get("occurrence"), 885 ), 886 exp.RegexpExtractAll: lambda self, e: self.func( 887 "REGEXP_EXTRACT_ALL", e.this, e.expression 888 ), 889 exp.RegexpReplace: regexp_replace_sql, 890 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 891 exp.ReturnsProperty: _returnsproperty_sql, 892 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 893 exp.Select: transforms.preprocess( 894 [ 895 transforms.explode_to_unnest(), 896 transforms.unqualify_unnest, 897 transforms.eliminate_distinct_on, 898 _alias_ordered_group, 899 transforms.eliminate_semi_and_anti_joins, 900 ] 901 ), 902 exp.SHA: rename_func("SHA1"), 903 exp.SHA2: sha256_sql, 904 exp.StabilityProperty: lambda self, e: ( 905 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 906 ), 907 exp.String: rename_func("STRING"), 908 exp.StrPosition: str_position_sql, 909 exp.StrToDate: _str_to_datetime_sql, 910 exp.StrToTime: _str_to_datetime_sql, 911 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 912 exp.TimeFromParts: rename_func("TIME"), 913 exp.TimestampFromParts: rename_func("DATETIME"), 914 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 915 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 916 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 917 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 918 exp.TimeStrToTime: timestrtotime_sql, 919 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 920 exp.TsOrDsAdd: _ts_or_ds_add_sql, 921 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 922 exp.TsOrDsToTime: rename_func("TIME"), 923 exp.TsOrDsToDatetime: rename_func("DATETIME"), 924 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 925 exp.Unhex: rename_func("FROM_HEX"), 926 exp.UnixDate: rename_func("UNIX_DATE"), 927 exp.UnixToTime: _unix_to_time_sql, 928 exp.Uuid: lambda *_: "GENERATE_UUID()", 929 exp.Values: _derived_table_values_to_unnest, 930 exp.VariancePop: rename_func("VAR_POP"), 931 } 932 933 SUPPORTED_JSON_PATH_PARTS = { 934 exp.JSONPathKey, 935 exp.JSONPathRoot, 936 exp.JSONPathSubscript, 937 } 938 939 TYPE_MAPPING = { 940 **generator.Generator.TYPE_MAPPING, 941 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 942 exp.DataType.Type.BIGINT: "INT64", 943 exp.DataType.Type.BINARY: "BYTES", 944 exp.DataType.Type.BOOLEAN: "BOOL", 945 exp.DataType.Type.CHAR: "STRING", 946 exp.DataType.Type.DECIMAL: "NUMERIC", 947 exp.DataType.Type.DOUBLE: "FLOAT64", 948 exp.DataType.Type.FLOAT: "FLOAT64", 949 exp.DataType.Type.INT: "INT64", 950 exp.DataType.Type.NCHAR: "STRING", 951 exp.DataType.Type.NVARCHAR: "STRING", 952 exp.DataType.Type.SMALLINT: "INT64", 953 exp.DataType.Type.TEXT: "STRING", 954 exp.DataType.Type.TIMESTAMP: "DATETIME", 955 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 956 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 957 exp.DataType.Type.TINYINT: "INT64", 958 exp.DataType.Type.ROWVERSION: "BYTES", 959 exp.DataType.Type.UUID: "STRING", 960 exp.DataType.Type.VARBINARY: "BYTES", 961 exp.DataType.Type.VARCHAR: "STRING", 962 exp.DataType.Type.VARIANT: "ANY TYPE", 963 } 964 965 PROPERTIES_LOCATION = { 966 **generator.Generator.PROPERTIES_LOCATION, 967 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 968 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 969 } 970 971 # WINDOW comes after QUALIFY 972 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 973 AFTER_HAVING_MODIFIER_TRANSFORMS = { 974 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 975 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 976 } 977 978 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 979 RESERVED_KEYWORDS = { 980 "all", 981 "and", 982 "any", 983 "array", 984 "as", 985 "asc", 986 "assert_rows_modified", 987 "at", 988 "between", 989 "by", 990 "case", 991 "cast", 992 "collate", 993 "contains", 994 "create", 995 "cross", 996 "cube", 997 "current", 998 "default", 999 "define", 1000 "desc", 1001 "distinct", 1002 "else", 1003 "end", 1004 "enum", 1005 "escape", 1006 "except", 1007 "exclude", 1008 "exists", 1009 "extract", 1010 "false", 1011 "fetch", 1012 "following", 1013 "for", 1014 "from", 1015 "full", 1016 "group", 1017 "grouping", 1018 "groups", 1019 "hash", 1020 "having", 1021 "if", 1022 "ignore", 1023 "in", 1024 "inner", 1025 "intersect", 1026 "interval", 1027 "into", 1028 "is", 1029 "join", 1030 "lateral", 1031 "left", 1032 "like", 1033 "limit", 1034 "lookup", 1035 "merge", 1036 "natural", 1037 "new", 1038 "no", 1039 "not", 1040 "null", 1041 "nulls", 1042 "of", 1043 "on", 1044 "or", 1045 "order", 1046 "outer", 1047 "over", 1048 "partition", 1049 "preceding", 1050 "proto", 1051 "qualify", 1052 "range", 1053 "recursive", 1054 "respect", 1055 "right", 1056 "rollup", 1057 "rows", 1058 "select", 1059 "set", 1060 "some", 1061 "struct", 1062 "tablesample", 1063 "then", 1064 "to", 1065 "treat", 1066 "true", 1067 "unbounded", 1068 "union", 1069 "unnest", 1070 "using", 1071 "when", 1072 "where", 1073 "window", 1074 "with", 1075 "within", 1076 } 1077 1078 def mod_sql(self, expression: exp.Mod) -> str: 1079 this = expression.this 1080 expr = expression.expression 1081 return self.func( 1082 "MOD", 1083 this.unnest() if isinstance(this, exp.Paren) else this, 1084 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1085 ) 1086 1087 def column_parts(self, expression: exp.Column) -> str: 1088 if expression.meta.get("quoted_column"): 1089 # If a column reference is of the form `dataset.table`.name, we need 1090 # to preserve the quoted table path, otherwise the reference breaks 1091 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1092 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1093 return f"{table_path}.{self.sql(expression, 'this')}" 1094 1095 return super().column_parts(expression) 1096 1097 def table_parts(self, expression: exp.Table) -> str: 1098 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1099 # we need to make sure the correct quoting is used in each case. 1100 # 1101 # For example, if there is a CTE x that clashes with a schema name, then the former will 1102 # return the table y in that schema, whereas the latter will return the CTE's y column: 1103 # 1104 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1105 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1106 if expression.meta.get("quoted_table"): 1107 table_parts = ".".join(p.name for p in expression.parts) 1108 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1109 1110 return super().table_parts(expression) 1111 1112 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1113 this = expression.this 1114 if isinstance(this, exp.TsOrDsToDatetime): 1115 func_name = "FORMAT_DATETIME" 1116 elif isinstance(this, exp.TsOrDsToTimestamp): 1117 func_name = "FORMAT_TIMESTAMP" 1118 else: 1119 func_name = "FORMAT_DATE" 1120 1121 time_expr = ( 1122 this 1123 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1124 else expression 1125 ) 1126 return self.func(func_name, self.format_time(expression), time_expr.this) 1127 1128 def eq_sql(self, expression: exp.EQ) -> str: 1129 # Operands of = cannot be NULL in BigQuery 1130 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1131 if not isinstance(expression.parent, exp.Update): 1132 return "NULL" 1133 1134 return self.binary(expression, "=") 1135 1136 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1137 parent = expression.parent 1138 1139 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1140 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1141 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1142 return self.func( 1143 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1144 ) 1145 1146 return super().attimezone_sql(expression) 1147 1148 def trycast_sql(self, expression: exp.TryCast) -> str: 1149 return self.cast_sql(expression, safe_prefix="SAFE_") 1150 1151 def bracket_sql(self, expression: exp.Bracket) -> str: 1152 this = expression.this 1153 expressions = expression.expressions 1154 1155 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1156 arg = expressions[0] 1157 if arg.type is None: 1158 from sqlglot.optimizer.annotate_types import annotate_types 1159 1160 arg = annotate_types(arg) 1161 1162 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1163 # BQ doesn't support bracket syntax with string values for structs 1164 return f"{self.sql(this)}.{arg.name}" 1165 1166 expressions_sql = self.expressions(expression, flat=True) 1167 offset = expression.args.get("offset") 1168 1169 if offset == 0: 1170 expressions_sql = f"OFFSET({expressions_sql})" 1171 elif offset == 1: 1172 expressions_sql = f"ORDINAL({expressions_sql})" 1173 elif offset is not None: 1174 self.unsupported(f"Unsupported array offset: {offset}") 1175 1176 if expression.args.get("safe"): 1177 expressions_sql = f"SAFE_{expressions_sql}" 1178 1179 return f"{self.sql(this)}[{expressions_sql}]" 1180 1181 def in_unnest_op(self, expression: exp.Unnest) -> str: 1182 return self.sql(expression) 1183 1184 def version_sql(self, expression: exp.Version) -> str: 1185 if expression.name == "TIMESTAMP": 1186 expression.set("this", "SYSTEM_TIME") 1187 return super().version_sql(expression) 1188 1189 def contains_sql(self, expression: exp.Contains) -> str: 1190 this = expression.this 1191 expr = expression.expression 1192 1193 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1194 this = this.this 1195 expr = expr.this 1196 1197 return self.func("CONTAINS_SUBSTRING", this, expr)
328class BigQuery(Dialect): 329 WEEK_OFFSET = -1 330 UNNEST_COLUMN_ONLY = True 331 SUPPORTS_USER_DEFINED_TYPES = False 332 SUPPORTS_SEMI_ANTI_JOIN = False 333 LOG_BASE_FIRST = False 334 HEX_LOWERCASE = True 335 FORCE_EARLY_ALIAS_REF_EXPANSION = True 336 EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = True 337 PRESERVE_ORIGINAL_NAMES = True 338 339 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 340 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 341 342 # bigquery udfs are case sensitive 343 NORMALIZE_FUNCTIONS = False 344 345 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 346 TIME_MAPPING = { 347 "%D": "%m/%d/%y", 348 "%E6S": "%S.%f", 349 "%e": "%-d", 350 } 351 352 FORMAT_MAPPING = { 353 "DD": "%d", 354 "MM": "%m", 355 "MON": "%b", 356 "MONTH": "%B", 357 "YYYY": "%Y", 358 "YY": "%y", 359 "HH": "%I", 360 "HH12": "%I", 361 "HH24": "%H", 362 "MI": "%M", 363 "SS": "%S", 364 "SSSSS": "%f", 365 "TZH": "%z", 366 } 367 368 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 369 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 370 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 371 372 # All set operations require either a DISTINCT or ALL specifier 373 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 374 375 ANNOTATORS = { 376 **Dialect.ANNOTATORS, 377 **{ 378 expr_type: lambda self, e: _annotate_math_functions(self, e) 379 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 380 }, 381 **{ 382 expr_type: lambda self, e: self._annotate_by_args(e, "this") 383 for expr_type in ( 384 exp.Left, 385 exp.Right, 386 exp.Lower, 387 exp.Upper, 388 exp.Pad, 389 exp.Trim, 390 exp.RegexpExtract, 391 exp.RegexpReplace, 392 exp.Repeat, 393 exp.Substring, 394 ) 395 }, 396 exp.Concat: lambda self, e: self._annotate_by_args(e, "expressions"), 397 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 398 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 399 } 400 401 def normalize_identifier(self, expression: E) -> E: 402 if ( 403 isinstance(expression, exp.Identifier) 404 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 405 ): 406 parent = expression.parent 407 while isinstance(parent, exp.Dot): 408 parent = parent.parent 409 410 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 411 # by default. The following check uses a heuristic to detect tables based on whether 412 # they are qualified. This should generally be correct, because tables in BigQuery 413 # must be qualified with at least a dataset, unless @@dataset_id is set. 414 case_sensitive = ( 415 isinstance(parent, exp.UserDefinedFunction) 416 or ( 417 isinstance(parent, exp.Table) 418 and parent.db 419 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 420 ) 421 or expression.meta.get("is_table") 422 ) 423 if not case_sensitive: 424 expression.set("this", expression.this.lower()) 425 426 return expression 427 428 class Tokenizer(tokens.Tokenizer): 429 QUOTES = ["'", '"', '"""', "'''"] 430 COMMENTS = ["--", "#", ("/*", "*/")] 431 IDENTIFIERS = ["`"] 432 STRING_ESCAPES = ["\\"] 433 434 HEX_STRINGS = [("0x", ""), ("0X", "")] 435 436 BYTE_STRINGS = [ 437 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 438 ] 439 440 RAW_STRINGS = [ 441 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 442 ] 443 444 KEYWORDS = { 445 **tokens.Tokenizer.KEYWORDS, 446 "ANY TYPE": TokenType.VARIANT, 447 "BEGIN": TokenType.COMMAND, 448 "BEGIN TRANSACTION": TokenType.BEGIN, 449 "BYTEINT": TokenType.INT, 450 "BYTES": TokenType.BINARY, 451 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 452 "DATETIME": TokenType.TIMESTAMP, 453 "DECLARE": TokenType.COMMAND, 454 "ELSEIF": TokenType.COMMAND, 455 "EXCEPTION": TokenType.COMMAND, 456 "FLOAT64": TokenType.DOUBLE, 457 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 458 "MODEL": TokenType.MODEL, 459 "NOT DETERMINISTIC": TokenType.VOLATILE, 460 "RECORD": TokenType.STRUCT, 461 "TIMESTAMP": TokenType.TIMESTAMPTZ, 462 } 463 KEYWORDS.pop("DIV") 464 KEYWORDS.pop("VALUES") 465 KEYWORDS.pop("/*+") 466 467 class Parser(parser.Parser): 468 PREFIXED_PIVOT_COLUMNS = True 469 LOG_DEFAULTS_TO_LN = True 470 SUPPORTS_IMPLICIT_UNNEST = True 471 472 FUNCTIONS = { 473 **parser.Parser.FUNCTIONS, 474 "CONTAINS_SUBSTRING": _build_contains_substring, 475 "DATE": _build_date, 476 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 477 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 478 "DATE_TRUNC": lambda args: exp.DateTrunc( 479 unit=exp.Literal.string(str(seq_get(args, 1))), 480 this=seq_get(args, 0), 481 zone=seq_get(args, 2), 482 ), 483 "DATETIME": _build_datetime, 484 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 485 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 486 "DIV": binary_from_function(exp.IntDiv), 487 "EDIT_DISTANCE": _build_levenshtein, 488 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 489 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 490 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 491 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 492 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 493 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 494 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 495 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 496 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 497 "MD5": exp.MD5Digest.from_arg_list, 498 "TO_HEX": _build_to_hex, 499 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 500 [seq_get(args, 1), seq_get(args, 0)] 501 ), 502 "PARSE_TIMESTAMP": _build_parse_timestamp, 503 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 504 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 505 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 506 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 507 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 508 ), 509 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 510 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 511 "SPLIT": lambda args: exp.Split( 512 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 513 this=seq_get(args, 0), 514 expression=seq_get(args, 1) or exp.Literal.string(","), 515 ), 516 "STRPOS": exp.StrPosition.from_arg_list, 517 "TIME": _build_time, 518 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 519 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 520 "TIMESTAMP": _build_timestamp, 521 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 522 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 523 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 524 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 525 ), 526 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 527 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 528 ), 529 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 530 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 531 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 532 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 533 } 534 535 FUNCTION_PARSERS = { 536 **parser.Parser.FUNCTION_PARSERS, 537 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 538 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 539 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 540 } 541 FUNCTION_PARSERS.pop("TRIM") 542 543 NO_PAREN_FUNCTIONS = { 544 **parser.Parser.NO_PAREN_FUNCTIONS, 545 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 546 } 547 548 NESTED_TYPE_TOKENS = { 549 *parser.Parser.NESTED_TYPE_TOKENS, 550 TokenType.TABLE, 551 } 552 553 PROPERTY_PARSERS = { 554 **parser.Parser.PROPERTY_PARSERS, 555 "NOT DETERMINISTIC": lambda self: self.expression( 556 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 557 ), 558 "OPTIONS": lambda self: self._parse_with_property(), 559 } 560 561 CONSTRAINT_PARSERS = { 562 **parser.Parser.CONSTRAINT_PARSERS, 563 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 564 } 565 566 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 567 RANGE_PARSERS.pop(TokenType.OVERLAPS) 568 569 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 570 571 STATEMENT_PARSERS = { 572 **parser.Parser.STATEMENT_PARSERS, 573 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 574 TokenType.END: lambda self: self._parse_as_command(self._prev), 575 TokenType.FOR: lambda self: self._parse_for_in(), 576 } 577 578 BRACKET_OFFSETS = { 579 "OFFSET": (0, False), 580 "ORDINAL": (1, False), 581 "SAFE_OFFSET": (0, True), 582 "SAFE_ORDINAL": (1, True), 583 } 584 585 def _parse_for_in(self) -> exp.ForIn: 586 this = self._parse_range() 587 self._match_text_seq("DO") 588 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 589 590 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 591 this = super()._parse_table_part(schema=schema) or self._parse_number() 592 593 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 594 if isinstance(this, exp.Identifier): 595 table_name = this.name 596 while self._match(TokenType.DASH, advance=False) and self._next: 597 text = "" 598 while self._is_connected() and self._curr.token_type != TokenType.DOT: 599 self._advance() 600 text += self._prev.text 601 table_name += text 602 603 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 604 elif isinstance(this, exp.Literal): 605 table_name = this.name 606 607 if self._is_connected() and self._parse_var(any_token=True): 608 table_name += self._prev.text 609 610 this = exp.Identifier(this=table_name, quoted=True) 611 612 return this 613 614 def _parse_table_parts( 615 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 616 ) -> exp.Table: 617 table = super()._parse_table_parts( 618 schema=schema, is_db_reference=is_db_reference, wildcard=True 619 ) 620 621 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 622 if not table.catalog: 623 if table.db: 624 parts = table.db.split(".") 625 if len(parts) == 2 and not table.args["db"].quoted: 626 table.set("catalog", exp.Identifier(this=parts[0])) 627 table.set("db", exp.Identifier(this=parts[1])) 628 else: 629 parts = table.name.split(".") 630 if len(parts) == 2 and not table.this.quoted: 631 table.set("db", exp.Identifier(this=parts[0])) 632 table.set("this", exp.Identifier(this=parts[1])) 633 634 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 635 alias = table.this 636 catalog, db, this, *rest = ( 637 exp.to_identifier(p, quoted=True) 638 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 639 ) 640 641 if rest and this: 642 this = exp.Dot.build([this, *rest]) # type: ignore 643 644 table = exp.Table( 645 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 646 ) 647 table.meta["quoted_table"] = True 648 else: 649 alias = None 650 651 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 652 # dataset, so if the project identifier is omitted we need to fix the ast so that 653 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 654 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 655 # views, because it would seem like the "catalog" part is set, when it'd actually 656 # be the region/dataset. Merging the two identifiers into a single one is done to 657 # avoid producing a 4-part Table reference, which would cause issues in the schema 658 # module, when there are 3-part table names mixed with information schema views. 659 # 660 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 661 table_parts = table.parts 662 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 663 # We need to alias the table here to avoid breaking existing qualified columns. 664 # This is expected to be safe, because if there's an actual alias coming up in 665 # the token stream, it will overwrite this one. If there isn't one, we are only 666 # exposing the name that can be used to reference the view explicitly (a no-op). 667 exp.alias_( 668 table, 669 t.cast(exp.Identifier, alias or table_parts[-1]), 670 table=True, 671 copy=False, 672 ) 673 674 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 675 table.set("this", exp.Identifier(this=info_schema_view, quoted=True)) 676 table.set("db", seq_get(table_parts, -3)) 677 table.set("catalog", seq_get(table_parts, -4)) 678 679 return table 680 681 def _parse_column(self) -> t.Optional[exp.Expression]: 682 column = super()._parse_column() 683 if isinstance(column, exp.Column): 684 parts = column.parts 685 if any("." in p.name for p in parts): 686 catalog, db, table, this, *rest = ( 687 exp.to_identifier(p, quoted=True) 688 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 689 ) 690 691 if rest and this: 692 this = exp.Dot.build([this, *rest]) # type: ignore 693 694 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 695 column.meta["quoted_column"] = True 696 697 return column 698 699 @t.overload 700 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 701 702 @t.overload 703 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 704 705 def _parse_json_object(self, agg=False): 706 json_object = super()._parse_json_object() 707 array_kv_pair = seq_get(json_object.expressions, 0) 708 709 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 710 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 711 if ( 712 array_kv_pair 713 and isinstance(array_kv_pair.this, exp.Array) 714 and isinstance(array_kv_pair.expression, exp.Array) 715 ): 716 keys = array_kv_pair.this.expressions 717 values = array_kv_pair.expression.expressions 718 719 json_object.set( 720 "expressions", 721 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 722 ) 723 724 return json_object 725 726 def _parse_bracket( 727 self, this: t.Optional[exp.Expression] = None 728 ) -> t.Optional[exp.Expression]: 729 bracket = super()._parse_bracket(this) 730 731 if this is bracket: 732 return bracket 733 734 if isinstance(bracket, exp.Bracket): 735 for expression in bracket.expressions: 736 name = expression.name.upper() 737 738 if name not in self.BRACKET_OFFSETS: 739 break 740 741 offset, safe = self.BRACKET_OFFSETS[name] 742 bracket.set("offset", offset) 743 bracket.set("safe", safe) 744 expression.replace(expression.expressions[0]) 745 746 return bracket 747 748 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 749 unnest = super()._parse_unnest(with_alias=with_alias) 750 751 if not unnest: 752 return None 753 754 unnest_expr = seq_get(unnest.expressions, 0) 755 if unnest_expr: 756 from sqlglot.optimizer.annotate_types import annotate_types 757 758 unnest_expr = annotate_types(unnest_expr) 759 760 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 761 # in contrast to other dialects such as DuckDB which flattens only the array by default 762 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 763 array_elem.is_type(exp.DataType.Type.STRUCT) 764 for array_elem in unnest_expr._type.expressions 765 ): 766 unnest.set("explode_array", True) 767 768 return unnest 769 770 def _parse_make_interval(self) -> exp.MakeInterval: 771 expr = exp.MakeInterval() 772 773 for arg_key in expr.arg_types: 774 value = self._parse_lambda() 775 776 if not value: 777 break 778 779 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 780 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 781 if isinstance(value, exp.Kwarg): 782 arg_key = value.this.name 783 784 expr.set(arg_key, value) 785 786 self._match(TokenType.COMMA) 787 788 return expr 789 790 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 791 expr = self.expression( 792 exp.FeaturesAtTime, 793 this=(self._match(TokenType.TABLE) and self._parse_table()) 794 or self._parse_select(nested=True), 795 ) 796 797 while self._match(TokenType.COMMA): 798 arg = self._parse_lambda() 799 800 # Get the LHS of the Kwarg and set the arg to that value, e.g 801 # "num_rows => 1" sets the expr's `num_rows` arg 802 if arg: 803 expr.set(arg.this.name, arg) 804 805 return expr 806 807 class Generator(generator.Generator): 808 INTERVAL_ALLOWS_PLURAL_FORM = False 809 JOIN_HINTS = False 810 QUERY_HINTS = False 811 TABLE_HINTS = False 812 LIMIT_FETCH = "LIMIT" 813 RENAME_TABLE_WITH_DB = False 814 NVL2_SUPPORTED = False 815 UNNEST_WITH_ORDINALITY = False 816 COLLATE_IS_FUNC = True 817 LIMIT_ONLY_LITERALS = True 818 SUPPORTS_TABLE_ALIAS_COLUMNS = False 819 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 820 JSON_KEY_VALUE_PAIR_SEP = "," 821 NULL_ORDERING_SUPPORTED = False 822 IGNORE_NULLS_IN_FUNC = True 823 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 824 CAN_IMPLEMENT_ARRAY_ANY = True 825 SUPPORTS_TO_NUMBER = False 826 NAMED_PLACEHOLDER_TOKEN = "@" 827 HEX_FUNC = "TO_HEX" 828 WITH_PROPERTIES_PREFIX = "OPTIONS" 829 SUPPORTS_EXPLODING_PROJECTIONS = False 830 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 831 SUPPORTS_UNIX_SECONDS = True 832 833 TRANSFORMS = { 834 **generator.Generator.TRANSFORMS, 835 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 836 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 837 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 838 exp.Array: inline_array_unless_query, 839 exp.ArrayContains: _array_contains_sql, 840 exp.ArrayFilter: filter_array_using_unnest, 841 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 842 exp.CollateProperty: lambda self, e: ( 843 f"DEFAULT COLLATE {self.sql(e, 'this')}" 844 if e.args.get("default") 845 else f"COLLATE {self.sql(e, 'this')}" 846 ), 847 exp.Commit: lambda *_: "COMMIT TRANSACTION", 848 exp.CountIf: rename_func("COUNTIF"), 849 exp.Create: _create_sql, 850 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 851 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 852 exp.DateDiff: lambda self, e: self.func( 853 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 854 ), 855 exp.DateFromParts: rename_func("DATE"), 856 exp.DateStrToDate: datestrtodate_sql, 857 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 858 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 859 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 860 exp.DateTrunc: lambda self, e: self.func( 861 "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone") 862 ), 863 exp.FromTimeZone: lambda self, e: self.func( 864 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 865 ), 866 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 867 exp.GroupConcat: rename_func("STRING_AGG"), 868 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 869 exp.If: if_sql(false_value="NULL"), 870 exp.ILike: no_ilike_sql, 871 exp.IntDiv: rename_func("DIV"), 872 exp.Int64: rename_func("INT64"), 873 exp.JSONFormat: rename_func("TO_JSON_STRING"), 874 exp.Levenshtein: _levenshtein_sql, 875 exp.Max: max_or_greatest, 876 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 877 exp.MD5Digest: rename_func("MD5"), 878 exp.Min: min_or_least, 879 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 880 exp.RegexpExtract: lambda self, e: self.func( 881 "REGEXP_EXTRACT", 882 e.this, 883 e.expression, 884 e.args.get("position"), 885 e.args.get("occurrence"), 886 ), 887 exp.RegexpExtractAll: lambda self, e: self.func( 888 "REGEXP_EXTRACT_ALL", e.this, e.expression 889 ), 890 exp.RegexpReplace: regexp_replace_sql, 891 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 892 exp.ReturnsProperty: _returnsproperty_sql, 893 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 894 exp.Select: transforms.preprocess( 895 [ 896 transforms.explode_to_unnest(), 897 transforms.unqualify_unnest, 898 transforms.eliminate_distinct_on, 899 _alias_ordered_group, 900 transforms.eliminate_semi_and_anti_joins, 901 ] 902 ), 903 exp.SHA: rename_func("SHA1"), 904 exp.SHA2: sha256_sql, 905 exp.StabilityProperty: lambda self, e: ( 906 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 907 ), 908 exp.String: rename_func("STRING"), 909 exp.StrPosition: str_position_sql, 910 exp.StrToDate: _str_to_datetime_sql, 911 exp.StrToTime: _str_to_datetime_sql, 912 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 913 exp.TimeFromParts: rename_func("TIME"), 914 exp.TimestampFromParts: rename_func("DATETIME"), 915 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 916 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 917 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 918 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 919 exp.TimeStrToTime: timestrtotime_sql, 920 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 921 exp.TsOrDsAdd: _ts_or_ds_add_sql, 922 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 923 exp.TsOrDsToTime: rename_func("TIME"), 924 exp.TsOrDsToDatetime: rename_func("DATETIME"), 925 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 926 exp.Unhex: rename_func("FROM_HEX"), 927 exp.UnixDate: rename_func("UNIX_DATE"), 928 exp.UnixToTime: _unix_to_time_sql, 929 exp.Uuid: lambda *_: "GENERATE_UUID()", 930 exp.Values: _derived_table_values_to_unnest, 931 exp.VariancePop: rename_func("VAR_POP"), 932 } 933 934 SUPPORTED_JSON_PATH_PARTS = { 935 exp.JSONPathKey, 936 exp.JSONPathRoot, 937 exp.JSONPathSubscript, 938 } 939 940 TYPE_MAPPING = { 941 **generator.Generator.TYPE_MAPPING, 942 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 943 exp.DataType.Type.BIGINT: "INT64", 944 exp.DataType.Type.BINARY: "BYTES", 945 exp.DataType.Type.BOOLEAN: "BOOL", 946 exp.DataType.Type.CHAR: "STRING", 947 exp.DataType.Type.DECIMAL: "NUMERIC", 948 exp.DataType.Type.DOUBLE: "FLOAT64", 949 exp.DataType.Type.FLOAT: "FLOAT64", 950 exp.DataType.Type.INT: "INT64", 951 exp.DataType.Type.NCHAR: "STRING", 952 exp.DataType.Type.NVARCHAR: "STRING", 953 exp.DataType.Type.SMALLINT: "INT64", 954 exp.DataType.Type.TEXT: "STRING", 955 exp.DataType.Type.TIMESTAMP: "DATETIME", 956 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 957 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 958 exp.DataType.Type.TINYINT: "INT64", 959 exp.DataType.Type.ROWVERSION: "BYTES", 960 exp.DataType.Type.UUID: "STRING", 961 exp.DataType.Type.VARBINARY: "BYTES", 962 exp.DataType.Type.VARCHAR: "STRING", 963 exp.DataType.Type.VARIANT: "ANY TYPE", 964 } 965 966 PROPERTIES_LOCATION = { 967 **generator.Generator.PROPERTIES_LOCATION, 968 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 969 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 970 } 971 972 # WINDOW comes after QUALIFY 973 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 974 AFTER_HAVING_MODIFIER_TRANSFORMS = { 975 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 976 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 977 } 978 979 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 980 RESERVED_KEYWORDS = { 981 "all", 982 "and", 983 "any", 984 "array", 985 "as", 986 "asc", 987 "assert_rows_modified", 988 "at", 989 "between", 990 "by", 991 "case", 992 "cast", 993 "collate", 994 "contains", 995 "create", 996 "cross", 997 "cube", 998 "current", 999 "default", 1000 "define", 1001 "desc", 1002 "distinct", 1003 "else", 1004 "end", 1005 "enum", 1006 "escape", 1007 "except", 1008 "exclude", 1009 "exists", 1010 "extract", 1011 "false", 1012 "fetch", 1013 "following", 1014 "for", 1015 "from", 1016 "full", 1017 "group", 1018 "grouping", 1019 "groups", 1020 "hash", 1021 "having", 1022 "if", 1023 "ignore", 1024 "in", 1025 "inner", 1026 "intersect", 1027 "interval", 1028 "into", 1029 "is", 1030 "join", 1031 "lateral", 1032 "left", 1033 "like", 1034 "limit", 1035 "lookup", 1036 "merge", 1037 "natural", 1038 "new", 1039 "no", 1040 "not", 1041 "null", 1042 "nulls", 1043 "of", 1044 "on", 1045 "or", 1046 "order", 1047 "outer", 1048 "over", 1049 "partition", 1050 "preceding", 1051 "proto", 1052 "qualify", 1053 "range", 1054 "recursive", 1055 "respect", 1056 "right", 1057 "rollup", 1058 "rows", 1059 "select", 1060 "set", 1061 "some", 1062 "struct", 1063 "tablesample", 1064 "then", 1065 "to", 1066 "treat", 1067 "true", 1068 "unbounded", 1069 "union", 1070 "unnest", 1071 "using", 1072 "when", 1073 "where", 1074 "window", 1075 "with", 1076 "within", 1077 } 1078 1079 def mod_sql(self, expression: exp.Mod) -> str: 1080 this = expression.this 1081 expr = expression.expression 1082 return self.func( 1083 "MOD", 1084 this.unnest() if isinstance(this, exp.Paren) else this, 1085 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1086 ) 1087 1088 def column_parts(self, expression: exp.Column) -> str: 1089 if expression.meta.get("quoted_column"): 1090 # If a column reference is of the form `dataset.table`.name, we need 1091 # to preserve the quoted table path, otherwise the reference breaks 1092 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1093 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1094 return f"{table_path}.{self.sql(expression, 'this')}" 1095 1096 return super().column_parts(expression) 1097 1098 def table_parts(self, expression: exp.Table) -> str: 1099 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1100 # we need to make sure the correct quoting is used in each case. 1101 # 1102 # For example, if there is a CTE x that clashes with a schema name, then the former will 1103 # return the table y in that schema, whereas the latter will return the CTE's y column: 1104 # 1105 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1106 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1107 if expression.meta.get("quoted_table"): 1108 table_parts = ".".join(p.name for p in expression.parts) 1109 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1110 1111 return super().table_parts(expression) 1112 1113 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1114 this = expression.this 1115 if isinstance(this, exp.TsOrDsToDatetime): 1116 func_name = "FORMAT_DATETIME" 1117 elif isinstance(this, exp.TsOrDsToTimestamp): 1118 func_name = "FORMAT_TIMESTAMP" 1119 else: 1120 func_name = "FORMAT_DATE" 1121 1122 time_expr = ( 1123 this 1124 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1125 else expression 1126 ) 1127 return self.func(func_name, self.format_time(expression), time_expr.this) 1128 1129 def eq_sql(self, expression: exp.EQ) -> str: 1130 # Operands of = cannot be NULL in BigQuery 1131 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1132 if not isinstance(expression.parent, exp.Update): 1133 return "NULL" 1134 1135 return self.binary(expression, "=") 1136 1137 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1138 parent = expression.parent 1139 1140 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1141 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1142 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1143 return self.func( 1144 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1145 ) 1146 1147 return super().attimezone_sql(expression) 1148 1149 def trycast_sql(self, expression: exp.TryCast) -> str: 1150 return self.cast_sql(expression, safe_prefix="SAFE_") 1151 1152 def bracket_sql(self, expression: exp.Bracket) -> str: 1153 this = expression.this 1154 expressions = expression.expressions 1155 1156 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1157 arg = expressions[0] 1158 if arg.type is None: 1159 from sqlglot.optimizer.annotate_types import annotate_types 1160 1161 arg = annotate_types(arg) 1162 1163 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1164 # BQ doesn't support bracket syntax with string values for structs 1165 return f"{self.sql(this)}.{arg.name}" 1166 1167 expressions_sql = self.expressions(expression, flat=True) 1168 offset = expression.args.get("offset") 1169 1170 if offset == 0: 1171 expressions_sql = f"OFFSET({expressions_sql})" 1172 elif offset == 1: 1173 expressions_sql = f"ORDINAL({expressions_sql})" 1174 elif offset is not None: 1175 self.unsupported(f"Unsupported array offset: {offset}") 1176 1177 if expression.args.get("safe"): 1178 expressions_sql = f"SAFE_{expressions_sql}" 1179 1180 return f"{self.sql(this)}[{expressions_sql}]" 1181 1182 def in_unnest_op(self, expression: exp.Unnest) -> str: 1183 return self.sql(expression) 1184 1185 def version_sql(self, expression: exp.Version) -> str: 1186 if expression.name == "TIMESTAMP": 1187 expression.set("this", "SYSTEM_TIME") 1188 return super().version_sql(expression) 1189 1190 def contains_sql(self, expression: exp.Contains) -> str: 1191 this = expression.this 1192 expr = expression.expression 1193 1194 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1195 this = this.this 1196 expr = expr.this 1197 1198 return self.func("CONTAINS_SUBSTRING", this, expr)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether alias reference expansion before qualification should only happen for the GROUP BY clause.
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
401 def normalize_identifier(self, expression: E) -> E: 402 if ( 403 isinstance(expression, exp.Identifier) 404 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 405 ): 406 parent = expression.parent 407 while isinstance(parent, exp.Dot): 408 parent = parent.parent 409 410 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 411 # by default. The following check uses a heuristic to detect tables based on whether 412 # they are qualified. This should generally be correct, because tables in BigQuery 413 # must be qualified with at least a dataset, unless @@dataset_id is set. 414 case_sensitive = ( 415 isinstance(parent, exp.UserDefinedFunction) 416 or ( 417 isinstance(parent, exp.Table) 418 and parent.db 419 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 420 ) 421 or expression.meta.get("is_table") 422 ) 423 if not case_sensitive: 424 expression.set("this", expression.this.lower()) 425 426 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- COPY_PARAMS_ARE_CSV
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- PROMOTE_TO_INFERRED_DATETIME_TYPE
- REGEXP_EXTRACT_DEFAULT_GROUP
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- get_or_raise
- format_time
- settings
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
428 class Tokenizer(tokens.Tokenizer): 429 QUOTES = ["'", '"', '"""', "'''"] 430 COMMENTS = ["--", "#", ("/*", "*/")] 431 IDENTIFIERS = ["`"] 432 STRING_ESCAPES = ["\\"] 433 434 HEX_STRINGS = [("0x", ""), ("0X", "")] 435 436 BYTE_STRINGS = [ 437 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 438 ] 439 440 RAW_STRINGS = [ 441 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 442 ] 443 444 KEYWORDS = { 445 **tokens.Tokenizer.KEYWORDS, 446 "ANY TYPE": TokenType.VARIANT, 447 "BEGIN": TokenType.COMMAND, 448 "BEGIN TRANSACTION": TokenType.BEGIN, 449 "BYTEINT": TokenType.INT, 450 "BYTES": TokenType.BINARY, 451 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 452 "DATETIME": TokenType.TIMESTAMP, 453 "DECLARE": TokenType.COMMAND, 454 "ELSEIF": TokenType.COMMAND, 455 "EXCEPTION": TokenType.COMMAND, 456 "FLOAT64": TokenType.DOUBLE, 457 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 458 "MODEL": TokenType.MODEL, 459 "NOT DETERMINISTIC": TokenType.VOLATILE, 460 "RECORD": TokenType.STRUCT, 461 "TIMESTAMP": TokenType.TIMESTAMPTZ, 462 } 463 KEYWORDS.pop("DIV") 464 KEYWORDS.pop("VALUES") 465 KEYWORDS.pop("/*+")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
467 class Parser(parser.Parser): 468 PREFIXED_PIVOT_COLUMNS = True 469 LOG_DEFAULTS_TO_LN = True 470 SUPPORTS_IMPLICIT_UNNEST = True 471 472 FUNCTIONS = { 473 **parser.Parser.FUNCTIONS, 474 "CONTAINS_SUBSTRING": _build_contains_substring, 475 "DATE": _build_date, 476 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 477 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 478 "DATE_TRUNC": lambda args: exp.DateTrunc( 479 unit=exp.Literal.string(str(seq_get(args, 1))), 480 this=seq_get(args, 0), 481 zone=seq_get(args, 2), 482 ), 483 "DATETIME": _build_datetime, 484 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 485 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 486 "DIV": binary_from_function(exp.IntDiv), 487 "EDIT_DISTANCE": _build_levenshtein, 488 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 489 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 490 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 491 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 492 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 493 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 494 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 495 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 496 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 497 "MD5": exp.MD5Digest.from_arg_list, 498 "TO_HEX": _build_to_hex, 499 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 500 [seq_get(args, 1), seq_get(args, 0)] 501 ), 502 "PARSE_TIMESTAMP": _build_parse_timestamp, 503 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 504 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 505 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 506 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 507 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 508 ), 509 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 510 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 511 "SPLIT": lambda args: exp.Split( 512 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 513 this=seq_get(args, 0), 514 expression=seq_get(args, 1) or exp.Literal.string(","), 515 ), 516 "STRPOS": exp.StrPosition.from_arg_list, 517 "TIME": _build_time, 518 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 519 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 520 "TIMESTAMP": _build_timestamp, 521 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 522 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 523 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 524 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 525 ), 526 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 527 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 528 ), 529 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 530 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 531 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 532 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 533 } 534 535 FUNCTION_PARSERS = { 536 **parser.Parser.FUNCTION_PARSERS, 537 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 538 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 539 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 540 } 541 FUNCTION_PARSERS.pop("TRIM") 542 543 NO_PAREN_FUNCTIONS = { 544 **parser.Parser.NO_PAREN_FUNCTIONS, 545 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 546 } 547 548 NESTED_TYPE_TOKENS = { 549 *parser.Parser.NESTED_TYPE_TOKENS, 550 TokenType.TABLE, 551 } 552 553 PROPERTY_PARSERS = { 554 **parser.Parser.PROPERTY_PARSERS, 555 "NOT DETERMINISTIC": lambda self: self.expression( 556 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 557 ), 558 "OPTIONS": lambda self: self._parse_with_property(), 559 } 560 561 CONSTRAINT_PARSERS = { 562 **parser.Parser.CONSTRAINT_PARSERS, 563 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 564 } 565 566 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 567 RANGE_PARSERS.pop(TokenType.OVERLAPS) 568 569 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 570 571 STATEMENT_PARSERS = { 572 **parser.Parser.STATEMENT_PARSERS, 573 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 574 TokenType.END: lambda self: self._parse_as_command(self._prev), 575 TokenType.FOR: lambda self: self._parse_for_in(), 576 } 577 578 BRACKET_OFFSETS = { 579 "OFFSET": (0, False), 580 "ORDINAL": (1, False), 581 "SAFE_OFFSET": (0, True), 582 "SAFE_ORDINAL": (1, True), 583 } 584 585 def _parse_for_in(self) -> exp.ForIn: 586 this = self._parse_range() 587 self._match_text_seq("DO") 588 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 589 590 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 591 this = super()._parse_table_part(schema=schema) or self._parse_number() 592 593 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 594 if isinstance(this, exp.Identifier): 595 table_name = this.name 596 while self._match(TokenType.DASH, advance=False) and self._next: 597 text = "" 598 while self._is_connected() and self._curr.token_type != TokenType.DOT: 599 self._advance() 600 text += self._prev.text 601 table_name += text 602 603 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 604 elif isinstance(this, exp.Literal): 605 table_name = this.name 606 607 if self._is_connected() and self._parse_var(any_token=True): 608 table_name += self._prev.text 609 610 this = exp.Identifier(this=table_name, quoted=True) 611 612 return this 613 614 def _parse_table_parts( 615 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 616 ) -> exp.Table: 617 table = super()._parse_table_parts( 618 schema=schema, is_db_reference=is_db_reference, wildcard=True 619 ) 620 621 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 622 if not table.catalog: 623 if table.db: 624 parts = table.db.split(".") 625 if len(parts) == 2 and not table.args["db"].quoted: 626 table.set("catalog", exp.Identifier(this=parts[0])) 627 table.set("db", exp.Identifier(this=parts[1])) 628 else: 629 parts = table.name.split(".") 630 if len(parts) == 2 and not table.this.quoted: 631 table.set("db", exp.Identifier(this=parts[0])) 632 table.set("this", exp.Identifier(this=parts[1])) 633 634 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 635 alias = table.this 636 catalog, db, this, *rest = ( 637 exp.to_identifier(p, quoted=True) 638 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 639 ) 640 641 if rest and this: 642 this = exp.Dot.build([this, *rest]) # type: ignore 643 644 table = exp.Table( 645 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 646 ) 647 table.meta["quoted_table"] = True 648 else: 649 alias = None 650 651 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 652 # dataset, so if the project identifier is omitted we need to fix the ast so that 653 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 654 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 655 # views, because it would seem like the "catalog" part is set, when it'd actually 656 # be the region/dataset. Merging the two identifiers into a single one is done to 657 # avoid producing a 4-part Table reference, which would cause issues in the schema 658 # module, when there are 3-part table names mixed with information schema views. 659 # 660 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 661 table_parts = table.parts 662 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 663 # We need to alias the table here to avoid breaking existing qualified columns. 664 # This is expected to be safe, because if there's an actual alias coming up in 665 # the token stream, it will overwrite this one. If there isn't one, we are only 666 # exposing the name that can be used to reference the view explicitly (a no-op). 667 exp.alias_( 668 table, 669 t.cast(exp.Identifier, alias or table_parts[-1]), 670 table=True, 671 copy=False, 672 ) 673 674 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 675 table.set("this", exp.Identifier(this=info_schema_view, quoted=True)) 676 table.set("db", seq_get(table_parts, -3)) 677 table.set("catalog", seq_get(table_parts, -4)) 678 679 return table 680 681 def _parse_column(self) -> t.Optional[exp.Expression]: 682 column = super()._parse_column() 683 if isinstance(column, exp.Column): 684 parts = column.parts 685 if any("." in p.name for p in parts): 686 catalog, db, table, this, *rest = ( 687 exp.to_identifier(p, quoted=True) 688 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 689 ) 690 691 if rest and this: 692 this = exp.Dot.build([this, *rest]) # type: ignore 693 694 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 695 column.meta["quoted_column"] = True 696 697 return column 698 699 @t.overload 700 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 701 702 @t.overload 703 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 704 705 def _parse_json_object(self, agg=False): 706 json_object = super()._parse_json_object() 707 array_kv_pair = seq_get(json_object.expressions, 0) 708 709 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 710 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 711 if ( 712 array_kv_pair 713 and isinstance(array_kv_pair.this, exp.Array) 714 and isinstance(array_kv_pair.expression, exp.Array) 715 ): 716 keys = array_kv_pair.this.expressions 717 values = array_kv_pair.expression.expressions 718 719 json_object.set( 720 "expressions", 721 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 722 ) 723 724 return json_object 725 726 def _parse_bracket( 727 self, this: t.Optional[exp.Expression] = None 728 ) -> t.Optional[exp.Expression]: 729 bracket = super()._parse_bracket(this) 730 731 if this is bracket: 732 return bracket 733 734 if isinstance(bracket, exp.Bracket): 735 for expression in bracket.expressions: 736 name = expression.name.upper() 737 738 if name not in self.BRACKET_OFFSETS: 739 break 740 741 offset, safe = self.BRACKET_OFFSETS[name] 742 bracket.set("offset", offset) 743 bracket.set("safe", safe) 744 expression.replace(expression.expressions[0]) 745 746 return bracket 747 748 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 749 unnest = super()._parse_unnest(with_alias=with_alias) 750 751 if not unnest: 752 return None 753 754 unnest_expr = seq_get(unnest.expressions, 0) 755 if unnest_expr: 756 from sqlglot.optimizer.annotate_types import annotate_types 757 758 unnest_expr = annotate_types(unnest_expr) 759 760 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 761 # in contrast to other dialects such as DuckDB which flattens only the array by default 762 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 763 array_elem.is_type(exp.DataType.Type.STRUCT) 764 for array_elem in unnest_expr._type.expressions 765 ): 766 unnest.set("explode_array", True) 767 768 return unnest 769 770 def _parse_make_interval(self) -> exp.MakeInterval: 771 expr = exp.MakeInterval() 772 773 for arg_key in expr.arg_types: 774 value = self._parse_lambda() 775 776 if not value: 777 break 778 779 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 780 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 781 if isinstance(value, exp.Kwarg): 782 arg_key = value.this.name 783 784 expr.set(arg_key, value) 785 786 self._match(TokenType.COMMA) 787 788 return expr 789 790 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 791 expr = self.expression( 792 exp.FeaturesAtTime, 793 this=(self._match(TokenType.TABLE) and self._parse_table()) 794 or self._parse_select(nested=True), 795 ) 796 797 while self._match(TokenType.COMMA): 798 arg = self._parse_lambda() 799 800 # Get the LHS of the Kwarg and set the arg to that value, e.g 801 # "num_rows => 1" sets the expr's `num_rows` arg 802 if arg: 803 expr.set(arg.this.name, arg) 804 805 return expr
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- OPERATION_MODIFIERS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
807 class Generator(generator.Generator): 808 INTERVAL_ALLOWS_PLURAL_FORM = False 809 JOIN_HINTS = False 810 QUERY_HINTS = False 811 TABLE_HINTS = False 812 LIMIT_FETCH = "LIMIT" 813 RENAME_TABLE_WITH_DB = False 814 NVL2_SUPPORTED = False 815 UNNEST_WITH_ORDINALITY = False 816 COLLATE_IS_FUNC = True 817 LIMIT_ONLY_LITERALS = True 818 SUPPORTS_TABLE_ALIAS_COLUMNS = False 819 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 820 JSON_KEY_VALUE_PAIR_SEP = "," 821 NULL_ORDERING_SUPPORTED = False 822 IGNORE_NULLS_IN_FUNC = True 823 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 824 CAN_IMPLEMENT_ARRAY_ANY = True 825 SUPPORTS_TO_NUMBER = False 826 NAMED_PLACEHOLDER_TOKEN = "@" 827 HEX_FUNC = "TO_HEX" 828 WITH_PROPERTIES_PREFIX = "OPTIONS" 829 SUPPORTS_EXPLODING_PROJECTIONS = False 830 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 831 SUPPORTS_UNIX_SECONDS = True 832 833 TRANSFORMS = { 834 **generator.Generator.TRANSFORMS, 835 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 836 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 837 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 838 exp.Array: inline_array_unless_query, 839 exp.ArrayContains: _array_contains_sql, 840 exp.ArrayFilter: filter_array_using_unnest, 841 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 842 exp.CollateProperty: lambda self, e: ( 843 f"DEFAULT COLLATE {self.sql(e, 'this')}" 844 if e.args.get("default") 845 else f"COLLATE {self.sql(e, 'this')}" 846 ), 847 exp.Commit: lambda *_: "COMMIT TRANSACTION", 848 exp.CountIf: rename_func("COUNTIF"), 849 exp.Create: _create_sql, 850 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 851 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 852 exp.DateDiff: lambda self, e: self.func( 853 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 854 ), 855 exp.DateFromParts: rename_func("DATE"), 856 exp.DateStrToDate: datestrtodate_sql, 857 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 858 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 859 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 860 exp.DateTrunc: lambda self, e: self.func( 861 "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone") 862 ), 863 exp.FromTimeZone: lambda self, e: self.func( 864 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 865 ), 866 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 867 exp.GroupConcat: rename_func("STRING_AGG"), 868 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 869 exp.If: if_sql(false_value="NULL"), 870 exp.ILike: no_ilike_sql, 871 exp.IntDiv: rename_func("DIV"), 872 exp.Int64: rename_func("INT64"), 873 exp.JSONFormat: rename_func("TO_JSON_STRING"), 874 exp.Levenshtein: _levenshtein_sql, 875 exp.Max: max_or_greatest, 876 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 877 exp.MD5Digest: rename_func("MD5"), 878 exp.Min: min_or_least, 879 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 880 exp.RegexpExtract: lambda self, e: self.func( 881 "REGEXP_EXTRACT", 882 e.this, 883 e.expression, 884 e.args.get("position"), 885 e.args.get("occurrence"), 886 ), 887 exp.RegexpExtractAll: lambda self, e: self.func( 888 "REGEXP_EXTRACT_ALL", e.this, e.expression 889 ), 890 exp.RegexpReplace: regexp_replace_sql, 891 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 892 exp.ReturnsProperty: _returnsproperty_sql, 893 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 894 exp.Select: transforms.preprocess( 895 [ 896 transforms.explode_to_unnest(), 897 transforms.unqualify_unnest, 898 transforms.eliminate_distinct_on, 899 _alias_ordered_group, 900 transforms.eliminate_semi_and_anti_joins, 901 ] 902 ), 903 exp.SHA: rename_func("SHA1"), 904 exp.SHA2: sha256_sql, 905 exp.StabilityProperty: lambda self, e: ( 906 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 907 ), 908 exp.String: rename_func("STRING"), 909 exp.StrPosition: str_position_sql, 910 exp.StrToDate: _str_to_datetime_sql, 911 exp.StrToTime: _str_to_datetime_sql, 912 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 913 exp.TimeFromParts: rename_func("TIME"), 914 exp.TimestampFromParts: rename_func("DATETIME"), 915 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 916 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 917 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 918 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 919 exp.TimeStrToTime: timestrtotime_sql, 920 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 921 exp.TsOrDsAdd: _ts_or_ds_add_sql, 922 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 923 exp.TsOrDsToTime: rename_func("TIME"), 924 exp.TsOrDsToDatetime: rename_func("DATETIME"), 925 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 926 exp.Unhex: rename_func("FROM_HEX"), 927 exp.UnixDate: rename_func("UNIX_DATE"), 928 exp.UnixToTime: _unix_to_time_sql, 929 exp.Uuid: lambda *_: "GENERATE_UUID()", 930 exp.Values: _derived_table_values_to_unnest, 931 exp.VariancePop: rename_func("VAR_POP"), 932 } 933 934 SUPPORTED_JSON_PATH_PARTS = { 935 exp.JSONPathKey, 936 exp.JSONPathRoot, 937 exp.JSONPathSubscript, 938 } 939 940 TYPE_MAPPING = { 941 **generator.Generator.TYPE_MAPPING, 942 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 943 exp.DataType.Type.BIGINT: "INT64", 944 exp.DataType.Type.BINARY: "BYTES", 945 exp.DataType.Type.BOOLEAN: "BOOL", 946 exp.DataType.Type.CHAR: "STRING", 947 exp.DataType.Type.DECIMAL: "NUMERIC", 948 exp.DataType.Type.DOUBLE: "FLOAT64", 949 exp.DataType.Type.FLOAT: "FLOAT64", 950 exp.DataType.Type.INT: "INT64", 951 exp.DataType.Type.NCHAR: "STRING", 952 exp.DataType.Type.NVARCHAR: "STRING", 953 exp.DataType.Type.SMALLINT: "INT64", 954 exp.DataType.Type.TEXT: "STRING", 955 exp.DataType.Type.TIMESTAMP: "DATETIME", 956 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 957 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 958 exp.DataType.Type.TINYINT: "INT64", 959 exp.DataType.Type.ROWVERSION: "BYTES", 960 exp.DataType.Type.UUID: "STRING", 961 exp.DataType.Type.VARBINARY: "BYTES", 962 exp.DataType.Type.VARCHAR: "STRING", 963 exp.DataType.Type.VARIANT: "ANY TYPE", 964 } 965 966 PROPERTIES_LOCATION = { 967 **generator.Generator.PROPERTIES_LOCATION, 968 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 969 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 970 } 971 972 # WINDOW comes after QUALIFY 973 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 974 AFTER_HAVING_MODIFIER_TRANSFORMS = { 975 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 976 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 977 } 978 979 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 980 RESERVED_KEYWORDS = { 981 "all", 982 "and", 983 "any", 984 "array", 985 "as", 986 "asc", 987 "assert_rows_modified", 988 "at", 989 "between", 990 "by", 991 "case", 992 "cast", 993 "collate", 994 "contains", 995 "create", 996 "cross", 997 "cube", 998 "current", 999 "default", 1000 "define", 1001 "desc", 1002 "distinct", 1003 "else", 1004 "end", 1005 "enum", 1006 "escape", 1007 "except", 1008 "exclude", 1009 "exists", 1010 "extract", 1011 "false", 1012 "fetch", 1013 "following", 1014 "for", 1015 "from", 1016 "full", 1017 "group", 1018 "grouping", 1019 "groups", 1020 "hash", 1021 "having", 1022 "if", 1023 "ignore", 1024 "in", 1025 "inner", 1026 "intersect", 1027 "interval", 1028 "into", 1029 "is", 1030 "join", 1031 "lateral", 1032 "left", 1033 "like", 1034 "limit", 1035 "lookup", 1036 "merge", 1037 "natural", 1038 "new", 1039 "no", 1040 "not", 1041 "null", 1042 "nulls", 1043 "of", 1044 "on", 1045 "or", 1046 "order", 1047 "outer", 1048 "over", 1049 "partition", 1050 "preceding", 1051 "proto", 1052 "qualify", 1053 "range", 1054 "recursive", 1055 "respect", 1056 "right", 1057 "rollup", 1058 "rows", 1059 "select", 1060 "set", 1061 "some", 1062 "struct", 1063 "tablesample", 1064 "then", 1065 "to", 1066 "treat", 1067 "true", 1068 "unbounded", 1069 "union", 1070 "unnest", 1071 "using", 1072 "when", 1073 "where", 1074 "window", 1075 "with", 1076 "within", 1077 } 1078 1079 def mod_sql(self, expression: exp.Mod) -> str: 1080 this = expression.this 1081 expr = expression.expression 1082 return self.func( 1083 "MOD", 1084 this.unnest() if isinstance(this, exp.Paren) else this, 1085 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1086 ) 1087 1088 def column_parts(self, expression: exp.Column) -> str: 1089 if expression.meta.get("quoted_column"): 1090 # If a column reference is of the form `dataset.table`.name, we need 1091 # to preserve the quoted table path, otherwise the reference breaks 1092 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1093 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1094 return f"{table_path}.{self.sql(expression, 'this')}" 1095 1096 return super().column_parts(expression) 1097 1098 def table_parts(self, expression: exp.Table) -> str: 1099 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1100 # we need to make sure the correct quoting is used in each case. 1101 # 1102 # For example, if there is a CTE x that clashes with a schema name, then the former will 1103 # return the table y in that schema, whereas the latter will return the CTE's y column: 1104 # 1105 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1106 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1107 if expression.meta.get("quoted_table"): 1108 table_parts = ".".join(p.name for p in expression.parts) 1109 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1110 1111 return super().table_parts(expression) 1112 1113 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1114 this = expression.this 1115 if isinstance(this, exp.TsOrDsToDatetime): 1116 func_name = "FORMAT_DATETIME" 1117 elif isinstance(this, exp.TsOrDsToTimestamp): 1118 func_name = "FORMAT_TIMESTAMP" 1119 else: 1120 func_name = "FORMAT_DATE" 1121 1122 time_expr = ( 1123 this 1124 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1125 else expression 1126 ) 1127 return self.func(func_name, self.format_time(expression), time_expr.this) 1128 1129 def eq_sql(self, expression: exp.EQ) -> str: 1130 # Operands of = cannot be NULL in BigQuery 1131 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1132 if not isinstance(expression.parent, exp.Update): 1133 return "NULL" 1134 1135 return self.binary(expression, "=") 1136 1137 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1138 parent = expression.parent 1139 1140 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1141 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1142 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1143 return self.func( 1144 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1145 ) 1146 1147 return super().attimezone_sql(expression) 1148 1149 def trycast_sql(self, expression: exp.TryCast) -> str: 1150 return self.cast_sql(expression, safe_prefix="SAFE_") 1151 1152 def bracket_sql(self, expression: exp.Bracket) -> str: 1153 this = expression.this 1154 expressions = expression.expressions 1155 1156 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1157 arg = expressions[0] 1158 if arg.type is None: 1159 from sqlglot.optimizer.annotate_types import annotate_types 1160 1161 arg = annotate_types(arg) 1162 1163 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1164 # BQ doesn't support bracket syntax with string values for structs 1165 return f"{self.sql(this)}.{arg.name}" 1166 1167 expressions_sql = self.expressions(expression, flat=True) 1168 offset = expression.args.get("offset") 1169 1170 if offset == 0: 1171 expressions_sql = f"OFFSET({expressions_sql})" 1172 elif offset == 1: 1173 expressions_sql = f"ORDINAL({expressions_sql})" 1174 elif offset is not None: 1175 self.unsupported(f"Unsupported array offset: {offset}") 1176 1177 if expression.args.get("safe"): 1178 expressions_sql = f"SAFE_{expressions_sql}" 1179 1180 return f"{self.sql(this)}[{expressions_sql}]" 1181 1182 def in_unnest_op(self, expression: exp.Unnest) -> str: 1183 return self.sql(expression) 1184 1185 def version_sql(self, expression: exp.Version) -> str: 1186 if expression.name == "TIMESTAMP": 1187 expression.set("this", "SYSTEM_TIME") 1188 return super().version_sql(expression) 1189 1190 def contains_sql(self, expression: exp.Contains) -> str: 1191 this = expression.this 1192 expr = expression.expression 1193 1194 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1195 this = this.this 1196 expr = expr.this 1197 1198 return self.func("CONTAINS_SUBSTRING", this, expr)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1088 def column_parts(self, expression: exp.Column) -> str: 1089 if expression.meta.get("quoted_column"): 1090 # If a column reference is of the form `dataset.table`.name, we need 1091 # to preserve the quoted table path, otherwise the reference breaks 1092 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1093 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1094 return f"{table_path}.{self.sql(expression, 'this')}" 1095 1096 return super().column_parts(expression)
1098 def table_parts(self, expression: exp.Table) -> str: 1099 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1100 # we need to make sure the correct quoting is used in each case. 1101 # 1102 # For example, if there is a CTE x that clashes with a schema name, then the former will 1103 # return the table y in that schema, whereas the latter will return the CTE's y column: 1104 # 1105 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1106 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1107 if expression.meta.get("quoted_table"): 1108 table_parts = ".".join(p.name for p in expression.parts) 1109 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1110 1111 return super().table_parts(expression)
1113 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1114 this = expression.this 1115 if isinstance(this, exp.TsOrDsToDatetime): 1116 func_name = "FORMAT_DATETIME" 1117 elif isinstance(this, exp.TsOrDsToTimestamp): 1118 func_name = "FORMAT_TIMESTAMP" 1119 else: 1120 func_name = "FORMAT_DATE" 1121 1122 time_expr = ( 1123 this 1124 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1125 else expression 1126 ) 1127 return self.func(func_name, self.format_time(expression), time_expr.this)
1129 def eq_sql(self, expression: exp.EQ) -> str: 1130 # Operands of = cannot be NULL in BigQuery 1131 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1132 if not isinstance(expression.parent, exp.Update): 1133 return "NULL" 1134 1135 return self.binary(expression, "=")
1137 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1138 parent = expression.parent 1139 1140 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1141 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1142 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1143 return self.func( 1144 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1145 ) 1146 1147 return super().attimezone_sql(expression)
1152 def bracket_sql(self, expression: exp.Bracket) -> str: 1153 this = expression.this 1154 expressions = expression.expressions 1155 1156 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1157 arg = expressions[0] 1158 if arg.type is None: 1159 from sqlglot.optimizer.annotate_types import annotate_types 1160 1161 arg = annotate_types(arg) 1162 1163 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1164 # BQ doesn't support bracket syntax with string values for structs 1165 return f"{self.sql(this)}.{arg.name}" 1166 1167 expressions_sql = self.expressions(expression, flat=True) 1168 offset = expression.args.get("offset") 1169 1170 if offset == 0: 1171 expressions_sql = f"OFFSET({expressions_sql})" 1172 elif offset == 1: 1173 expressions_sql = f"ORDINAL({expressions_sql})" 1174 elif offset is not None: 1175 self.unsupported(f"Unsupported array offset: {offset}") 1176 1177 if expression.args.get("safe"): 1178 expressions_sql = f"SAFE_{expressions_sql}" 1179 1180 return f"{self.sql(this)}[{expressions_sql}]"
1190 def contains_sql(self, expression: exp.Contains) -> str: 1191 this = expression.this 1192 expr = expression.expression 1193 1194 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1195 this = this.this 1196 expr = expr.this 1197 1198 return self.func("CONTAINS_SUBSTRING", this, expr)
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql