sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 build_formatted_time, 16 filter_array_using_unnest, 17 if_sql, 18 inline_array_unless_query, 19 max_or_greatest, 20 min_or_least, 21 no_ilike_sql, 22 build_date_delta_with_interval, 23 regexp_replace_sql, 24 rename_func, 25 sha256_sql, 26 timestrtotime_sql, 27 ts_or_ds_add_cast, 28 unit_to_var, 29) 30from sqlglot.helper import seq_get, split_num_words 31from sqlglot.tokens import TokenType 32 33if t.TYPE_CHECKING: 34 from sqlglot._typing import E, Lit 35 36 from sqlglot.optimizer.annotate_types import TypeAnnotator 37 38logger = logging.getLogger("sqlglot") 39 40 41def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 42 if not expression.find_ancestor(exp.From, exp.Join): 43 return self.values_sql(expression) 44 45 structs = [] 46 alias = expression.args.get("alias") 47 for tup in expression.find_all(exp.Tuple): 48 field_aliases = ( 49 alias.columns 50 if alias and alias.columns 51 else (f"_c{i}" for i in range(len(tup.expressions))) 52 ) 53 expressions = [ 54 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 55 for name, fld in zip(field_aliases, tup.expressions) 56 ] 57 structs.append(exp.Struct(expressions=expressions)) 58 59 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 60 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 61 return self.unnest_sql( 62 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 63 ) 64 65 66def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 67 this = expression.this 68 if isinstance(this, exp.Schema): 69 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 70 else: 71 this = self.sql(this) 72 return f"RETURNS {this}" 73 74 75def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 76 returns = expression.find(exp.ReturnsProperty) 77 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 78 expression.set("kind", "TABLE FUNCTION") 79 80 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 81 expression.set("expression", expression.expression.this) 82 83 return self.create_sql(expression) 84 85 86# https://issuetracker.google.com/issues/162294746 87# workaround for bigquery bug when grouping by an expression and then ordering 88# WITH x AS (SELECT 1 y) 89# SELECT y + 1 z 90# FROM x 91# GROUP BY x + 1 92# ORDER by z 93def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 94 if isinstance(expression, exp.Select): 95 group = expression.args.get("group") 96 order = expression.args.get("order") 97 98 if group and order: 99 aliases = { 100 select.this: select.args["alias"] 101 for select in expression.selects 102 if isinstance(select, exp.Alias) 103 } 104 105 for grouped in group.expressions: 106 if grouped.is_int: 107 continue 108 alias = aliases.get(grouped) 109 if alias: 110 grouped.replace(exp.column(alias)) 111 112 return expression 113 114 115def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 116 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 117 if isinstance(expression, exp.CTE) and expression.alias_column_names: 118 cte_query = expression.this 119 120 if cte_query.is_star: 121 logger.warning( 122 "Can't push down CTE column names for star queries. Run the query through" 123 " the optimizer or use 'qualify' to expand the star projections first." 124 ) 125 return expression 126 127 column_names = expression.alias_column_names 128 expression.args["alias"].set("columns", None) 129 130 for name, select in zip(column_names, cte_query.selects): 131 to_replace = select 132 133 if isinstance(select, exp.Alias): 134 select = select.this 135 136 # Inner aliases are shadowed by the CTE column names 137 to_replace.replace(exp.alias_(select, name)) 138 139 return expression 140 141 142def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 143 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 144 this.set("zone", seq_get(args, 2)) 145 return this 146 147 148def _build_timestamp(args: t.List) -> exp.Timestamp: 149 timestamp = exp.Timestamp.from_arg_list(args) 150 timestamp.set("with_tz", True) 151 return timestamp 152 153 154def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 155 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 156 return expr_type.from_arg_list(args) 157 158 159def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 160 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 161 arg = seq_get(args, 0) 162 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg) 163 164 165def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 166 return self.sql( 167 exp.Exists( 168 this=exp.select("1") 169 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 170 .where(exp.column("_col").eq(expression.right)) 171 ) 172 ) 173 174 175def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 176 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 177 178 179def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 180 expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 181 expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)) 182 unit = unit_to_var(expression) 183 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 184 185 186def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 187 scale = expression.args.get("scale") 188 timestamp = expression.this 189 190 if scale in (None, exp.UnixToTime.SECONDS): 191 return self.func("TIMESTAMP_SECONDS", timestamp) 192 if scale == exp.UnixToTime.MILLIS: 193 return self.func("TIMESTAMP_MILLIS", timestamp) 194 if scale == exp.UnixToTime.MICROS: 195 return self.func("TIMESTAMP_MICROS", timestamp) 196 197 unix_seconds = exp.cast( 198 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 199 ) 200 return self.func("TIMESTAMP_SECONDS", unix_seconds) 201 202 203def _build_time(args: t.List) -> exp.Func: 204 if len(args) == 1: 205 return exp.TsOrDsToTime(this=args[0]) 206 if len(args) == 2: 207 return exp.Time.from_arg_list(args) 208 return exp.TimeFromParts.from_arg_list(args) 209 210 211def _build_datetime(args: t.List) -> exp.Func: 212 if len(args) == 1: 213 return exp.TsOrDsToTimestamp.from_arg_list(args) 214 if len(args) == 2: 215 return exp.Datetime.from_arg_list(args) 216 return exp.TimestampFromParts.from_arg_list(args) 217 218 219def _build_regexp_extract(args: t.List) -> exp.RegexpExtract: 220 try: 221 group = re.compile(args[1].name).groups == 1 222 except re.error: 223 group = False 224 225 return exp.RegexpExtract( 226 this=seq_get(args, 0), 227 expression=seq_get(args, 1), 228 position=seq_get(args, 2), 229 occurrence=seq_get(args, 3), 230 group=exp.Literal.number(1) if group else None, 231 ) 232 233 234def _build_json_extract_scalar(args: t.List, dialect: Dialect) -> exp.JSONExtractScalar: 235 if len(args) == 1: 236 # The default value for the JSONPath is '$' i.e all of the data 237 args.append(exp.Literal.string("$")) 238 return parser.build_extract_json_with_path(exp.JSONExtractScalar)(args, dialect) 239 240 241def _str_to_datetime_sql( 242 self: BigQuery.Generator, expression: exp.StrToDate | exp.StrToTime 243) -> str: 244 this = self.sql(expression, "this") 245 dtype = "DATE" if isinstance(expression, exp.StrToDate) else "TIMESTAMP" 246 247 if expression.args.get("safe"): 248 fmt = self.format_time( 249 expression, 250 self.dialect.INVERSE_FORMAT_MAPPING, 251 self.dialect.INVERSE_FORMAT_TRIE, 252 ) 253 return f"SAFE_CAST({this} AS {dtype} FORMAT {fmt})" 254 255 fmt = self.format_time(expression) 256 return self.func(f"PARSE_{dtype}", fmt, this, expression.args.get("zone")) 257 258 259def _annotate_math_functions(self: TypeAnnotator, expression: E) -> E: 260 """ 261 Many BigQuery math functions such as CEIL, FLOOR etc follow this return type convention: 262 +---------+---------+---------+------------+---------+ 263 | INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 264 +---------+---------+---------+------------+---------+ 265 | OUTPUT | FLOAT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 266 +---------+---------+---------+------------+---------+ 267 """ 268 self._annotate_args(expression) 269 270 this: exp.Expression = expression.this 271 272 self._set_type( 273 expression, 274 exp.DataType.Type.DOUBLE if this.is_type(*exp.DataType.INTEGER_TYPES) else this.type, 275 ) 276 return expression 277 278 279class BigQuery(Dialect): 280 WEEK_OFFSET = -1 281 UNNEST_COLUMN_ONLY = True 282 SUPPORTS_USER_DEFINED_TYPES = False 283 SUPPORTS_SEMI_ANTI_JOIN = False 284 LOG_BASE_FIRST = False 285 HEX_LOWERCASE = True 286 FORCE_EARLY_ALIAS_REF_EXPANSION = True 287 EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = True 288 289 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 290 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 291 292 # bigquery udfs are case sensitive 293 NORMALIZE_FUNCTIONS = False 294 295 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 296 TIME_MAPPING = { 297 "%D": "%m/%d/%y", 298 "%E6S": "%S.%f", 299 "%e": "%-d", 300 } 301 302 FORMAT_MAPPING = { 303 "DD": "%d", 304 "MM": "%m", 305 "MON": "%b", 306 "MONTH": "%B", 307 "YYYY": "%Y", 308 "YY": "%y", 309 "HH": "%I", 310 "HH12": "%I", 311 "HH24": "%H", 312 "MI": "%M", 313 "SS": "%S", 314 "SSSSS": "%f", 315 "TZH": "%z", 316 } 317 318 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 319 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 320 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 321 322 # All set operations require either a DISTINCT or ALL specifier 323 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 324 325 ANNOTATORS = { 326 **Dialect.ANNOTATORS, 327 **{ 328 expr_type: lambda self, e: _annotate_math_functions(self, e) 329 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 330 }, 331 **{ 332 expr_type: lambda self, e: self._annotate_by_args(e, "this") 333 for expr_type in ( 334 exp.Left, 335 exp.Right, 336 exp.Lower, 337 exp.Upper, 338 exp.Pad, 339 exp.Trim, 340 exp.RegexpExtract, 341 exp.RegexpReplace, 342 exp.Repeat, 343 exp.Substring, 344 ) 345 }, 346 exp.Concat: lambda self, e: self._annotate_by_args(e, "expressions"), 347 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 348 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 349 } 350 351 def normalize_identifier(self, expression: E) -> E: 352 if ( 353 isinstance(expression, exp.Identifier) 354 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 355 ): 356 parent = expression.parent 357 while isinstance(parent, exp.Dot): 358 parent = parent.parent 359 360 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 361 # by default. The following check uses a heuristic to detect tables based on whether 362 # they are qualified. This should generally be correct, because tables in BigQuery 363 # must be qualified with at least a dataset, unless @@dataset_id is set. 364 case_sensitive = ( 365 isinstance(parent, exp.UserDefinedFunction) 366 or ( 367 isinstance(parent, exp.Table) 368 and parent.db 369 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 370 ) 371 or expression.meta.get("is_table") 372 ) 373 if not case_sensitive: 374 expression.set("this", expression.this.lower()) 375 376 return expression 377 378 class Tokenizer(tokens.Tokenizer): 379 QUOTES = ["'", '"', '"""', "'''"] 380 COMMENTS = ["--", "#", ("/*", "*/")] 381 IDENTIFIERS = ["`"] 382 STRING_ESCAPES = ["\\"] 383 384 HEX_STRINGS = [("0x", ""), ("0X", "")] 385 386 BYTE_STRINGS = [ 387 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 388 ] 389 390 RAW_STRINGS = [ 391 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 392 ] 393 394 KEYWORDS = { 395 **tokens.Tokenizer.KEYWORDS, 396 "ANY TYPE": TokenType.VARIANT, 397 "BEGIN": TokenType.COMMAND, 398 "BEGIN TRANSACTION": TokenType.BEGIN, 399 "BYTEINT": TokenType.INT, 400 "BYTES": TokenType.BINARY, 401 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 402 "DATETIME": TokenType.TIMESTAMP, 403 "DECLARE": TokenType.COMMAND, 404 "ELSEIF": TokenType.COMMAND, 405 "EXCEPTION": TokenType.COMMAND, 406 "FLOAT64": TokenType.DOUBLE, 407 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 408 "MODEL": TokenType.MODEL, 409 "NOT DETERMINISTIC": TokenType.VOLATILE, 410 "RECORD": TokenType.STRUCT, 411 "TIMESTAMP": TokenType.TIMESTAMPTZ, 412 } 413 KEYWORDS.pop("DIV") 414 KEYWORDS.pop("VALUES") 415 KEYWORDS.pop("/*+") 416 417 class Parser(parser.Parser): 418 PREFIXED_PIVOT_COLUMNS = True 419 LOG_DEFAULTS_TO_LN = True 420 SUPPORTS_IMPLICIT_UNNEST = True 421 422 FUNCTIONS = { 423 **parser.Parser.FUNCTIONS, 424 "DATE": _build_date, 425 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 426 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 427 "DATE_TRUNC": lambda args: exp.DateTrunc( 428 unit=exp.Literal.string(str(seq_get(args, 1))), 429 this=seq_get(args, 0), 430 zone=seq_get(args, 2), 431 ), 432 "DATETIME": _build_datetime, 433 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 434 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 435 "DIV": binary_from_function(exp.IntDiv), 436 "EDIT_DISTANCE": lambda args: exp.Levenshtein( 437 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 438 ), 439 "FORMAT_DATE": lambda args: exp.TimeToStr( 440 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 441 ), 442 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 443 "JSON_EXTRACT_SCALAR": _build_json_extract_scalar, 444 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 445 "JSON_VALUE": _build_json_extract_scalar, 446 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 447 "MD5": exp.MD5Digest.from_arg_list, 448 "TO_HEX": _build_to_hex, 449 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 450 [seq_get(args, 1), seq_get(args, 0)] 451 ), 452 "PARSE_TIMESTAMP": _build_parse_timestamp, 453 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 454 "REGEXP_EXTRACT": _build_regexp_extract, 455 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 456 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 457 "SPLIT": lambda args: exp.Split( 458 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 459 this=seq_get(args, 0), 460 expression=seq_get(args, 1) or exp.Literal.string(","), 461 ), 462 "TIME": _build_time, 463 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 464 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 465 "TIMESTAMP": _build_timestamp, 466 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 467 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 468 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 469 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 470 ), 471 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 472 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 473 ), 474 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 475 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 476 "FORMAT_DATETIME": lambda args: exp.TimeToStr( 477 this=exp.TsOrDsToTimestamp(this=seq_get(args, 1)), format=seq_get(args, 0) 478 ), 479 } 480 481 FUNCTION_PARSERS = { 482 **parser.Parser.FUNCTION_PARSERS, 483 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 484 } 485 FUNCTION_PARSERS.pop("TRIM") 486 487 NO_PAREN_FUNCTIONS = { 488 **parser.Parser.NO_PAREN_FUNCTIONS, 489 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 490 } 491 492 NESTED_TYPE_TOKENS = { 493 *parser.Parser.NESTED_TYPE_TOKENS, 494 TokenType.TABLE, 495 } 496 497 PROPERTY_PARSERS = { 498 **parser.Parser.PROPERTY_PARSERS, 499 "NOT DETERMINISTIC": lambda self: self.expression( 500 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 501 ), 502 "OPTIONS": lambda self: self._parse_with_property(), 503 } 504 505 CONSTRAINT_PARSERS = { 506 **parser.Parser.CONSTRAINT_PARSERS, 507 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 508 } 509 510 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 511 RANGE_PARSERS.pop(TokenType.OVERLAPS) 512 513 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 514 515 STATEMENT_PARSERS = { 516 **parser.Parser.STATEMENT_PARSERS, 517 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 518 TokenType.END: lambda self: self._parse_as_command(self._prev), 519 TokenType.FOR: lambda self: self._parse_for_in(), 520 } 521 522 BRACKET_OFFSETS = { 523 "OFFSET": (0, False), 524 "ORDINAL": (1, False), 525 "SAFE_OFFSET": (0, True), 526 "SAFE_ORDINAL": (1, True), 527 } 528 529 def _parse_for_in(self) -> exp.ForIn: 530 this = self._parse_range() 531 self._match_text_seq("DO") 532 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 533 534 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 535 this = super()._parse_table_part(schema=schema) or self._parse_number() 536 537 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 538 if isinstance(this, exp.Identifier): 539 table_name = this.name 540 while self._match(TokenType.DASH, advance=False) and self._next: 541 text = "" 542 while self._is_connected() and self._curr.token_type != TokenType.DOT: 543 self._advance() 544 text += self._prev.text 545 table_name += text 546 547 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 548 elif isinstance(this, exp.Literal): 549 table_name = this.name 550 551 if self._is_connected() and self._parse_var(any_token=True): 552 table_name += self._prev.text 553 554 this = exp.Identifier(this=table_name, quoted=True) 555 556 return this 557 558 def _parse_table_parts( 559 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 560 ) -> exp.Table: 561 table = super()._parse_table_parts( 562 schema=schema, is_db_reference=is_db_reference, wildcard=True 563 ) 564 565 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 566 if not table.catalog: 567 if table.db: 568 parts = table.db.split(".") 569 if len(parts) == 2 and not table.args["db"].quoted: 570 table.set("catalog", exp.Identifier(this=parts[0])) 571 table.set("db", exp.Identifier(this=parts[1])) 572 else: 573 parts = table.name.split(".") 574 if len(parts) == 2 and not table.this.quoted: 575 table.set("db", exp.Identifier(this=parts[0])) 576 table.set("this", exp.Identifier(this=parts[1])) 577 578 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 579 alias = table.this 580 catalog, db, this, *rest = ( 581 exp.to_identifier(p, quoted=True) 582 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 583 ) 584 585 if rest and this: 586 this = exp.Dot.build([this, *rest]) # type: ignore 587 588 table = exp.Table( 589 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 590 ) 591 table.meta["quoted_table"] = True 592 else: 593 alias = None 594 595 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 596 # dataset, so if the project identifier is omitted we need to fix the ast so that 597 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 598 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 599 # views, because it would seem like the "catalog" part is set, when it'd actually 600 # be the region/dataset. Merging the two identifiers into a single one is done to 601 # avoid producing a 4-part Table reference, which would cause issues in the schema 602 # module, when there are 3-part table names mixed with information schema views. 603 # 604 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 605 table_parts = table.parts 606 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 607 # We need to alias the table here to avoid breaking existing qualified columns. 608 # This is expected to be safe, because if there's an actual alias coming up in 609 # the token stream, it will overwrite this one. If there isn't one, we are only 610 # exposing the name that can be used to reference the view explicitly (a no-op). 611 exp.alias_( 612 table, 613 t.cast(exp.Identifier, alias or table_parts[-1]), 614 table=True, 615 copy=False, 616 ) 617 618 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 619 table.set("this", exp.Identifier(this=info_schema_view, quoted=True)) 620 table.set("db", seq_get(table_parts, -3)) 621 table.set("catalog", seq_get(table_parts, -4)) 622 623 return table 624 625 def _parse_column(self) -> t.Optional[exp.Expression]: 626 column = super()._parse_column() 627 if isinstance(column, exp.Column): 628 parts = column.parts 629 if any("." in p.name for p in parts): 630 catalog, db, table, this, *rest = ( 631 exp.to_identifier(p, quoted=True) 632 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 633 ) 634 635 if rest and this: 636 this = exp.Dot.build([this, *rest]) # type: ignore 637 638 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 639 column.meta["quoted_column"] = True 640 641 return column 642 643 @t.overload 644 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 645 646 @t.overload 647 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 648 649 def _parse_json_object(self, agg=False): 650 json_object = super()._parse_json_object() 651 array_kv_pair = seq_get(json_object.expressions, 0) 652 653 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 654 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 655 if ( 656 array_kv_pair 657 and isinstance(array_kv_pair.this, exp.Array) 658 and isinstance(array_kv_pair.expression, exp.Array) 659 ): 660 keys = array_kv_pair.this.expressions 661 values = array_kv_pair.expression.expressions 662 663 json_object.set( 664 "expressions", 665 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 666 ) 667 668 return json_object 669 670 def _parse_bracket( 671 self, this: t.Optional[exp.Expression] = None 672 ) -> t.Optional[exp.Expression]: 673 bracket = super()._parse_bracket(this) 674 675 if this is bracket: 676 return bracket 677 678 if isinstance(bracket, exp.Bracket): 679 for expression in bracket.expressions: 680 name = expression.name.upper() 681 682 if name not in self.BRACKET_OFFSETS: 683 break 684 685 offset, safe = self.BRACKET_OFFSETS[name] 686 bracket.set("offset", offset) 687 bracket.set("safe", safe) 688 expression.replace(expression.expressions[0]) 689 690 return bracket 691 692 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 693 unnest = super()._parse_unnest(with_alias=with_alias) 694 695 if not unnest: 696 return None 697 698 unnest_expr = seq_get(unnest.expressions, 0) 699 if unnest_expr: 700 from sqlglot.optimizer.annotate_types import annotate_types 701 702 unnest_expr = annotate_types(unnest_expr) 703 704 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 705 # in contrast to other dialects such as DuckDB which flattens only the array by default 706 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 707 array_elem.is_type(exp.DataType.Type.STRUCT) 708 for array_elem in unnest_expr._type.expressions 709 ): 710 unnest.set("explode_array", True) 711 712 return unnest 713 714 class Generator(generator.Generator): 715 INTERVAL_ALLOWS_PLURAL_FORM = False 716 JOIN_HINTS = False 717 QUERY_HINTS = False 718 TABLE_HINTS = False 719 LIMIT_FETCH = "LIMIT" 720 RENAME_TABLE_WITH_DB = False 721 NVL2_SUPPORTED = False 722 UNNEST_WITH_ORDINALITY = False 723 COLLATE_IS_FUNC = True 724 LIMIT_ONLY_LITERALS = True 725 SUPPORTS_TABLE_ALIAS_COLUMNS = False 726 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 727 JSON_KEY_VALUE_PAIR_SEP = "," 728 NULL_ORDERING_SUPPORTED = False 729 IGNORE_NULLS_IN_FUNC = True 730 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 731 CAN_IMPLEMENT_ARRAY_ANY = True 732 SUPPORTS_TO_NUMBER = False 733 NAMED_PLACEHOLDER_TOKEN = "@" 734 HEX_FUNC = "TO_HEX" 735 WITH_PROPERTIES_PREFIX = "OPTIONS" 736 SUPPORTS_EXPLODING_PROJECTIONS = False 737 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 738 739 TRANSFORMS = { 740 **generator.Generator.TRANSFORMS, 741 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 742 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 743 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 744 exp.Array: inline_array_unless_query, 745 exp.ArrayContains: _array_contains_sql, 746 exp.ArrayFilter: filter_array_using_unnest, 747 exp.ArraySize: rename_func("ARRAY_LENGTH"), 748 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 749 exp.CollateProperty: lambda self, e: ( 750 f"DEFAULT COLLATE {self.sql(e, 'this')}" 751 if e.args.get("default") 752 else f"COLLATE {self.sql(e, 'this')}" 753 ), 754 exp.Commit: lambda *_: "COMMIT TRANSACTION", 755 exp.CountIf: rename_func("COUNTIF"), 756 exp.Create: _create_sql, 757 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 758 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 759 exp.DateDiff: lambda self, e: self.func( 760 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 761 ), 762 exp.DateFromParts: rename_func("DATE"), 763 exp.DateStrToDate: datestrtodate_sql, 764 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 765 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 766 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 767 exp.DateTrunc: lambda self, e: self.func( 768 "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone") 769 ), 770 exp.FromTimeZone: lambda self, e: self.func( 771 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 772 ), 773 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 774 exp.GroupConcat: rename_func("STRING_AGG"), 775 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 776 exp.If: if_sql(false_value="NULL"), 777 exp.ILike: no_ilike_sql, 778 exp.IntDiv: rename_func("DIV"), 779 exp.JSONFormat: rename_func("TO_JSON_STRING"), 780 exp.Levenshtein: rename_func("EDIT_DISTANCE"), 781 exp.Max: max_or_greatest, 782 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 783 exp.MD5Digest: rename_func("MD5"), 784 exp.Min: min_or_least, 785 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 786 exp.RegexpExtract: lambda self, e: self.func( 787 "REGEXP_EXTRACT", 788 e.this, 789 e.expression, 790 e.args.get("position"), 791 e.args.get("occurrence"), 792 ), 793 exp.RegexpReplace: regexp_replace_sql, 794 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 795 exp.ReturnsProperty: _returnsproperty_sql, 796 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 797 exp.Select: transforms.preprocess( 798 [ 799 transforms.explode_to_unnest(), 800 transforms.unqualify_unnest, 801 transforms.eliminate_distinct_on, 802 _alias_ordered_group, 803 transforms.eliminate_semi_and_anti_joins, 804 ] 805 ), 806 exp.SHA: rename_func("SHA1"), 807 exp.SHA2: sha256_sql, 808 exp.StabilityProperty: lambda self, e: ( 809 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 810 ), 811 exp.String: rename_func("STRING"), 812 exp.StrToDate: _str_to_datetime_sql, 813 exp.StrToTime: _str_to_datetime_sql, 814 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 815 exp.TimeFromParts: rename_func("TIME"), 816 exp.TimestampFromParts: rename_func("DATETIME"), 817 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 818 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 819 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 820 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 821 exp.TimeStrToTime: timestrtotime_sql, 822 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 823 exp.TsOrDsAdd: _ts_or_ds_add_sql, 824 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 825 exp.TsOrDsToTime: rename_func("TIME"), 826 exp.TsOrDsToTimestamp: rename_func("DATETIME"), 827 exp.Unhex: rename_func("FROM_HEX"), 828 exp.UnixDate: rename_func("UNIX_DATE"), 829 exp.UnixToTime: _unix_to_time_sql, 830 exp.Uuid: lambda *_: "GENERATE_UUID()", 831 exp.Values: _derived_table_values_to_unnest, 832 exp.VariancePop: rename_func("VAR_POP"), 833 } 834 835 SUPPORTED_JSON_PATH_PARTS = { 836 exp.JSONPathKey, 837 exp.JSONPathRoot, 838 exp.JSONPathSubscript, 839 } 840 841 TYPE_MAPPING = { 842 **generator.Generator.TYPE_MAPPING, 843 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 844 exp.DataType.Type.BIGINT: "INT64", 845 exp.DataType.Type.BINARY: "BYTES", 846 exp.DataType.Type.BOOLEAN: "BOOL", 847 exp.DataType.Type.CHAR: "STRING", 848 exp.DataType.Type.DECIMAL: "NUMERIC", 849 exp.DataType.Type.DOUBLE: "FLOAT64", 850 exp.DataType.Type.FLOAT: "FLOAT64", 851 exp.DataType.Type.INT: "INT64", 852 exp.DataType.Type.NCHAR: "STRING", 853 exp.DataType.Type.NVARCHAR: "STRING", 854 exp.DataType.Type.SMALLINT: "INT64", 855 exp.DataType.Type.TEXT: "STRING", 856 exp.DataType.Type.TIMESTAMP: "DATETIME", 857 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 858 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 859 exp.DataType.Type.TINYINT: "INT64", 860 exp.DataType.Type.ROWVERSION: "BYTES", 861 exp.DataType.Type.UUID: "STRING", 862 exp.DataType.Type.VARBINARY: "BYTES", 863 exp.DataType.Type.VARCHAR: "STRING", 864 exp.DataType.Type.VARIANT: "ANY TYPE", 865 } 866 867 PROPERTIES_LOCATION = { 868 **generator.Generator.PROPERTIES_LOCATION, 869 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 870 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 871 } 872 873 # WINDOW comes after QUALIFY 874 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 875 AFTER_HAVING_MODIFIER_TRANSFORMS = { 876 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 877 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 878 } 879 880 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 881 RESERVED_KEYWORDS = { 882 "all", 883 "and", 884 "any", 885 "array", 886 "as", 887 "asc", 888 "assert_rows_modified", 889 "at", 890 "between", 891 "by", 892 "case", 893 "cast", 894 "collate", 895 "contains", 896 "create", 897 "cross", 898 "cube", 899 "current", 900 "default", 901 "define", 902 "desc", 903 "distinct", 904 "else", 905 "end", 906 "enum", 907 "escape", 908 "except", 909 "exclude", 910 "exists", 911 "extract", 912 "false", 913 "fetch", 914 "following", 915 "for", 916 "from", 917 "full", 918 "group", 919 "grouping", 920 "groups", 921 "hash", 922 "having", 923 "if", 924 "ignore", 925 "in", 926 "inner", 927 "intersect", 928 "interval", 929 "into", 930 "is", 931 "join", 932 "lateral", 933 "left", 934 "like", 935 "limit", 936 "lookup", 937 "merge", 938 "natural", 939 "new", 940 "no", 941 "not", 942 "null", 943 "nulls", 944 "of", 945 "on", 946 "or", 947 "order", 948 "outer", 949 "over", 950 "partition", 951 "preceding", 952 "proto", 953 "qualify", 954 "range", 955 "recursive", 956 "respect", 957 "right", 958 "rollup", 959 "rows", 960 "select", 961 "set", 962 "some", 963 "struct", 964 "tablesample", 965 "then", 966 "to", 967 "treat", 968 "true", 969 "unbounded", 970 "union", 971 "unnest", 972 "using", 973 "when", 974 "where", 975 "window", 976 "with", 977 "within", 978 } 979 980 def mod_sql(self, expression: exp.Mod) -> str: 981 this = expression.this 982 expr = expression.expression 983 return self.func( 984 "MOD", 985 this.unnest() if isinstance(this, exp.Paren) else this, 986 expr.unnest() if isinstance(expr, exp.Paren) else expr, 987 ) 988 989 def column_parts(self, expression: exp.Column) -> str: 990 if expression.meta.get("quoted_column"): 991 # If a column reference is of the form `dataset.table`.name, we need 992 # to preserve the quoted table path, otherwise the reference breaks 993 table_parts = ".".join(p.name for p in expression.parts[:-1]) 994 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 995 return f"{table_path}.{self.sql(expression, 'this')}" 996 997 return super().column_parts(expression) 998 999 def table_parts(self, expression: exp.Table) -> str: 1000 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1001 # we need to make sure the correct quoting is used in each case. 1002 # 1003 # For example, if there is a CTE x that clashes with a schema name, then the former will 1004 # return the table y in that schema, whereas the latter will return the CTE's y column: 1005 # 1006 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1007 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1008 if expression.meta.get("quoted_table"): 1009 table_parts = ".".join(p.name for p in expression.parts) 1010 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1011 1012 return super().table_parts(expression) 1013 1014 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1015 if isinstance(expression.this, exp.TsOrDsToTimestamp): 1016 func_name = "FORMAT_DATETIME" 1017 else: 1018 func_name = "FORMAT_DATE" 1019 this = ( 1020 expression.this 1021 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1022 else expression 1023 ) 1024 return self.func(func_name, self.format_time(expression), this.this) 1025 1026 def eq_sql(self, expression: exp.EQ) -> str: 1027 # Operands of = cannot be NULL in BigQuery 1028 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1029 if not isinstance(expression.parent, exp.Update): 1030 return "NULL" 1031 1032 return self.binary(expression, "=") 1033 1034 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1035 parent = expression.parent 1036 1037 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1038 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1039 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1040 return self.func( 1041 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1042 ) 1043 1044 return super().attimezone_sql(expression) 1045 1046 def trycast_sql(self, expression: exp.TryCast) -> str: 1047 return self.cast_sql(expression, safe_prefix="SAFE_") 1048 1049 def bracket_sql(self, expression: exp.Bracket) -> str: 1050 this = expression.this 1051 expressions = expression.expressions 1052 1053 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1054 arg = expressions[0] 1055 if arg.type is None: 1056 from sqlglot.optimizer.annotate_types import annotate_types 1057 1058 arg = annotate_types(arg) 1059 1060 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1061 # BQ doesn't support bracket syntax with string values for structs 1062 return f"{self.sql(this)}.{arg.name}" 1063 1064 expressions_sql = self.expressions(expression, flat=True) 1065 offset = expression.args.get("offset") 1066 1067 if offset == 0: 1068 expressions_sql = f"OFFSET({expressions_sql})" 1069 elif offset == 1: 1070 expressions_sql = f"ORDINAL({expressions_sql})" 1071 elif offset is not None: 1072 self.unsupported(f"Unsupported array offset: {offset}") 1073 1074 if expression.args.get("safe"): 1075 expressions_sql = f"SAFE_{expressions_sql}" 1076 1077 return f"{self.sql(this)}[{expressions_sql}]" 1078 1079 def in_unnest_op(self, expression: exp.Unnest) -> str: 1080 return self.sql(expression) 1081 1082 def version_sql(self, expression: exp.Version) -> str: 1083 if expression.name == "TIMESTAMP": 1084 expression.set("this", "SYSTEM_TIME") 1085 return super().version_sql(expression)
280class BigQuery(Dialect): 281 WEEK_OFFSET = -1 282 UNNEST_COLUMN_ONLY = True 283 SUPPORTS_USER_DEFINED_TYPES = False 284 SUPPORTS_SEMI_ANTI_JOIN = False 285 LOG_BASE_FIRST = False 286 HEX_LOWERCASE = True 287 FORCE_EARLY_ALIAS_REF_EXPANSION = True 288 EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = True 289 290 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 291 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 292 293 # bigquery udfs are case sensitive 294 NORMALIZE_FUNCTIONS = False 295 296 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 297 TIME_MAPPING = { 298 "%D": "%m/%d/%y", 299 "%E6S": "%S.%f", 300 "%e": "%-d", 301 } 302 303 FORMAT_MAPPING = { 304 "DD": "%d", 305 "MM": "%m", 306 "MON": "%b", 307 "MONTH": "%B", 308 "YYYY": "%Y", 309 "YY": "%y", 310 "HH": "%I", 311 "HH12": "%I", 312 "HH24": "%H", 313 "MI": "%M", 314 "SS": "%S", 315 "SSSSS": "%f", 316 "TZH": "%z", 317 } 318 319 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 320 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 321 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 322 323 # All set operations require either a DISTINCT or ALL specifier 324 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 325 326 ANNOTATORS = { 327 **Dialect.ANNOTATORS, 328 **{ 329 expr_type: lambda self, e: _annotate_math_functions(self, e) 330 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 331 }, 332 **{ 333 expr_type: lambda self, e: self._annotate_by_args(e, "this") 334 for expr_type in ( 335 exp.Left, 336 exp.Right, 337 exp.Lower, 338 exp.Upper, 339 exp.Pad, 340 exp.Trim, 341 exp.RegexpExtract, 342 exp.RegexpReplace, 343 exp.Repeat, 344 exp.Substring, 345 ) 346 }, 347 exp.Concat: lambda self, e: self._annotate_by_args(e, "expressions"), 348 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 349 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 350 } 351 352 def normalize_identifier(self, expression: E) -> E: 353 if ( 354 isinstance(expression, exp.Identifier) 355 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 356 ): 357 parent = expression.parent 358 while isinstance(parent, exp.Dot): 359 parent = parent.parent 360 361 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 362 # by default. The following check uses a heuristic to detect tables based on whether 363 # they are qualified. This should generally be correct, because tables in BigQuery 364 # must be qualified with at least a dataset, unless @@dataset_id is set. 365 case_sensitive = ( 366 isinstance(parent, exp.UserDefinedFunction) 367 or ( 368 isinstance(parent, exp.Table) 369 and parent.db 370 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 371 ) 372 or expression.meta.get("is_table") 373 ) 374 if not case_sensitive: 375 expression.set("this", expression.this.lower()) 376 377 return expression 378 379 class Tokenizer(tokens.Tokenizer): 380 QUOTES = ["'", '"', '"""', "'''"] 381 COMMENTS = ["--", "#", ("/*", "*/")] 382 IDENTIFIERS = ["`"] 383 STRING_ESCAPES = ["\\"] 384 385 HEX_STRINGS = [("0x", ""), ("0X", "")] 386 387 BYTE_STRINGS = [ 388 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 389 ] 390 391 RAW_STRINGS = [ 392 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 393 ] 394 395 KEYWORDS = { 396 **tokens.Tokenizer.KEYWORDS, 397 "ANY TYPE": TokenType.VARIANT, 398 "BEGIN": TokenType.COMMAND, 399 "BEGIN TRANSACTION": TokenType.BEGIN, 400 "BYTEINT": TokenType.INT, 401 "BYTES": TokenType.BINARY, 402 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 403 "DATETIME": TokenType.TIMESTAMP, 404 "DECLARE": TokenType.COMMAND, 405 "ELSEIF": TokenType.COMMAND, 406 "EXCEPTION": TokenType.COMMAND, 407 "FLOAT64": TokenType.DOUBLE, 408 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 409 "MODEL": TokenType.MODEL, 410 "NOT DETERMINISTIC": TokenType.VOLATILE, 411 "RECORD": TokenType.STRUCT, 412 "TIMESTAMP": TokenType.TIMESTAMPTZ, 413 } 414 KEYWORDS.pop("DIV") 415 KEYWORDS.pop("VALUES") 416 KEYWORDS.pop("/*+") 417 418 class Parser(parser.Parser): 419 PREFIXED_PIVOT_COLUMNS = True 420 LOG_DEFAULTS_TO_LN = True 421 SUPPORTS_IMPLICIT_UNNEST = True 422 423 FUNCTIONS = { 424 **parser.Parser.FUNCTIONS, 425 "DATE": _build_date, 426 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 427 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 428 "DATE_TRUNC": lambda args: exp.DateTrunc( 429 unit=exp.Literal.string(str(seq_get(args, 1))), 430 this=seq_get(args, 0), 431 zone=seq_get(args, 2), 432 ), 433 "DATETIME": _build_datetime, 434 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 435 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 436 "DIV": binary_from_function(exp.IntDiv), 437 "EDIT_DISTANCE": lambda args: exp.Levenshtein( 438 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 439 ), 440 "FORMAT_DATE": lambda args: exp.TimeToStr( 441 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 442 ), 443 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 444 "JSON_EXTRACT_SCALAR": _build_json_extract_scalar, 445 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 446 "JSON_VALUE": _build_json_extract_scalar, 447 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 448 "MD5": exp.MD5Digest.from_arg_list, 449 "TO_HEX": _build_to_hex, 450 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 451 [seq_get(args, 1), seq_get(args, 0)] 452 ), 453 "PARSE_TIMESTAMP": _build_parse_timestamp, 454 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 455 "REGEXP_EXTRACT": _build_regexp_extract, 456 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 457 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 458 "SPLIT": lambda args: exp.Split( 459 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 460 this=seq_get(args, 0), 461 expression=seq_get(args, 1) or exp.Literal.string(","), 462 ), 463 "TIME": _build_time, 464 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 465 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 466 "TIMESTAMP": _build_timestamp, 467 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 468 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 469 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 470 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 471 ), 472 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 473 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 474 ), 475 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 476 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 477 "FORMAT_DATETIME": lambda args: exp.TimeToStr( 478 this=exp.TsOrDsToTimestamp(this=seq_get(args, 1)), format=seq_get(args, 0) 479 ), 480 } 481 482 FUNCTION_PARSERS = { 483 **parser.Parser.FUNCTION_PARSERS, 484 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 485 } 486 FUNCTION_PARSERS.pop("TRIM") 487 488 NO_PAREN_FUNCTIONS = { 489 **parser.Parser.NO_PAREN_FUNCTIONS, 490 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 491 } 492 493 NESTED_TYPE_TOKENS = { 494 *parser.Parser.NESTED_TYPE_TOKENS, 495 TokenType.TABLE, 496 } 497 498 PROPERTY_PARSERS = { 499 **parser.Parser.PROPERTY_PARSERS, 500 "NOT DETERMINISTIC": lambda self: self.expression( 501 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 502 ), 503 "OPTIONS": lambda self: self._parse_with_property(), 504 } 505 506 CONSTRAINT_PARSERS = { 507 **parser.Parser.CONSTRAINT_PARSERS, 508 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 509 } 510 511 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 512 RANGE_PARSERS.pop(TokenType.OVERLAPS) 513 514 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 515 516 STATEMENT_PARSERS = { 517 **parser.Parser.STATEMENT_PARSERS, 518 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 519 TokenType.END: lambda self: self._parse_as_command(self._prev), 520 TokenType.FOR: lambda self: self._parse_for_in(), 521 } 522 523 BRACKET_OFFSETS = { 524 "OFFSET": (0, False), 525 "ORDINAL": (1, False), 526 "SAFE_OFFSET": (0, True), 527 "SAFE_ORDINAL": (1, True), 528 } 529 530 def _parse_for_in(self) -> exp.ForIn: 531 this = self._parse_range() 532 self._match_text_seq("DO") 533 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 534 535 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 536 this = super()._parse_table_part(schema=schema) or self._parse_number() 537 538 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 539 if isinstance(this, exp.Identifier): 540 table_name = this.name 541 while self._match(TokenType.DASH, advance=False) and self._next: 542 text = "" 543 while self._is_connected() and self._curr.token_type != TokenType.DOT: 544 self._advance() 545 text += self._prev.text 546 table_name += text 547 548 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 549 elif isinstance(this, exp.Literal): 550 table_name = this.name 551 552 if self._is_connected() and self._parse_var(any_token=True): 553 table_name += self._prev.text 554 555 this = exp.Identifier(this=table_name, quoted=True) 556 557 return this 558 559 def _parse_table_parts( 560 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 561 ) -> exp.Table: 562 table = super()._parse_table_parts( 563 schema=schema, is_db_reference=is_db_reference, wildcard=True 564 ) 565 566 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 567 if not table.catalog: 568 if table.db: 569 parts = table.db.split(".") 570 if len(parts) == 2 and not table.args["db"].quoted: 571 table.set("catalog", exp.Identifier(this=parts[0])) 572 table.set("db", exp.Identifier(this=parts[1])) 573 else: 574 parts = table.name.split(".") 575 if len(parts) == 2 and not table.this.quoted: 576 table.set("db", exp.Identifier(this=parts[0])) 577 table.set("this", exp.Identifier(this=parts[1])) 578 579 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 580 alias = table.this 581 catalog, db, this, *rest = ( 582 exp.to_identifier(p, quoted=True) 583 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 584 ) 585 586 if rest and this: 587 this = exp.Dot.build([this, *rest]) # type: ignore 588 589 table = exp.Table( 590 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 591 ) 592 table.meta["quoted_table"] = True 593 else: 594 alias = None 595 596 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 597 # dataset, so if the project identifier is omitted we need to fix the ast so that 598 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 599 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 600 # views, because it would seem like the "catalog" part is set, when it'd actually 601 # be the region/dataset. Merging the two identifiers into a single one is done to 602 # avoid producing a 4-part Table reference, which would cause issues in the schema 603 # module, when there are 3-part table names mixed with information schema views. 604 # 605 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 606 table_parts = table.parts 607 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 608 # We need to alias the table here to avoid breaking existing qualified columns. 609 # This is expected to be safe, because if there's an actual alias coming up in 610 # the token stream, it will overwrite this one. If there isn't one, we are only 611 # exposing the name that can be used to reference the view explicitly (a no-op). 612 exp.alias_( 613 table, 614 t.cast(exp.Identifier, alias or table_parts[-1]), 615 table=True, 616 copy=False, 617 ) 618 619 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 620 table.set("this", exp.Identifier(this=info_schema_view, quoted=True)) 621 table.set("db", seq_get(table_parts, -3)) 622 table.set("catalog", seq_get(table_parts, -4)) 623 624 return table 625 626 def _parse_column(self) -> t.Optional[exp.Expression]: 627 column = super()._parse_column() 628 if isinstance(column, exp.Column): 629 parts = column.parts 630 if any("." in p.name for p in parts): 631 catalog, db, table, this, *rest = ( 632 exp.to_identifier(p, quoted=True) 633 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 634 ) 635 636 if rest and this: 637 this = exp.Dot.build([this, *rest]) # type: ignore 638 639 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 640 column.meta["quoted_column"] = True 641 642 return column 643 644 @t.overload 645 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 646 647 @t.overload 648 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 649 650 def _parse_json_object(self, agg=False): 651 json_object = super()._parse_json_object() 652 array_kv_pair = seq_get(json_object.expressions, 0) 653 654 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 655 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 656 if ( 657 array_kv_pair 658 and isinstance(array_kv_pair.this, exp.Array) 659 and isinstance(array_kv_pair.expression, exp.Array) 660 ): 661 keys = array_kv_pair.this.expressions 662 values = array_kv_pair.expression.expressions 663 664 json_object.set( 665 "expressions", 666 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 667 ) 668 669 return json_object 670 671 def _parse_bracket( 672 self, this: t.Optional[exp.Expression] = None 673 ) -> t.Optional[exp.Expression]: 674 bracket = super()._parse_bracket(this) 675 676 if this is bracket: 677 return bracket 678 679 if isinstance(bracket, exp.Bracket): 680 for expression in bracket.expressions: 681 name = expression.name.upper() 682 683 if name not in self.BRACKET_OFFSETS: 684 break 685 686 offset, safe = self.BRACKET_OFFSETS[name] 687 bracket.set("offset", offset) 688 bracket.set("safe", safe) 689 expression.replace(expression.expressions[0]) 690 691 return bracket 692 693 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 694 unnest = super()._parse_unnest(with_alias=with_alias) 695 696 if not unnest: 697 return None 698 699 unnest_expr = seq_get(unnest.expressions, 0) 700 if unnest_expr: 701 from sqlglot.optimizer.annotate_types import annotate_types 702 703 unnest_expr = annotate_types(unnest_expr) 704 705 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 706 # in contrast to other dialects such as DuckDB which flattens only the array by default 707 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 708 array_elem.is_type(exp.DataType.Type.STRUCT) 709 for array_elem in unnest_expr._type.expressions 710 ): 711 unnest.set("explode_array", True) 712 713 return unnest 714 715 class Generator(generator.Generator): 716 INTERVAL_ALLOWS_PLURAL_FORM = False 717 JOIN_HINTS = False 718 QUERY_HINTS = False 719 TABLE_HINTS = False 720 LIMIT_FETCH = "LIMIT" 721 RENAME_TABLE_WITH_DB = False 722 NVL2_SUPPORTED = False 723 UNNEST_WITH_ORDINALITY = False 724 COLLATE_IS_FUNC = True 725 LIMIT_ONLY_LITERALS = True 726 SUPPORTS_TABLE_ALIAS_COLUMNS = False 727 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 728 JSON_KEY_VALUE_PAIR_SEP = "," 729 NULL_ORDERING_SUPPORTED = False 730 IGNORE_NULLS_IN_FUNC = True 731 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 732 CAN_IMPLEMENT_ARRAY_ANY = True 733 SUPPORTS_TO_NUMBER = False 734 NAMED_PLACEHOLDER_TOKEN = "@" 735 HEX_FUNC = "TO_HEX" 736 WITH_PROPERTIES_PREFIX = "OPTIONS" 737 SUPPORTS_EXPLODING_PROJECTIONS = False 738 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 739 740 TRANSFORMS = { 741 **generator.Generator.TRANSFORMS, 742 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 743 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 744 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 745 exp.Array: inline_array_unless_query, 746 exp.ArrayContains: _array_contains_sql, 747 exp.ArrayFilter: filter_array_using_unnest, 748 exp.ArraySize: rename_func("ARRAY_LENGTH"), 749 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 750 exp.CollateProperty: lambda self, e: ( 751 f"DEFAULT COLLATE {self.sql(e, 'this')}" 752 if e.args.get("default") 753 else f"COLLATE {self.sql(e, 'this')}" 754 ), 755 exp.Commit: lambda *_: "COMMIT TRANSACTION", 756 exp.CountIf: rename_func("COUNTIF"), 757 exp.Create: _create_sql, 758 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 759 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 760 exp.DateDiff: lambda self, e: self.func( 761 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 762 ), 763 exp.DateFromParts: rename_func("DATE"), 764 exp.DateStrToDate: datestrtodate_sql, 765 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 766 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 767 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 768 exp.DateTrunc: lambda self, e: self.func( 769 "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone") 770 ), 771 exp.FromTimeZone: lambda self, e: self.func( 772 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 773 ), 774 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 775 exp.GroupConcat: rename_func("STRING_AGG"), 776 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 777 exp.If: if_sql(false_value="NULL"), 778 exp.ILike: no_ilike_sql, 779 exp.IntDiv: rename_func("DIV"), 780 exp.JSONFormat: rename_func("TO_JSON_STRING"), 781 exp.Levenshtein: rename_func("EDIT_DISTANCE"), 782 exp.Max: max_or_greatest, 783 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 784 exp.MD5Digest: rename_func("MD5"), 785 exp.Min: min_or_least, 786 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 787 exp.RegexpExtract: lambda self, e: self.func( 788 "REGEXP_EXTRACT", 789 e.this, 790 e.expression, 791 e.args.get("position"), 792 e.args.get("occurrence"), 793 ), 794 exp.RegexpReplace: regexp_replace_sql, 795 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 796 exp.ReturnsProperty: _returnsproperty_sql, 797 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 798 exp.Select: transforms.preprocess( 799 [ 800 transforms.explode_to_unnest(), 801 transforms.unqualify_unnest, 802 transforms.eliminate_distinct_on, 803 _alias_ordered_group, 804 transforms.eliminate_semi_and_anti_joins, 805 ] 806 ), 807 exp.SHA: rename_func("SHA1"), 808 exp.SHA2: sha256_sql, 809 exp.StabilityProperty: lambda self, e: ( 810 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 811 ), 812 exp.String: rename_func("STRING"), 813 exp.StrToDate: _str_to_datetime_sql, 814 exp.StrToTime: _str_to_datetime_sql, 815 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 816 exp.TimeFromParts: rename_func("TIME"), 817 exp.TimestampFromParts: rename_func("DATETIME"), 818 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 819 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 820 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 821 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 822 exp.TimeStrToTime: timestrtotime_sql, 823 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 824 exp.TsOrDsAdd: _ts_or_ds_add_sql, 825 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 826 exp.TsOrDsToTime: rename_func("TIME"), 827 exp.TsOrDsToTimestamp: rename_func("DATETIME"), 828 exp.Unhex: rename_func("FROM_HEX"), 829 exp.UnixDate: rename_func("UNIX_DATE"), 830 exp.UnixToTime: _unix_to_time_sql, 831 exp.Uuid: lambda *_: "GENERATE_UUID()", 832 exp.Values: _derived_table_values_to_unnest, 833 exp.VariancePop: rename_func("VAR_POP"), 834 } 835 836 SUPPORTED_JSON_PATH_PARTS = { 837 exp.JSONPathKey, 838 exp.JSONPathRoot, 839 exp.JSONPathSubscript, 840 } 841 842 TYPE_MAPPING = { 843 **generator.Generator.TYPE_MAPPING, 844 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 845 exp.DataType.Type.BIGINT: "INT64", 846 exp.DataType.Type.BINARY: "BYTES", 847 exp.DataType.Type.BOOLEAN: "BOOL", 848 exp.DataType.Type.CHAR: "STRING", 849 exp.DataType.Type.DECIMAL: "NUMERIC", 850 exp.DataType.Type.DOUBLE: "FLOAT64", 851 exp.DataType.Type.FLOAT: "FLOAT64", 852 exp.DataType.Type.INT: "INT64", 853 exp.DataType.Type.NCHAR: "STRING", 854 exp.DataType.Type.NVARCHAR: "STRING", 855 exp.DataType.Type.SMALLINT: "INT64", 856 exp.DataType.Type.TEXT: "STRING", 857 exp.DataType.Type.TIMESTAMP: "DATETIME", 858 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 859 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 860 exp.DataType.Type.TINYINT: "INT64", 861 exp.DataType.Type.ROWVERSION: "BYTES", 862 exp.DataType.Type.UUID: "STRING", 863 exp.DataType.Type.VARBINARY: "BYTES", 864 exp.DataType.Type.VARCHAR: "STRING", 865 exp.DataType.Type.VARIANT: "ANY TYPE", 866 } 867 868 PROPERTIES_LOCATION = { 869 **generator.Generator.PROPERTIES_LOCATION, 870 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 871 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 872 } 873 874 # WINDOW comes after QUALIFY 875 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 876 AFTER_HAVING_MODIFIER_TRANSFORMS = { 877 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 878 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 879 } 880 881 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 882 RESERVED_KEYWORDS = { 883 "all", 884 "and", 885 "any", 886 "array", 887 "as", 888 "asc", 889 "assert_rows_modified", 890 "at", 891 "between", 892 "by", 893 "case", 894 "cast", 895 "collate", 896 "contains", 897 "create", 898 "cross", 899 "cube", 900 "current", 901 "default", 902 "define", 903 "desc", 904 "distinct", 905 "else", 906 "end", 907 "enum", 908 "escape", 909 "except", 910 "exclude", 911 "exists", 912 "extract", 913 "false", 914 "fetch", 915 "following", 916 "for", 917 "from", 918 "full", 919 "group", 920 "grouping", 921 "groups", 922 "hash", 923 "having", 924 "if", 925 "ignore", 926 "in", 927 "inner", 928 "intersect", 929 "interval", 930 "into", 931 "is", 932 "join", 933 "lateral", 934 "left", 935 "like", 936 "limit", 937 "lookup", 938 "merge", 939 "natural", 940 "new", 941 "no", 942 "not", 943 "null", 944 "nulls", 945 "of", 946 "on", 947 "or", 948 "order", 949 "outer", 950 "over", 951 "partition", 952 "preceding", 953 "proto", 954 "qualify", 955 "range", 956 "recursive", 957 "respect", 958 "right", 959 "rollup", 960 "rows", 961 "select", 962 "set", 963 "some", 964 "struct", 965 "tablesample", 966 "then", 967 "to", 968 "treat", 969 "true", 970 "unbounded", 971 "union", 972 "unnest", 973 "using", 974 "when", 975 "where", 976 "window", 977 "with", 978 "within", 979 } 980 981 def mod_sql(self, expression: exp.Mod) -> str: 982 this = expression.this 983 expr = expression.expression 984 return self.func( 985 "MOD", 986 this.unnest() if isinstance(this, exp.Paren) else this, 987 expr.unnest() if isinstance(expr, exp.Paren) else expr, 988 ) 989 990 def column_parts(self, expression: exp.Column) -> str: 991 if expression.meta.get("quoted_column"): 992 # If a column reference is of the form `dataset.table`.name, we need 993 # to preserve the quoted table path, otherwise the reference breaks 994 table_parts = ".".join(p.name for p in expression.parts[:-1]) 995 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 996 return f"{table_path}.{self.sql(expression, 'this')}" 997 998 return super().column_parts(expression) 999 1000 def table_parts(self, expression: exp.Table) -> str: 1001 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1002 # we need to make sure the correct quoting is used in each case. 1003 # 1004 # For example, if there is a CTE x that clashes with a schema name, then the former will 1005 # return the table y in that schema, whereas the latter will return the CTE's y column: 1006 # 1007 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1008 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1009 if expression.meta.get("quoted_table"): 1010 table_parts = ".".join(p.name for p in expression.parts) 1011 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1012 1013 return super().table_parts(expression) 1014 1015 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1016 if isinstance(expression.this, exp.TsOrDsToTimestamp): 1017 func_name = "FORMAT_DATETIME" 1018 else: 1019 func_name = "FORMAT_DATE" 1020 this = ( 1021 expression.this 1022 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1023 else expression 1024 ) 1025 return self.func(func_name, self.format_time(expression), this.this) 1026 1027 def eq_sql(self, expression: exp.EQ) -> str: 1028 # Operands of = cannot be NULL in BigQuery 1029 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1030 if not isinstance(expression.parent, exp.Update): 1031 return "NULL" 1032 1033 return self.binary(expression, "=") 1034 1035 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1036 parent = expression.parent 1037 1038 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1039 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1040 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1041 return self.func( 1042 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1043 ) 1044 1045 return super().attimezone_sql(expression) 1046 1047 def trycast_sql(self, expression: exp.TryCast) -> str: 1048 return self.cast_sql(expression, safe_prefix="SAFE_") 1049 1050 def bracket_sql(self, expression: exp.Bracket) -> str: 1051 this = expression.this 1052 expressions = expression.expressions 1053 1054 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1055 arg = expressions[0] 1056 if arg.type is None: 1057 from sqlglot.optimizer.annotate_types import annotate_types 1058 1059 arg = annotate_types(arg) 1060 1061 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1062 # BQ doesn't support bracket syntax with string values for structs 1063 return f"{self.sql(this)}.{arg.name}" 1064 1065 expressions_sql = self.expressions(expression, flat=True) 1066 offset = expression.args.get("offset") 1067 1068 if offset == 0: 1069 expressions_sql = f"OFFSET({expressions_sql})" 1070 elif offset == 1: 1071 expressions_sql = f"ORDINAL({expressions_sql})" 1072 elif offset is not None: 1073 self.unsupported(f"Unsupported array offset: {offset}") 1074 1075 if expression.args.get("safe"): 1076 expressions_sql = f"SAFE_{expressions_sql}" 1077 1078 return f"{self.sql(this)}[{expressions_sql}]" 1079 1080 def in_unnest_op(self, expression: exp.Unnest) -> str: 1081 return self.sql(expression) 1082 1083 def version_sql(self, expression: exp.Version) -> str: 1084 if expression.name == "TIMESTAMP": 1085 expression.set("this", "SYSTEM_TIME") 1086 return super().version_sql(expression)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether alias reference expansion before qualification should only happen for the GROUP BY clause.
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
352 def normalize_identifier(self, expression: E) -> E: 353 if ( 354 isinstance(expression, exp.Identifier) 355 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 356 ): 357 parent = expression.parent 358 while isinstance(parent, exp.Dot): 359 parent = parent.parent 360 361 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 362 # by default. The following check uses a heuristic to detect tables based on whether 363 # they are qualified. This should generally be correct, because tables in BigQuery 364 # must be qualified with at least a dataset, unless @@dataset_id is set. 365 case_sensitive = ( 366 isinstance(parent, exp.UserDefinedFunction) 367 or ( 368 isinstance(parent, exp.Table) 369 and parent.db 370 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 371 ) 372 or expression.meta.get("is_table") 373 ) 374 if not case_sensitive: 375 expression.set("this", expression.this.lower()) 376 377 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- COPY_PARAMS_ARE_CSV
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- REGEXP_EXTRACT_DEFAULT_GROUP
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- get_or_raise
- format_time
- settings
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
379 class Tokenizer(tokens.Tokenizer): 380 QUOTES = ["'", '"', '"""', "'''"] 381 COMMENTS = ["--", "#", ("/*", "*/")] 382 IDENTIFIERS = ["`"] 383 STRING_ESCAPES = ["\\"] 384 385 HEX_STRINGS = [("0x", ""), ("0X", "")] 386 387 BYTE_STRINGS = [ 388 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 389 ] 390 391 RAW_STRINGS = [ 392 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 393 ] 394 395 KEYWORDS = { 396 **tokens.Tokenizer.KEYWORDS, 397 "ANY TYPE": TokenType.VARIANT, 398 "BEGIN": TokenType.COMMAND, 399 "BEGIN TRANSACTION": TokenType.BEGIN, 400 "BYTEINT": TokenType.INT, 401 "BYTES": TokenType.BINARY, 402 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 403 "DATETIME": TokenType.TIMESTAMP, 404 "DECLARE": TokenType.COMMAND, 405 "ELSEIF": TokenType.COMMAND, 406 "EXCEPTION": TokenType.COMMAND, 407 "FLOAT64": TokenType.DOUBLE, 408 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 409 "MODEL": TokenType.MODEL, 410 "NOT DETERMINISTIC": TokenType.VOLATILE, 411 "RECORD": TokenType.STRUCT, 412 "TIMESTAMP": TokenType.TIMESTAMPTZ, 413 } 414 KEYWORDS.pop("DIV") 415 KEYWORDS.pop("VALUES") 416 KEYWORDS.pop("/*+")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
418 class Parser(parser.Parser): 419 PREFIXED_PIVOT_COLUMNS = True 420 LOG_DEFAULTS_TO_LN = True 421 SUPPORTS_IMPLICIT_UNNEST = True 422 423 FUNCTIONS = { 424 **parser.Parser.FUNCTIONS, 425 "DATE": _build_date, 426 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 427 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 428 "DATE_TRUNC": lambda args: exp.DateTrunc( 429 unit=exp.Literal.string(str(seq_get(args, 1))), 430 this=seq_get(args, 0), 431 zone=seq_get(args, 2), 432 ), 433 "DATETIME": _build_datetime, 434 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 435 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 436 "DIV": binary_from_function(exp.IntDiv), 437 "EDIT_DISTANCE": lambda args: exp.Levenshtein( 438 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 439 ), 440 "FORMAT_DATE": lambda args: exp.TimeToStr( 441 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 442 ), 443 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 444 "JSON_EXTRACT_SCALAR": _build_json_extract_scalar, 445 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 446 "JSON_VALUE": _build_json_extract_scalar, 447 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 448 "MD5": exp.MD5Digest.from_arg_list, 449 "TO_HEX": _build_to_hex, 450 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 451 [seq_get(args, 1), seq_get(args, 0)] 452 ), 453 "PARSE_TIMESTAMP": _build_parse_timestamp, 454 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 455 "REGEXP_EXTRACT": _build_regexp_extract, 456 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 457 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 458 "SPLIT": lambda args: exp.Split( 459 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 460 this=seq_get(args, 0), 461 expression=seq_get(args, 1) or exp.Literal.string(","), 462 ), 463 "TIME": _build_time, 464 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 465 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 466 "TIMESTAMP": _build_timestamp, 467 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 468 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 469 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 470 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 471 ), 472 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 473 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 474 ), 475 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 476 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 477 "FORMAT_DATETIME": lambda args: exp.TimeToStr( 478 this=exp.TsOrDsToTimestamp(this=seq_get(args, 1)), format=seq_get(args, 0) 479 ), 480 } 481 482 FUNCTION_PARSERS = { 483 **parser.Parser.FUNCTION_PARSERS, 484 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 485 } 486 FUNCTION_PARSERS.pop("TRIM") 487 488 NO_PAREN_FUNCTIONS = { 489 **parser.Parser.NO_PAREN_FUNCTIONS, 490 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 491 } 492 493 NESTED_TYPE_TOKENS = { 494 *parser.Parser.NESTED_TYPE_TOKENS, 495 TokenType.TABLE, 496 } 497 498 PROPERTY_PARSERS = { 499 **parser.Parser.PROPERTY_PARSERS, 500 "NOT DETERMINISTIC": lambda self: self.expression( 501 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 502 ), 503 "OPTIONS": lambda self: self._parse_with_property(), 504 } 505 506 CONSTRAINT_PARSERS = { 507 **parser.Parser.CONSTRAINT_PARSERS, 508 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 509 } 510 511 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 512 RANGE_PARSERS.pop(TokenType.OVERLAPS) 513 514 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 515 516 STATEMENT_PARSERS = { 517 **parser.Parser.STATEMENT_PARSERS, 518 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 519 TokenType.END: lambda self: self._parse_as_command(self._prev), 520 TokenType.FOR: lambda self: self._parse_for_in(), 521 } 522 523 BRACKET_OFFSETS = { 524 "OFFSET": (0, False), 525 "ORDINAL": (1, False), 526 "SAFE_OFFSET": (0, True), 527 "SAFE_ORDINAL": (1, True), 528 } 529 530 def _parse_for_in(self) -> exp.ForIn: 531 this = self._parse_range() 532 self._match_text_seq("DO") 533 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 534 535 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 536 this = super()._parse_table_part(schema=schema) or self._parse_number() 537 538 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 539 if isinstance(this, exp.Identifier): 540 table_name = this.name 541 while self._match(TokenType.DASH, advance=False) and self._next: 542 text = "" 543 while self._is_connected() and self._curr.token_type != TokenType.DOT: 544 self._advance() 545 text += self._prev.text 546 table_name += text 547 548 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 549 elif isinstance(this, exp.Literal): 550 table_name = this.name 551 552 if self._is_connected() and self._parse_var(any_token=True): 553 table_name += self._prev.text 554 555 this = exp.Identifier(this=table_name, quoted=True) 556 557 return this 558 559 def _parse_table_parts( 560 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 561 ) -> exp.Table: 562 table = super()._parse_table_parts( 563 schema=schema, is_db_reference=is_db_reference, wildcard=True 564 ) 565 566 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 567 if not table.catalog: 568 if table.db: 569 parts = table.db.split(".") 570 if len(parts) == 2 and not table.args["db"].quoted: 571 table.set("catalog", exp.Identifier(this=parts[0])) 572 table.set("db", exp.Identifier(this=parts[1])) 573 else: 574 parts = table.name.split(".") 575 if len(parts) == 2 and not table.this.quoted: 576 table.set("db", exp.Identifier(this=parts[0])) 577 table.set("this", exp.Identifier(this=parts[1])) 578 579 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 580 alias = table.this 581 catalog, db, this, *rest = ( 582 exp.to_identifier(p, quoted=True) 583 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 584 ) 585 586 if rest and this: 587 this = exp.Dot.build([this, *rest]) # type: ignore 588 589 table = exp.Table( 590 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 591 ) 592 table.meta["quoted_table"] = True 593 else: 594 alias = None 595 596 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 597 # dataset, so if the project identifier is omitted we need to fix the ast so that 598 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 599 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 600 # views, because it would seem like the "catalog" part is set, when it'd actually 601 # be the region/dataset. Merging the two identifiers into a single one is done to 602 # avoid producing a 4-part Table reference, which would cause issues in the schema 603 # module, when there are 3-part table names mixed with information schema views. 604 # 605 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 606 table_parts = table.parts 607 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 608 # We need to alias the table here to avoid breaking existing qualified columns. 609 # This is expected to be safe, because if there's an actual alias coming up in 610 # the token stream, it will overwrite this one. If there isn't one, we are only 611 # exposing the name that can be used to reference the view explicitly (a no-op). 612 exp.alias_( 613 table, 614 t.cast(exp.Identifier, alias or table_parts[-1]), 615 table=True, 616 copy=False, 617 ) 618 619 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 620 table.set("this", exp.Identifier(this=info_schema_view, quoted=True)) 621 table.set("db", seq_get(table_parts, -3)) 622 table.set("catalog", seq_get(table_parts, -4)) 623 624 return table 625 626 def _parse_column(self) -> t.Optional[exp.Expression]: 627 column = super()._parse_column() 628 if isinstance(column, exp.Column): 629 parts = column.parts 630 if any("." in p.name for p in parts): 631 catalog, db, table, this, *rest = ( 632 exp.to_identifier(p, quoted=True) 633 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 634 ) 635 636 if rest and this: 637 this = exp.Dot.build([this, *rest]) # type: ignore 638 639 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 640 column.meta["quoted_column"] = True 641 642 return column 643 644 @t.overload 645 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 646 647 @t.overload 648 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 649 650 def _parse_json_object(self, agg=False): 651 json_object = super()._parse_json_object() 652 array_kv_pair = seq_get(json_object.expressions, 0) 653 654 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 655 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 656 if ( 657 array_kv_pair 658 and isinstance(array_kv_pair.this, exp.Array) 659 and isinstance(array_kv_pair.expression, exp.Array) 660 ): 661 keys = array_kv_pair.this.expressions 662 values = array_kv_pair.expression.expressions 663 664 json_object.set( 665 "expressions", 666 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 667 ) 668 669 return json_object 670 671 def _parse_bracket( 672 self, this: t.Optional[exp.Expression] = None 673 ) -> t.Optional[exp.Expression]: 674 bracket = super()._parse_bracket(this) 675 676 if this is bracket: 677 return bracket 678 679 if isinstance(bracket, exp.Bracket): 680 for expression in bracket.expressions: 681 name = expression.name.upper() 682 683 if name not in self.BRACKET_OFFSETS: 684 break 685 686 offset, safe = self.BRACKET_OFFSETS[name] 687 bracket.set("offset", offset) 688 bracket.set("safe", safe) 689 expression.replace(expression.expressions[0]) 690 691 return bracket 692 693 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 694 unnest = super()._parse_unnest(with_alias=with_alias) 695 696 if not unnest: 697 return None 698 699 unnest_expr = seq_get(unnest.expressions, 0) 700 if unnest_expr: 701 from sqlglot.optimizer.annotate_types import annotate_types 702 703 unnest_expr = annotate_types(unnest_expr) 704 705 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 706 # in contrast to other dialects such as DuckDB which flattens only the array by default 707 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 708 array_elem.is_type(exp.DataType.Type.STRUCT) 709 for array_elem in unnest_expr._type.expressions 710 ): 711 unnest.set("explode_array", True) 712 713 return unnest
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- OPERATION_MODIFIERS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
715 class Generator(generator.Generator): 716 INTERVAL_ALLOWS_PLURAL_FORM = False 717 JOIN_HINTS = False 718 QUERY_HINTS = False 719 TABLE_HINTS = False 720 LIMIT_FETCH = "LIMIT" 721 RENAME_TABLE_WITH_DB = False 722 NVL2_SUPPORTED = False 723 UNNEST_WITH_ORDINALITY = False 724 COLLATE_IS_FUNC = True 725 LIMIT_ONLY_LITERALS = True 726 SUPPORTS_TABLE_ALIAS_COLUMNS = False 727 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 728 JSON_KEY_VALUE_PAIR_SEP = "," 729 NULL_ORDERING_SUPPORTED = False 730 IGNORE_NULLS_IN_FUNC = True 731 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 732 CAN_IMPLEMENT_ARRAY_ANY = True 733 SUPPORTS_TO_NUMBER = False 734 NAMED_PLACEHOLDER_TOKEN = "@" 735 HEX_FUNC = "TO_HEX" 736 WITH_PROPERTIES_PREFIX = "OPTIONS" 737 SUPPORTS_EXPLODING_PROJECTIONS = False 738 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 739 740 TRANSFORMS = { 741 **generator.Generator.TRANSFORMS, 742 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 743 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 744 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 745 exp.Array: inline_array_unless_query, 746 exp.ArrayContains: _array_contains_sql, 747 exp.ArrayFilter: filter_array_using_unnest, 748 exp.ArraySize: rename_func("ARRAY_LENGTH"), 749 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 750 exp.CollateProperty: lambda self, e: ( 751 f"DEFAULT COLLATE {self.sql(e, 'this')}" 752 if e.args.get("default") 753 else f"COLLATE {self.sql(e, 'this')}" 754 ), 755 exp.Commit: lambda *_: "COMMIT TRANSACTION", 756 exp.CountIf: rename_func("COUNTIF"), 757 exp.Create: _create_sql, 758 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 759 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 760 exp.DateDiff: lambda self, e: self.func( 761 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 762 ), 763 exp.DateFromParts: rename_func("DATE"), 764 exp.DateStrToDate: datestrtodate_sql, 765 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 766 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 767 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 768 exp.DateTrunc: lambda self, e: self.func( 769 "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone") 770 ), 771 exp.FromTimeZone: lambda self, e: self.func( 772 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 773 ), 774 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 775 exp.GroupConcat: rename_func("STRING_AGG"), 776 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 777 exp.If: if_sql(false_value="NULL"), 778 exp.ILike: no_ilike_sql, 779 exp.IntDiv: rename_func("DIV"), 780 exp.JSONFormat: rename_func("TO_JSON_STRING"), 781 exp.Levenshtein: rename_func("EDIT_DISTANCE"), 782 exp.Max: max_or_greatest, 783 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 784 exp.MD5Digest: rename_func("MD5"), 785 exp.Min: min_or_least, 786 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 787 exp.RegexpExtract: lambda self, e: self.func( 788 "REGEXP_EXTRACT", 789 e.this, 790 e.expression, 791 e.args.get("position"), 792 e.args.get("occurrence"), 793 ), 794 exp.RegexpReplace: regexp_replace_sql, 795 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 796 exp.ReturnsProperty: _returnsproperty_sql, 797 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 798 exp.Select: transforms.preprocess( 799 [ 800 transforms.explode_to_unnest(), 801 transforms.unqualify_unnest, 802 transforms.eliminate_distinct_on, 803 _alias_ordered_group, 804 transforms.eliminate_semi_and_anti_joins, 805 ] 806 ), 807 exp.SHA: rename_func("SHA1"), 808 exp.SHA2: sha256_sql, 809 exp.StabilityProperty: lambda self, e: ( 810 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 811 ), 812 exp.String: rename_func("STRING"), 813 exp.StrToDate: _str_to_datetime_sql, 814 exp.StrToTime: _str_to_datetime_sql, 815 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 816 exp.TimeFromParts: rename_func("TIME"), 817 exp.TimestampFromParts: rename_func("DATETIME"), 818 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 819 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 820 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 821 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 822 exp.TimeStrToTime: timestrtotime_sql, 823 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 824 exp.TsOrDsAdd: _ts_or_ds_add_sql, 825 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 826 exp.TsOrDsToTime: rename_func("TIME"), 827 exp.TsOrDsToTimestamp: rename_func("DATETIME"), 828 exp.Unhex: rename_func("FROM_HEX"), 829 exp.UnixDate: rename_func("UNIX_DATE"), 830 exp.UnixToTime: _unix_to_time_sql, 831 exp.Uuid: lambda *_: "GENERATE_UUID()", 832 exp.Values: _derived_table_values_to_unnest, 833 exp.VariancePop: rename_func("VAR_POP"), 834 } 835 836 SUPPORTED_JSON_PATH_PARTS = { 837 exp.JSONPathKey, 838 exp.JSONPathRoot, 839 exp.JSONPathSubscript, 840 } 841 842 TYPE_MAPPING = { 843 **generator.Generator.TYPE_MAPPING, 844 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 845 exp.DataType.Type.BIGINT: "INT64", 846 exp.DataType.Type.BINARY: "BYTES", 847 exp.DataType.Type.BOOLEAN: "BOOL", 848 exp.DataType.Type.CHAR: "STRING", 849 exp.DataType.Type.DECIMAL: "NUMERIC", 850 exp.DataType.Type.DOUBLE: "FLOAT64", 851 exp.DataType.Type.FLOAT: "FLOAT64", 852 exp.DataType.Type.INT: "INT64", 853 exp.DataType.Type.NCHAR: "STRING", 854 exp.DataType.Type.NVARCHAR: "STRING", 855 exp.DataType.Type.SMALLINT: "INT64", 856 exp.DataType.Type.TEXT: "STRING", 857 exp.DataType.Type.TIMESTAMP: "DATETIME", 858 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 859 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 860 exp.DataType.Type.TINYINT: "INT64", 861 exp.DataType.Type.ROWVERSION: "BYTES", 862 exp.DataType.Type.UUID: "STRING", 863 exp.DataType.Type.VARBINARY: "BYTES", 864 exp.DataType.Type.VARCHAR: "STRING", 865 exp.DataType.Type.VARIANT: "ANY TYPE", 866 } 867 868 PROPERTIES_LOCATION = { 869 **generator.Generator.PROPERTIES_LOCATION, 870 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 871 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 872 } 873 874 # WINDOW comes after QUALIFY 875 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 876 AFTER_HAVING_MODIFIER_TRANSFORMS = { 877 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 878 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 879 } 880 881 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 882 RESERVED_KEYWORDS = { 883 "all", 884 "and", 885 "any", 886 "array", 887 "as", 888 "asc", 889 "assert_rows_modified", 890 "at", 891 "between", 892 "by", 893 "case", 894 "cast", 895 "collate", 896 "contains", 897 "create", 898 "cross", 899 "cube", 900 "current", 901 "default", 902 "define", 903 "desc", 904 "distinct", 905 "else", 906 "end", 907 "enum", 908 "escape", 909 "except", 910 "exclude", 911 "exists", 912 "extract", 913 "false", 914 "fetch", 915 "following", 916 "for", 917 "from", 918 "full", 919 "group", 920 "grouping", 921 "groups", 922 "hash", 923 "having", 924 "if", 925 "ignore", 926 "in", 927 "inner", 928 "intersect", 929 "interval", 930 "into", 931 "is", 932 "join", 933 "lateral", 934 "left", 935 "like", 936 "limit", 937 "lookup", 938 "merge", 939 "natural", 940 "new", 941 "no", 942 "not", 943 "null", 944 "nulls", 945 "of", 946 "on", 947 "or", 948 "order", 949 "outer", 950 "over", 951 "partition", 952 "preceding", 953 "proto", 954 "qualify", 955 "range", 956 "recursive", 957 "respect", 958 "right", 959 "rollup", 960 "rows", 961 "select", 962 "set", 963 "some", 964 "struct", 965 "tablesample", 966 "then", 967 "to", 968 "treat", 969 "true", 970 "unbounded", 971 "union", 972 "unnest", 973 "using", 974 "when", 975 "where", 976 "window", 977 "with", 978 "within", 979 } 980 981 def mod_sql(self, expression: exp.Mod) -> str: 982 this = expression.this 983 expr = expression.expression 984 return self.func( 985 "MOD", 986 this.unnest() if isinstance(this, exp.Paren) else this, 987 expr.unnest() if isinstance(expr, exp.Paren) else expr, 988 ) 989 990 def column_parts(self, expression: exp.Column) -> str: 991 if expression.meta.get("quoted_column"): 992 # If a column reference is of the form `dataset.table`.name, we need 993 # to preserve the quoted table path, otherwise the reference breaks 994 table_parts = ".".join(p.name for p in expression.parts[:-1]) 995 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 996 return f"{table_path}.{self.sql(expression, 'this')}" 997 998 return super().column_parts(expression) 999 1000 def table_parts(self, expression: exp.Table) -> str: 1001 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1002 # we need to make sure the correct quoting is used in each case. 1003 # 1004 # For example, if there is a CTE x that clashes with a schema name, then the former will 1005 # return the table y in that schema, whereas the latter will return the CTE's y column: 1006 # 1007 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1008 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1009 if expression.meta.get("quoted_table"): 1010 table_parts = ".".join(p.name for p in expression.parts) 1011 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1012 1013 return super().table_parts(expression) 1014 1015 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1016 if isinstance(expression.this, exp.TsOrDsToTimestamp): 1017 func_name = "FORMAT_DATETIME" 1018 else: 1019 func_name = "FORMAT_DATE" 1020 this = ( 1021 expression.this 1022 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1023 else expression 1024 ) 1025 return self.func(func_name, self.format_time(expression), this.this) 1026 1027 def eq_sql(self, expression: exp.EQ) -> str: 1028 # Operands of = cannot be NULL in BigQuery 1029 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1030 if not isinstance(expression.parent, exp.Update): 1031 return "NULL" 1032 1033 return self.binary(expression, "=") 1034 1035 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1036 parent = expression.parent 1037 1038 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1039 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1040 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1041 return self.func( 1042 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1043 ) 1044 1045 return super().attimezone_sql(expression) 1046 1047 def trycast_sql(self, expression: exp.TryCast) -> str: 1048 return self.cast_sql(expression, safe_prefix="SAFE_") 1049 1050 def bracket_sql(self, expression: exp.Bracket) -> str: 1051 this = expression.this 1052 expressions = expression.expressions 1053 1054 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1055 arg = expressions[0] 1056 if arg.type is None: 1057 from sqlglot.optimizer.annotate_types import annotate_types 1058 1059 arg = annotate_types(arg) 1060 1061 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1062 # BQ doesn't support bracket syntax with string values for structs 1063 return f"{self.sql(this)}.{arg.name}" 1064 1065 expressions_sql = self.expressions(expression, flat=True) 1066 offset = expression.args.get("offset") 1067 1068 if offset == 0: 1069 expressions_sql = f"OFFSET({expressions_sql})" 1070 elif offset == 1: 1071 expressions_sql = f"ORDINAL({expressions_sql})" 1072 elif offset is not None: 1073 self.unsupported(f"Unsupported array offset: {offset}") 1074 1075 if expression.args.get("safe"): 1076 expressions_sql = f"SAFE_{expressions_sql}" 1077 1078 return f"{self.sql(this)}[{expressions_sql}]" 1079 1080 def in_unnest_op(self, expression: exp.Unnest) -> str: 1081 return self.sql(expression) 1082 1083 def version_sql(self, expression: exp.Version) -> str: 1084 if expression.name == "TIMESTAMP": 1085 expression.set("this", "SYSTEM_TIME") 1086 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
990 def column_parts(self, expression: exp.Column) -> str: 991 if expression.meta.get("quoted_column"): 992 # If a column reference is of the form `dataset.table`.name, we need 993 # to preserve the quoted table path, otherwise the reference breaks 994 table_parts = ".".join(p.name for p in expression.parts[:-1]) 995 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 996 return f"{table_path}.{self.sql(expression, 'this')}" 997 998 return super().column_parts(expression)
1000 def table_parts(self, expression: exp.Table) -> str: 1001 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1002 # we need to make sure the correct quoting is used in each case. 1003 # 1004 # For example, if there is a CTE x that clashes with a schema name, then the former will 1005 # return the table y in that schema, whereas the latter will return the CTE's y column: 1006 # 1007 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1008 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1009 if expression.meta.get("quoted_table"): 1010 table_parts = ".".join(p.name for p in expression.parts) 1011 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1012 1013 return super().table_parts(expression)
1015 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1016 if isinstance(expression.this, exp.TsOrDsToTimestamp): 1017 func_name = "FORMAT_DATETIME" 1018 else: 1019 func_name = "FORMAT_DATE" 1020 this = ( 1021 expression.this 1022 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1023 else expression 1024 ) 1025 return self.func(func_name, self.format_time(expression), this.this)
1027 def eq_sql(self, expression: exp.EQ) -> str: 1028 # Operands of = cannot be NULL in BigQuery 1029 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1030 if not isinstance(expression.parent, exp.Update): 1031 return "NULL" 1032 1033 return self.binary(expression, "=")
1035 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1036 parent = expression.parent 1037 1038 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1039 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1040 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1041 return self.func( 1042 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1043 ) 1044 1045 return super().attimezone_sql(expression)
1050 def bracket_sql(self, expression: exp.Bracket) -> str: 1051 this = expression.this 1052 expressions = expression.expressions 1053 1054 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1055 arg = expressions[0] 1056 if arg.type is None: 1057 from sqlglot.optimizer.annotate_types import annotate_types 1058 1059 arg = annotate_types(arg) 1060 1061 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1062 # BQ doesn't support bracket syntax with string values for structs 1063 return f"{self.sql(this)}.{arg.name}" 1064 1065 expressions_sql = self.expressions(expression, flat=True) 1066 offset = expression.args.get("offset") 1067 1068 if offset == 0: 1069 expressions_sql = f"OFFSET({expressions_sql})" 1070 elif offset == 1: 1071 expressions_sql = f"ORDINAL({expressions_sql})" 1072 elif offset is not None: 1073 self.unsupported(f"Unsupported array offset: {offset}") 1074 1075 if expression.args.get("safe"): 1076 expressions_sql = f"SAFE_{expressions_sql}" 1077 1078 return f"{self.sql(this)}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql