sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot._typing import E 9from sqlglot.dialects.dialect import ( 10 Dialect, 11 NormalizationStrategy, 12 annotate_with_type_lambda, 13 arg_max_or_min_no_count, 14 binary_from_function, 15 date_add_interval_sql, 16 datestrtodate_sql, 17 build_formatted_time, 18 filter_array_using_unnest, 19 if_sql, 20 inline_array_unless_query, 21 max_or_greatest, 22 min_or_least, 23 no_ilike_sql, 24 build_date_delta_with_interval, 25 regexp_replace_sql, 26 rename_func, 27 sha256_sql, 28 timestrtotime_sql, 29 ts_or_ds_add_cast, 30 unit_to_var, 31 strposition_sql, 32 groupconcat_sql, 33) 34from sqlglot.helper import seq_get, split_num_words 35from sqlglot.tokens import TokenType 36from sqlglot.generator import unsupported_args 37 38if t.TYPE_CHECKING: 39 from sqlglot._typing import Lit 40 41 from sqlglot.optimizer.annotate_types import TypeAnnotator 42 43logger = logging.getLogger("sqlglot") 44 45 46JSON_EXTRACT_TYPE = t.Union[exp.JSONExtract, exp.JSONExtractScalar, exp.JSONExtractArray] 47 48DQUOTES_ESCAPING_JSON_FUNCTIONS = ("JSON_QUERY", "JSON_VALUE", "JSON_QUERY_ARRAY") 49 50 51def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 52 if not expression.find_ancestor(exp.From, exp.Join): 53 return self.values_sql(expression) 54 55 structs = [] 56 alias = expression.args.get("alias") 57 for tup in expression.find_all(exp.Tuple): 58 field_aliases = ( 59 alias.columns 60 if alias and alias.columns 61 else (f"_c{i}" for i in range(len(tup.expressions))) 62 ) 63 expressions = [ 64 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 65 for name, fld in zip(field_aliases, tup.expressions) 66 ] 67 structs.append(exp.Struct(expressions=expressions)) 68 69 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 70 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 71 return self.unnest_sql( 72 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 73 ) 74 75 76def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 77 this = expression.this 78 if isinstance(this, exp.Schema): 79 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 80 else: 81 this = self.sql(this) 82 return f"RETURNS {this}" 83 84 85def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 86 returns = expression.find(exp.ReturnsProperty) 87 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 88 expression.set("kind", "TABLE FUNCTION") 89 90 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 91 expression.set("expression", expression.expression.this) 92 93 return self.create_sql(expression) 94 95 96# https://issuetracker.google.com/issues/162294746 97# workaround for bigquery bug when grouping by an expression and then ordering 98# WITH x AS (SELECT 1 y) 99# SELECT y + 1 z 100# FROM x 101# GROUP BY x + 1 102# ORDER by z 103def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 104 if isinstance(expression, exp.Select): 105 group = expression.args.get("group") 106 order = expression.args.get("order") 107 108 if group and order: 109 aliases = { 110 select.this: select.args["alias"] 111 for select in expression.selects 112 if isinstance(select, exp.Alias) 113 } 114 115 for grouped in group.expressions: 116 if grouped.is_int: 117 continue 118 alias = aliases.get(grouped) 119 if alias: 120 grouped.replace(exp.column(alias)) 121 122 return expression 123 124 125def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 126 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 127 if isinstance(expression, exp.CTE) and expression.alias_column_names: 128 cte_query = expression.this 129 130 if cte_query.is_star: 131 logger.warning( 132 "Can't push down CTE column names for star queries. Run the query through" 133 " the optimizer or use 'qualify' to expand the star projections first." 134 ) 135 return expression 136 137 column_names = expression.alias_column_names 138 expression.args["alias"].set("columns", None) 139 140 for name, select in zip(column_names, cte_query.selects): 141 to_replace = select 142 143 if isinstance(select, exp.Alias): 144 select = select.this 145 146 # Inner aliases are shadowed by the CTE column names 147 to_replace.replace(exp.alias_(select, name)) 148 149 return expression 150 151 152def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 153 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 154 this.set("zone", seq_get(args, 2)) 155 return this 156 157 158def _build_timestamp(args: t.List) -> exp.Timestamp: 159 timestamp = exp.Timestamp.from_arg_list(args) 160 timestamp.set("with_tz", True) 161 return timestamp 162 163 164def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 165 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 166 return expr_type.from_arg_list(args) 167 168 169def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 170 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 171 arg = seq_get(args, 0) 172 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg) 173 174 175def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 176 return self.sql( 177 exp.Exists( 178 this=exp.select("1") 179 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 180 .where(exp.column("_col").eq(expression.right)) 181 ) 182 ) 183 184 185def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 186 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 187 188 189def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 190 expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 191 expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)) 192 unit = unit_to_var(expression) 193 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 194 195 196def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 197 scale = expression.args.get("scale") 198 timestamp = expression.this 199 200 if scale in (None, exp.UnixToTime.SECONDS): 201 return self.func("TIMESTAMP_SECONDS", timestamp) 202 if scale == exp.UnixToTime.MILLIS: 203 return self.func("TIMESTAMP_MILLIS", timestamp) 204 if scale == exp.UnixToTime.MICROS: 205 return self.func("TIMESTAMP_MICROS", timestamp) 206 207 unix_seconds = exp.cast( 208 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 209 ) 210 return self.func("TIMESTAMP_SECONDS", unix_seconds) 211 212 213def _build_time(args: t.List) -> exp.Func: 214 if len(args) == 1: 215 return exp.TsOrDsToTime(this=args[0]) 216 if len(args) == 2: 217 return exp.Time.from_arg_list(args) 218 return exp.TimeFromParts.from_arg_list(args) 219 220 221def _build_datetime(args: t.List) -> exp.Func: 222 if len(args) == 1: 223 return exp.TsOrDsToDatetime.from_arg_list(args) 224 if len(args) == 2: 225 return exp.Datetime.from_arg_list(args) 226 return exp.TimestampFromParts.from_arg_list(args) 227 228 229def _build_regexp_extract( 230 expr_type: t.Type[E], default_group: t.Optional[exp.Expression] = None 231) -> t.Callable[[t.List], E]: 232 def _builder(args: t.List) -> E: 233 try: 234 group = re.compile(args[1].name).groups == 1 235 except re.error: 236 group = False 237 238 # Default group is used for the transpilation of REGEXP_EXTRACT_ALL 239 return expr_type( 240 this=seq_get(args, 0), 241 expression=seq_get(args, 1), 242 position=seq_get(args, 2), 243 occurrence=seq_get(args, 3), 244 group=exp.Literal.number(1) if group else default_group, 245 ) 246 247 return _builder 248 249 250def _build_extract_json_with_default_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 251 def _builder(args: t.List, dialect: Dialect) -> E: 252 if len(args) == 1: 253 # The default value for the JSONPath is '$' i.e all of the data 254 args.append(exp.Literal.string("$")) 255 return parser.build_extract_json_with_path(expr_type)(args, dialect) 256 257 return _builder 258 259 260def _str_to_datetime_sql( 261 self: BigQuery.Generator, expression: exp.StrToDate | exp.StrToTime 262) -> str: 263 this = self.sql(expression, "this") 264 dtype = "DATE" if isinstance(expression, exp.StrToDate) else "TIMESTAMP" 265 266 if expression.args.get("safe"): 267 fmt = self.format_time( 268 expression, 269 self.dialect.INVERSE_FORMAT_MAPPING, 270 self.dialect.INVERSE_FORMAT_TRIE, 271 ) 272 return f"SAFE_CAST({this} AS {dtype} FORMAT {fmt})" 273 274 fmt = self.format_time(expression) 275 return self.func(f"PARSE_{dtype}", fmt, this, expression.args.get("zone")) 276 277 278def _annotate_math_functions(self: TypeAnnotator, expression: E) -> E: 279 """ 280 Many BigQuery math functions such as CEIL, FLOOR etc follow this return type convention: 281 +---------+---------+---------+------------+---------+ 282 | INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 283 +---------+---------+---------+------------+---------+ 284 | OUTPUT | FLOAT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 285 +---------+---------+---------+------------+---------+ 286 """ 287 self._annotate_args(expression) 288 289 this: exp.Expression = expression.this 290 291 self._set_type( 292 expression, 293 exp.DataType.Type.DOUBLE if this.is_type(*exp.DataType.INTEGER_TYPES) else this.type, 294 ) 295 return expression 296 297 298@unsupported_args("ins_cost", "del_cost", "sub_cost") 299def _levenshtein_sql(self: BigQuery.Generator, expression: exp.Levenshtein) -> str: 300 max_dist = expression.args.get("max_dist") 301 if max_dist: 302 max_dist = exp.Kwarg(this=exp.var("max_distance"), expression=max_dist) 303 304 return self.func("EDIT_DISTANCE", expression.this, expression.expression, max_dist) 305 306 307def _build_levenshtein(args: t.List) -> exp.Levenshtein: 308 max_dist = seq_get(args, 2) 309 return exp.Levenshtein( 310 this=seq_get(args, 0), 311 expression=seq_get(args, 1), 312 max_dist=max_dist.expression if max_dist else None, 313 ) 314 315 316def _build_format_time(expr_type: t.Type[exp.Expression]) -> t.Callable[[t.List], exp.TimeToStr]: 317 def _builder(args: t.List) -> exp.TimeToStr: 318 return exp.TimeToStr( 319 this=expr_type(this=seq_get(args, 1)), 320 format=seq_get(args, 0), 321 zone=seq_get(args, 2), 322 ) 323 324 return _builder 325 326 327def _build_contains_substring(args: t.List) -> exp.Contains | exp.Anonymous: 328 if len(args) == 3: 329 return exp.Anonymous(this="CONTAINS_SUBSTR", expressions=args) 330 331 # Lowercase the operands in case of transpilation, as exp.Contains 332 # is case-sensitive on other dialects 333 this = exp.Lower(this=seq_get(args, 0)) 334 expr = exp.Lower(this=seq_get(args, 1)) 335 336 return exp.Contains(this=this, expression=expr) 337 338 339def _json_extract_sql(self: BigQuery.Generator, expression: JSON_EXTRACT_TYPE) -> str: 340 name = (expression._meta and expression.meta.get("name")) or expression.sql_name() 341 upper = name.upper() 342 343 dquote_escaping = upper in DQUOTES_ESCAPING_JSON_FUNCTIONS 344 345 if dquote_escaping: 346 self._quote_json_path_key_using_brackets = False 347 348 sql = rename_func(upper)(self, expression) 349 350 if dquote_escaping: 351 self._quote_json_path_key_using_brackets = True 352 353 return sql 354 355 356def _annotate_concat(self: TypeAnnotator, expression: exp.Concat) -> exp.Concat: 357 annotated = self._annotate_by_args(expression, "expressions") 358 359 # Args must be BYTES or types that can be cast to STRING, return type is either BYTES or STRING 360 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#concat 361 if not annotated.is_type(exp.DataType.Type.BINARY, exp.DataType.Type.UNKNOWN): 362 annotated.type = exp.DataType.Type.VARCHAR 363 364 return annotated 365 366 367class BigQuery(Dialect): 368 WEEK_OFFSET = -1 369 UNNEST_COLUMN_ONLY = True 370 SUPPORTS_USER_DEFINED_TYPES = False 371 SUPPORTS_SEMI_ANTI_JOIN = False 372 LOG_BASE_FIRST = False 373 HEX_LOWERCASE = True 374 FORCE_EARLY_ALIAS_REF_EXPANSION = True 375 PRESERVE_ORIGINAL_NAMES = True 376 HEX_STRING_IS_INTEGER_TYPE = True 377 378 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 379 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 380 381 # bigquery udfs are case sensitive 382 NORMALIZE_FUNCTIONS = False 383 384 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 385 TIME_MAPPING = { 386 "%D": "%m/%d/%y", 387 "%E6S": "%S.%f", 388 "%e": "%-d", 389 } 390 391 FORMAT_MAPPING = { 392 "DD": "%d", 393 "MM": "%m", 394 "MON": "%b", 395 "MONTH": "%B", 396 "YYYY": "%Y", 397 "YY": "%y", 398 "HH": "%I", 399 "HH12": "%I", 400 "HH24": "%H", 401 "MI": "%M", 402 "SS": "%S", 403 "SSSSS": "%f", 404 "TZH": "%z", 405 } 406 407 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 408 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 409 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 410 411 # All set operations require either a DISTINCT or ALL specifier 412 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 413 414 # BigQuery maps Type.TIMESTAMP to DATETIME, so we need to amend the inferred types 415 TYPE_TO_EXPRESSIONS = { 416 **Dialect.TYPE_TO_EXPRESSIONS, 417 exp.DataType.Type.TIMESTAMPTZ: Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.TIMESTAMP], 418 } 419 TYPE_TO_EXPRESSIONS.pop(exp.DataType.Type.TIMESTAMP) 420 421 ANNOTATORS = { 422 **Dialect.ANNOTATORS, 423 **{ 424 expr_type: annotate_with_type_lambda(data_type) 425 for data_type, expressions in TYPE_TO_EXPRESSIONS.items() 426 for expr_type in expressions 427 }, 428 **{ 429 expr_type: lambda self, e: _annotate_math_functions(self, e) 430 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 431 }, 432 **{ 433 expr_type: lambda self, e: self._annotate_by_args(e, "this") 434 for expr_type in ( 435 exp.Left, 436 exp.Right, 437 exp.Lower, 438 exp.Upper, 439 exp.Pad, 440 exp.Trim, 441 exp.RegexpExtract, 442 exp.RegexpReplace, 443 exp.Repeat, 444 exp.Substring, 445 ) 446 }, 447 exp.Concat: _annotate_concat, 448 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 449 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 450 } 451 452 def normalize_identifier(self, expression: E) -> E: 453 if ( 454 isinstance(expression, exp.Identifier) 455 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 456 ): 457 parent = expression.parent 458 while isinstance(parent, exp.Dot): 459 parent = parent.parent 460 461 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 462 # by default. The following check uses a heuristic to detect tables based on whether 463 # they are qualified. This should generally be correct, because tables in BigQuery 464 # must be qualified with at least a dataset, unless @@dataset_id is set. 465 case_sensitive = ( 466 isinstance(parent, exp.UserDefinedFunction) 467 or ( 468 isinstance(parent, exp.Table) 469 and parent.db 470 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 471 ) 472 or expression.meta.get("is_table") 473 ) 474 if not case_sensitive: 475 expression.set("this", expression.this.lower()) 476 477 return t.cast(E, expression) 478 479 return super().normalize_identifier(expression) 480 481 class Tokenizer(tokens.Tokenizer): 482 QUOTES = ["'", '"', '"""', "'''"] 483 COMMENTS = ["--", "#", ("/*", "*/")] 484 IDENTIFIERS = ["`"] 485 STRING_ESCAPES = ["\\"] 486 487 HEX_STRINGS = [("0x", ""), ("0X", "")] 488 489 BYTE_STRINGS = [ 490 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 491 ] 492 493 RAW_STRINGS = [ 494 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 495 ] 496 497 KEYWORDS = { 498 **tokens.Tokenizer.KEYWORDS, 499 "ANY TYPE": TokenType.VARIANT, 500 "BEGIN": TokenType.COMMAND, 501 "BEGIN TRANSACTION": TokenType.BEGIN, 502 "BYTEINT": TokenType.INT, 503 "BYTES": TokenType.BINARY, 504 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 505 "DATETIME": TokenType.TIMESTAMP, 506 "DECLARE": TokenType.COMMAND, 507 "ELSEIF": TokenType.COMMAND, 508 "EXCEPTION": TokenType.COMMAND, 509 "EXPORT": TokenType.EXPORT, 510 "FLOAT64": TokenType.DOUBLE, 511 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 512 "MODEL": TokenType.MODEL, 513 "NOT DETERMINISTIC": TokenType.VOLATILE, 514 "RECORD": TokenType.STRUCT, 515 "TIMESTAMP": TokenType.TIMESTAMPTZ, 516 } 517 KEYWORDS.pop("DIV") 518 KEYWORDS.pop("VALUES") 519 KEYWORDS.pop("/*+") 520 521 class Parser(parser.Parser): 522 PREFIXED_PIVOT_COLUMNS = True 523 LOG_DEFAULTS_TO_LN = True 524 SUPPORTS_IMPLICIT_UNNEST = True 525 526 FUNCTIONS = { 527 **parser.Parser.FUNCTIONS, 528 "CONTAINS_SUBSTR": _build_contains_substring, 529 "DATE": _build_date, 530 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 531 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 532 "DATE_TRUNC": lambda args: exp.DateTrunc( 533 unit=exp.Literal.string(str(seq_get(args, 1))), 534 this=seq_get(args, 0), 535 zone=seq_get(args, 2), 536 ), 537 "DATETIME": _build_datetime, 538 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 539 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 540 "DIV": binary_from_function(exp.IntDiv), 541 "EDIT_DISTANCE": _build_levenshtein, 542 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 543 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 544 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 545 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 546 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 547 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 548 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 549 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 550 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 551 "MD5": exp.MD5Digest.from_arg_list, 552 "TO_HEX": _build_to_hex, 553 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 554 [seq_get(args, 1), seq_get(args, 0)] 555 ), 556 "PARSE_TIMESTAMP": _build_parse_timestamp, 557 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 558 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 559 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 560 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 561 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 562 ), 563 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 564 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 565 "SPLIT": lambda args: exp.Split( 566 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 567 this=seq_get(args, 0), 568 expression=seq_get(args, 1) or exp.Literal.string(","), 569 ), 570 "STRPOS": exp.StrPosition.from_arg_list, 571 "TIME": _build_time, 572 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 573 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 574 "TIMESTAMP": _build_timestamp, 575 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 576 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 577 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 578 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 579 ), 580 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 581 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 582 ), 583 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 584 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 585 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 586 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 587 } 588 589 FUNCTION_PARSERS = { 590 **parser.Parser.FUNCTION_PARSERS, 591 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 592 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 593 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 594 } 595 FUNCTION_PARSERS.pop("TRIM") 596 597 NO_PAREN_FUNCTIONS = { 598 **parser.Parser.NO_PAREN_FUNCTIONS, 599 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 600 } 601 602 NESTED_TYPE_TOKENS = { 603 *parser.Parser.NESTED_TYPE_TOKENS, 604 TokenType.TABLE, 605 } 606 607 PROPERTY_PARSERS = { 608 **parser.Parser.PROPERTY_PARSERS, 609 "NOT DETERMINISTIC": lambda self: self.expression( 610 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 611 ), 612 "OPTIONS": lambda self: self._parse_with_property(), 613 } 614 615 CONSTRAINT_PARSERS = { 616 **parser.Parser.CONSTRAINT_PARSERS, 617 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 618 } 619 620 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 621 RANGE_PARSERS.pop(TokenType.OVERLAPS) 622 623 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 624 625 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 626 627 STATEMENT_PARSERS = { 628 **parser.Parser.STATEMENT_PARSERS, 629 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 630 TokenType.END: lambda self: self._parse_as_command(self._prev), 631 TokenType.FOR: lambda self: self._parse_for_in(), 632 TokenType.EXPORT: lambda self: self._parse_export_data(), 633 } 634 635 BRACKET_OFFSETS = { 636 "OFFSET": (0, False), 637 "ORDINAL": (1, False), 638 "SAFE_OFFSET": (0, True), 639 "SAFE_ORDINAL": (1, True), 640 } 641 642 def _parse_for_in(self) -> exp.ForIn: 643 this = self._parse_range() 644 self._match_text_seq("DO") 645 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 646 647 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 648 this = super()._parse_table_part(schema=schema) or self._parse_number() 649 650 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 651 if isinstance(this, exp.Identifier): 652 table_name = this.name 653 while self._match(TokenType.DASH, advance=False) and self._next: 654 start = self._curr 655 while self._is_connected() and not self._match_set( 656 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 657 ): 658 self._advance() 659 660 table_name += self._find_sql(start, self._prev) 661 662 this = exp.Identifier( 663 this=table_name, quoted=this.args.get("quoted") 664 ).update_positions(this) 665 elif isinstance(this, exp.Literal): 666 table_name = this.name 667 668 if self._is_connected() and self._parse_var(any_token=True): 669 table_name += self._prev.text 670 671 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 672 673 return this 674 675 def _parse_table_parts( 676 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 677 ) -> exp.Table: 678 table = super()._parse_table_parts( 679 schema=schema, is_db_reference=is_db_reference, wildcard=True 680 ) 681 682 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 683 if not table.catalog: 684 if table.db: 685 previous_db = table.args["db"] 686 parts = table.db.split(".") 687 if len(parts) == 2 and not table.args["db"].quoted: 688 table.set( 689 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 690 ) 691 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 692 else: 693 previous_this = table.this 694 parts = table.name.split(".") 695 if len(parts) == 2 and not table.this.quoted: 696 table.set( 697 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 698 ) 699 table.set( 700 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 701 ) 702 703 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 704 alias = table.this 705 catalog, db, this, *rest = ( 706 exp.to_identifier(p, quoted=True) 707 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 708 ) 709 710 for part in (catalog, db, this): 711 if part: 712 part.update_positions(table.this) 713 714 if rest and this: 715 this = exp.Dot.build([this, *rest]) # type: ignore 716 717 table = exp.Table( 718 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 719 ) 720 table.meta["quoted_table"] = True 721 else: 722 alias = None 723 724 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 725 # dataset, so if the project identifier is omitted we need to fix the ast so that 726 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 727 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 728 # views, because it would seem like the "catalog" part is set, when it'd actually 729 # be the region/dataset. Merging the two identifiers into a single one is done to 730 # avoid producing a 4-part Table reference, which would cause issues in the schema 731 # module, when there are 3-part table names mixed with information schema views. 732 # 733 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 734 table_parts = table.parts 735 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 736 # We need to alias the table here to avoid breaking existing qualified columns. 737 # This is expected to be safe, because if there's an actual alias coming up in 738 # the token stream, it will overwrite this one. If there isn't one, we are only 739 # exposing the name that can be used to reference the view explicitly (a no-op). 740 exp.alias_( 741 table, 742 t.cast(exp.Identifier, alias or table_parts[-1]), 743 table=True, 744 copy=False, 745 ) 746 747 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 748 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 749 line=table_parts[-2].meta.get("line"), 750 col=table_parts[-1].meta.get("col"), 751 start=table_parts[-2].meta.get("start"), 752 end=table_parts[-1].meta.get("end"), 753 ) 754 table.set("this", new_this) 755 table.set("db", seq_get(table_parts, -3)) 756 table.set("catalog", seq_get(table_parts, -4)) 757 758 return table 759 760 def _parse_column(self) -> t.Optional[exp.Expression]: 761 column = super()._parse_column() 762 if isinstance(column, exp.Column): 763 parts = column.parts 764 if any("." in p.name for p in parts): 765 catalog, db, table, this, *rest = ( 766 exp.to_identifier(p, quoted=True) 767 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 768 ) 769 770 if rest and this: 771 this = exp.Dot.build([this, *rest]) # type: ignore 772 773 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 774 column.meta["quoted_column"] = True 775 776 return column 777 778 @t.overload 779 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 780 781 @t.overload 782 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 783 784 def _parse_json_object(self, agg=False): 785 json_object = super()._parse_json_object() 786 array_kv_pair = seq_get(json_object.expressions, 0) 787 788 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 789 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 790 if ( 791 array_kv_pair 792 and isinstance(array_kv_pair.this, exp.Array) 793 and isinstance(array_kv_pair.expression, exp.Array) 794 ): 795 keys = array_kv_pair.this.expressions 796 values = array_kv_pair.expression.expressions 797 798 json_object.set( 799 "expressions", 800 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 801 ) 802 803 return json_object 804 805 def _parse_bracket( 806 self, this: t.Optional[exp.Expression] = None 807 ) -> t.Optional[exp.Expression]: 808 bracket = super()._parse_bracket(this) 809 810 if this is bracket: 811 return bracket 812 813 if isinstance(bracket, exp.Bracket): 814 for expression in bracket.expressions: 815 name = expression.name.upper() 816 817 if name not in self.BRACKET_OFFSETS: 818 break 819 820 offset, safe = self.BRACKET_OFFSETS[name] 821 bracket.set("offset", offset) 822 bracket.set("safe", safe) 823 expression.replace(expression.expressions[0]) 824 825 return bracket 826 827 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 828 unnest = super()._parse_unnest(with_alias=with_alias) 829 830 if not unnest: 831 return None 832 833 unnest_expr = seq_get(unnest.expressions, 0) 834 if unnest_expr: 835 from sqlglot.optimizer.annotate_types import annotate_types 836 837 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 838 839 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 840 # in contrast to other dialects such as DuckDB which flattens only the array by default 841 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 842 array_elem.is_type(exp.DataType.Type.STRUCT) 843 for array_elem in unnest_expr._type.expressions 844 ): 845 unnest.set("explode_array", True) 846 847 return unnest 848 849 def _parse_make_interval(self) -> exp.MakeInterval: 850 expr = exp.MakeInterval() 851 852 for arg_key in expr.arg_types: 853 value = self._parse_lambda() 854 855 if not value: 856 break 857 858 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 859 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 860 if isinstance(value, exp.Kwarg): 861 arg_key = value.this.name 862 863 expr.set(arg_key, value) 864 865 self._match(TokenType.COMMA) 866 867 return expr 868 869 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 870 expr = self.expression( 871 exp.FeaturesAtTime, 872 this=(self._match(TokenType.TABLE) and self._parse_table()) 873 or self._parse_select(nested=True), 874 ) 875 876 while self._match(TokenType.COMMA): 877 arg = self._parse_lambda() 878 879 # Get the LHS of the Kwarg and set the arg to that value, e.g 880 # "num_rows => 1" sets the expr's `num_rows` arg 881 if arg: 882 expr.set(arg.this.name, arg) 883 884 return expr 885 886 def _parse_export_data(self) -> exp.Export: 887 self._match_text_seq("DATA") 888 889 return self.expression( 890 exp.Export, 891 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 892 options=self._parse_properties(), 893 this=self._match_text_seq("AS") and self._parse_select(), 894 ) 895 896 class Generator(generator.Generator): 897 INTERVAL_ALLOWS_PLURAL_FORM = False 898 JOIN_HINTS = False 899 QUERY_HINTS = False 900 TABLE_HINTS = False 901 LIMIT_FETCH = "LIMIT" 902 RENAME_TABLE_WITH_DB = False 903 NVL2_SUPPORTED = False 904 UNNEST_WITH_ORDINALITY = False 905 COLLATE_IS_FUNC = True 906 LIMIT_ONLY_LITERALS = True 907 SUPPORTS_TABLE_ALIAS_COLUMNS = False 908 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 909 JSON_KEY_VALUE_PAIR_SEP = "," 910 NULL_ORDERING_SUPPORTED = False 911 IGNORE_NULLS_IN_FUNC = True 912 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 913 CAN_IMPLEMENT_ARRAY_ANY = True 914 SUPPORTS_TO_NUMBER = False 915 NAMED_PLACEHOLDER_TOKEN = "@" 916 HEX_FUNC = "TO_HEX" 917 WITH_PROPERTIES_PREFIX = "OPTIONS" 918 SUPPORTS_EXPLODING_PROJECTIONS = False 919 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 920 SUPPORTS_UNIX_SECONDS = True 921 922 TRANSFORMS = { 923 **generator.Generator.TRANSFORMS, 924 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 925 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 926 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 927 exp.Array: inline_array_unless_query, 928 exp.ArrayContains: _array_contains_sql, 929 exp.ArrayFilter: filter_array_using_unnest, 930 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 931 exp.CollateProperty: lambda self, e: ( 932 f"DEFAULT COLLATE {self.sql(e, 'this')}" 933 if e.args.get("default") 934 else f"COLLATE {self.sql(e, 'this')}" 935 ), 936 exp.Commit: lambda *_: "COMMIT TRANSACTION", 937 exp.CountIf: rename_func("COUNTIF"), 938 exp.Create: _create_sql, 939 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 940 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 941 exp.DateDiff: lambda self, e: self.func( 942 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 943 ), 944 exp.DateFromParts: rename_func("DATE"), 945 exp.DateStrToDate: datestrtodate_sql, 946 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 947 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 948 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 949 exp.DateTrunc: lambda self, e: self.func( 950 "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone") 951 ), 952 exp.FromTimeZone: lambda self, e: self.func( 953 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 954 ), 955 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 956 exp.GroupConcat: lambda self, e: groupconcat_sql( 957 self, e, func_name="STRING_AGG", within_group=False 958 ), 959 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 960 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 961 exp.If: if_sql(false_value="NULL"), 962 exp.ILike: no_ilike_sql, 963 exp.IntDiv: rename_func("DIV"), 964 exp.Int64: rename_func("INT64"), 965 exp.JSONExtract: _json_extract_sql, 966 exp.JSONExtractArray: _json_extract_sql, 967 exp.JSONExtractScalar: _json_extract_sql, 968 exp.JSONFormat: rename_func("TO_JSON_STRING"), 969 exp.Levenshtein: _levenshtein_sql, 970 exp.Max: max_or_greatest, 971 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 972 exp.MD5Digest: rename_func("MD5"), 973 exp.Min: min_or_least, 974 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 975 exp.RegexpExtract: lambda self, e: self.func( 976 "REGEXP_EXTRACT", 977 e.this, 978 e.expression, 979 e.args.get("position"), 980 e.args.get("occurrence"), 981 ), 982 exp.RegexpExtractAll: lambda self, e: self.func( 983 "REGEXP_EXTRACT_ALL", e.this, e.expression 984 ), 985 exp.RegexpReplace: regexp_replace_sql, 986 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 987 exp.ReturnsProperty: _returnsproperty_sql, 988 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 989 exp.Select: transforms.preprocess( 990 [ 991 transforms.explode_projection_to_unnest(), 992 transforms.unqualify_unnest, 993 transforms.eliminate_distinct_on, 994 _alias_ordered_group, 995 transforms.eliminate_semi_and_anti_joins, 996 ] 997 ), 998 exp.SHA: rename_func("SHA1"), 999 exp.SHA2: sha256_sql, 1000 exp.StabilityProperty: lambda self, e: ( 1001 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 1002 ), 1003 exp.String: rename_func("STRING"), 1004 exp.StrPosition: lambda self, e: ( 1005 strposition_sql( 1006 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 1007 ) 1008 ), 1009 exp.StrToDate: _str_to_datetime_sql, 1010 exp.StrToTime: _str_to_datetime_sql, 1011 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 1012 exp.TimeFromParts: rename_func("TIME"), 1013 exp.TimestampFromParts: rename_func("DATETIME"), 1014 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1015 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1016 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1017 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1018 exp.TimeStrToTime: timestrtotime_sql, 1019 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1020 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1021 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1022 exp.TsOrDsToTime: rename_func("TIME"), 1023 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1024 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1025 exp.Unhex: rename_func("FROM_HEX"), 1026 exp.UnixDate: rename_func("UNIX_DATE"), 1027 exp.UnixToTime: _unix_to_time_sql, 1028 exp.Uuid: lambda *_: "GENERATE_UUID()", 1029 exp.Values: _derived_table_values_to_unnest, 1030 exp.VariancePop: rename_func("VAR_POP"), 1031 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1032 } 1033 1034 SUPPORTED_JSON_PATH_PARTS = { 1035 exp.JSONPathKey, 1036 exp.JSONPathRoot, 1037 exp.JSONPathSubscript, 1038 } 1039 1040 TYPE_MAPPING = { 1041 **generator.Generator.TYPE_MAPPING, 1042 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1043 exp.DataType.Type.BIGINT: "INT64", 1044 exp.DataType.Type.BINARY: "BYTES", 1045 exp.DataType.Type.BLOB: "BYTES", 1046 exp.DataType.Type.BOOLEAN: "BOOL", 1047 exp.DataType.Type.CHAR: "STRING", 1048 exp.DataType.Type.DECIMAL: "NUMERIC", 1049 exp.DataType.Type.DOUBLE: "FLOAT64", 1050 exp.DataType.Type.FLOAT: "FLOAT64", 1051 exp.DataType.Type.INT: "INT64", 1052 exp.DataType.Type.NCHAR: "STRING", 1053 exp.DataType.Type.NVARCHAR: "STRING", 1054 exp.DataType.Type.SMALLINT: "INT64", 1055 exp.DataType.Type.TEXT: "STRING", 1056 exp.DataType.Type.TIMESTAMP: "DATETIME", 1057 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 1058 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1059 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1060 exp.DataType.Type.TINYINT: "INT64", 1061 exp.DataType.Type.ROWVERSION: "BYTES", 1062 exp.DataType.Type.UUID: "STRING", 1063 exp.DataType.Type.VARBINARY: "BYTES", 1064 exp.DataType.Type.VARCHAR: "STRING", 1065 exp.DataType.Type.VARIANT: "ANY TYPE", 1066 } 1067 1068 PROPERTIES_LOCATION = { 1069 **generator.Generator.PROPERTIES_LOCATION, 1070 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1071 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1072 } 1073 1074 # WINDOW comes after QUALIFY 1075 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1076 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1077 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1078 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1079 } 1080 1081 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1082 RESERVED_KEYWORDS = { 1083 "all", 1084 "and", 1085 "any", 1086 "array", 1087 "as", 1088 "asc", 1089 "assert_rows_modified", 1090 "at", 1091 "between", 1092 "by", 1093 "case", 1094 "cast", 1095 "collate", 1096 "contains", 1097 "create", 1098 "cross", 1099 "cube", 1100 "current", 1101 "default", 1102 "define", 1103 "desc", 1104 "distinct", 1105 "else", 1106 "end", 1107 "enum", 1108 "escape", 1109 "except", 1110 "exclude", 1111 "exists", 1112 "extract", 1113 "false", 1114 "fetch", 1115 "following", 1116 "for", 1117 "from", 1118 "full", 1119 "group", 1120 "grouping", 1121 "groups", 1122 "hash", 1123 "having", 1124 "if", 1125 "ignore", 1126 "in", 1127 "inner", 1128 "intersect", 1129 "interval", 1130 "into", 1131 "is", 1132 "join", 1133 "lateral", 1134 "left", 1135 "like", 1136 "limit", 1137 "lookup", 1138 "merge", 1139 "natural", 1140 "new", 1141 "no", 1142 "not", 1143 "null", 1144 "nulls", 1145 "of", 1146 "on", 1147 "or", 1148 "order", 1149 "outer", 1150 "over", 1151 "partition", 1152 "preceding", 1153 "proto", 1154 "qualify", 1155 "range", 1156 "recursive", 1157 "respect", 1158 "right", 1159 "rollup", 1160 "rows", 1161 "select", 1162 "set", 1163 "some", 1164 "struct", 1165 "tablesample", 1166 "then", 1167 "to", 1168 "treat", 1169 "true", 1170 "unbounded", 1171 "union", 1172 "unnest", 1173 "using", 1174 "when", 1175 "where", 1176 "window", 1177 "with", 1178 "within", 1179 } 1180 1181 def mod_sql(self, expression: exp.Mod) -> str: 1182 this = expression.this 1183 expr = expression.expression 1184 return self.func( 1185 "MOD", 1186 this.unnest() if isinstance(this, exp.Paren) else this, 1187 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1188 ) 1189 1190 def column_parts(self, expression: exp.Column) -> str: 1191 if expression.meta.get("quoted_column"): 1192 # If a column reference is of the form `dataset.table`.name, we need 1193 # to preserve the quoted table path, otherwise the reference breaks 1194 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1195 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1196 return f"{table_path}.{self.sql(expression, 'this')}" 1197 1198 return super().column_parts(expression) 1199 1200 def table_parts(self, expression: exp.Table) -> str: 1201 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1202 # we need to make sure the correct quoting is used in each case. 1203 # 1204 # For example, if there is a CTE x that clashes with a schema name, then the former will 1205 # return the table y in that schema, whereas the latter will return the CTE's y column: 1206 # 1207 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1208 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1209 if expression.meta.get("quoted_table"): 1210 table_parts = ".".join(p.name for p in expression.parts) 1211 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1212 1213 return super().table_parts(expression) 1214 1215 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1216 this = expression.this 1217 if isinstance(this, exp.TsOrDsToDatetime): 1218 func_name = "FORMAT_DATETIME" 1219 elif isinstance(this, exp.TsOrDsToTimestamp): 1220 func_name = "FORMAT_TIMESTAMP" 1221 else: 1222 func_name = "FORMAT_DATE" 1223 1224 time_expr = ( 1225 this 1226 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1227 else expression 1228 ) 1229 return self.func( 1230 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1231 ) 1232 1233 def eq_sql(self, expression: exp.EQ) -> str: 1234 # Operands of = cannot be NULL in BigQuery 1235 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1236 if not isinstance(expression.parent, exp.Update): 1237 return "NULL" 1238 1239 return self.binary(expression, "=") 1240 1241 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1242 parent = expression.parent 1243 1244 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1245 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1246 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1247 return self.func( 1248 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1249 ) 1250 1251 return super().attimezone_sql(expression) 1252 1253 def trycast_sql(self, expression: exp.TryCast) -> str: 1254 return self.cast_sql(expression, safe_prefix="SAFE_") 1255 1256 def bracket_sql(self, expression: exp.Bracket) -> str: 1257 this = expression.this 1258 expressions = expression.expressions 1259 1260 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1261 arg = expressions[0] 1262 if arg.type is None: 1263 from sqlglot.optimizer.annotate_types import annotate_types 1264 1265 arg = annotate_types(arg, dialect=self.dialect) 1266 1267 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1268 # BQ doesn't support bracket syntax with string values for structs 1269 return f"{self.sql(this)}.{arg.name}" 1270 1271 expressions_sql = self.expressions(expression, flat=True) 1272 offset = expression.args.get("offset") 1273 1274 if offset == 0: 1275 expressions_sql = f"OFFSET({expressions_sql})" 1276 elif offset == 1: 1277 expressions_sql = f"ORDINAL({expressions_sql})" 1278 elif offset is not None: 1279 self.unsupported(f"Unsupported array offset: {offset}") 1280 1281 if expression.args.get("safe"): 1282 expressions_sql = f"SAFE_{expressions_sql}" 1283 1284 return f"{self.sql(this)}[{expressions_sql}]" 1285 1286 def in_unnest_op(self, expression: exp.Unnest) -> str: 1287 return self.sql(expression) 1288 1289 def version_sql(self, expression: exp.Version) -> str: 1290 if expression.name == "TIMESTAMP": 1291 expression.set("this", "SYSTEM_TIME") 1292 return super().version_sql(expression) 1293 1294 def contains_sql(self, expression: exp.Contains) -> str: 1295 this = expression.this 1296 expr = expression.expression 1297 1298 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1299 this = this.this 1300 expr = expr.this 1301 1302 return self.func("CONTAINS_SUBSTR", this, expr) 1303 1304 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1305 this = expression.this 1306 1307 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1308 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1309 # because they aren't literals and so the above syntax is invalid BigQuery. 1310 if isinstance(this, exp.Array): 1311 elem = seq_get(this.expressions, 0) 1312 if not (elem and elem.find(exp.Query)): 1313 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1314 1315 return super().cast_sql(expression, safe_prefix=safe_prefix)
368class BigQuery(Dialect): 369 WEEK_OFFSET = -1 370 UNNEST_COLUMN_ONLY = True 371 SUPPORTS_USER_DEFINED_TYPES = False 372 SUPPORTS_SEMI_ANTI_JOIN = False 373 LOG_BASE_FIRST = False 374 HEX_LOWERCASE = True 375 FORCE_EARLY_ALIAS_REF_EXPANSION = True 376 PRESERVE_ORIGINAL_NAMES = True 377 HEX_STRING_IS_INTEGER_TYPE = True 378 379 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 380 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 381 382 # bigquery udfs are case sensitive 383 NORMALIZE_FUNCTIONS = False 384 385 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 386 TIME_MAPPING = { 387 "%D": "%m/%d/%y", 388 "%E6S": "%S.%f", 389 "%e": "%-d", 390 } 391 392 FORMAT_MAPPING = { 393 "DD": "%d", 394 "MM": "%m", 395 "MON": "%b", 396 "MONTH": "%B", 397 "YYYY": "%Y", 398 "YY": "%y", 399 "HH": "%I", 400 "HH12": "%I", 401 "HH24": "%H", 402 "MI": "%M", 403 "SS": "%S", 404 "SSSSS": "%f", 405 "TZH": "%z", 406 } 407 408 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 409 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 410 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 411 412 # All set operations require either a DISTINCT or ALL specifier 413 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 414 415 # BigQuery maps Type.TIMESTAMP to DATETIME, so we need to amend the inferred types 416 TYPE_TO_EXPRESSIONS = { 417 **Dialect.TYPE_TO_EXPRESSIONS, 418 exp.DataType.Type.TIMESTAMPTZ: Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.TIMESTAMP], 419 } 420 TYPE_TO_EXPRESSIONS.pop(exp.DataType.Type.TIMESTAMP) 421 422 ANNOTATORS = { 423 **Dialect.ANNOTATORS, 424 **{ 425 expr_type: annotate_with_type_lambda(data_type) 426 for data_type, expressions in TYPE_TO_EXPRESSIONS.items() 427 for expr_type in expressions 428 }, 429 **{ 430 expr_type: lambda self, e: _annotate_math_functions(self, e) 431 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 432 }, 433 **{ 434 expr_type: lambda self, e: self._annotate_by_args(e, "this") 435 for expr_type in ( 436 exp.Left, 437 exp.Right, 438 exp.Lower, 439 exp.Upper, 440 exp.Pad, 441 exp.Trim, 442 exp.RegexpExtract, 443 exp.RegexpReplace, 444 exp.Repeat, 445 exp.Substring, 446 ) 447 }, 448 exp.Concat: _annotate_concat, 449 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 450 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 451 } 452 453 def normalize_identifier(self, expression: E) -> E: 454 if ( 455 isinstance(expression, exp.Identifier) 456 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 457 ): 458 parent = expression.parent 459 while isinstance(parent, exp.Dot): 460 parent = parent.parent 461 462 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 463 # by default. The following check uses a heuristic to detect tables based on whether 464 # they are qualified. This should generally be correct, because tables in BigQuery 465 # must be qualified with at least a dataset, unless @@dataset_id is set. 466 case_sensitive = ( 467 isinstance(parent, exp.UserDefinedFunction) 468 or ( 469 isinstance(parent, exp.Table) 470 and parent.db 471 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 472 ) 473 or expression.meta.get("is_table") 474 ) 475 if not case_sensitive: 476 expression.set("this", expression.this.lower()) 477 478 return t.cast(E, expression) 479 480 return super().normalize_identifier(expression) 481 482 class Tokenizer(tokens.Tokenizer): 483 QUOTES = ["'", '"', '"""', "'''"] 484 COMMENTS = ["--", "#", ("/*", "*/")] 485 IDENTIFIERS = ["`"] 486 STRING_ESCAPES = ["\\"] 487 488 HEX_STRINGS = [("0x", ""), ("0X", "")] 489 490 BYTE_STRINGS = [ 491 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 492 ] 493 494 RAW_STRINGS = [ 495 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 496 ] 497 498 KEYWORDS = { 499 **tokens.Tokenizer.KEYWORDS, 500 "ANY TYPE": TokenType.VARIANT, 501 "BEGIN": TokenType.COMMAND, 502 "BEGIN TRANSACTION": TokenType.BEGIN, 503 "BYTEINT": TokenType.INT, 504 "BYTES": TokenType.BINARY, 505 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 506 "DATETIME": TokenType.TIMESTAMP, 507 "DECLARE": TokenType.COMMAND, 508 "ELSEIF": TokenType.COMMAND, 509 "EXCEPTION": TokenType.COMMAND, 510 "EXPORT": TokenType.EXPORT, 511 "FLOAT64": TokenType.DOUBLE, 512 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 513 "MODEL": TokenType.MODEL, 514 "NOT DETERMINISTIC": TokenType.VOLATILE, 515 "RECORD": TokenType.STRUCT, 516 "TIMESTAMP": TokenType.TIMESTAMPTZ, 517 } 518 KEYWORDS.pop("DIV") 519 KEYWORDS.pop("VALUES") 520 KEYWORDS.pop("/*+") 521 522 class Parser(parser.Parser): 523 PREFIXED_PIVOT_COLUMNS = True 524 LOG_DEFAULTS_TO_LN = True 525 SUPPORTS_IMPLICIT_UNNEST = True 526 527 FUNCTIONS = { 528 **parser.Parser.FUNCTIONS, 529 "CONTAINS_SUBSTR": _build_contains_substring, 530 "DATE": _build_date, 531 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 532 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 533 "DATE_TRUNC": lambda args: exp.DateTrunc( 534 unit=exp.Literal.string(str(seq_get(args, 1))), 535 this=seq_get(args, 0), 536 zone=seq_get(args, 2), 537 ), 538 "DATETIME": _build_datetime, 539 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 540 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 541 "DIV": binary_from_function(exp.IntDiv), 542 "EDIT_DISTANCE": _build_levenshtein, 543 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 544 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 545 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 546 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 547 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 548 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 549 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 550 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 551 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 552 "MD5": exp.MD5Digest.from_arg_list, 553 "TO_HEX": _build_to_hex, 554 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 555 [seq_get(args, 1), seq_get(args, 0)] 556 ), 557 "PARSE_TIMESTAMP": _build_parse_timestamp, 558 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 559 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 560 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 561 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 562 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 563 ), 564 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 565 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 566 "SPLIT": lambda args: exp.Split( 567 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 568 this=seq_get(args, 0), 569 expression=seq_get(args, 1) or exp.Literal.string(","), 570 ), 571 "STRPOS": exp.StrPosition.from_arg_list, 572 "TIME": _build_time, 573 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 574 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 575 "TIMESTAMP": _build_timestamp, 576 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 577 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 578 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 579 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 580 ), 581 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 582 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 583 ), 584 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 585 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 586 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 587 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 588 } 589 590 FUNCTION_PARSERS = { 591 **parser.Parser.FUNCTION_PARSERS, 592 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 593 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 594 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 595 } 596 FUNCTION_PARSERS.pop("TRIM") 597 598 NO_PAREN_FUNCTIONS = { 599 **parser.Parser.NO_PAREN_FUNCTIONS, 600 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 601 } 602 603 NESTED_TYPE_TOKENS = { 604 *parser.Parser.NESTED_TYPE_TOKENS, 605 TokenType.TABLE, 606 } 607 608 PROPERTY_PARSERS = { 609 **parser.Parser.PROPERTY_PARSERS, 610 "NOT DETERMINISTIC": lambda self: self.expression( 611 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 612 ), 613 "OPTIONS": lambda self: self._parse_with_property(), 614 } 615 616 CONSTRAINT_PARSERS = { 617 **parser.Parser.CONSTRAINT_PARSERS, 618 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 619 } 620 621 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 622 RANGE_PARSERS.pop(TokenType.OVERLAPS) 623 624 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 625 626 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 627 628 STATEMENT_PARSERS = { 629 **parser.Parser.STATEMENT_PARSERS, 630 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 631 TokenType.END: lambda self: self._parse_as_command(self._prev), 632 TokenType.FOR: lambda self: self._parse_for_in(), 633 TokenType.EXPORT: lambda self: self._parse_export_data(), 634 } 635 636 BRACKET_OFFSETS = { 637 "OFFSET": (0, False), 638 "ORDINAL": (1, False), 639 "SAFE_OFFSET": (0, True), 640 "SAFE_ORDINAL": (1, True), 641 } 642 643 def _parse_for_in(self) -> exp.ForIn: 644 this = self._parse_range() 645 self._match_text_seq("DO") 646 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 647 648 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 649 this = super()._parse_table_part(schema=schema) or self._parse_number() 650 651 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 652 if isinstance(this, exp.Identifier): 653 table_name = this.name 654 while self._match(TokenType.DASH, advance=False) and self._next: 655 start = self._curr 656 while self._is_connected() and not self._match_set( 657 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 658 ): 659 self._advance() 660 661 table_name += self._find_sql(start, self._prev) 662 663 this = exp.Identifier( 664 this=table_name, quoted=this.args.get("quoted") 665 ).update_positions(this) 666 elif isinstance(this, exp.Literal): 667 table_name = this.name 668 669 if self._is_connected() and self._parse_var(any_token=True): 670 table_name += self._prev.text 671 672 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 673 674 return this 675 676 def _parse_table_parts( 677 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 678 ) -> exp.Table: 679 table = super()._parse_table_parts( 680 schema=schema, is_db_reference=is_db_reference, wildcard=True 681 ) 682 683 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 684 if not table.catalog: 685 if table.db: 686 previous_db = table.args["db"] 687 parts = table.db.split(".") 688 if len(parts) == 2 and not table.args["db"].quoted: 689 table.set( 690 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 691 ) 692 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 693 else: 694 previous_this = table.this 695 parts = table.name.split(".") 696 if len(parts) == 2 and not table.this.quoted: 697 table.set( 698 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 699 ) 700 table.set( 701 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 702 ) 703 704 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 705 alias = table.this 706 catalog, db, this, *rest = ( 707 exp.to_identifier(p, quoted=True) 708 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 709 ) 710 711 for part in (catalog, db, this): 712 if part: 713 part.update_positions(table.this) 714 715 if rest and this: 716 this = exp.Dot.build([this, *rest]) # type: ignore 717 718 table = exp.Table( 719 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 720 ) 721 table.meta["quoted_table"] = True 722 else: 723 alias = None 724 725 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 726 # dataset, so if the project identifier is omitted we need to fix the ast so that 727 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 728 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 729 # views, because it would seem like the "catalog" part is set, when it'd actually 730 # be the region/dataset. Merging the two identifiers into a single one is done to 731 # avoid producing a 4-part Table reference, which would cause issues in the schema 732 # module, when there are 3-part table names mixed with information schema views. 733 # 734 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 735 table_parts = table.parts 736 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 737 # We need to alias the table here to avoid breaking existing qualified columns. 738 # This is expected to be safe, because if there's an actual alias coming up in 739 # the token stream, it will overwrite this one. If there isn't one, we are only 740 # exposing the name that can be used to reference the view explicitly (a no-op). 741 exp.alias_( 742 table, 743 t.cast(exp.Identifier, alias or table_parts[-1]), 744 table=True, 745 copy=False, 746 ) 747 748 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 749 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 750 line=table_parts[-2].meta.get("line"), 751 col=table_parts[-1].meta.get("col"), 752 start=table_parts[-2].meta.get("start"), 753 end=table_parts[-1].meta.get("end"), 754 ) 755 table.set("this", new_this) 756 table.set("db", seq_get(table_parts, -3)) 757 table.set("catalog", seq_get(table_parts, -4)) 758 759 return table 760 761 def _parse_column(self) -> t.Optional[exp.Expression]: 762 column = super()._parse_column() 763 if isinstance(column, exp.Column): 764 parts = column.parts 765 if any("." in p.name for p in parts): 766 catalog, db, table, this, *rest = ( 767 exp.to_identifier(p, quoted=True) 768 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 769 ) 770 771 if rest and this: 772 this = exp.Dot.build([this, *rest]) # type: ignore 773 774 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 775 column.meta["quoted_column"] = True 776 777 return column 778 779 @t.overload 780 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 781 782 @t.overload 783 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 784 785 def _parse_json_object(self, agg=False): 786 json_object = super()._parse_json_object() 787 array_kv_pair = seq_get(json_object.expressions, 0) 788 789 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 790 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 791 if ( 792 array_kv_pair 793 and isinstance(array_kv_pair.this, exp.Array) 794 and isinstance(array_kv_pair.expression, exp.Array) 795 ): 796 keys = array_kv_pair.this.expressions 797 values = array_kv_pair.expression.expressions 798 799 json_object.set( 800 "expressions", 801 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 802 ) 803 804 return json_object 805 806 def _parse_bracket( 807 self, this: t.Optional[exp.Expression] = None 808 ) -> t.Optional[exp.Expression]: 809 bracket = super()._parse_bracket(this) 810 811 if this is bracket: 812 return bracket 813 814 if isinstance(bracket, exp.Bracket): 815 for expression in bracket.expressions: 816 name = expression.name.upper() 817 818 if name not in self.BRACKET_OFFSETS: 819 break 820 821 offset, safe = self.BRACKET_OFFSETS[name] 822 bracket.set("offset", offset) 823 bracket.set("safe", safe) 824 expression.replace(expression.expressions[0]) 825 826 return bracket 827 828 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 829 unnest = super()._parse_unnest(with_alias=with_alias) 830 831 if not unnest: 832 return None 833 834 unnest_expr = seq_get(unnest.expressions, 0) 835 if unnest_expr: 836 from sqlglot.optimizer.annotate_types import annotate_types 837 838 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 839 840 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 841 # in contrast to other dialects such as DuckDB which flattens only the array by default 842 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 843 array_elem.is_type(exp.DataType.Type.STRUCT) 844 for array_elem in unnest_expr._type.expressions 845 ): 846 unnest.set("explode_array", True) 847 848 return unnest 849 850 def _parse_make_interval(self) -> exp.MakeInterval: 851 expr = exp.MakeInterval() 852 853 for arg_key in expr.arg_types: 854 value = self._parse_lambda() 855 856 if not value: 857 break 858 859 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 860 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 861 if isinstance(value, exp.Kwarg): 862 arg_key = value.this.name 863 864 expr.set(arg_key, value) 865 866 self._match(TokenType.COMMA) 867 868 return expr 869 870 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 871 expr = self.expression( 872 exp.FeaturesAtTime, 873 this=(self._match(TokenType.TABLE) and self._parse_table()) 874 or self._parse_select(nested=True), 875 ) 876 877 while self._match(TokenType.COMMA): 878 arg = self._parse_lambda() 879 880 # Get the LHS of the Kwarg and set the arg to that value, e.g 881 # "num_rows => 1" sets the expr's `num_rows` arg 882 if arg: 883 expr.set(arg.this.name, arg) 884 885 return expr 886 887 def _parse_export_data(self) -> exp.Export: 888 self._match_text_seq("DATA") 889 890 return self.expression( 891 exp.Export, 892 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 893 options=self._parse_properties(), 894 this=self._match_text_seq("AS") and self._parse_select(), 895 ) 896 897 class Generator(generator.Generator): 898 INTERVAL_ALLOWS_PLURAL_FORM = False 899 JOIN_HINTS = False 900 QUERY_HINTS = False 901 TABLE_HINTS = False 902 LIMIT_FETCH = "LIMIT" 903 RENAME_TABLE_WITH_DB = False 904 NVL2_SUPPORTED = False 905 UNNEST_WITH_ORDINALITY = False 906 COLLATE_IS_FUNC = True 907 LIMIT_ONLY_LITERALS = True 908 SUPPORTS_TABLE_ALIAS_COLUMNS = False 909 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 910 JSON_KEY_VALUE_PAIR_SEP = "," 911 NULL_ORDERING_SUPPORTED = False 912 IGNORE_NULLS_IN_FUNC = True 913 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 914 CAN_IMPLEMENT_ARRAY_ANY = True 915 SUPPORTS_TO_NUMBER = False 916 NAMED_PLACEHOLDER_TOKEN = "@" 917 HEX_FUNC = "TO_HEX" 918 WITH_PROPERTIES_PREFIX = "OPTIONS" 919 SUPPORTS_EXPLODING_PROJECTIONS = False 920 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 921 SUPPORTS_UNIX_SECONDS = True 922 923 TRANSFORMS = { 924 **generator.Generator.TRANSFORMS, 925 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 926 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 927 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 928 exp.Array: inline_array_unless_query, 929 exp.ArrayContains: _array_contains_sql, 930 exp.ArrayFilter: filter_array_using_unnest, 931 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 932 exp.CollateProperty: lambda self, e: ( 933 f"DEFAULT COLLATE {self.sql(e, 'this')}" 934 if e.args.get("default") 935 else f"COLLATE {self.sql(e, 'this')}" 936 ), 937 exp.Commit: lambda *_: "COMMIT TRANSACTION", 938 exp.CountIf: rename_func("COUNTIF"), 939 exp.Create: _create_sql, 940 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 941 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 942 exp.DateDiff: lambda self, e: self.func( 943 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 944 ), 945 exp.DateFromParts: rename_func("DATE"), 946 exp.DateStrToDate: datestrtodate_sql, 947 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 948 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 949 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 950 exp.DateTrunc: lambda self, e: self.func( 951 "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone") 952 ), 953 exp.FromTimeZone: lambda self, e: self.func( 954 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 955 ), 956 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 957 exp.GroupConcat: lambda self, e: groupconcat_sql( 958 self, e, func_name="STRING_AGG", within_group=False 959 ), 960 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 961 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 962 exp.If: if_sql(false_value="NULL"), 963 exp.ILike: no_ilike_sql, 964 exp.IntDiv: rename_func("DIV"), 965 exp.Int64: rename_func("INT64"), 966 exp.JSONExtract: _json_extract_sql, 967 exp.JSONExtractArray: _json_extract_sql, 968 exp.JSONExtractScalar: _json_extract_sql, 969 exp.JSONFormat: rename_func("TO_JSON_STRING"), 970 exp.Levenshtein: _levenshtein_sql, 971 exp.Max: max_or_greatest, 972 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 973 exp.MD5Digest: rename_func("MD5"), 974 exp.Min: min_or_least, 975 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 976 exp.RegexpExtract: lambda self, e: self.func( 977 "REGEXP_EXTRACT", 978 e.this, 979 e.expression, 980 e.args.get("position"), 981 e.args.get("occurrence"), 982 ), 983 exp.RegexpExtractAll: lambda self, e: self.func( 984 "REGEXP_EXTRACT_ALL", e.this, e.expression 985 ), 986 exp.RegexpReplace: regexp_replace_sql, 987 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 988 exp.ReturnsProperty: _returnsproperty_sql, 989 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 990 exp.Select: transforms.preprocess( 991 [ 992 transforms.explode_projection_to_unnest(), 993 transforms.unqualify_unnest, 994 transforms.eliminate_distinct_on, 995 _alias_ordered_group, 996 transforms.eliminate_semi_and_anti_joins, 997 ] 998 ), 999 exp.SHA: rename_func("SHA1"), 1000 exp.SHA2: sha256_sql, 1001 exp.StabilityProperty: lambda self, e: ( 1002 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 1003 ), 1004 exp.String: rename_func("STRING"), 1005 exp.StrPosition: lambda self, e: ( 1006 strposition_sql( 1007 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 1008 ) 1009 ), 1010 exp.StrToDate: _str_to_datetime_sql, 1011 exp.StrToTime: _str_to_datetime_sql, 1012 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 1013 exp.TimeFromParts: rename_func("TIME"), 1014 exp.TimestampFromParts: rename_func("DATETIME"), 1015 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1016 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1017 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1018 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1019 exp.TimeStrToTime: timestrtotime_sql, 1020 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1021 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1022 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1023 exp.TsOrDsToTime: rename_func("TIME"), 1024 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1025 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1026 exp.Unhex: rename_func("FROM_HEX"), 1027 exp.UnixDate: rename_func("UNIX_DATE"), 1028 exp.UnixToTime: _unix_to_time_sql, 1029 exp.Uuid: lambda *_: "GENERATE_UUID()", 1030 exp.Values: _derived_table_values_to_unnest, 1031 exp.VariancePop: rename_func("VAR_POP"), 1032 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1033 } 1034 1035 SUPPORTED_JSON_PATH_PARTS = { 1036 exp.JSONPathKey, 1037 exp.JSONPathRoot, 1038 exp.JSONPathSubscript, 1039 } 1040 1041 TYPE_MAPPING = { 1042 **generator.Generator.TYPE_MAPPING, 1043 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1044 exp.DataType.Type.BIGINT: "INT64", 1045 exp.DataType.Type.BINARY: "BYTES", 1046 exp.DataType.Type.BLOB: "BYTES", 1047 exp.DataType.Type.BOOLEAN: "BOOL", 1048 exp.DataType.Type.CHAR: "STRING", 1049 exp.DataType.Type.DECIMAL: "NUMERIC", 1050 exp.DataType.Type.DOUBLE: "FLOAT64", 1051 exp.DataType.Type.FLOAT: "FLOAT64", 1052 exp.DataType.Type.INT: "INT64", 1053 exp.DataType.Type.NCHAR: "STRING", 1054 exp.DataType.Type.NVARCHAR: "STRING", 1055 exp.DataType.Type.SMALLINT: "INT64", 1056 exp.DataType.Type.TEXT: "STRING", 1057 exp.DataType.Type.TIMESTAMP: "DATETIME", 1058 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 1059 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1060 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1061 exp.DataType.Type.TINYINT: "INT64", 1062 exp.DataType.Type.ROWVERSION: "BYTES", 1063 exp.DataType.Type.UUID: "STRING", 1064 exp.DataType.Type.VARBINARY: "BYTES", 1065 exp.DataType.Type.VARCHAR: "STRING", 1066 exp.DataType.Type.VARIANT: "ANY TYPE", 1067 } 1068 1069 PROPERTIES_LOCATION = { 1070 **generator.Generator.PROPERTIES_LOCATION, 1071 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1072 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1073 } 1074 1075 # WINDOW comes after QUALIFY 1076 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1077 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1078 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1079 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1080 } 1081 1082 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1083 RESERVED_KEYWORDS = { 1084 "all", 1085 "and", 1086 "any", 1087 "array", 1088 "as", 1089 "asc", 1090 "assert_rows_modified", 1091 "at", 1092 "between", 1093 "by", 1094 "case", 1095 "cast", 1096 "collate", 1097 "contains", 1098 "create", 1099 "cross", 1100 "cube", 1101 "current", 1102 "default", 1103 "define", 1104 "desc", 1105 "distinct", 1106 "else", 1107 "end", 1108 "enum", 1109 "escape", 1110 "except", 1111 "exclude", 1112 "exists", 1113 "extract", 1114 "false", 1115 "fetch", 1116 "following", 1117 "for", 1118 "from", 1119 "full", 1120 "group", 1121 "grouping", 1122 "groups", 1123 "hash", 1124 "having", 1125 "if", 1126 "ignore", 1127 "in", 1128 "inner", 1129 "intersect", 1130 "interval", 1131 "into", 1132 "is", 1133 "join", 1134 "lateral", 1135 "left", 1136 "like", 1137 "limit", 1138 "lookup", 1139 "merge", 1140 "natural", 1141 "new", 1142 "no", 1143 "not", 1144 "null", 1145 "nulls", 1146 "of", 1147 "on", 1148 "or", 1149 "order", 1150 "outer", 1151 "over", 1152 "partition", 1153 "preceding", 1154 "proto", 1155 "qualify", 1156 "range", 1157 "recursive", 1158 "respect", 1159 "right", 1160 "rollup", 1161 "rows", 1162 "select", 1163 "set", 1164 "some", 1165 "struct", 1166 "tablesample", 1167 "then", 1168 "to", 1169 "treat", 1170 "true", 1171 "unbounded", 1172 "union", 1173 "unnest", 1174 "using", 1175 "when", 1176 "where", 1177 "window", 1178 "with", 1179 "within", 1180 } 1181 1182 def mod_sql(self, expression: exp.Mod) -> str: 1183 this = expression.this 1184 expr = expression.expression 1185 return self.func( 1186 "MOD", 1187 this.unnest() if isinstance(this, exp.Paren) else this, 1188 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1189 ) 1190 1191 def column_parts(self, expression: exp.Column) -> str: 1192 if expression.meta.get("quoted_column"): 1193 # If a column reference is of the form `dataset.table`.name, we need 1194 # to preserve the quoted table path, otherwise the reference breaks 1195 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1196 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1197 return f"{table_path}.{self.sql(expression, 'this')}" 1198 1199 return super().column_parts(expression) 1200 1201 def table_parts(self, expression: exp.Table) -> str: 1202 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1203 # we need to make sure the correct quoting is used in each case. 1204 # 1205 # For example, if there is a CTE x that clashes with a schema name, then the former will 1206 # return the table y in that schema, whereas the latter will return the CTE's y column: 1207 # 1208 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1209 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1210 if expression.meta.get("quoted_table"): 1211 table_parts = ".".join(p.name for p in expression.parts) 1212 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1213 1214 return super().table_parts(expression) 1215 1216 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1217 this = expression.this 1218 if isinstance(this, exp.TsOrDsToDatetime): 1219 func_name = "FORMAT_DATETIME" 1220 elif isinstance(this, exp.TsOrDsToTimestamp): 1221 func_name = "FORMAT_TIMESTAMP" 1222 else: 1223 func_name = "FORMAT_DATE" 1224 1225 time_expr = ( 1226 this 1227 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1228 else expression 1229 ) 1230 return self.func( 1231 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1232 ) 1233 1234 def eq_sql(self, expression: exp.EQ) -> str: 1235 # Operands of = cannot be NULL in BigQuery 1236 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1237 if not isinstance(expression.parent, exp.Update): 1238 return "NULL" 1239 1240 return self.binary(expression, "=") 1241 1242 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1243 parent = expression.parent 1244 1245 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1246 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1247 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1248 return self.func( 1249 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1250 ) 1251 1252 return super().attimezone_sql(expression) 1253 1254 def trycast_sql(self, expression: exp.TryCast) -> str: 1255 return self.cast_sql(expression, safe_prefix="SAFE_") 1256 1257 def bracket_sql(self, expression: exp.Bracket) -> str: 1258 this = expression.this 1259 expressions = expression.expressions 1260 1261 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1262 arg = expressions[0] 1263 if arg.type is None: 1264 from sqlglot.optimizer.annotate_types import annotate_types 1265 1266 arg = annotate_types(arg, dialect=self.dialect) 1267 1268 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1269 # BQ doesn't support bracket syntax with string values for structs 1270 return f"{self.sql(this)}.{arg.name}" 1271 1272 expressions_sql = self.expressions(expression, flat=True) 1273 offset = expression.args.get("offset") 1274 1275 if offset == 0: 1276 expressions_sql = f"OFFSET({expressions_sql})" 1277 elif offset == 1: 1278 expressions_sql = f"ORDINAL({expressions_sql})" 1279 elif offset is not None: 1280 self.unsupported(f"Unsupported array offset: {offset}") 1281 1282 if expression.args.get("safe"): 1283 expressions_sql = f"SAFE_{expressions_sql}" 1284 1285 return f"{self.sql(this)}[{expressions_sql}]" 1286 1287 def in_unnest_op(self, expression: exp.Unnest) -> str: 1288 return self.sql(expression) 1289 1290 def version_sql(self, expression: exp.Version) -> str: 1291 if expression.name == "TIMESTAMP": 1292 expression.set("this", "SYSTEM_TIME") 1293 return super().version_sql(expression) 1294 1295 def contains_sql(self, expression: exp.Contains) -> str: 1296 this = expression.this 1297 expr = expression.expression 1298 1299 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1300 this = this.this 1301 expr = expr.this 1302 1303 return self.func("CONTAINS_SUBSTR", this, expr) 1304 1305 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1306 this = expression.this 1307 1308 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1309 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1310 # because they aren't literals and so the above syntax is invalid BigQuery. 1311 if isinstance(this, exp.Array): 1312 elem = seq_get(this.expressions, 0) 1313 if not (elem and elem.find(exp.Query)): 1314 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1315 1316 return super().cast_sql(expression, safe_prefix=safe_prefix)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Whether hex strings such as x'CC' evaluate to integer or binary/blob type
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
453 def normalize_identifier(self, expression: E) -> E: 454 if ( 455 isinstance(expression, exp.Identifier) 456 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 457 ): 458 parent = expression.parent 459 while isinstance(parent, exp.Dot): 460 parent = parent.parent 461 462 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 463 # by default. The following check uses a heuristic to detect tables based on whether 464 # they are qualified. This should generally be correct, because tables in BigQuery 465 # must be qualified with at least a dataset, unless @@dataset_id is set. 466 case_sensitive = ( 467 isinstance(parent, exp.UserDefinedFunction) 468 or ( 469 isinstance(parent, exp.Table) 470 and parent.db 471 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 472 ) 473 or expression.meta.get("is_table") 474 ) 475 if not case_sensitive: 476 expression.set("this", expression.this.lower()) 477 478 return t.cast(E, expression) 479 480 return super().normalize_identifier(expression)
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
482 class Tokenizer(tokens.Tokenizer): 483 QUOTES = ["'", '"', '"""', "'''"] 484 COMMENTS = ["--", "#", ("/*", "*/")] 485 IDENTIFIERS = ["`"] 486 STRING_ESCAPES = ["\\"] 487 488 HEX_STRINGS = [("0x", ""), ("0X", "")] 489 490 BYTE_STRINGS = [ 491 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 492 ] 493 494 RAW_STRINGS = [ 495 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 496 ] 497 498 KEYWORDS = { 499 **tokens.Tokenizer.KEYWORDS, 500 "ANY TYPE": TokenType.VARIANT, 501 "BEGIN": TokenType.COMMAND, 502 "BEGIN TRANSACTION": TokenType.BEGIN, 503 "BYTEINT": TokenType.INT, 504 "BYTES": TokenType.BINARY, 505 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 506 "DATETIME": TokenType.TIMESTAMP, 507 "DECLARE": TokenType.COMMAND, 508 "ELSEIF": TokenType.COMMAND, 509 "EXCEPTION": TokenType.COMMAND, 510 "EXPORT": TokenType.EXPORT, 511 "FLOAT64": TokenType.DOUBLE, 512 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 513 "MODEL": TokenType.MODEL, 514 "NOT DETERMINISTIC": TokenType.VOLATILE, 515 "RECORD": TokenType.STRUCT, 516 "TIMESTAMP": TokenType.TIMESTAMPTZ, 517 } 518 KEYWORDS.pop("DIV") 519 KEYWORDS.pop("VALUES") 520 KEYWORDS.pop("/*+")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
522 class Parser(parser.Parser): 523 PREFIXED_PIVOT_COLUMNS = True 524 LOG_DEFAULTS_TO_LN = True 525 SUPPORTS_IMPLICIT_UNNEST = True 526 527 FUNCTIONS = { 528 **parser.Parser.FUNCTIONS, 529 "CONTAINS_SUBSTR": _build_contains_substring, 530 "DATE": _build_date, 531 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 532 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 533 "DATE_TRUNC": lambda args: exp.DateTrunc( 534 unit=exp.Literal.string(str(seq_get(args, 1))), 535 this=seq_get(args, 0), 536 zone=seq_get(args, 2), 537 ), 538 "DATETIME": _build_datetime, 539 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 540 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 541 "DIV": binary_from_function(exp.IntDiv), 542 "EDIT_DISTANCE": _build_levenshtein, 543 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 544 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 545 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 546 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 547 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 548 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 549 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 550 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 551 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 552 "MD5": exp.MD5Digest.from_arg_list, 553 "TO_HEX": _build_to_hex, 554 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 555 [seq_get(args, 1), seq_get(args, 0)] 556 ), 557 "PARSE_TIMESTAMP": _build_parse_timestamp, 558 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 559 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 560 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 561 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 562 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 563 ), 564 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 565 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 566 "SPLIT": lambda args: exp.Split( 567 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 568 this=seq_get(args, 0), 569 expression=seq_get(args, 1) or exp.Literal.string(","), 570 ), 571 "STRPOS": exp.StrPosition.from_arg_list, 572 "TIME": _build_time, 573 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 574 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 575 "TIMESTAMP": _build_timestamp, 576 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 577 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 578 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 579 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 580 ), 581 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 582 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 583 ), 584 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 585 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 586 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 587 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 588 } 589 590 FUNCTION_PARSERS = { 591 **parser.Parser.FUNCTION_PARSERS, 592 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 593 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 594 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 595 } 596 FUNCTION_PARSERS.pop("TRIM") 597 598 NO_PAREN_FUNCTIONS = { 599 **parser.Parser.NO_PAREN_FUNCTIONS, 600 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 601 } 602 603 NESTED_TYPE_TOKENS = { 604 *parser.Parser.NESTED_TYPE_TOKENS, 605 TokenType.TABLE, 606 } 607 608 PROPERTY_PARSERS = { 609 **parser.Parser.PROPERTY_PARSERS, 610 "NOT DETERMINISTIC": lambda self: self.expression( 611 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 612 ), 613 "OPTIONS": lambda self: self._parse_with_property(), 614 } 615 616 CONSTRAINT_PARSERS = { 617 **parser.Parser.CONSTRAINT_PARSERS, 618 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 619 } 620 621 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 622 RANGE_PARSERS.pop(TokenType.OVERLAPS) 623 624 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 625 626 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 627 628 STATEMENT_PARSERS = { 629 **parser.Parser.STATEMENT_PARSERS, 630 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 631 TokenType.END: lambda self: self._parse_as_command(self._prev), 632 TokenType.FOR: lambda self: self._parse_for_in(), 633 TokenType.EXPORT: lambda self: self._parse_export_data(), 634 } 635 636 BRACKET_OFFSETS = { 637 "OFFSET": (0, False), 638 "ORDINAL": (1, False), 639 "SAFE_OFFSET": (0, True), 640 "SAFE_ORDINAL": (1, True), 641 } 642 643 def _parse_for_in(self) -> exp.ForIn: 644 this = self._parse_range() 645 self._match_text_seq("DO") 646 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 647 648 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 649 this = super()._parse_table_part(schema=schema) or self._parse_number() 650 651 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 652 if isinstance(this, exp.Identifier): 653 table_name = this.name 654 while self._match(TokenType.DASH, advance=False) and self._next: 655 start = self._curr 656 while self._is_connected() and not self._match_set( 657 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 658 ): 659 self._advance() 660 661 table_name += self._find_sql(start, self._prev) 662 663 this = exp.Identifier( 664 this=table_name, quoted=this.args.get("quoted") 665 ).update_positions(this) 666 elif isinstance(this, exp.Literal): 667 table_name = this.name 668 669 if self._is_connected() and self._parse_var(any_token=True): 670 table_name += self._prev.text 671 672 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 673 674 return this 675 676 def _parse_table_parts( 677 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 678 ) -> exp.Table: 679 table = super()._parse_table_parts( 680 schema=schema, is_db_reference=is_db_reference, wildcard=True 681 ) 682 683 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 684 if not table.catalog: 685 if table.db: 686 previous_db = table.args["db"] 687 parts = table.db.split(".") 688 if len(parts) == 2 and not table.args["db"].quoted: 689 table.set( 690 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 691 ) 692 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 693 else: 694 previous_this = table.this 695 parts = table.name.split(".") 696 if len(parts) == 2 and not table.this.quoted: 697 table.set( 698 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 699 ) 700 table.set( 701 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 702 ) 703 704 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 705 alias = table.this 706 catalog, db, this, *rest = ( 707 exp.to_identifier(p, quoted=True) 708 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 709 ) 710 711 for part in (catalog, db, this): 712 if part: 713 part.update_positions(table.this) 714 715 if rest and this: 716 this = exp.Dot.build([this, *rest]) # type: ignore 717 718 table = exp.Table( 719 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 720 ) 721 table.meta["quoted_table"] = True 722 else: 723 alias = None 724 725 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 726 # dataset, so if the project identifier is omitted we need to fix the ast so that 727 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 728 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 729 # views, because it would seem like the "catalog" part is set, when it'd actually 730 # be the region/dataset. Merging the two identifiers into a single one is done to 731 # avoid producing a 4-part Table reference, which would cause issues in the schema 732 # module, when there are 3-part table names mixed with information schema views. 733 # 734 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 735 table_parts = table.parts 736 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 737 # We need to alias the table here to avoid breaking existing qualified columns. 738 # This is expected to be safe, because if there's an actual alias coming up in 739 # the token stream, it will overwrite this one. If there isn't one, we are only 740 # exposing the name that can be used to reference the view explicitly (a no-op). 741 exp.alias_( 742 table, 743 t.cast(exp.Identifier, alias or table_parts[-1]), 744 table=True, 745 copy=False, 746 ) 747 748 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 749 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 750 line=table_parts[-2].meta.get("line"), 751 col=table_parts[-1].meta.get("col"), 752 start=table_parts[-2].meta.get("start"), 753 end=table_parts[-1].meta.get("end"), 754 ) 755 table.set("this", new_this) 756 table.set("db", seq_get(table_parts, -3)) 757 table.set("catalog", seq_get(table_parts, -4)) 758 759 return table 760 761 def _parse_column(self) -> t.Optional[exp.Expression]: 762 column = super()._parse_column() 763 if isinstance(column, exp.Column): 764 parts = column.parts 765 if any("." in p.name for p in parts): 766 catalog, db, table, this, *rest = ( 767 exp.to_identifier(p, quoted=True) 768 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 769 ) 770 771 if rest and this: 772 this = exp.Dot.build([this, *rest]) # type: ignore 773 774 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 775 column.meta["quoted_column"] = True 776 777 return column 778 779 @t.overload 780 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 781 782 @t.overload 783 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 784 785 def _parse_json_object(self, agg=False): 786 json_object = super()._parse_json_object() 787 array_kv_pair = seq_get(json_object.expressions, 0) 788 789 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 790 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 791 if ( 792 array_kv_pair 793 and isinstance(array_kv_pair.this, exp.Array) 794 and isinstance(array_kv_pair.expression, exp.Array) 795 ): 796 keys = array_kv_pair.this.expressions 797 values = array_kv_pair.expression.expressions 798 799 json_object.set( 800 "expressions", 801 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 802 ) 803 804 return json_object 805 806 def _parse_bracket( 807 self, this: t.Optional[exp.Expression] = None 808 ) -> t.Optional[exp.Expression]: 809 bracket = super()._parse_bracket(this) 810 811 if this is bracket: 812 return bracket 813 814 if isinstance(bracket, exp.Bracket): 815 for expression in bracket.expressions: 816 name = expression.name.upper() 817 818 if name not in self.BRACKET_OFFSETS: 819 break 820 821 offset, safe = self.BRACKET_OFFSETS[name] 822 bracket.set("offset", offset) 823 bracket.set("safe", safe) 824 expression.replace(expression.expressions[0]) 825 826 return bracket 827 828 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 829 unnest = super()._parse_unnest(with_alias=with_alias) 830 831 if not unnest: 832 return None 833 834 unnest_expr = seq_get(unnest.expressions, 0) 835 if unnest_expr: 836 from sqlglot.optimizer.annotate_types import annotate_types 837 838 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 839 840 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 841 # in contrast to other dialects such as DuckDB which flattens only the array by default 842 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 843 array_elem.is_type(exp.DataType.Type.STRUCT) 844 for array_elem in unnest_expr._type.expressions 845 ): 846 unnest.set("explode_array", True) 847 848 return unnest 849 850 def _parse_make_interval(self) -> exp.MakeInterval: 851 expr = exp.MakeInterval() 852 853 for arg_key in expr.arg_types: 854 value = self._parse_lambda() 855 856 if not value: 857 break 858 859 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 860 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 861 if isinstance(value, exp.Kwarg): 862 arg_key = value.this.name 863 864 expr.set(arg_key, value) 865 866 self._match(TokenType.COMMA) 867 868 return expr 869 870 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 871 expr = self.expression( 872 exp.FeaturesAtTime, 873 this=(self._match(TokenType.TABLE) and self._parse_table()) 874 or self._parse_select(nested=True), 875 ) 876 877 while self._match(TokenType.COMMA): 878 arg = self._parse_lambda() 879 880 # Get the LHS of the Kwarg and set the arg to that value, e.g 881 # "num_rows => 1" sets the expr's `num_rows` arg 882 if arg: 883 expr.set(arg.this.name, arg) 884 885 return expr 886 887 def _parse_export_data(self) -> exp.Export: 888 self._match_text_seq("DATA") 889 890 return self.expression( 891 exp.Export, 892 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 893 options=self._parse_properties(), 894 this=self._match_text_seq("AS") and self._parse_select(), 895 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- errors
- sql
897 class Generator(generator.Generator): 898 INTERVAL_ALLOWS_PLURAL_FORM = False 899 JOIN_HINTS = False 900 QUERY_HINTS = False 901 TABLE_HINTS = False 902 LIMIT_FETCH = "LIMIT" 903 RENAME_TABLE_WITH_DB = False 904 NVL2_SUPPORTED = False 905 UNNEST_WITH_ORDINALITY = False 906 COLLATE_IS_FUNC = True 907 LIMIT_ONLY_LITERALS = True 908 SUPPORTS_TABLE_ALIAS_COLUMNS = False 909 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 910 JSON_KEY_VALUE_PAIR_SEP = "," 911 NULL_ORDERING_SUPPORTED = False 912 IGNORE_NULLS_IN_FUNC = True 913 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 914 CAN_IMPLEMENT_ARRAY_ANY = True 915 SUPPORTS_TO_NUMBER = False 916 NAMED_PLACEHOLDER_TOKEN = "@" 917 HEX_FUNC = "TO_HEX" 918 WITH_PROPERTIES_PREFIX = "OPTIONS" 919 SUPPORTS_EXPLODING_PROJECTIONS = False 920 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 921 SUPPORTS_UNIX_SECONDS = True 922 923 TRANSFORMS = { 924 **generator.Generator.TRANSFORMS, 925 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 926 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 927 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 928 exp.Array: inline_array_unless_query, 929 exp.ArrayContains: _array_contains_sql, 930 exp.ArrayFilter: filter_array_using_unnest, 931 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 932 exp.CollateProperty: lambda self, e: ( 933 f"DEFAULT COLLATE {self.sql(e, 'this')}" 934 if e.args.get("default") 935 else f"COLLATE {self.sql(e, 'this')}" 936 ), 937 exp.Commit: lambda *_: "COMMIT TRANSACTION", 938 exp.CountIf: rename_func("COUNTIF"), 939 exp.Create: _create_sql, 940 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 941 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 942 exp.DateDiff: lambda self, e: self.func( 943 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 944 ), 945 exp.DateFromParts: rename_func("DATE"), 946 exp.DateStrToDate: datestrtodate_sql, 947 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 948 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 949 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 950 exp.DateTrunc: lambda self, e: self.func( 951 "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone") 952 ), 953 exp.FromTimeZone: lambda self, e: self.func( 954 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 955 ), 956 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 957 exp.GroupConcat: lambda self, e: groupconcat_sql( 958 self, e, func_name="STRING_AGG", within_group=False 959 ), 960 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 961 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 962 exp.If: if_sql(false_value="NULL"), 963 exp.ILike: no_ilike_sql, 964 exp.IntDiv: rename_func("DIV"), 965 exp.Int64: rename_func("INT64"), 966 exp.JSONExtract: _json_extract_sql, 967 exp.JSONExtractArray: _json_extract_sql, 968 exp.JSONExtractScalar: _json_extract_sql, 969 exp.JSONFormat: rename_func("TO_JSON_STRING"), 970 exp.Levenshtein: _levenshtein_sql, 971 exp.Max: max_or_greatest, 972 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 973 exp.MD5Digest: rename_func("MD5"), 974 exp.Min: min_or_least, 975 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 976 exp.RegexpExtract: lambda self, e: self.func( 977 "REGEXP_EXTRACT", 978 e.this, 979 e.expression, 980 e.args.get("position"), 981 e.args.get("occurrence"), 982 ), 983 exp.RegexpExtractAll: lambda self, e: self.func( 984 "REGEXP_EXTRACT_ALL", e.this, e.expression 985 ), 986 exp.RegexpReplace: regexp_replace_sql, 987 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 988 exp.ReturnsProperty: _returnsproperty_sql, 989 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 990 exp.Select: transforms.preprocess( 991 [ 992 transforms.explode_projection_to_unnest(), 993 transforms.unqualify_unnest, 994 transforms.eliminate_distinct_on, 995 _alias_ordered_group, 996 transforms.eliminate_semi_and_anti_joins, 997 ] 998 ), 999 exp.SHA: rename_func("SHA1"), 1000 exp.SHA2: sha256_sql, 1001 exp.StabilityProperty: lambda self, e: ( 1002 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 1003 ), 1004 exp.String: rename_func("STRING"), 1005 exp.StrPosition: lambda self, e: ( 1006 strposition_sql( 1007 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 1008 ) 1009 ), 1010 exp.StrToDate: _str_to_datetime_sql, 1011 exp.StrToTime: _str_to_datetime_sql, 1012 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 1013 exp.TimeFromParts: rename_func("TIME"), 1014 exp.TimestampFromParts: rename_func("DATETIME"), 1015 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1016 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1017 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1018 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1019 exp.TimeStrToTime: timestrtotime_sql, 1020 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1021 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1022 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1023 exp.TsOrDsToTime: rename_func("TIME"), 1024 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1025 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1026 exp.Unhex: rename_func("FROM_HEX"), 1027 exp.UnixDate: rename_func("UNIX_DATE"), 1028 exp.UnixToTime: _unix_to_time_sql, 1029 exp.Uuid: lambda *_: "GENERATE_UUID()", 1030 exp.Values: _derived_table_values_to_unnest, 1031 exp.VariancePop: rename_func("VAR_POP"), 1032 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1033 } 1034 1035 SUPPORTED_JSON_PATH_PARTS = { 1036 exp.JSONPathKey, 1037 exp.JSONPathRoot, 1038 exp.JSONPathSubscript, 1039 } 1040 1041 TYPE_MAPPING = { 1042 **generator.Generator.TYPE_MAPPING, 1043 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1044 exp.DataType.Type.BIGINT: "INT64", 1045 exp.DataType.Type.BINARY: "BYTES", 1046 exp.DataType.Type.BLOB: "BYTES", 1047 exp.DataType.Type.BOOLEAN: "BOOL", 1048 exp.DataType.Type.CHAR: "STRING", 1049 exp.DataType.Type.DECIMAL: "NUMERIC", 1050 exp.DataType.Type.DOUBLE: "FLOAT64", 1051 exp.DataType.Type.FLOAT: "FLOAT64", 1052 exp.DataType.Type.INT: "INT64", 1053 exp.DataType.Type.NCHAR: "STRING", 1054 exp.DataType.Type.NVARCHAR: "STRING", 1055 exp.DataType.Type.SMALLINT: "INT64", 1056 exp.DataType.Type.TEXT: "STRING", 1057 exp.DataType.Type.TIMESTAMP: "DATETIME", 1058 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 1059 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1060 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1061 exp.DataType.Type.TINYINT: "INT64", 1062 exp.DataType.Type.ROWVERSION: "BYTES", 1063 exp.DataType.Type.UUID: "STRING", 1064 exp.DataType.Type.VARBINARY: "BYTES", 1065 exp.DataType.Type.VARCHAR: "STRING", 1066 exp.DataType.Type.VARIANT: "ANY TYPE", 1067 } 1068 1069 PROPERTIES_LOCATION = { 1070 **generator.Generator.PROPERTIES_LOCATION, 1071 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1072 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1073 } 1074 1075 # WINDOW comes after QUALIFY 1076 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1077 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1078 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1079 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1080 } 1081 1082 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1083 RESERVED_KEYWORDS = { 1084 "all", 1085 "and", 1086 "any", 1087 "array", 1088 "as", 1089 "asc", 1090 "assert_rows_modified", 1091 "at", 1092 "between", 1093 "by", 1094 "case", 1095 "cast", 1096 "collate", 1097 "contains", 1098 "create", 1099 "cross", 1100 "cube", 1101 "current", 1102 "default", 1103 "define", 1104 "desc", 1105 "distinct", 1106 "else", 1107 "end", 1108 "enum", 1109 "escape", 1110 "except", 1111 "exclude", 1112 "exists", 1113 "extract", 1114 "false", 1115 "fetch", 1116 "following", 1117 "for", 1118 "from", 1119 "full", 1120 "group", 1121 "grouping", 1122 "groups", 1123 "hash", 1124 "having", 1125 "if", 1126 "ignore", 1127 "in", 1128 "inner", 1129 "intersect", 1130 "interval", 1131 "into", 1132 "is", 1133 "join", 1134 "lateral", 1135 "left", 1136 "like", 1137 "limit", 1138 "lookup", 1139 "merge", 1140 "natural", 1141 "new", 1142 "no", 1143 "not", 1144 "null", 1145 "nulls", 1146 "of", 1147 "on", 1148 "or", 1149 "order", 1150 "outer", 1151 "over", 1152 "partition", 1153 "preceding", 1154 "proto", 1155 "qualify", 1156 "range", 1157 "recursive", 1158 "respect", 1159 "right", 1160 "rollup", 1161 "rows", 1162 "select", 1163 "set", 1164 "some", 1165 "struct", 1166 "tablesample", 1167 "then", 1168 "to", 1169 "treat", 1170 "true", 1171 "unbounded", 1172 "union", 1173 "unnest", 1174 "using", 1175 "when", 1176 "where", 1177 "window", 1178 "with", 1179 "within", 1180 } 1181 1182 def mod_sql(self, expression: exp.Mod) -> str: 1183 this = expression.this 1184 expr = expression.expression 1185 return self.func( 1186 "MOD", 1187 this.unnest() if isinstance(this, exp.Paren) else this, 1188 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1189 ) 1190 1191 def column_parts(self, expression: exp.Column) -> str: 1192 if expression.meta.get("quoted_column"): 1193 # If a column reference is of the form `dataset.table`.name, we need 1194 # to preserve the quoted table path, otherwise the reference breaks 1195 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1196 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1197 return f"{table_path}.{self.sql(expression, 'this')}" 1198 1199 return super().column_parts(expression) 1200 1201 def table_parts(self, expression: exp.Table) -> str: 1202 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1203 # we need to make sure the correct quoting is used in each case. 1204 # 1205 # For example, if there is a CTE x that clashes with a schema name, then the former will 1206 # return the table y in that schema, whereas the latter will return the CTE's y column: 1207 # 1208 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1209 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1210 if expression.meta.get("quoted_table"): 1211 table_parts = ".".join(p.name for p in expression.parts) 1212 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1213 1214 return super().table_parts(expression) 1215 1216 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1217 this = expression.this 1218 if isinstance(this, exp.TsOrDsToDatetime): 1219 func_name = "FORMAT_DATETIME" 1220 elif isinstance(this, exp.TsOrDsToTimestamp): 1221 func_name = "FORMAT_TIMESTAMP" 1222 else: 1223 func_name = "FORMAT_DATE" 1224 1225 time_expr = ( 1226 this 1227 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1228 else expression 1229 ) 1230 return self.func( 1231 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1232 ) 1233 1234 def eq_sql(self, expression: exp.EQ) -> str: 1235 # Operands of = cannot be NULL in BigQuery 1236 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1237 if not isinstance(expression.parent, exp.Update): 1238 return "NULL" 1239 1240 return self.binary(expression, "=") 1241 1242 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1243 parent = expression.parent 1244 1245 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1246 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1247 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1248 return self.func( 1249 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1250 ) 1251 1252 return super().attimezone_sql(expression) 1253 1254 def trycast_sql(self, expression: exp.TryCast) -> str: 1255 return self.cast_sql(expression, safe_prefix="SAFE_") 1256 1257 def bracket_sql(self, expression: exp.Bracket) -> str: 1258 this = expression.this 1259 expressions = expression.expressions 1260 1261 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1262 arg = expressions[0] 1263 if arg.type is None: 1264 from sqlglot.optimizer.annotate_types import annotate_types 1265 1266 arg = annotate_types(arg, dialect=self.dialect) 1267 1268 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1269 # BQ doesn't support bracket syntax with string values for structs 1270 return f"{self.sql(this)}.{arg.name}" 1271 1272 expressions_sql = self.expressions(expression, flat=True) 1273 offset = expression.args.get("offset") 1274 1275 if offset == 0: 1276 expressions_sql = f"OFFSET({expressions_sql})" 1277 elif offset == 1: 1278 expressions_sql = f"ORDINAL({expressions_sql})" 1279 elif offset is not None: 1280 self.unsupported(f"Unsupported array offset: {offset}") 1281 1282 if expression.args.get("safe"): 1283 expressions_sql = f"SAFE_{expressions_sql}" 1284 1285 return f"{self.sql(this)}[{expressions_sql}]" 1286 1287 def in_unnest_op(self, expression: exp.Unnest) -> str: 1288 return self.sql(expression) 1289 1290 def version_sql(self, expression: exp.Version) -> str: 1291 if expression.name == "TIMESTAMP": 1292 expression.set("this", "SYSTEM_TIME") 1293 return super().version_sql(expression) 1294 1295 def contains_sql(self, expression: exp.Contains) -> str: 1296 this = expression.this 1297 expr = expression.expression 1298 1299 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1300 this = this.this 1301 expr = expr.this 1302 1303 return self.func("CONTAINS_SUBSTR", this, expr) 1304 1305 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1306 this = expression.this 1307 1308 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1309 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1310 # because they aren't literals and so the above syntax is invalid BigQuery. 1311 if isinstance(this, exp.Array): 1312 elem = seq_get(this.expressions, 0) 1313 if not (elem and elem.find(exp.Query)): 1314 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1315 1316 return super().cast_sql(expression, safe_prefix=safe_prefix)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1191 def column_parts(self, expression: exp.Column) -> str: 1192 if expression.meta.get("quoted_column"): 1193 # If a column reference is of the form `dataset.table`.name, we need 1194 # to preserve the quoted table path, otherwise the reference breaks 1195 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1196 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1197 return f"{table_path}.{self.sql(expression, 'this')}" 1198 1199 return super().column_parts(expression)
1201 def table_parts(self, expression: exp.Table) -> str: 1202 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1203 # we need to make sure the correct quoting is used in each case. 1204 # 1205 # For example, if there is a CTE x that clashes with a schema name, then the former will 1206 # return the table y in that schema, whereas the latter will return the CTE's y column: 1207 # 1208 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1209 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1210 if expression.meta.get("quoted_table"): 1211 table_parts = ".".join(p.name for p in expression.parts) 1212 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1213 1214 return super().table_parts(expression)
1216 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1217 this = expression.this 1218 if isinstance(this, exp.TsOrDsToDatetime): 1219 func_name = "FORMAT_DATETIME" 1220 elif isinstance(this, exp.TsOrDsToTimestamp): 1221 func_name = "FORMAT_TIMESTAMP" 1222 else: 1223 func_name = "FORMAT_DATE" 1224 1225 time_expr = ( 1226 this 1227 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1228 else expression 1229 ) 1230 return self.func( 1231 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1232 )
1234 def eq_sql(self, expression: exp.EQ) -> str: 1235 # Operands of = cannot be NULL in BigQuery 1236 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1237 if not isinstance(expression.parent, exp.Update): 1238 return "NULL" 1239 1240 return self.binary(expression, "=")
1242 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1243 parent = expression.parent 1244 1245 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1246 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1247 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1248 return self.func( 1249 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1250 ) 1251 1252 return super().attimezone_sql(expression)
1257 def bracket_sql(self, expression: exp.Bracket) -> str: 1258 this = expression.this 1259 expressions = expression.expressions 1260 1261 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1262 arg = expressions[0] 1263 if arg.type is None: 1264 from sqlglot.optimizer.annotate_types import annotate_types 1265 1266 arg = annotate_types(arg, dialect=self.dialect) 1267 1268 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1269 # BQ doesn't support bracket syntax with string values for structs 1270 return f"{self.sql(this)}.{arg.name}" 1271 1272 expressions_sql = self.expressions(expression, flat=True) 1273 offset = expression.args.get("offset") 1274 1275 if offset == 0: 1276 expressions_sql = f"OFFSET({expressions_sql})" 1277 elif offset == 1: 1278 expressions_sql = f"ORDINAL({expressions_sql})" 1279 elif offset is not None: 1280 self.unsupported(f"Unsupported array offset: {offset}") 1281 1282 if expression.args.get("safe"): 1283 expressions_sql = f"SAFE_{expressions_sql}" 1284 1285 return f"{self.sql(this)}[{expressions_sql}]"
1305 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1306 this = expression.this 1307 1308 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1309 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1310 # because they aren't literals and so the above syntax is invalid BigQuery. 1311 if isinstance(this, exp.Array): 1312 elem = seq_get(this.expressions, 0) 1313 if not (elem and elem.find(exp.Query)): 1314 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1315 1316 return super().cast_sql(expression, safe_prefix=safe_prefix)
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- get_put_sql
- translatecharacters_sql