sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot._typing import E 9from sqlglot.dialects.dialect import ( 10 Dialect, 11 NormalizationStrategy, 12 annotate_with_type_lambda, 13 arg_max_or_min_no_count, 14 binary_from_function, 15 date_add_interval_sql, 16 datestrtodate_sql, 17 build_formatted_time, 18 filter_array_using_unnest, 19 if_sql, 20 inline_array_unless_query, 21 max_or_greatest, 22 min_or_least, 23 no_ilike_sql, 24 build_date_delta_with_interval, 25 regexp_replace_sql, 26 rename_func, 27 sha256_sql, 28 timestrtotime_sql, 29 ts_or_ds_add_cast, 30 unit_to_var, 31 strposition_sql, 32 groupconcat_sql, 33) 34from sqlglot.helper import seq_get, split_num_words 35from sqlglot.tokens import TokenType 36from sqlglot.generator import unsupported_args 37 38if t.TYPE_CHECKING: 39 from sqlglot._typing import Lit 40 41 from sqlglot.optimizer.annotate_types import TypeAnnotator 42 43logger = logging.getLogger("sqlglot") 44 45 46JSON_EXTRACT_TYPE = t.Union[exp.JSONExtract, exp.JSONExtractScalar, exp.JSONExtractArray] 47 48DQUOTES_ESCAPING_JSON_FUNCTIONS = ("JSON_QUERY", "JSON_VALUE", "JSON_QUERY_ARRAY") 49 50 51def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 52 if not expression.find_ancestor(exp.From, exp.Join): 53 return self.values_sql(expression) 54 55 structs = [] 56 alias = expression.args.get("alias") 57 for tup in expression.find_all(exp.Tuple): 58 field_aliases = ( 59 alias.columns 60 if alias and alias.columns 61 else (f"_c{i}" for i in range(len(tup.expressions))) 62 ) 63 expressions = [ 64 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 65 for name, fld in zip(field_aliases, tup.expressions) 66 ] 67 structs.append(exp.Struct(expressions=expressions)) 68 69 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 70 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 71 return self.unnest_sql( 72 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 73 ) 74 75 76def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 77 this = expression.this 78 if isinstance(this, exp.Schema): 79 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 80 else: 81 this = self.sql(this) 82 return f"RETURNS {this}" 83 84 85def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 86 returns = expression.find(exp.ReturnsProperty) 87 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 88 expression.set("kind", "TABLE FUNCTION") 89 90 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 91 expression.set("expression", expression.expression.this) 92 93 return self.create_sql(expression) 94 95 96# https://issuetracker.google.com/issues/162294746 97# workaround for bigquery bug when grouping by an expression and then ordering 98# WITH x AS (SELECT 1 y) 99# SELECT y + 1 z 100# FROM x 101# GROUP BY x + 1 102# ORDER by z 103def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 104 if isinstance(expression, exp.Select): 105 group = expression.args.get("group") 106 order = expression.args.get("order") 107 108 if group and order: 109 aliases = { 110 select.this: select.args["alias"] 111 for select in expression.selects 112 if isinstance(select, exp.Alias) 113 } 114 115 for grouped in group.expressions: 116 if grouped.is_int: 117 continue 118 alias = aliases.get(grouped) 119 if alias: 120 grouped.replace(exp.column(alias)) 121 122 return expression 123 124 125def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 126 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 127 if isinstance(expression, exp.CTE) and expression.alias_column_names: 128 cte_query = expression.this 129 130 if cte_query.is_star: 131 logger.warning( 132 "Can't push down CTE column names for star queries. Run the query through" 133 " the optimizer or use 'qualify' to expand the star projections first." 134 ) 135 return expression 136 137 column_names = expression.alias_column_names 138 expression.args["alias"].set("columns", None) 139 140 for name, select in zip(column_names, cte_query.selects): 141 to_replace = select 142 143 if isinstance(select, exp.Alias): 144 select = select.this 145 146 # Inner aliases are shadowed by the CTE column names 147 to_replace.replace(exp.alias_(select, name)) 148 149 return expression 150 151 152def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 153 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 154 this.set("zone", seq_get(args, 2)) 155 return this 156 157 158def _build_timestamp(args: t.List) -> exp.Timestamp: 159 timestamp = exp.Timestamp.from_arg_list(args) 160 timestamp.set("with_tz", True) 161 return timestamp 162 163 164def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 165 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 166 return expr_type.from_arg_list(args) 167 168 169def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 170 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 171 arg = seq_get(args, 0) 172 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg) 173 174 175def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 176 return self.sql( 177 exp.Exists( 178 this=exp.select("1") 179 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 180 .where(exp.column("_col").eq(expression.right)) 181 ) 182 ) 183 184 185def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 186 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 187 188 189def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 190 expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 191 expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)) 192 unit = unit_to_var(expression) 193 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 194 195 196def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 197 scale = expression.args.get("scale") 198 timestamp = expression.this 199 200 if scale in (None, exp.UnixToTime.SECONDS): 201 return self.func("TIMESTAMP_SECONDS", timestamp) 202 if scale == exp.UnixToTime.MILLIS: 203 return self.func("TIMESTAMP_MILLIS", timestamp) 204 if scale == exp.UnixToTime.MICROS: 205 return self.func("TIMESTAMP_MICROS", timestamp) 206 207 unix_seconds = exp.cast( 208 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 209 ) 210 return self.func("TIMESTAMP_SECONDS", unix_seconds) 211 212 213def _build_time(args: t.List) -> exp.Func: 214 if len(args) == 1: 215 return exp.TsOrDsToTime(this=args[0]) 216 if len(args) == 2: 217 return exp.Time.from_arg_list(args) 218 return exp.TimeFromParts.from_arg_list(args) 219 220 221def _build_datetime(args: t.List) -> exp.Func: 222 if len(args) == 1: 223 return exp.TsOrDsToDatetime.from_arg_list(args) 224 if len(args) == 2: 225 return exp.Datetime.from_arg_list(args) 226 return exp.TimestampFromParts.from_arg_list(args) 227 228 229def _build_regexp_extract( 230 expr_type: t.Type[E], default_group: t.Optional[exp.Expression] = None 231) -> t.Callable[[t.List], E]: 232 def _builder(args: t.List) -> E: 233 try: 234 group = re.compile(args[1].name).groups == 1 235 except re.error: 236 group = False 237 238 # Default group is used for the transpilation of REGEXP_EXTRACT_ALL 239 return expr_type( 240 this=seq_get(args, 0), 241 expression=seq_get(args, 1), 242 position=seq_get(args, 2), 243 occurrence=seq_get(args, 3), 244 group=exp.Literal.number(1) if group else default_group, 245 ) 246 247 return _builder 248 249 250def _build_extract_json_with_default_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 251 def _builder(args: t.List, dialect: Dialect) -> E: 252 if len(args) == 1: 253 # The default value for the JSONPath is '$' i.e all of the data 254 args.append(exp.Literal.string("$")) 255 return parser.build_extract_json_with_path(expr_type)(args, dialect) 256 257 return _builder 258 259 260def _str_to_datetime_sql( 261 self: BigQuery.Generator, expression: exp.StrToDate | exp.StrToTime 262) -> str: 263 this = self.sql(expression, "this") 264 dtype = "DATE" if isinstance(expression, exp.StrToDate) else "TIMESTAMP" 265 266 if expression.args.get("safe"): 267 fmt = self.format_time( 268 expression, 269 self.dialect.INVERSE_FORMAT_MAPPING, 270 self.dialect.INVERSE_FORMAT_TRIE, 271 ) 272 return f"SAFE_CAST({this} AS {dtype} FORMAT {fmt})" 273 274 fmt = self.format_time(expression) 275 return self.func(f"PARSE_{dtype}", fmt, this, expression.args.get("zone")) 276 277 278def _annotate_math_functions(self: TypeAnnotator, expression: E) -> E: 279 """ 280 Many BigQuery math functions such as CEIL, FLOOR etc follow this return type convention: 281 +---------+---------+---------+------------+---------+ 282 | INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 283 +---------+---------+---------+------------+---------+ 284 | OUTPUT | FLOAT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 285 +---------+---------+---------+------------+---------+ 286 """ 287 self._annotate_args(expression) 288 289 this: exp.Expression = expression.this 290 291 self._set_type( 292 expression, 293 exp.DataType.Type.DOUBLE if this.is_type(*exp.DataType.INTEGER_TYPES) else this.type, 294 ) 295 return expression 296 297 298@unsupported_args("ins_cost", "del_cost", "sub_cost") 299def _levenshtein_sql(self: BigQuery.Generator, expression: exp.Levenshtein) -> str: 300 max_dist = expression.args.get("max_dist") 301 if max_dist: 302 max_dist = exp.Kwarg(this=exp.var("max_distance"), expression=max_dist) 303 304 return self.func("EDIT_DISTANCE", expression.this, expression.expression, max_dist) 305 306 307def _build_levenshtein(args: t.List) -> exp.Levenshtein: 308 max_dist = seq_get(args, 2) 309 return exp.Levenshtein( 310 this=seq_get(args, 0), 311 expression=seq_get(args, 1), 312 max_dist=max_dist.expression if max_dist else None, 313 ) 314 315 316def _build_format_time(expr_type: t.Type[exp.Expression]) -> t.Callable[[t.List], exp.TimeToStr]: 317 def _builder(args: t.List) -> exp.TimeToStr: 318 return exp.TimeToStr( 319 this=expr_type(this=seq_get(args, 1)), 320 format=seq_get(args, 0), 321 zone=seq_get(args, 2), 322 ) 323 324 return _builder 325 326 327def _build_contains_substring(args: t.List) -> exp.Contains | exp.Anonymous: 328 if len(args) == 3: 329 return exp.Anonymous(this="CONTAINS_SUBSTR", expressions=args) 330 331 # Lowercase the operands in case of transpilation, as exp.Contains 332 # is case-sensitive on other dialects 333 this = exp.Lower(this=seq_get(args, 0)) 334 expr = exp.Lower(this=seq_get(args, 1)) 335 336 return exp.Contains(this=this, expression=expr) 337 338 339def _json_extract_sql(self: BigQuery.Generator, expression: JSON_EXTRACT_TYPE) -> str: 340 name = (expression._meta and expression.meta.get("name")) or expression.sql_name() 341 upper = name.upper() 342 343 dquote_escaping = upper in DQUOTES_ESCAPING_JSON_FUNCTIONS 344 345 if dquote_escaping: 346 self._quote_json_path_key_using_brackets = False 347 348 sql = rename_func(upper)(self, expression) 349 350 if dquote_escaping: 351 self._quote_json_path_key_using_brackets = True 352 353 return sql 354 355 356def _annotate_concat(self: TypeAnnotator, expression: exp.Concat) -> exp.Concat: 357 annotated = self._annotate_by_args(expression, "expressions") 358 359 # Args must be BYTES or types that can be cast to STRING, return type is either BYTES or STRING 360 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#concat 361 if not annotated.is_type(exp.DataType.Type.BINARY, exp.DataType.Type.UNKNOWN): 362 annotated.type = exp.DataType.Type.VARCHAR 363 364 return annotated 365 366 367class BigQuery(Dialect): 368 WEEK_OFFSET = -1 369 UNNEST_COLUMN_ONLY = True 370 SUPPORTS_USER_DEFINED_TYPES = False 371 SUPPORTS_SEMI_ANTI_JOIN = False 372 LOG_BASE_FIRST = False 373 HEX_LOWERCASE = True 374 FORCE_EARLY_ALIAS_REF_EXPANSION = True 375 PRESERVE_ORIGINAL_NAMES = True 376 HEX_STRING_IS_INTEGER_TYPE = True 377 378 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 379 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 380 381 # bigquery udfs are case sensitive 382 NORMALIZE_FUNCTIONS = False 383 384 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 385 TIME_MAPPING = { 386 "%D": "%m/%d/%y", 387 "%E6S": "%S.%f", 388 "%e": "%-d", 389 } 390 391 FORMAT_MAPPING = { 392 "DD": "%d", 393 "MM": "%m", 394 "MON": "%b", 395 "MONTH": "%B", 396 "YYYY": "%Y", 397 "YY": "%y", 398 "HH": "%I", 399 "HH12": "%I", 400 "HH24": "%H", 401 "MI": "%M", 402 "SS": "%S", 403 "SSSSS": "%f", 404 "TZH": "%z", 405 } 406 407 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 408 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 409 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 410 411 # All set operations require either a DISTINCT or ALL specifier 412 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 413 414 # BigQuery maps Type.TIMESTAMP to DATETIME, so we need to amend the inferred types 415 TYPE_TO_EXPRESSIONS = { 416 **Dialect.TYPE_TO_EXPRESSIONS, 417 exp.DataType.Type.TIMESTAMPTZ: Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.TIMESTAMP], 418 } 419 TYPE_TO_EXPRESSIONS.pop(exp.DataType.Type.TIMESTAMP) 420 421 ANNOTATORS = { 422 **Dialect.ANNOTATORS, 423 **{ 424 expr_type: annotate_with_type_lambda(data_type) 425 for data_type, expressions in TYPE_TO_EXPRESSIONS.items() 426 for expr_type in expressions 427 }, 428 **{ 429 expr_type: lambda self, e: _annotate_math_functions(self, e) 430 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 431 }, 432 **{ 433 expr_type: lambda self, e: self._annotate_by_args(e, "this") 434 for expr_type in ( 435 exp.Left, 436 exp.Right, 437 exp.Lower, 438 exp.Upper, 439 exp.Pad, 440 exp.Trim, 441 exp.RegexpExtract, 442 exp.RegexpReplace, 443 exp.Repeat, 444 exp.Substring, 445 ) 446 }, 447 exp.Concat: _annotate_concat, 448 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 449 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 450 } 451 452 def normalize_identifier(self, expression: E) -> E: 453 if ( 454 isinstance(expression, exp.Identifier) 455 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 456 ): 457 parent = expression.parent 458 while isinstance(parent, exp.Dot): 459 parent = parent.parent 460 461 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 462 # by default. The following check uses a heuristic to detect tables based on whether 463 # they are qualified. This should generally be correct, because tables in BigQuery 464 # must be qualified with at least a dataset, unless @@dataset_id is set. 465 case_sensitive = ( 466 isinstance(parent, exp.UserDefinedFunction) 467 or ( 468 isinstance(parent, exp.Table) 469 and parent.db 470 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 471 ) 472 or expression.meta.get("is_table") 473 ) 474 if not case_sensitive: 475 expression.set("this", expression.this.lower()) 476 477 return t.cast(E, expression) 478 479 return super().normalize_identifier(expression) 480 481 class Tokenizer(tokens.Tokenizer): 482 QUOTES = ["'", '"', '"""', "'''"] 483 COMMENTS = ["--", "#", ("/*", "*/")] 484 IDENTIFIERS = ["`"] 485 STRING_ESCAPES = ["\\"] 486 487 HEX_STRINGS = [("0x", ""), ("0X", "")] 488 489 BYTE_STRINGS = [ 490 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 491 ] 492 493 RAW_STRINGS = [ 494 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 495 ] 496 497 NESTED_COMMENTS = False 498 499 KEYWORDS = { 500 **tokens.Tokenizer.KEYWORDS, 501 "ANY TYPE": TokenType.VARIANT, 502 "BEGIN": TokenType.COMMAND, 503 "BEGIN TRANSACTION": TokenType.BEGIN, 504 "BYTEINT": TokenType.INT, 505 "BYTES": TokenType.BINARY, 506 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 507 "DATETIME": TokenType.TIMESTAMP, 508 "DECLARE": TokenType.COMMAND, 509 "ELSEIF": TokenType.COMMAND, 510 "EXCEPTION": TokenType.COMMAND, 511 "EXPORT": TokenType.EXPORT, 512 "FLOAT64": TokenType.DOUBLE, 513 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 514 "MODEL": TokenType.MODEL, 515 "NOT DETERMINISTIC": TokenType.VOLATILE, 516 "RECORD": TokenType.STRUCT, 517 "TIMESTAMP": TokenType.TIMESTAMPTZ, 518 } 519 KEYWORDS.pop("DIV") 520 KEYWORDS.pop("VALUES") 521 KEYWORDS.pop("/*+") 522 523 class Parser(parser.Parser): 524 PREFIXED_PIVOT_COLUMNS = True 525 LOG_DEFAULTS_TO_LN = True 526 SUPPORTS_IMPLICIT_UNNEST = True 527 528 FUNCTIONS = { 529 **parser.Parser.FUNCTIONS, 530 "CONTAINS_SUBSTR": _build_contains_substring, 531 "DATE": _build_date, 532 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 533 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 534 "DATE_TRUNC": lambda args: exp.DateTrunc( 535 unit=exp.Literal.string(str(seq_get(args, 1))), 536 this=seq_get(args, 0), 537 zone=seq_get(args, 2), 538 ), 539 "DATETIME": _build_datetime, 540 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 541 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 542 "DIV": binary_from_function(exp.IntDiv), 543 "EDIT_DISTANCE": _build_levenshtein, 544 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 545 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 546 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 547 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 548 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 549 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 550 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 551 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 552 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 553 "MD5": exp.MD5Digest.from_arg_list, 554 "TO_HEX": _build_to_hex, 555 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 556 [seq_get(args, 1), seq_get(args, 0)] 557 ), 558 "PARSE_TIMESTAMP": _build_parse_timestamp, 559 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 560 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 561 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 562 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 563 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 564 ), 565 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 566 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 567 "SPLIT": lambda args: exp.Split( 568 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 569 this=seq_get(args, 0), 570 expression=seq_get(args, 1) or exp.Literal.string(","), 571 ), 572 "STRPOS": exp.StrPosition.from_arg_list, 573 "TIME": _build_time, 574 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 575 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 576 "TIMESTAMP": _build_timestamp, 577 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 578 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 579 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 580 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 581 ), 582 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 583 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 584 ), 585 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 586 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 587 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 588 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 589 } 590 591 FUNCTION_PARSERS = { 592 **parser.Parser.FUNCTION_PARSERS, 593 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 594 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 595 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 596 } 597 FUNCTION_PARSERS.pop("TRIM") 598 599 NO_PAREN_FUNCTIONS = { 600 **parser.Parser.NO_PAREN_FUNCTIONS, 601 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 602 } 603 604 NESTED_TYPE_TOKENS = { 605 *parser.Parser.NESTED_TYPE_TOKENS, 606 TokenType.TABLE, 607 } 608 609 PROPERTY_PARSERS = { 610 **parser.Parser.PROPERTY_PARSERS, 611 "NOT DETERMINISTIC": lambda self: self.expression( 612 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 613 ), 614 "OPTIONS": lambda self: self._parse_with_property(), 615 } 616 617 CONSTRAINT_PARSERS = { 618 **parser.Parser.CONSTRAINT_PARSERS, 619 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 620 } 621 622 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 623 RANGE_PARSERS.pop(TokenType.OVERLAPS) 624 625 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 626 627 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 628 629 STATEMENT_PARSERS = { 630 **parser.Parser.STATEMENT_PARSERS, 631 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 632 TokenType.END: lambda self: self._parse_as_command(self._prev), 633 TokenType.FOR: lambda self: self._parse_for_in(), 634 TokenType.EXPORT: lambda self: self._parse_export_data(), 635 } 636 637 BRACKET_OFFSETS = { 638 "OFFSET": (0, False), 639 "ORDINAL": (1, False), 640 "SAFE_OFFSET": (0, True), 641 "SAFE_ORDINAL": (1, True), 642 } 643 644 def _parse_for_in(self) -> exp.ForIn: 645 this = self._parse_range() 646 self._match_text_seq("DO") 647 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 648 649 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 650 this = super()._parse_table_part(schema=schema) or self._parse_number() 651 652 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 653 if isinstance(this, exp.Identifier): 654 table_name = this.name 655 while self._match(TokenType.DASH, advance=False) and self._next: 656 start = self._curr 657 while self._is_connected() and not self._match_set( 658 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 659 ): 660 self._advance() 661 662 if start == self._curr: 663 break 664 665 table_name += self._find_sql(start, self._prev) 666 667 this = exp.Identifier( 668 this=table_name, quoted=this.args.get("quoted") 669 ).update_positions(this) 670 elif isinstance(this, exp.Literal): 671 table_name = this.name 672 673 if self._is_connected() and self._parse_var(any_token=True): 674 table_name += self._prev.text 675 676 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 677 678 return this 679 680 def _parse_table_parts( 681 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 682 ) -> exp.Table: 683 table = super()._parse_table_parts( 684 schema=schema, is_db_reference=is_db_reference, wildcard=True 685 ) 686 687 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 688 if not table.catalog: 689 if table.db: 690 previous_db = table.args["db"] 691 parts = table.db.split(".") 692 if len(parts) == 2 and not table.args["db"].quoted: 693 table.set( 694 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 695 ) 696 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 697 else: 698 previous_this = table.this 699 parts = table.name.split(".") 700 if len(parts) == 2 and not table.this.quoted: 701 table.set( 702 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 703 ) 704 table.set( 705 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 706 ) 707 708 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 709 alias = table.this 710 catalog, db, this, *rest = ( 711 exp.to_identifier(p, quoted=True) 712 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 713 ) 714 715 for part in (catalog, db, this): 716 if part: 717 part.update_positions(table.this) 718 719 if rest and this: 720 this = exp.Dot.build([this, *rest]) # type: ignore 721 722 table = exp.Table( 723 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 724 ) 725 table.meta["quoted_table"] = True 726 else: 727 alias = None 728 729 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 730 # dataset, so if the project identifier is omitted we need to fix the ast so that 731 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 732 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 733 # views, because it would seem like the "catalog" part is set, when it'd actually 734 # be the region/dataset. Merging the two identifiers into a single one is done to 735 # avoid producing a 4-part Table reference, which would cause issues in the schema 736 # module, when there are 3-part table names mixed with information schema views. 737 # 738 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 739 table_parts = table.parts 740 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 741 # We need to alias the table here to avoid breaking existing qualified columns. 742 # This is expected to be safe, because if there's an actual alias coming up in 743 # the token stream, it will overwrite this one. If there isn't one, we are only 744 # exposing the name that can be used to reference the view explicitly (a no-op). 745 exp.alias_( 746 table, 747 t.cast(exp.Identifier, alias or table_parts[-1]), 748 table=True, 749 copy=False, 750 ) 751 752 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 753 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 754 line=table_parts[-2].meta.get("line"), 755 col=table_parts[-1].meta.get("col"), 756 start=table_parts[-2].meta.get("start"), 757 end=table_parts[-1].meta.get("end"), 758 ) 759 table.set("this", new_this) 760 table.set("db", seq_get(table_parts, -3)) 761 table.set("catalog", seq_get(table_parts, -4)) 762 763 return table 764 765 def _parse_column(self) -> t.Optional[exp.Expression]: 766 column = super()._parse_column() 767 if isinstance(column, exp.Column): 768 parts = column.parts 769 if any("." in p.name for p in parts): 770 catalog, db, table, this, *rest = ( 771 exp.to_identifier(p, quoted=True) 772 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 773 ) 774 775 if rest and this: 776 this = exp.Dot.build([this, *rest]) # type: ignore 777 778 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 779 column.meta["quoted_column"] = True 780 781 return column 782 783 @t.overload 784 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 785 786 @t.overload 787 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 788 789 def _parse_json_object(self, agg=False): 790 json_object = super()._parse_json_object() 791 array_kv_pair = seq_get(json_object.expressions, 0) 792 793 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 794 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 795 if ( 796 array_kv_pair 797 and isinstance(array_kv_pair.this, exp.Array) 798 and isinstance(array_kv_pair.expression, exp.Array) 799 ): 800 keys = array_kv_pair.this.expressions 801 values = array_kv_pair.expression.expressions 802 803 json_object.set( 804 "expressions", 805 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 806 ) 807 808 return json_object 809 810 def _parse_bracket( 811 self, this: t.Optional[exp.Expression] = None 812 ) -> t.Optional[exp.Expression]: 813 bracket = super()._parse_bracket(this) 814 815 if this is bracket: 816 return bracket 817 818 if isinstance(bracket, exp.Bracket): 819 for expression in bracket.expressions: 820 name = expression.name.upper() 821 822 if name not in self.BRACKET_OFFSETS: 823 break 824 825 offset, safe = self.BRACKET_OFFSETS[name] 826 bracket.set("offset", offset) 827 bracket.set("safe", safe) 828 expression.replace(expression.expressions[0]) 829 830 return bracket 831 832 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 833 unnest = super()._parse_unnest(with_alias=with_alias) 834 835 if not unnest: 836 return None 837 838 unnest_expr = seq_get(unnest.expressions, 0) 839 if unnest_expr: 840 from sqlglot.optimizer.annotate_types import annotate_types 841 842 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 843 844 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 845 # in contrast to other dialects such as DuckDB which flattens only the array by default 846 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 847 array_elem.is_type(exp.DataType.Type.STRUCT) 848 for array_elem in unnest_expr._type.expressions 849 ): 850 unnest.set("explode_array", True) 851 852 return unnest 853 854 def _parse_make_interval(self) -> exp.MakeInterval: 855 expr = exp.MakeInterval() 856 857 for arg_key in expr.arg_types: 858 value = self._parse_lambda() 859 860 if not value: 861 break 862 863 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 864 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 865 if isinstance(value, exp.Kwarg): 866 arg_key = value.this.name 867 868 expr.set(arg_key, value) 869 870 self._match(TokenType.COMMA) 871 872 return expr 873 874 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 875 expr = self.expression( 876 exp.FeaturesAtTime, 877 this=(self._match(TokenType.TABLE) and self._parse_table()) 878 or self._parse_select(nested=True), 879 ) 880 881 while self._match(TokenType.COMMA): 882 arg = self._parse_lambda() 883 884 # Get the LHS of the Kwarg and set the arg to that value, e.g 885 # "num_rows => 1" sets the expr's `num_rows` arg 886 if arg: 887 expr.set(arg.this.name, arg) 888 889 return expr 890 891 def _parse_export_data(self) -> exp.Export: 892 self._match_text_seq("DATA") 893 894 return self.expression( 895 exp.Export, 896 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 897 options=self._parse_properties(), 898 this=self._match_text_seq("AS") and self._parse_select(), 899 ) 900 901 class Generator(generator.Generator): 902 INTERVAL_ALLOWS_PLURAL_FORM = False 903 JOIN_HINTS = False 904 QUERY_HINTS = False 905 TABLE_HINTS = False 906 LIMIT_FETCH = "LIMIT" 907 RENAME_TABLE_WITH_DB = False 908 NVL2_SUPPORTED = False 909 UNNEST_WITH_ORDINALITY = False 910 COLLATE_IS_FUNC = True 911 LIMIT_ONLY_LITERALS = True 912 SUPPORTS_TABLE_ALIAS_COLUMNS = False 913 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 914 JSON_KEY_VALUE_PAIR_SEP = "," 915 NULL_ORDERING_SUPPORTED = False 916 IGNORE_NULLS_IN_FUNC = True 917 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 918 CAN_IMPLEMENT_ARRAY_ANY = True 919 SUPPORTS_TO_NUMBER = False 920 NAMED_PLACEHOLDER_TOKEN = "@" 921 HEX_FUNC = "TO_HEX" 922 WITH_PROPERTIES_PREFIX = "OPTIONS" 923 SUPPORTS_EXPLODING_PROJECTIONS = False 924 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 925 SUPPORTS_UNIX_SECONDS = True 926 927 TRANSFORMS = { 928 **generator.Generator.TRANSFORMS, 929 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 930 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 931 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 932 exp.Array: inline_array_unless_query, 933 exp.ArrayContains: _array_contains_sql, 934 exp.ArrayFilter: filter_array_using_unnest, 935 exp.ArrayRemove: filter_array_using_unnest, 936 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 937 exp.CollateProperty: lambda self, e: ( 938 f"DEFAULT COLLATE {self.sql(e, 'this')}" 939 if e.args.get("default") 940 else f"COLLATE {self.sql(e, 'this')}" 941 ), 942 exp.Commit: lambda *_: "COMMIT TRANSACTION", 943 exp.CountIf: rename_func("COUNTIF"), 944 exp.Create: _create_sql, 945 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 946 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 947 exp.DateDiff: lambda self, e: self.func( 948 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 949 ), 950 exp.DateFromParts: rename_func("DATE"), 951 exp.DateStrToDate: datestrtodate_sql, 952 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 953 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 954 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 955 exp.DateTrunc: lambda self, e: self.func( 956 "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone") 957 ), 958 exp.FromTimeZone: lambda self, e: self.func( 959 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 960 ), 961 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 962 exp.GroupConcat: lambda self, e: groupconcat_sql( 963 self, e, func_name="STRING_AGG", within_group=False 964 ), 965 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 966 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 967 exp.If: if_sql(false_value="NULL"), 968 exp.ILike: no_ilike_sql, 969 exp.IntDiv: rename_func("DIV"), 970 exp.Int64: rename_func("INT64"), 971 exp.JSONExtract: _json_extract_sql, 972 exp.JSONExtractArray: _json_extract_sql, 973 exp.JSONExtractScalar: _json_extract_sql, 974 exp.JSONFormat: rename_func("TO_JSON_STRING"), 975 exp.Levenshtein: _levenshtein_sql, 976 exp.Max: max_or_greatest, 977 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 978 exp.MD5Digest: rename_func("MD5"), 979 exp.Min: min_or_least, 980 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 981 exp.RegexpExtract: lambda self, e: self.func( 982 "REGEXP_EXTRACT", 983 e.this, 984 e.expression, 985 e.args.get("position"), 986 e.args.get("occurrence"), 987 ), 988 exp.RegexpExtractAll: lambda self, e: self.func( 989 "REGEXP_EXTRACT_ALL", e.this, e.expression 990 ), 991 exp.RegexpReplace: regexp_replace_sql, 992 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 993 exp.ReturnsProperty: _returnsproperty_sql, 994 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 995 exp.Select: transforms.preprocess( 996 [ 997 transforms.explode_projection_to_unnest(), 998 transforms.unqualify_unnest, 999 transforms.eliminate_distinct_on, 1000 _alias_ordered_group, 1001 transforms.eliminate_semi_and_anti_joins, 1002 ] 1003 ), 1004 exp.SHA: rename_func("SHA1"), 1005 exp.SHA2: sha256_sql, 1006 exp.StabilityProperty: lambda self, e: ( 1007 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 1008 ), 1009 exp.String: rename_func("STRING"), 1010 exp.StrPosition: lambda self, e: ( 1011 strposition_sql( 1012 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 1013 ) 1014 ), 1015 exp.StrToDate: _str_to_datetime_sql, 1016 exp.StrToTime: _str_to_datetime_sql, 1017 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 1018 exp.TimeFromParts: rename_func("TIME"), 1019 exp.TimestampFromParts: rename_func("DATETIME"), 1020 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1021 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1022 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1023 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1024 exp.TimeStrToTime: timestrtotime_sql, 1025 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1026 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1027 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1028 exp.TsOrDsToTime: rename_func("TIME"), 1029 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1030 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1031 exp.Unhex: rename_func("FROM_HEX"), 1032 exp.UnixDate: rename_func("UNIX_DATE"), 1033 exp.UnixToTime: _unix_to_time_sql, 1034 exp.Uuid: lambda *_: "GENERATE_UUID()", 1035 exp.Values: _derived_table_values_to_unnest, 1036 exp.VariancePop: rename_func("VAR_POP"), 1037 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1038 } 1039 1040 SUPPORTED_JSON_PATH_PARTS = { 1041 exp.JSONPathKey, 1042 exp.JSONPathRoot, 1043 exp.JSONPathSubscript, 1044 } 1045 1046 TYPE_MAPPING = { 1047 **generator.Generator.TYPE_MAPPING, 1048 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1049 exp.DataType.Type.BIGINT: "INT64", 1050 exp.DataType.Type.BINARY: "BYTES", 1051 exp.DataType.Type.BLOB: "BYTES", 1052 exp.DataType.Type.BOOLEAN: "BOOL", 1053 exp.DataType.Type.CHAR: "STRING", 1054 exp.DataType.Type.DECIMAL: "NUMERIC", 1055 exp.DataType.Type.DOUBLE: "FLOAT64", 1056 exp.DataType.Type.FLOAT: "FLOAT64", 1057 exp.DataType.Type.INT: "INT64", 1058 exp.DataType.Type.NCHAR: "STRING", 1059 exp.DataType.Type.NVARCHAR: "STRING", 1060 exp.DataType.Type.SMALLINT: "INT64", 1061 exp.DataType.Type.TEXT: "STRING", 1062 exp.DataType.Type.TIMESTAMP: "DATETIME", 1063 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 1064 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1065 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1066 exp.DataType.Type.TINYINT: "INT64", 1067 exp.DataType.Type.ROWVERSION: "BYTES", 1068 exp.DataType.Type.UUID: "STRING", 1069 exp.DataType.Type.VARBINARY: "BYTES", 1070 exp.DataType.Type.VARCHAR: "STRING", 1071 exp.DataType.Type.VARIANT: "ANY TYPE", 1072 } 1073 1074 PROPERTIES_LOCATION = { 1075 **generator.Generator.PROPERTIES_LOCATION, 1076 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1077 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1078 } 1079 1080 # WINDOW comes after QUALIFY 1081 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1082 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1083 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1084 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1085 } 1086 1087 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1088 RESERVED_KEYWORDS = { 1089 "all", 1090 "and", 1091 "any", 1092 "array", 1093 "as", 1094 "asc", 1095 "assert_rows_modified", 1096 "at", 1097 "between", 1098 "by", 1099 "case", 1100 "cast", 1101 "collate", 1102 "contains", 1103 "create", 1104 "cross", 1105 "cube", 1106 "current", 1107 "default", 1108 "define", 1109 "desc", 1110 "distinct", 1111 "else", 1112 "end", 1113 "enum", 1114 "escape", 1115 "except", 1116 "exclude", 1117 "exists", 1118 "extract", 1119 "false", 1120 "fetch", 1121 "following", 1122 "for", 1123 "from", 1124 "full", 1125 "group", 1126 "grouping", 1127 "groups", 1128 "hash", 1129 "having", 1130 "if", 1131 "ignore", 1132 "in", 1133 "inner", 1134 "intersect", 1135 "interval", 1136 "into", 1137 "is", 1138 "join", 1139 "lateral", 1140 "left", 1141 "like", 1142 "limit", 1143 "lookup", 1144 "merge", 1145 "natural", 1146 "new", 1147 "no", 1148 "not", 1149 "null", 1150 "nulls", 1151 "of", 1152 "on", 1153 "or", 1154 "order", 1155 "outer", 1156 "over", 1157 "partition", 1158 "preceding", 1159 "proto", 1160 "qualify", 1161 "range", 1162 "recursive", 1163 "respect", 1164 "right", 1165 "rollup", 1166 "rows", 1167 "select", 1168 "set", 1169 "some", 1170 "struct", 1171 "tablesample", 1172 "then", 1173 "to", 1174 "treat", 1175 "true", 1176 "unbounded", 1177 "union", 1178 "unnest", 1179 "using", 1180 "when", 1181 "where", 1182 "window", 1183 "with", 1184 "within", 1185 } 1186 1187 def mod_sql(self, expression: exp.Mod) -> str: 1188 this = expression.this 1189 expr = expression.expression 1190 return self.func( 1191 "MOD", 1192 this.unnest() if isinstance(this, exp.Paren) else this, 1193 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1194 ) 1195 1196 def column_parts(self, expression: exp.Column) -> str: 1197 if expression.meta.get("quoted_column"): 1198 # If a column reference is of the form `dataset.table`.name, we need 1199 # to preserve the quoted table path, otherwise the reference breaks 1200 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1201 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1202 return f"{table_path}.{self.sql(expression, 'this')}" 1203 1204 return super().column_parts(expression) 1205 1206 def table_parts(self, expression: exp.Table) -> str: 1207 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1208 # we need to make sure the correct quoting is used in each case. 1209 # 1210 # For example, if there is a CTE x that clashes with a schema name, then the former will 1211 # return the table y in that schema, whereas the latter will return the CTE's y column: 1212 # 1213 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1214 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1215 if expression.meta.get("quoted_table"): 1216 table_parts = ".".join(p.name for p in expression.parts) 1217 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1218 1219 return super().table_parts(expression) 1220 1221 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1222 this = expression.this 1223 if isinstance(this, exp.TsOrDsToDatetime): 1224 func_name = "FORMAT_DATETIME" 1225 elif isinstance(this, exp.TsOrDsToTimestamp): 1226 func_name = "FORMAT_TIMESTAMP" 1227 else: 1228 func_name = "FORMAT_DATE" 1229 1230 time_expr = ( 1231 this 1232 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1233 else expression 1234 ) 1235 return self.func( 1236 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1237 ) 1238 1239 def eq_sql(self, expression: exp.EQ) -> str: 1240 # Operands of = cannot be NULL in BigQuery 1241 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1242 if not isinstance(expression.parent, exp.Update): 1243 return "NULL" 1244 1245 return self.binary(expression, "=") 1246 1247 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1248 parent = expression.parent 1249 1250 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1251 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1252 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1253 return self.func( 1254 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1255 ) 1256 1257 return super().attimezone_sql(expression) 1258 1259 def trycast_sql(self, expression: exp.TryCast) -> str: 1260 return self.cast_sql(expression, safe_prefix="SAFE_") 1261 1262 def bracket_sql(self, expression: exp.Bracket) -> str: 1263 this = expression.this 1264 expressions = expression.expressions 1265 1266 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1267 arg = expressions[0] 1268 if arg.type is None: 1269 from sqlglot.optimizer.annotate_types import annotate_types 1270 1271 arg = annotate_types(arg, dialect=self.dialect) 1272 1273 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1274 # BQ doesn't support bracket syntax with string values for structs 1275 return f"{self.sql(this)}.{arg.name}" 1276 1277 expressions_sql = self.expressions(expression, flat=True) 1278 offset = expression.args.get("offset") 1279 1280 if offset == 0: 1281 expressions_sql = f"OFFSET({expressions_sql})" 1282 elif offset == 1: 1283 expressions_sql = f"ORDINAL({expressions_sql})" 1284 elif offset is not None: 1285 self.unsupported(f"Unsupported array offset: {offset}") 1286 1287 if expression.args.get("safe"): 1288 expressions_sql = f"SAFE_{expressions_sql}" 1289 1290 return f"{self.sql(this)}[{expressions_sql}]" 1291 1292 def in_unnest_op(self, expression: exp.Unnest) -> str: 1293 return self.sql(expression) 1294 1295 def version_sql(self, expression: exp.Version) -> str: 1296 if expression.name == "TIMESTAMP": 1297 expression.set("this", "SYSTEM_TIME") 1298 return super().version_sql(expression) 1299 1300 def contains_sql(self, expression: exp.Contains) -> str: 1301 this = expression.this 1302 expr = expression.expression 1303 1304 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1305 this = this.this 1306 expr = expr.this 1307 1308 return self.func("CONTAINS_SUBSTR", this, expr) 1309 1310 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1311 this = expression.this 1312 1313 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1314 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1315 # because they aren't literals and so the above syntax is invalid BigQuery. 1316 if isinstance(this, exp.Array): 1317 elem = seq_get(this.expressions, 0) 1318 if not (elem and elem.find(exp.Query)): 1319 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1320 1321 return super().cast_sql(expression, safe_prefix=safe_prefix)
368class BigQuery(Dialect): 369 WEEK_OFFSET = -1 370 UNNEST_COLUMN_ONLY = True 371 SUPPORTS_USER_DEFINED_TYPES = False 372 SUPPORTS_SEMI_ANTI_JOIN = False 373 LOG_BASE_FIRST = False 374 HEX_LOWERCASE = True 375 FORCE_EARLY_ALIAS_REF_EXPANSION = True 376 PRESERVE_ORIGINAL_NAMES = True 377 HEX_STRING_IS_INTEGER_TYPE = True 378 379 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 380 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 381 382 # bigquery udfs are case sensitive 383 NORMALIZE_FUNCTIONS = False 384 385 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 386 TIME_MAPPING = { 387 "%D": "%m/%d/%y", 388 "%E6S": "%S.%f", 389 "%e": "%-d", 390 } 391 392 FORMAT_MAPPING = { 393 "DD": "%d", 394 "MM": "%m", 395 "MON": "%b", 396 "MONTH": "%B", 397 "YYYY": "%Y", 398 "YY": "%y", 399 "HH": "%I", 400 "HH12": "%I", 401 "HH24": "%H", 402 "MI": "%M", 403 "SS": "%S", 404 "SSSSS": "%f", 405 "TZH": "%z", 406 } 407 408 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 409 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 410 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 411 412 # All set operations require either a DISTINCT or ALL specifier 413 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 414 415 # BigQuery maps Type.TIMESTAMP to DATETIME, so we need to amend the inferred types 416 TYPE_TO_EXPRESSIONS = { 417 **Dialect.TYPE_TO_EXPRESSIONS, 418 exp.DataType.Type.TIMESTAMPTZ: Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.TIMESTAMP], 419 } 420 TYPE_TO_EXPRESSIONS.pop(exp.DataType.Type.TIMESTAMP) 421 422 ANNOTATORS = { 423 **Dialect.ANNOTATORS, 424 **{ 425 expr_type: annotate_with_type_lambda(data_type) 426 for data_type, expressions in TYPE_TO_EXPRESSIONS.items() 427 for expr_type in expressions 428 }, 429 **{ 430 expr_type: lambda self, e: _annotate_math_functions(self, e) 431 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 432 }, 433 **{ 434 expr_type: lambda self, e: self._annotate_by_args(e, "this") 435 for expr_type in ( 436 exp.Left, 437 exp.Right, 438 exp.Lower, 439 exp.Upper, 440 exp.Pad, 441 exp.Trim, 442 exp.RegexpExtract, 443 exp.RegexpReplace, 444 exp.Repeat, 445 exp.Substring, 446 ) 447 }, 448 exp.Concat: _annotate_concat, 449 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 450 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 451 } 452 453 def normalize_identifier(self, expression: E) -> E: 454 if ( 455 isinstance(expression, exp.Identifier) 456 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 457 ): 458 parent = expression.parent 459 while isinstance(parent, exp.Dot): 460 parent = parent.parent 461 462 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 463 # by default. The following check uses a heuristic to detect tables based on whether 464 # they are qualified. This should generally be correct, because tables in BigQuery 465 # must be qualified with at least a dataset, unless @@dataset_id is set. 466 case_sensitive = ( 467 isinstance(parent, exp.UserDefinedFunction) 468 or ( 469 isinstance(parent, exp.Table) 470 and parent.db 471 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 472 ) 473 or expression.meta.get("is_table") 474 ) 475 if not case_sensitive: 476 expression.set("this", expression.this.lower()) 477 478 return t.cast(E, expression) 479 480 return super().normalize_identifier(expression) 481 482 class Tokenizer(tokens.Tokenizer): 483 QUOTES = ["'", '"', '"""', "'''"] 484 COMMENTS = ["--", "#", ("/*", "*/")] 485 IDENTIFIERS = ["`"] 486 STRING_ESCAPES = ["\\"] 487 488 HEX_STRINGS = [("0x", ""), ("0X", "")] 489 490 BYTE_STRINGS = [ 491 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 492 ] 493 494 RAW_STRINGS = [ 495 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 496 ] 497 498 NESTED_COMMENTS = False 499 500 KEYWORDS = { 501 **tokens.Tokenizer.KEYWORDS, 502 "ANY TYPE": TokenType.VARIANT, 503 "BEGIN": TokenType.COMMAND, 504 "BEGIN TRANSACTION": TokenType.BEGIN, 505 "BYTEINT": TokenType.INT, 506 "BYTES": TokenType.BINARY, 507 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 508 "DATETIME": TokenType.TIMESTAMP, 509 "DECLARE": TokenType.COMMAND, 510 "ELSEIF": TokenType.COMMAND, 511 "EXCEPTION": TokenType.COMMAND, 512 "EXPORT": TokenType.EXPORT, 513 "FLOAT64": TokenType.DOUBLE, 514 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 515 "MODEL": TokenType.MODEL, 516 "NOT DETERMINISTIC": TokenType.VOLATILE, 517 "RECORD": TokenType.STRUCT, 518 "TIMESTAMP": TokenType.TIMESTAMPTZ, 519 } 520 KEYWORDS.pop("DIV") 521 KEYWORDS.pop("VALUES") 522 KEYWORDS.pop("/*+") 523 524 class Parser(parser.Parser): 525 PREFIXED_PIVOT_COLUMNS = True 526 LOG_DEFAULTS_TO_LN = True 527 SUPPORTS_IMPLICIT_UNNEST = True 528 529 FUNCTIONS = { 530 **parser.Parser.FUNCTIONS, 531 "CONTAINS_SUBSTR": _build_contains_substring, 532 "DATE": _build_date, 533 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 534 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 535 "DATE_TRUNC": lambda args: exp.DateTrunc( 536 unit=exp.Literal.string(str(seq_get(args, 1))), 537 this=seq_get(args, 0), 538 zone=seq_get(args, 2), 539 ), 540 "DATETIME": _build_datetime, 541 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 542 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 543 "DIV": binary_from_function(exp.IntDiv), 544 "EDIT_DISTANCE": _build_levenshtein, 545 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 546 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 547 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 548 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 549 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 550 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 551 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 552 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 553 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 554 "MD5": exp.MD5Digest.from_arg_list, 555 "TO_HEX": _build_to_hex, 556 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 557 [seq_get(args, 1), seq_get(args, 0)] 558 ), 559 "PARSE_TIMESTAMP": _build_parse_timestamp, 560 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 561 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 562 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 563 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 564 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 565 ), 566 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 567 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 568 "SPLIT": lambda args: exp.Split( 569 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 570 this=seq_get(args, 0), 571 expression=seq_get(args, 1) or exp.Literal.string(","), 572 ), 573 "STRPOS": exp.StrPosition.from_arg_list, 574 "TIME": _build_time, 575 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 576 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 577 "TIMESTAMP": _build_timestamp, 578 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 579 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 580 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 581 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 582 ), 583 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 584 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 585 ), 586 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 587 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 588 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 589 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 590 } 591 592 FUNCTION_PARSERS = { 593 **parser.Parser.FUNCTION_PARSERS, 594 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 595 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 596 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 597 } 598 FUNCTION_PARSERS.pop("TRIM") 599 600 NO_PAREN_FUNCTIONS = { 601 **parser.Parser.NO_PAREN_FUNCTIONS, 602 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 603 } 604 605 NESTED_TYPE_TOKENS = { 606 *parser.Parser.NESTED_TYPE_TOKENS, 607 TokenType.TABLE, 608 } 609 610 PROPERTY_PARSERS = { 611 **parser.Parser.PROPERTY_PARSERS, 612 "NOT DETERMINISTIC": lambda self: self.expression( 613 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 614 ), 615 "OPTIONS": lambda self: self._parse_with_property(), 616 } 617 618 CONSTRAINT_PARSERS = { 619 **parser.Parser.CONSTRAINT_PARSERS, 620 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 621 } 622 623 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 624 RANGE_PARSERS.pop(TokenType.OVERLAPS) 625 626 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 627 628 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 629 630 STATEMENT_PARSERS = { 631 **parser.Parser.STATEMENT_PARSERS, 632 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 633 TokenType.END: lambda self: self._parse_as_command(self._prev), 634 TokenType.FOR: lambda self: self._parse_for_in(), 635 TokenType.EXPORT: lambda self: self._parse_export_data(), 636 } 637 638 BRACKET_OFFSETS = { 639 "OFFSET": (0, False), 640 "ORDINAL": (1, False), 641 "SAFE_OFFSET": (0, True), 642 "SAFE_ORDINAL": (1, True), 643 } 644 645 def _parse_for_in(self) -> exp.ForIn: 646 this = self._parse_range() 647 self._match_text_seq("DO") 648 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 649 650 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 651 this = super()._parse_table_part(schema=schema) or self._parse_number() 652 653 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 654 if isinstance(this, exp.Identifier): 655 table_name = this.name 656 while self._match(TokenType.DASH, advance=False) and self._next: 657 start = self._curr 658 while self._is_connected() and not self._match_set( 659 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 660 ): 661 self._advance() 662 663 if start == self._curr: 664 break 665 666 table_name += self._find_sql(start, self._prev) 667 668 this = exp.Identifier( 669 this=table_name, quoted=this.args.get("quoted") 670 ).update_positions(this) 671 elif isinstance(this, exp.Literal): 672 table_name = this.name 673 674 if self._is_connected() and self._parse_var(any_token=True): 675 table_name += self._prev.text 676 677 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 678 679 return this 680 681 def _parse_table_parts( 682 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 683 ) -> exp.Table: 684 table = super()._parse_table_parts( 685 schema=schema, is_db_reference=is_db_reference, wildcard=True 686 ) 687 688 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 689 if not table.catalog: 690 if table.db: 691 previous_db = table.args["db"] 692 parts = table.db.split(".") 693 if len(parts) == 2 and not table.args["db"].quoted: 694 table.set( 695 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 696 ) 697 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 698 else: 699 previous_this = table.this 700 parts = table.name.split(".") 701 if len(parts) == 2 and not table.this.quoted: 702 table.set( 703 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 704 ) 705 table.set( 706 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 707 ) 708 709 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 710 alias = table.this 711 catalog, db, this, *rest = ( 712 exp.to_identifier(p, quoted=True) 713 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 714 ) 715 716 for part in (catalog, db, this): 717 if part: 718 part.update_positions(table.this) 719 720 if rest and this: 721 this = exp.Dot.build([this, *rest]) # type: ignore 722 723 table = exp.Table( 724 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 725 ) 726 table.meta["quoted_table"] = True 727 else: 728 alias = None 729 730 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 731 # dataset, so if the project identifier is omitted we need to fix the ast so that 732 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 733 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 734 # views, because it would seem like the "catalog" part is set, when it'd actually 735 # be the region/dataset. Merging the two identifiers into a single one is done to 736 # avoid producing a 4-part Table reference, which would cause issues in the schema 737 # module, when there are 3-part table names mixed with information schema views. 738 # 739 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 740 table_parts = table.parts 741 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 742 # We need to alias the table here to avoid breaking existing qualified columns. 743 # This is expected to be safe, because if there's an actual alias coming up in 744 # the token stream, it will overwrite this one. If there isn't one, we are only 745 # exposing the name that can be used to reference the view explicitly (a no-op). 746 exp.alias_( 747 table, 748 t.cast(exp.Identifier, alias or table_parts[-1]), 749 table=True, 750 copy=False, 751 ) 752 753 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 754 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 755 line=table_parts[-2].meta.get("line"), 756 col=table_parts[-1].meta.get("col"), 757 start=table_parts[-2].meta.get("start"), 758 end=table_parts[-1].meta.get("end"), 759 ) 760 table.set("this", new_this) 761 table.set("db", seq_get(table_parts, -3)) 762 table.set("catalog", seq_get(table_parts, -4)) 763 764 return table 765 766 def _parse_column(self) -> t.Optional[exp.Expression]: 767 column = super()._parse_column() 768 if isinstance(column, exp.Column): 769 parts = column.parts 770 if any("." in p.name for p in parts): 771 catalog, db, table, this, *rest = ( 772 exp.to_identifier(p, quoted=True) 773 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 774 ) 775 776 if rest and this: 777 this = exp.Dot.build([this, *rest]) # type: ignore 778 779 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 780 column.meta["quoted_column"] = True 781 782 return column 783 784 @t.overload 785 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 786 787 @t.overload 788 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 789 790 def _parse_json_object(self, agg=False): 791 json_object = super()._parse_json_object() 792 array_kv_pair = seq_get(json_object.expressions, 0) 793 794 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 795 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 796 if ( 797 array_kv_pair 798 and isinstance(array_kv_pair.this, exp.Array) 799 and isinstance(array_kv_pair.expression, exp.Array) 800 ): 801 keys = array_kv_pair.this.expressions 802 values = array_kv_pair.expression.expressions 803 804 json_object.set( 805 "expressions", 806 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 807 ) 808 809 return json_object 810 811 def _parse_bracket( 812 self, this: t.Optional[exp.Expression] = None 813 ) -> t.Optional[exp.Expression]: 814 bracket = super()._parse_bracket(this) 815 816 if this is bracket: 817 return bracket 818 819 if isinstance(bracket, exp.Bracket): 820 for expression in bracket.expressions: 821 name = expression.name.upper() 822 823 if name not in self.BRACKET_OFFSETS: 824 break 825 826 offset, safe = self.BRACKET_OFFSETS[name] 827 bracket.set("offset", offset) 828 bracket.set("safe", safe) 829 expression.replace(expression.expressions[0]) 830 831 return bracket 832 833 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 834 unnest = super()._parse_unnest(with_alias=with_alias) 835 836 if not unnest: 837 return None 838 839 unnest_expr = seq_get(unnest.expressions, 0) 840 if unnest_expr: 841 from sqlglot.optimizer.annotate_types import annotate_types 842 843 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 844 845 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 846 # in contrast to other dialects such as DuckDB which flattens only the array by default 847 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 848 array_elem.is_type(exp.DataType.Type.STRUCT) 849 for array_elem in unnest_expr._type.expressions 850 ): 851 unnest.set("explode_array", True) 852 853 return unnest 854 855 def _parse_make_interval(self) -> exp.MakeInterval: 856 expr = exp.MakeInterval() 857 858 for arg_key in expr.arg_types: 859 value = self._parse_lambda() 860 861 if not value: 862 break 863 864 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 865 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 866 if isinstance(value, exp.Kwarg): 867 arg_key = value.this.name 868 869 expr.set(arg_key, value) 870 871 self._match(TokenType.COMMA) 872 873 return expr 874 875 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 876 expr = self.expression( 877 exp.FeaturesAtTime, 878 this=(self._match(TokenType.TABLE) and self._parse_table()) 879 or self._parse_select(nested=True), 880 ) 881 882 while self._match(TokenType.COMMA): 883 arg = self._parse_lambda() 884 885 # Get the LHS of the Kwarg and set the arg to that value, e.g 886 # "num_rows => 1" sets the expr's `num_rows` arg 887 if arg: 888 expr.set(arg.this.name, arg) 889 890 return expr 891 892 def _parse_export_data(self) -> exp.Export: 893 self._match_text_seq("DATA") 894 895 return self.expression( 896 exp.Export, 897 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 898 options=self._parse_properties(), 899 this=self._match_text_seq("AS") and self._parse_select(), 900 ) 901 902 class Generator(generator.Generator): 903 INTERVAL_ALLOWS_PLURAL_FORM = False 904 JOIN_HINTS = False 905 QUERY_HINTS = False 906 TABLE_HINTS = False 907 LIMIT_FETCH = "LIMIT" 908 RENAME_TABLE_WITH_DB = False 909 NVL2_SUPPORTED = False 910 UNNEST_WITH_ORDINALITY = False 911 COLLATE_IS_FUNC = True 912 LIMIT_ONLY_LITERALS = True 913 SUPPORTS_TABLE_ALIAS_COLUMNS = False 914 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 915 JSON_KEY_VALUE_PAIR_SEP = "," 916 NULL_ORDERING_SUPPORTED = False 917 IGNORE_NULLS_IN_FUNC = True 918 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 919 CAN_IMPLEMENT_ARRAY_ANY = True 920 SUPPORTS_TO_NUMBER = False 921 NAMED_PLACEHOLDER_TOKEN = "@" 922 HEX_FUNC = "TO_HEX" 923 WITH_PROPERTIES_PREFIX = "OPTIONS" 924 SUPPORTS_EXPLODING_PROJECTIONS = False 925 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 926 SUPPORTS_UNIX_SECONDS = True 927 928 TRANSFORMS = { 929 **generator.Generator.TRANSFORMS, 930 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 931 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 932 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 933 exp.Array: inline_array_unless_query, 934 exp.ArrayContains: _array_contains_sql, 935 exp.ArrayFilter: filter_array_using_unnest, 936 exp.ArrayRemove: filter_array_using_unnest, 937 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 938 exp.CollateProperty: lambda self, e: ( 939 f"DEFAULT COLLATE {self.sql(e, 'this')}" 940 if e.args.get("default") 941 else f"COLLATE {self.sql(e, 'this')}" 942 ), 943 exp.Commit: lambda *_: "COMMIT TRANSACTION", 944 exp.CountIf: rename_func("COUNTIF"), 945 exp.Create: _create_sql, 946 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 947 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 948 exp.DateDiff: lambda self, e: self.func( 949 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 950 ), 951 exp.DateFromParts: rename_func("DATE"), 952 exp.DateStrToDate: datestrtodate_sql, 953 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 954 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 955 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 956 exp.DateTrunc: lambda self, e: self.func( 957 "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone") 958 ), 959 exp.FromTimeZone: lambda self, e: self.func( 960 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 961 ), 962 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 963 exp.GroupConcat: lambda self, e: groupconcat_sql( 964 self, e, func_name="STRING_AGG", within_group=False 965 ), 966 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 967 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 968 exp.If: if_sql(false_value="NULL"), 969 exp.ILike: no_ilike_sql, 970 exp.IntDiv: rename_func("DIV"), 971 exp.Int64: rename_func("INT64"), 972 exp.JSONExtract: _json_extract_sql, 973 exp.JSONExtractArray: _json_extract_sql, 974 exp.JSONExtractScalar: _json_extract_sql, 975 exp.JSONFormat: rename_func("TO_JSON_STRING"), 976 exp.Levenshtein: _levenshtein_sql, 977 exp.Max: max_or_greatest, 978 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 979 exp.MD5Digest: rename_func("MD5"), 980 exp.Min: min_or_least, 981 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 982 exp.RegexpExtract: lambda self, e: self.func( 983 "REGEXP_EXTRACT", 984 e.this, 985 e.expression, 986 e.args.get("position"), 987 e.args.get("occurrence"), 988 ), 989 exp.RegexpExtractAll: lambda self, e: self.func( 990 "REGEXP_EXTRACT_ALL", e.this, e.expression 991 ), 992 exp.RegexpReplace: regexp_replace_sql, 993 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 994 exp.ReturnsProperty: _returnsproperty_sql, 995 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 996 exp.Select: transforms.preprocess( 997 [ 998 transforms.explode_projection_to_unnest(), 999 transforms.unqualify_unnest, 1000 transforms.eliminate_distinct_on, 1001 _alias_ordered_group, 1002 transforms.eliminate_semi_and_anti_joins, 1003 ] 1004 ), 1005 exp.SHA: rename_func("SHA1"), 1006 exp.SHA2: sha256_sql, 1007 exp.StabilityProperty: lambda self, e: ( 1008 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 1009 ), 1010 exp.String: rename_func("STRING"), 1011 exp.StrPosition: lambda self, e: ( 1012 strposition_sql( 1013 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 1014 ) 1015 ), 1016 exp.StrToDate: _str_to_datetime_sql, 1017 exp.StrToTime: _str_to_datetime_sql, 1018 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 1019 exp.TimeFromParts: rename_func("TIME"), 1020 exp.TimestampFromParts: rename_func("DATETIME"), 1021 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1022 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1023 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1024 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1025 exp.TimeStrToTime: timestrtotime_sql, 1026 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1027 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1028 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1029 exp.TsOrDsToTime: rename_func("TIME"), 1030 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1031 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1032 exp.Unhex: rename_func("FROM_HEX"), 1033 exp.UnixDate: rename_func("UNIX_DATE"), 1034 exp.UnixToTime: _unix_to_time_sql, 1035 exp.Uuid: lambda *_: "GENERATE_UUID()", 1036 exp.Values: _derived_table_values_to_unnest, 1037 exp.VariancePop: rename_func("VAR_POP"), 1038 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1039 } 1040 1041 SUPPORTED_JSON_PATH_PARTS = { 1042 exp.JSONPathKey, 1043 exp.JSONPathRoot, 1044 exp.JSONPathSubscript, 1045 } 1046 1047 TYPE_MAPPING = { 1048 **generator.Generator.TYPE_MAPPING, 1049 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1050 exp.DataType.Type.BIGINT: "INT64", 1051 exp.DataType.Type.BINARY: "BYTES", 1052 exp.DataType.Type.BLOB: "BYTES", 1053 exp.DataType.Type.BOOLEAN: "BOOL", 1054 exp.DataType.Type.CHAR: "STRING", 1055 exp.DataType.Type.DECIMAL: "NUMERIC", 1056 exp.DataType.Type.DOUBLE: "FLOAT64", 1057 exp.DataType.Type.FLOAT: "FLOAT64", 1058 exp.DataType.Type.INT: "INT64", 1059 exp.DataType.Type.NCHAR: "STRING", 1060 exp.DataType.Type.NVARCHAR: "STRING", 1061 exp.DataType.Type.SMALLINT: "INT64", 1062 exp.DataType.Type.TEXT: "STRING", 1063 exp.DataType.Type.TIMESTAMP: "DATETIME", 1064 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 1065 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1066 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1067 exp.DataType.Type.TINYINT: "INT64", 1068 exp.DataType.Type.ROWVERSION: "BYTES", 1069 exp.DataType.Type.UUID: "STRING", 1070 exp.DataType.Type.VARBINARY: "BYTES", 1071 exp.DataType.Type.VARCHAR: "STRING", 1072 exp.DataType.Type.VARIANT: "ANY TYPE", 1073 } 1074 1075 PROPERTIES_LOCATION = { 1076 **generator.Generator.PROPERTIES_LOCATION, 1077 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1078 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1079 } 1080 1081 # WINDOW comes after QUALIFY 1082 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1083 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1084 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1085 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1086 } 1087 1088 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1089 RESERVED_KEYWORDS = { 1090 "all", 1091 "and", 1092 "any", 1093 "array", 1094 "as", 1095 "asc", 1096 "assert_rows_modified", 1097 "at", 1098 "between", 1099 "by", 1100 "case", 1101 "cast", 1102 "collate", 1103 "contains", 1104 "create", 1105 "cross", 1106 "cube", 1107 "current", 1108 "default", 1109 "define", 1110 "desc", 1111 "distinct", 1112 "else", 1113 "end", 1114 "enum", 1115 "escape", 1116 "except", 1117 "exclude", 1118 "exists", 1119 "extract", 1120 "false", 1121 "fetch", 1122 "following", 1123 "for", 1124 "from", 1125 "full", 1126 "group", 1127 "grouping", 1128 "groups", 1129 "hash", 1130 "having", 1131 "if", 1132 "ignore", 1133 "in", 1134 "inner", 1135 "intersect", 1136 "interval", 1137 "into", 1138 "is", 1139 "join", 1140 "lateral", 1141 "left", 1142 "like", 1143 "limit", 1144 "lookup", 1145 "merge", 1146 "natural", 1147 "new", 1148 "no", 1149 "not", 1150 "null", 1151 "nulls", 1152 "of", 1153 "on", 1154 "or", 1155 "order", 1156 "outer", 1157 "over", 1158 "partition", 1159 "preceding", 1160 "proto", 1161 "qualify", 1162 "range", 1163 "recursive", 1164 "respect", 1165 "right", 1166 "rollup", 1167 "rows", 1168 "select", 1169 "set", 1170 "some", 1171 "struct", 1172 "tablesample", 1173 "then", 1174 "to", 1175 "treat", 1176 "true", 1177 "unbounded", 1178 "union", 1179 "unnest", 1180 "using", 1181 "when", 1182 "where", 1183 "window", 1184 "with", 1185 "within", 1186 } 1187 1188 def mod_sql(self, expression: exp.Mod) -> str: 1189 this = expression.this 1190 expr = expression.expression 1191 return self.func( 1192 "MOD", 1193 this.unnest() if isinstance(this, exp.Paren) else this, 1194 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1195 ) 1196 1197 def column_parts(self, expression: exp.Column) -> str: 1198 if expression.meta.get("quoted_column"): 1199 # If a column reference is of the form `dataset.table`.name, we need 1200 # to preserve the quoted table path, otherwise the reference breaks 1201 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1202 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1203 return f"{table_path}.{self.sql(expression, 'this')}" 1204 1205 return super().column_parts(expression) 1206 1207 def table_parts(self, expression: exp.Table) -> str: 1208 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1209 # we need to make sure the correct quoting is used in each case. 1210 # 1211 # For example, if there is a CTE x that clashes with a schema name, then the former will 1212 # return the table y in that schema, whereas the latter will return the CTE's y column: 1213 # 1214 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1215 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1216 if expression.meta.get("quoted_table"): 1217 table_parts = ".".join(p.name for p in expression.parts) 1218 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1219 1220 return super().table_parts(expression) 1221 1222 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1223 this = expression.this 1224 if isinstance(this, exp.TsOrDsToDatetime): 1225 func_name = "FORMAT_DATETIME" 1226 elif isinstance(this, exp.TsOrDsToTimestamp): 1227 func_name = "FORMAT_TIMESTAMP" 1228 else: 1229 func_name = "FORMAT_DATE" 1230 1231 time_expr = ( 1232 this 1233 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1234 else expression 1235 ) 1236 return self.func( 1237 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1238 ) 1239 1240 def eq_sql(self, expression: exp.EQ) -> str: 1241 # Operands of = cannot be NULL in BigQuery 1242 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1243 if not isinstance(expression.parent, exp.Update): 1244 return "NULL" 1245 1246 return self.binary(expression, "=") 1247 1248 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1249 parent = expression.parent 1250 1251 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1252 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1253 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1254 return self.func( 1255 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1256 ) 1257 1258 return super().attimezone_sql(expression) 1259 1260 def trycast_sql(self, expression: exp.TryCast) -> str: 1261 return self.cast_sql(expression, safe_prefix="SAFE_") 1262 1263 def bracket_sql(self, expression: exp.Bracket) -> str: 1264 this = expression.this 1265 expressions = expression.expressions 1266 1267 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1268 arg = expressions[0] 1269 if arg.type is None: 1270 from sqlglot.optimizer.annotate_types import annotate_types 1271 1272 arg = annotate_types(arg, dialect=self.dialect) 1273 1274 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1275 # BQ doesn't support bracket syntax with string values for structs 1276 return f"{self.sql(this)}.{arg.name}" 1277 1278 expressions_sql = self.expressions(expression, flat=True) 1279 offset = expression.args.get("offset") 1280 1281 if offset == 0: 1282 expressions_sql = f"OFFSET({expressions_sql})" 1283 elif offset == 1: 1284 expressions_sql = f"ORDINAL({expressions_sql})" 1285 elif offset is not None: 1286 self.unsupported(f"Unsupported array offset: {offset}") 1287 1288 if expression.args.get("safe"): 1289 expressions_sql = f"SAFE_{expressions_sql}" 1290 1291 return f"{self.sql(this)}[{expressions_sql}]" 1292 1293 def in_unnest_op(self, expression: exp.Unnest) -> str: 1294 return self.sql(expression) 1295 1296 def version_sql(self, expression: exp.Version) -> str: 1297 if expression.name == "TIMESTAMP": 1298 expression.set("this", "SYSTEM_TIME") 1299 return super().version_sql(expression) 1300 1301 def contains_sql(self, expression: exp.Contains) -> str: 1302 this = expression.this 1303 expr = expression.expression 1304 1305 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1306 this = this.this 1307 expr = expr.this 1308 1309 return self.func("CONTAINS_SUBSTR", this, expr) 1310 1311 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1312 this = expression.this 1313 1314 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1315 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1316 # because they aren't literals and so the above syntax is invalid BigQuery. 1317 if isinstance(this, exp.Array): 1318 elem = seq_get(this.expressions, 0) 1319 if not (elem and elem.find(exp.Query)): 1320 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1321 1322 return super().cast_sql(expression, safe_prefix=safe_prefix)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Whether hex strings such as x'CC' evaluate to integer or binary/blob type
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
453 def normalize_identifier(self, expression: E) -> E: 454 if ( 455 isinstance(expression, exp.Identifier) 456 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 457 ): 458 parent = expression.parent 459 while isinstance(parent, exp.Dot): 460 parent = parent.parent 461 462 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 463 # by default. The following check uses a heuristic to detect tables based on whether 464 # they are qualified. This should generally be correct, because tables in BigQuery 465 # must be qualified with at least a dataset, unless @@dataset_id is set. 466 case_sensitive = ( 467 isinstance(parent, exp.UserDefinedFunction) 468 or ( 469 isinstance(parent, exp.Table) 470 and parent.db 471 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 472 ) 473 or expression.meta.get("is_table") 474 ) 475 if not case_sensitive: 476 expression.set("this", expression.this.lower()) 477 478 return t.cast(E, expression) 479 480 return super().normalize_identifier(expression)
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
482 class Tokenizer(tokens.Tokenizer): 483 QUOTES = ["'", '"', '"""', "'''"] 484 COMMENTS = ["--", "#", ("/*", "*/")] 485 IDENTIFIERS = ["`"] 486 STRING_ESCAPES = ["\\"] 487 488 HEX_STRINGS = [("0x", ""), ("0X", "")] 489 490 BYTE_STRINGS = [ 491 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 492 ] 493 494 RAW_STRINGS = [ 495 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 496 ] 497 498 NESTED_COMMENTS = False 499 500 KEYWORDS = { 501 **tokens.Tokenizer.KEYWORDS, 502 "ANY TYPE": TokenType.VARIANT, 503 "BEGIN": TokenType.COMMAND, 504 "BEGIN TRANSACTION": TokenType.BEGIN, 505 "BYTEINT": TokenType.INT, 506 "BYTES": TokenType.BINARY, 507 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 508 "DATETIME": TokenType.TIMESTAMP, 509 "DECLARE": TokenType.COMMAND, 510 "ELSEIF": TokenType.COMMAND, 511 "EXCEPTION": TokenType.COMMAND, 512 "EXPORT": TokenType.EXPORT, 513 "FLOAT64": TokenType.DOUBLE, 514 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 515 "MODEL": TokenType.MODEL, 516 "NOT DETERMINISTIC": TokenType.VOLATILE, 517 "RECORD": TokenType.STRUCT, 518 "TIMESTAMP": TokenType.TIMESTAMPTZ, 519 } 520 KEYWORDS.pop("DIV") 521 KEYWORDS.pop("VALUES") 522 KEYWORDS.pop("/*+")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
524 class Parser(parser.Parser): 525 PREFIXED_PIVOT_COLUMNS = True 526 LOG_DEFAULTS_TO_LN = True 527 SUPPORTS_IMPLICIT_UNNEST = True 528 529 FUNCTIONS = { 530 **parser.Parser.FUNCTIONS, 531 "CONTAINS_SUBSTR": _build_contains_substring, 532 "DATE": _build_date, 533 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 534 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 535 "DATE_TRUNC": lambda args: exp.DateTrunc( 536 unit=exp.Literal.string(str(seq_get(args, 1))), 537 this=seq_get(args, 0), 538 zone=seq_get(args, 2), 539 ), 540 "DATETIME": _build_datetime, 541 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 542 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 543 "DIV": binary_from_function(exp.IntDiv), 544 "EDIT_DISTANCE": _build_levenshtein, 545 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 546 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 547 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 548 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 549 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 550 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 551 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 552 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 553 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 554 "MD5": exp.MD5Digest.from_arg_list, 555 "TO_HEX": _build_to_hex, 556 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 557 [seq_get(args, 1), seq_get(args, 0)] 558 ), 559 "PARSE_TIMESTAMP": _build_parse_timestamp, 560 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 561 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 562 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 563 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 564 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 565 ), 566 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 567 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 568 "SPLIT": lambda args: exp.Split( 569 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 570 this=seq_get(args, 0), 571 expression=seq_get(args, 1) or exp.Literal.string(","), 572 ), 573 "STRPOS": exp.StrPosition.from_arg_list, 574 "TIME": _build_time, 575 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 576 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 577 "TIMESTAMP": _build_timestamp, 578 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 579 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 580 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 581 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 582 ), 583 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 584 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 585 ), 586 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 587 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 588 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 589 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 590 } 591 592 FUNCTION_PARSERS = { 593 **parser.Parser.FUNCTION_PARSERS, 594 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 595 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 596 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 597 } 598 FUNCTION_PARSERS.pop("TRIM") 599 600 NO_PAREN_FUNCTIONS = { 601 **parser.Parser.NO_PAREN_FUNCTIONS, 602 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 603 } 604 605 NESTED_TYPE_TOKENS = { 606 *parser.Parser.NESTED_TYPE_TOKENS, 607 TokenType.TABLE, 608 } 609 610 PROPERTY_PARSERS = { 611 **parser.Parser.PROPERTY_PARSERS, 612 "NOT DETERMINISTIC": lambda self: self.expression( 613 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 614 ), 615 "OPTIONS": lambda self: self._parse_with_property(), 616 } 617 618 CONSTRAINT_PARSERS = { 619 **parser.Parser.CONSTRAINT_PARSERS, 620 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 621 } 622 623 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 624 RANGE_PARSERS.pop(TokenType.OVERLAPS) 625 626 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 627 628 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 629 630 STATEMENT_PARSERS = { 631 **parser.Parser.STATEMENT_PARSERS, 632 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 633 TokenType.END: lambda self: self._parse_as_command(self._prev), 634 TokenType.FOR: lambda self: self._parse_for_in(), 635 TokenType.EXPORT: lambda self: self._parse_export_data(), 636 } 637 638 BRACKET_OFFSETS = { 639 "OFFSET": (0, False), 640 "ORDINAL": (1, False), 641 "SAFE_OFFSET": (0, True), 642 "SAFE_ORDINAL": (1, True), 643 } 644 645 def _parse_for_in(self) -> exp.ForIn: 646 this = self._parse_range() 647 self._match_text_seq("DO") 648 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 649 650 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 651 this = super()._parse_table_part(schema=schema) or self._parse_number() 652 653 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 654 if isinstance(this, exp.Identifier): 655 table_name = this.name 656 while self._match(TokenType.DASH, advance=False) and self._next: 657 start = self._curr 658 while self._is_connected() and not self._match_set( 659 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 660 ): 661 self._advance() 662 663 if start == self._curr: 664 break 665 666 table_name += self._find_sql(start, self._prev) 667 668 this = exp.Identifier( 669 this=table_name, quoted=this.args.get("quoted") 670 ).update_positions(this) 671 elif isinstance(this, exp.Literal): 672 table_name = this.name 673 674 if self._is_connected() and self._parse_var(any_token=True): 675 table_name += self._prev.text 676 677 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 678 679 return this 680 681 def _parse_table_parts( 682 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 683 ) -> exp.Table: 684 table = super()._parse_table_parts( 685 schema=schema, is_db_reference=is_db_reference, wildcard=True 686 ) 687 688 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 689 if not table.catalog: 690 if table.db: 691 previous_db = table.args["db"] 692 parts = table.db.split(".") 693 if len(parts) == 2 and not table.args["db"].quoted: 694 table.set( 695 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 696 ) 697 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 698 else: 699 previous_this = table.this 700 parts = table.name.split(".") 701 if len(parts) == 2 and not table.this.quoted: 702 table.set( 703 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 704 ) 705 table.set( 706 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 707 ) 708 709 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 710 alias = table.this 711 catalog, db, this, *rest = ( 712 exp.to_identifier(p, quoted=True) 713 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 714 ) 715 716 for part in (catalog, db, this): 717 if part: 718 part.update_positions(table.this) 719 720 if rest and this: 721 this = exp.Dot.build([this, *rest]) # type: ignore 722 723 table = exp.Table( 724 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 725 ) 726 table.meta["quoted_table"] = True 727 else: 728 alias = None 729 730 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 731 # dataset, so if the project identifier is omitted we need to fix the ast so that 732 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 733 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 734 # views, because it would seem like the "catalog" part is set, when it'd actually 735 # be the region/dataset. Merging the two identifiers into a single one is done to 736 # avoid producing a 4-part Table reference, which would cause issues in the schema 737 # module, when there are 3-part table names mixed with information schema views. 738 # 739 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 740 table_parts = table.parts 741 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 742 # We need to alias the table here to avoid breaking existing qualified columns. 743 # This is expected to be safe, because if there's an actual alias coming up in 744 # the token stream, it will overwrite this one. If there isn't one, we are only 745 # exposing the name that can be used to reference the view explicitly (a no-op). 746 exp.alias_( 747 table, 748 t.cast(exp.Identifier, alias or table_parts[-1]), 749 table=True, 750 copy=False, 751 ) 752 753 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 754 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 755 line=table_parts[-2].meta.get("line"), 756 col=table_parts[-1].meta.get("col"), 757 start=table_parts[-2].meta.get("start"), 758 end=table_parts[-1].meta.get("end"), 759 ) 760 table.set("this", new_this) 761 table.set("db", seq_get(table_parts, -3)) 762 table.set("catalog", seq_get(table_parts, -4)) 763 764 return table 765 766 def _parse_column(self) -> t.Optional[exp.Expression]: 767 column = super()._parse_column() 768 if isinstance(column, exp.Column): 769 parts = column.parts 770 if any("." in p.name for p in parts): 771 catalog, db, table, this, *rest = ( 772 exp.to_identifier(p, quoted=True) 773 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 774 ) 775 776 if rest and this: 777 this = exp.Dot.build([this, *rest]) # type: ignore 778 779 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 780 column.meta["quoted_column"] = True 781 782 return column 783 784 @t.overload 785 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 786 787 @t.overload 788 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 789 790 def _parse_json_object(self, agg=False): 791 json_object = super()._parse_json_object() 792 array_kv_pair = seq_get(json_object.expressions, 0) 793 794 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 795 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 796 if ( 797 array_kv_pair 798 and isinstance(array_kv_pair.this, exp.Array) 799 and isinstance(array_kv_pair.expression, exp.Array) 800 ): 801 keys = array_kv_pair.this.expressions 802 values = array_kv_pair.expression.expressions 803 804 json_object.set( 805 "expressions", 806 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 807 ) 808 809 return json_object 810 811 def _parse_bracket( 812 self, this: t.Optional[exp.Expression] = None 813 ) -> t.Optional[exp.Expression]: 814 bracket = super()._parse_bracket(this) 815 816 if this is bracket: 817 return bracket 818 819 if isinstance(bracket, exp.Bracket): 820 for expression in bracket.expressions: 821 name = expression.name.upper() 822 823 if name not in self.BRACKET_OFFSETS: 824 break 825 826 offset, safe = self.BRACKET_OFFSETS[name] 827 bracket.set("offset", offset) 828 bracket.set("safe", safe) 829 expression.replace(expression.expressions[0]) 830 831 return bracket 832 833 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 834 unnest = super()._parse_unnest(with_alias=with_alias) 835 836 if not unnest: 837 return None 838 839 unnest_expr = seq_get(unnest.expressions, 0) 840 if unnest_expr: 841 from sqlglot.optimizer.annotate_types import annotate_types 842 843 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 844 845 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 846 # in contrast to other dialects such as DuckDB which flattens only the array by default 847 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 848 array_elem.is_type(exp.DataType.Type.STRUCT) 849 for array_elem in unnest_expr._type.expressions 850 ): 851 unnest.set("explode_array", True) 852 853 return unnest 854 855 def _parse_make_interval(self) -> exp.MakeInterval: 856 expr = exp.MakeInterval() 857 858 for arg_key in expr.arg_types: 859 value = self._parse_lambda() 860 861 if not value: 862 break 863 864 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 865 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 866 if isinstance(value, exp.Kwarg): 867 arg_key = value.this.name 868 869 expr.set(arg_key, value) 870 871 self._match(TokenType.COMMA) 872 873 return expr 874 875 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 876 expr = self.expression( 877 exp.FeaturesAtTime, 878 this=(self._match(TokenType.TABLE) and self._parse_table()) 879 or self._parse_select(nested=True), 880 ) 881 882 while self._match(TokenType.COMMA): 883 arg = self._parse_lambda() 884 885 # Get the LHS of the Kwarg and set the arg to that value, e.g 886 # "num_rows => 1" sets the expr's `num_rows` arg 887 if arg: 888 expr.set(arg.this.name, arg) 889 890 return expr 891 892 def _parse_export_data(self) -> exp.Export: 893 self._match_text_seq("DATA") 894 895 return self.expression( 896 exp.Export, 897 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 898 options=self._parse_properties(), 899 this=self._match_text_seq("AS") and self._parse_select(), 900 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- errors
- sql
902 class Generator(generator.Generator): 903 INTERVAL_ALLOWS_PLURAL_FORM = False 904 JOIN_HINTS = False 905 QUERY_HINTS = False 906 TABLE_HINTS = False 907 LIMIT_FETCH = "LIMIT" 908 RENAME_TABLE_WITH_DB = False 909 NVL2_SUPPORTED = False 910 UNNEST_WITH_ORDINALITY = False 911 COLLATE_IS_FUNC = True 912 LIMIT_ONLY_LITERALS = True 913 SUPPORTS_TABLE_ALIAS_COLUMNS = False 914 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 915 JSON_KEY_VALUE_PAIR_SEP = "," 916 NULL_ORDERING_SUPPORTED = False 917 IGNORE_NULLS_IN_FUNC = True 918 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 919 CAN_IMPLEMENT_ARRAY_ANY = True 920 SUPPORTS_TO_NUMBER = False 921 NAMED_PLACEHOLDER_TOKEN = "@" 922 HEX_FUNC = "TO_HEX" 923 WITH_PROPERTIES_PREFIX = "OPTIONS" 924 SUPPORTS_EXPLODING_PROJECTIONS = False 925 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 926 SUPPORTS_UNIX_SECONDS = True 927 928 TRANSFORMS = { 929 **generator.Generator.TRANSFORMS, 930 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 931 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 932 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 933 exp.Array: inline_array_unless_query, 934 exp.ArrayContains: _array_contains_sql, 935 exp.ArrayFilter: filter_array_using_unnest, 936 exp.ArrayRemove: filter_array_using_unnest, 937 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 938 exp.CollateProperty: lambda self, e: ( 939 f"DEFAULT COLLATE {self.sql(e, 'this')}" 940 if e.args.get("default") 941 else f"COLLATE {self.sql(e, 'this')}" 942 ), 943 exp.Commit: lambda *_: "COMMIT TRANSACTION", 944 exp.CountIf: rename_func("COUNTIF"), 945 exp.Create: _create_sql, 946 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 947 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 948 exp.DateDiff: lambda self, e: self.func( 949 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 950 ), 951 exp.DateFromParts: rename_func("DATE"), 952 exp.DateStrToDate: datestrtodate_sql, 953 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 954 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 955 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 956 exp.DateTrunc: lambda self, e: self.func( 957 "DATE_TRUNC", e.this, e.text("unit"), e.args.get("zone") 958 ), 959 exp.FromTimeZone: lambda self, e: self.func( 960 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 961 ), 962 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 963 exp.GroupConcat: lambda self, e: groupconcat_sql( 964 self, e, func_name="STRING_AGG", within_group=False 965 ), 966 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 967 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 968 exp.If: if_sql(false_value="NULL"), 969 exp.ILike: no_ilike_sql, 970 exp.IntDiv: rename_func("DIV"), 971 exp.Int64: rename_func("INT64"), 972 exp.JSONExtract: _json_extract_sql, 973 exp.JSONExtractArray: _json_extract_sql, 974 exp.JSONExtractScalar: _json_extract_sql, 975 exp.JSONFormat: rename_func("TO_JSON_STRING"), 976 exp.Levenshtein: _levenshtein_sql, 977 exp.Max: max_or_greatest, 978 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 979 exp.MD5Digest: rename_func("MD5"), 980 exp.Min: min_or_least, 981 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 982 exp.RegexpExtract: lambda self, e: self.func( 983 "REGEXP_EXTRACT", 984 e.this, 985 e.expression, 986 e.args.get("position"), 987 e.args.get("occurrence"), 988 ), 989 exp.RegexpExtractAll: lambda self, e: self.func( 990 "REGEXP_EXTRACT_ALL", e.this, e.expression 991 ), 992 exp.RegexpReplace: regexp_replace_sql, 993 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 994 exp.ReturnsProperty: _returnsproperty_sql, 995 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 996 exp.Select: transforms.preprocess( 997 [ 998 transforms.explode_projection_to_unnest(), 999 transforms.unqualify_unnest, 1000 transforms.eliminate_distinct_on, 1001 _alias_ordered_group, 1002 transforms.eliminate_semi_and_anti_joins, 1003 ] 1004 ), 1005 exp.SHA: rename_func("SHA1"), 1006 exp.SHA2: sha256_sql, 1007 exp.StabilityProperty: lambda self, e: ( 1008 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 1009 ), 1010 exp.String: rename_func("STRING"), 1011 exp.StrPosition: lambda self, e: ( 1012 strposition_sql( 1013 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 1014 ) 1015 ), 1016 exp.StrToDate: _str_to_datetime_sql, 1017 exp.StrToTime: _str_to_datetime_sql, 1018 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 1019 exp.TimeFromParts: rename_func("TIME"), 1020 exp.TimestampFromParts: rename_func("DATETIME"), 1021 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1022 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1023 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1024 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1025 exp.TimeStrToTime: timestrtotime_sql, 1026 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1027 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1028 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1029 exp.TsOrDsToTime: rename_func("TIME"), 1030 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1031 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1032 exp.Unhex: rename_func("FROM_HEX"), 1033 exp.UnixDate: rename_func("UNIX_DATE"), 1034 exp.UnixToTime: _unix_to_time_sql, 1035 exp.Uuid: lambda *_: "GENERATE_UUID()", 1036 exp.Values: _derived_table_values_to_unnest, 1037 exp.VariancePop: rename_func("VAR_POP"), 1038 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1039 } 1040 1041 SUPPORTED_JSON_PATH_PARTS = { 1042 exp.JSONPathKey, 1043 exp.JSONPathRoot, 1044 exp.JSONPathSubscript, 1045 } 1046 1047 TYPE_MAPPING = { 1048 **generator.Generator.TYPE_MAPPING, 1049 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1050 exp.DataType.Type.BIGINT: "INT64", 1051 exp.DataType.Type.BINARY: "BYTES", 1052 exp.DataType.Type.BLOB: "BYTES", 1053 exp.DataType.Type.BOOLEAN: "BOOL", 1054 exp.DataType.Type.CHAR: "STRING", 1055 exp.DataType.Type.DECIMAL: "NUMERIC", 1056 exp.DataType.Type.DOUBLE: "FLOAT64", 1057 exp.DataType.Type.FLOAT: "FLOAT64", 1058 exp.DataType.Type.INT: "INT64", 1059 exp.DataType.Type.NCHAR: "STRING", 1060 exp.DataType.Type.NVARCHAR: "STRING", 1061 exp.DataType.Type.SMALLINT: "INT64", 1062 exp.DataType.Type.TEXT: "STRING", 1063 exp.DataType.Type.TIMESTAMP: "DATETIME", 1064 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 1065 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1066 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1067 exp.DataType.Type.TINYINT: "INT64", 1068 exp.DataType.Type.ROWVERSION: "BYTES", 1069 exp.DataType.Type.UUID: "STRING", 1070 exp.DataType.Type.VARBINARY: "BYTES", 1071 exp.DataType.Type.VARCHAR: "STRING", 1072 exp.DataType.Type.VARIANT: "ANY TYPE", 1073 } 1074 1075 PROPERTIES_LOCATION = { 1076 **generator.Generator.PROPERTIES_LOCATION, 1077 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1078 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1079 } 1080 1081 # WINDOW comes after QUALIFY 1082 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1083 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1084 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1085 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1086 } 1087 1088 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1089 RESERVED_KEYWORDS = { 1090 "all", 1091 "and", 1092 "any", 1093 "array", 1094 "as", 1095 "asc", 1096 "assert_rows_modified", 1097 "at", 1098 "between", 1099 "by", 1100 "case", 1101 "cast", 1102 "collate", 1103 "contains", 1104 "create", 1105 "cross", 1106 "cube", 1107 "current", 1108 "default", 1109 "define", 1110 "desc", 1111 "distinct", 1112 "else", 1113 "end", 1114 "enum", 1115 "escape", 1116 "except", 1117 "exclude", 1118 "exists", 1119 "extract", 1120 "false", 1121 "fetch", 1122 "following", 1123 "for", 1124 "from", 1125 "full", 1126 "group", 1127 "grouping", 1128 "groups", 1129 "hash", 1130 "having", 1131 "if", 1132 "ignore", 1133 "in", 1134 "inner", 1135 "intersect", 1136 "interval", 1137 "into", 1138 "is", 1139 "join", 1140 "lateral", 1141 "left", 1142 "like", 1143 "limit", 1144 "lookup", 1145 "merge", 1146 "natural", 1147 "new", 1148 "no", 1149 "not", 1150 "null", 1151 "nulls", 1152 "of", 1153 "on", 1154 "or", 1155 "order", 1156 "outer", 1157 "over", 1158 "partition", 1159 "preceding", 1160 "proto", 1161 "qualify", 1162 "range", 1163 "recursive", 1164 "respect", 1165 "right", 1166 "rollup", 1167 "rows", 1168 "select", 1169 "set", 1170 "some", 1171 "struct", 1172 "tablesample", 1173 "then", 1174 "to", 1175 "treat", 1176 "true", 1177 "unbounded", 1178 "union", 1179 "unnest", 1180 "using", 1181 "when", 1182 "where", 1183 "window", 1184 "with", 1185 "within", 1186 } 1187 1188 def mod_sql(self, expression: exp.Mod) -> str: 1189 this = expression.this 1190 expr = expression.expression 1191 return self.func( 1192 "MOD", 1193 this.unnest() if isinstance(this, exp.Paren) else this, 1194 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1195 ) 1196 1197 def column_parts(self, expression: exp.Column) -> str: 1198 if expression.meta.get("quoted_column"): 1199 # If a column reference is of the form `dataset.table`.name, we need 1200 # to preserve the quoted table path, otherwise the reference breaks 1201 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1202 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1203 return f"{table_path}.{self.sql(expression, 'this')}" 1204 1205 return super().column_parts(expression) 1206 1207 def table_parts(self, expression: exp.Table) -> str: 1208 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1209 # we need to make sure the correct quoting is used in each case. 1210 # 1211 # For example, if there is a CTE x that clashes with a schema name, then the former will 1212 # return the table y in that schema, whereas the latter will return the CTE's y column: 1213 # 1214 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1215 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1216 if expression.meta.get("quoted_table"): 1217 table_parts = ".".join(p.name for p in expression.parts) 1218 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1219 1220 return super().table_parts(expression) 1221 1222 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1223 this = expression.this 1224 if isinstance(this, exp.TsOrDsToDatetime): 1225 func_name = "FORMAT_DATETIME" 1226 elif isinstance(this, exp.TsOrDsToTimestamp): 1227 func_name = "FORMAT_TIMESTAMP" 1228 else: 1229 func_name = "FORMAT_DATE" 1230 1231 time_expr = ( 1232 this 1233 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1234 else expression 1235 ) 1236 return self.func( 1237 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1238 ) 1239 1240 def eq_sql(self, expression: exp.EQ) -> str: 1241 # Operands of = cannot be NULL in BigQuery 1242 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1243 if not isinstance(expression.parent, exp.Update): 1244 return "NULL" 1245 1246 return self.binary(expression, "=") 1247 1248 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1249 parent = expression.parent 1250 1251 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1252 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1253 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1254 return self.func( 1255 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1256 ) 1257 1258 return super().attimezone_sql(expression) 1259 1260 def trycast_sql(self, expression: exp.TryCast) -> str: 1261 return self.cast_sql(expression, safe_prefix="SAFE_") 1262 1263 def bracket_sql(self, expression: exp.Bracket) -> str: 1264 this = expression.this 1265 expressions = expression.expressions 1266 1267 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1268 arg = expressions[0] 1269 if arg.type is None: 1270 from sqlglot.optimizer.annotate_types import annotate_types 1271 1272 arg = annotate_types(arg, dialect=self.dialect) 1273 1274 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1275 # BQ doesn't support bracket syntax with string values for structs 1276 return f"{self.sql(this)}.{arg.name}" 1277 1278 expressions_sql = self.expressions(expression, flat=True) 1279 offset = expression.args.get("offset") 1280 1281 if offset == 0: 1282 expressions_sql = f"OFFSET({expressions_sql})" 1283 elif offset == 1: 1284 expressions_sql = f"ORDINAL({expressions_sql})" 1285 elif offset is not None: 1286 self.unsupported(f"Unsupported array offset: {offset}") 1287 1288 if expression.args.get("safe"): 1289 expressions_sql = f"SAFE_{expressions_sql}" 1290 1291 return f"{self.sql(this)}[{expressions_sql}]" 1292 1293 def in_unnest_op(self, expression: exp.Unnest) -> str: 1294 return self.sql(expression) 1295 1296 def version_sql(self, expression: exp.Version) -> str: 1297 if expression.name == "TIMESTAMP": 1298 expression.set("this", "SYSTEM_TIME") 1299 return super().version_sql(expression) 1300 1301 def contains_sql(self, expression: exp.Contains) -> str: 1302 this = expression.this 1303 expr = expression.expression 1304 1305 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1306 this = this.this 1307 expr = expr.this 1308 1309 return self.func("CONTAINS_SUBSTR", this, expr) 1310 1311 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1312 this = expression.this 1313 1314 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1315 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1316 # because they aren't literals and so the above syntax is invalid BigQuery. 1317 if isinstance(this, exp.Array): 1318 elem = seq_get(this.expressions, 0) 1319 if not (elem and elem.find(exp.Query)): 1320 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1321 1322 return super().cast_sql(expression, safe_prefix=safe_prefix)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1197 def column_parts(self, expression: exp.Column) -> str: 1198 if expression.meta.get("quoted_column"): 1199 # If a column reference is of the form `dataset.table`.name, we need 1200 # to preserve the quoted table path, otherwise the reference breaks 1201 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1202 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1203 return f"{table_path}.{self.sql(expression, 'this')}" 1204 1205 return super().column_parts(expression)
1207 def table_parts(self, expression: exp.Table) -> str: 1208 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1209 # we need to make sure the correct quoting is used in each case. 1210 # 1211 # For example, if there is a CTE x that clashes with a schema name, then the former will 1212 # return the table y in that schema, whereas the latter will return the CTE's y column: 1213 # 1214 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1215 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1216 if expression.meta.get("quoted_table"): 1217 table_parts = ".".join(p.name for p in expression.parts) 1218 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1219 1220 return super().table_parts(expression)
1222 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1223 this = expression.this 1224 if isinstance(this, exp.TsOrDsToDatetime): 1225 func_name = "FORMAT_DATETIME" 1226 elif isinstance(this, exp.TsOrDsToTimestamp): 1227 func_name = "FORMAT_TIMESTAMP" 1228 else: 1229 func_name = "FORMAT_DATE" 1230 1231 time_expr = ( 1232 this 1233 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1234 else expression 1235 ) 1236 return self.func( 1237 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1238 )
1240 def eq_sql(self, expression: exp.EQ) -> str: 1241 # Operands of = cannot be NULL in BigQuery 1242 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1243 if not isinstance(expression.parent, exp.Update): 1244 return "NULL" 1245 1246 return self.binary(expression, "=")
1248 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1249 parent = expression.parent 1250 1251 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1252 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1253 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1254 return self.func( 1255 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1256 ) 1257 1258 return super().attimezone_sql(expression)
1263 def bracket_sql(self, expression: exp.Bracket) -> str: 1264 this = expression.this 1265 expressions = expression.expressions 1266 1267 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1268 arg = expressions[0] 1269 if arg.type is None: 1270 from sqlglot.optimizer.annotate_types import annotate_types 1271 1272 arg = annotate_types(arg, dialect=self.dialect) 1273 1274 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1275 # BQ doesn't support bracket syntax with string values for structs 1276 return f"{self.sql(this)}.{arg.name}" 1277 1278 expressions_sql = self.expressions(expression, flat=True) 1279 offset = expression.args.get("offset") 1280 1281 if offset == 0: 1282 expressions_sql = f"OFFSET({expressions_sql})" 1283 elif offset == 1: 1284 expressions_sql = f"ORDINAL({expressions_sql})" 1285 elif offset is not None: 1286 self.unsupported(f"Unsupported array offset: {offset}") 1287 1288 if expression.args.get("safe"): 1289 expressions_sql = f"SAFE_{expressions_sql}" 1290 1291 return f"{self.sql(this)}[{expressions_sql}]"
1311 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1312 this = expression.this 1313 1314 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1315 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1316 # because they aren't literals and so the above syntax is invalid BigQuery. 1317 if isinstance(this, exp.Array): 1318 elem = seq_get(this.expressions, 0) 1319 if not (elem and elem.find(exp.Query)): 1320 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1321 1322 return super().cast_sql(expression, safe_prefix=safe_prefix)
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- ALTER_SET_WRAPPED
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- get_put_sql
- translatecharacters_sql