sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 build_formatted_time, 16 filter_array_using_unnest, 17 if_sql, 18 inline_array_sql, 19 max_or_greatest, 20 min_or_least, 21 no_ilike_sql, 22 build_date_delta_with_interval, 23 regexp_replace_sql, 24 rename_func, 25 timestrtotime_sql, 26 ts_or_ds_add_cast, 27 unit_to_var, 28) 29from sqlglot.helper import seq_get, split_num_words 30from sqlglot.tokens import TokenType 31 32if t.TYPE_CHECKING: 33 from sqlglot._typing import E, Lit 34 35logger = logging.getLogger("sqlglot") 36 37 38def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 39 if not expression.find_ancestor(exp.From, exp.Join): 40 return self.values_sql(expression) 41 42 structs = [] 43 alias = expression.args.get("alias") 44 for tup in expression.find_all(exp.Tuple): 45 field_aliases = ( 46 alias.columns 47 if alias and alias.columns 48 else (f"_c{i}" for i in range(len(tup.expressions))) 49 ) 50 expressions = [ 51 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 52 for name, fld in zip(field_aliases, tup.expressions) 53 ] 54 structs.append(exp.Struct(expressions=expressions)) 55 56 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 57 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 58 return self.unnest_sql( 59 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 60 ) 61 62 63def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 64 this = expression.this 65 if isinstance(this, exp.Schema): 66 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 67 else: 68 this = self.sql(this) 69 return f"RETURNS {this}" 70 71 72def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 73 returns = expression.find(exp.ReturnsProperty) 74 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 75 expression.set("kind", "TABLE FUNCTION") 76 77 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 78 expression.set("expression", expression.expression.this) 79 80 return self.create_sql(expression) 81 82 83def _unqualify_unnest(expression: exp.Expression) -> exp.Expression: 84 """Remove references to unnest table aliases since bigquery doesn't allow them. 85 86 These are added by the optimizer's qualify_column step. 87 """ 88 from sqlglot.optimizer.scope import find_all_in_scope 89 90 if isinstance(expression, exp.Select): 91 unnest_aliases = { 92 unnest.alias 93 for unnest in find_all_in_scope(expression, exp.Unnest) 94 if isinstance(unnest.parent, (exp.From, exp.Join)) 95 } 96 if unnest_aliases: 97 for column in expression.find_all(exp.Column): 98 if column.table in unnest_aliases: 99 column.set("table", None) 100 elif column.db in unnest_aliases: 101 column.set("db", None) 102 103 return expression 104 105 106# https://issuetracker.google.com/issues/162294746 107# workaround for bigquery bug when grouping by an expression and then ordering 108# WITH x AS (SELECT 1 y) 109# SELECT y + 1 z 110# FROM x 111# GROUP BY x + 1 112# ORDER by z 113def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 114 if isinstance(expression, exp.Select): 115 group = expression.args.get("group") 116 order = expression.args.get("order") 117 118 if group and order: 119 aliases = { 120 select.this: select.args["alias"] 121 for select in expression.selects 122 if isinstance(select, exp.Alias) 123 } 124 125 for grouped in group.expressions: 126 if grouped.is_int: 127 continue 128 alias = aliases.get(grouped) 129 if alias: 130 grouped.replace(exp.column(alias)) 131 132 return expression 133 134 135def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 136 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 137 if isinstance(expression, exp.CTE) and expression.alias_column_names: 138 cte_query = expression.this 139 140 if cte_query.is_star: 141 logger.warning( 142 "Can't push down CTE column names for star queries. Run the query through" 143 " the optimizer or use 'qualify' to expand the star projections first." 144 ) 145 return expression 146 147 column_names = expression.alias_column_names 148 expression.args["alias"].set("columns", None) 149 150 for name, select in zip(column_names, cte_query.selects): 151 to_replace = select 152 153 if isinstance(select, exp.Alias): 154 select = select.this 155 156 # Inner aliases are shadowed by the CTE column names 157 to_replace.replace(exp.alias_(select, name)) 158 159 return expression 160 161 162def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 163 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 164 this.set("zone", seq_get(args, 2)) 165 return this 166 167 168def _build_timestamp(args: t.List) -> exp.Timestamp: 169 timestamp = exp.Timestamp.from_arg_list(args) 170 timestamp.set("with_tz", True) 171 return timestamp 172 173 174def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 175 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 176 return expr_type.from_arg_list(args) 177 178 179def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 180 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 181 arg = seq_get(args, 0) 182 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.Hex(this=arg) 183 184 185def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 186 return self.sql( 187 exp.Exists( 188 this=exp.select("1") 189 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 190 .where(exp.column("_col").eq(expression.right)) 191 ) 192 ) 193 194 195def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 196 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 197 198 199def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 200 expression.this.replace(exp.cast(expression.this, "TIMESTAMP", copy=True)) 201 expression.expression.replace(exp.cast(expression.expression, "TIMESTAMP", copy=True)) 202 unit = unit_to_var(expression) 203 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 204 205 206def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 207 scale = expression.args.get("scale") 208 timestamp = expression.this 209 210 if scale in (None, exp.UnixToTime.SECONDS): 211 return self.func("TIMESTAMP_SECONDS", timestamp) 212 if scale == exp.UnixToTime.MILLIS: 213 return self.func("TIMESTAMP_MILLIS", timestamp) 214 if scale == exp.UnixToTime.MICROS: 215 return self.func("TIMESTAMP_MICROS", timestamp) 216 217 unix_seconds = exp.cast(exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), "int64") 218 return self.func("TIMESTAMP_SECONDS", unix_seconds) 219 220 221def _build_time(args: t.List) -> exp.Func: 222 if len(args) == 1: 223 return exp.TsOrDsToTime(this=args[0]) 224 if len(args) == 3: 225 return exp.TimeFromParts.from_arg_list(args) 226 227 return exp.Anonymous(this="TIME", expressions=args) 228 229 230class BigQuery(Dialect): 231 WEEK_OFFSET = -1 232 UNNEST_COLUMN_ONLY = True 233 SUPPORTS_USER_DEFINED_TYPES = False 234 SUPPORTS_SEMI_ANTI_JOIN = False 235 LOG_BASE_FIRST = False 236 237 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 238 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 239 240 # bigquery udfs are case sensitive 241 NORMALIZE_FUNCTIONS = False 242 243 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 244 TIME_MAPPING = { 245 "%D": "%m/%d/%y", 246 "%E*S": "%S.%f", 247 "%E6S": "%S.%f", 248 } 249 250 FORMAT_MAPPING = { 251 "DD": "%d", 252 "MM": "%m", 253 "MON": "%b", 254 "MONTH": "%B", 255 "YYYY": "%Y", 256 "YY": "%y", 257 "HH": "%I", 258 "HH12": "%I", 259 "HH24": "%H", 260 "MI": "%M", 261 "SS": "%S", 262 "SSSSS": "%f", 263 "TZH": "%z", 264 } 265 266 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 267 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 268 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 269 270 def normalize_identifier(self, expression: E) -> E: 271 if isinstance(expression, exp.Identifier): 272 parent = expression.parent 273 while isinstance(parent, exp.Dot): 274 parent = parent.parent 275 276 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 277 # by default. The following check uses a heuristic to detect tables based on whether 278 # they are qualified. This should generally be correct, because tables in BigQuery 279 # must be qualified with at least a dataset, unless @@dataset_id is set. 280 case_sensitive = ( 281 isinstance(parent, exp.UserDefinedFunction) 282 or ( 283 isinstance(parent, exp.Table) 284 and parent.db 285 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 286 ) 287 or expression.meta.get("is_table") 288 ) 289 if not case_sensitive: 290 expression.set("this", expression.this.lower()) 291 292 return expression 293 294 class Tokenizer(tokens.Tokenizer): 295 QUOTES = ["'", '"', '"""', "'''"] 296 COMMENTS = ["--", "#", ("/*", "*/")] 297 IDENTIFIERS = ["`"] 298 STRING_ESCAPES = ["\\"] 299 300 HEX_STRINGS = [("0x", ""), ("0X", "")] 301 302 BYTE_STRINGS = [ 303 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 304 ] 305 306 RAW_STRINGS = [ 307 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 308 ] 309 310 KEYWORDS = { 311 **tokens.Tokenizer.KEYWORDS, 312 "ANY TYPE": TokenType.VARIANT, 313 "BEGIN": TokenType.COMMAND, 314 "BEGIN TRANSACTION": TokenType.BEGIN, 315 "BYTES": TokenType.BINARY, 316 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 317 "DATETIME": TokenType.TIMESTAMP, 318 "DECLARE": TokenType.COMMAND, 319 "ELSEIF": TokenType.COMMAND, 320 "EXCEPTION": TokenType.COMMAND, 321 "FLOAT64": TokenType.DOUBLE, 322 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 323 "MODEL": TokenType.MODEL, 324 "NOT DETERMINISTIC": TokenType.VOLATILE, 325 "RECORD": TokenType.STRUCT, 326 "TIMESTAMP": TokenType.TIMESTAMPTZ, 327 } 328 KEYWORDS.pop("DIV") 329 KEYWORDS.pop("VALUES") 330 331 class Parser(parser.Parser): 332 PREFIXED_PIVOT_COLUMNS = True 333 LOG_DEFAULTS_TO_LN = True 334 SUPPORTS_IMPLICIT_UNNEST = True 335 336 FUNCTIONS = { 337 **parser.Parser.FUNCTIONS, 338 "DATE": _build_date, 339 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 340 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 341 "DATE_TRUNC": lambda args: exp.DateTrunc( 342 unit=exp.Literal.string(str(seq_get(args, 1))), 343 this=seq_get(args, 0), 344 ), 345 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 346 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 347 "DIV": binary_from_function(exp.IntDiv), 348 "FORMAT_DATE": lambda args: exp.TimeToStr( 349 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 350 ), 351 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 352 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 353 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 354 ), 355 "MD5": exp.MD5Digest.from_arg_list, 356 "TO_HEX": _build_to_hex, 357 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 358 [seq_get(args, 1), seq_get(args, 0)] 359 ), 360 "PARSE_TIMESTAMP": _build_parse_timestamp, 361 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 362 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 363 this=seq_get(args, 0), 364 expression=seq_get(args, 1), 365 position=seq_get(args, 2), 366 occurrence=seq_get(args, 3), 367 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 368 ), 369 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 370 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 371 "SPLIT": lambda args: exp.Split( 372 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 373 this=seq_get(args, 0), 374 expression=seq_get(args, 1) or exp.Literal.string(","), 375 ), 376 "TIME": _build_time, 377 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 378 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 379 "TIMESTAMP": _build_timestamp, 380 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 381 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 382 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 383 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 384 ), 385 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 386 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 387 ), 388 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 389 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 390 } 391 392 FUNCTION_PARSERS = { 393 **parser.Parser.FUNCTION_PARSERS, 394 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 395 } 396 FUNCTION_PARSERS.pop("TRIM") 397 398 NO_PAREN_FUNCTIONS = { 399 **parser.Parser.NO_PAREN_FUNCTIONS, 400 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 401 } 402 403 NESTED_TYPE_TOKENS = { 404 *parser.Parser.NESTED_TYPE_TOKENS, 405 TokenType.TABLE, 406 } 407 408 PROPERTY_PARSERS = { 409 **parser.Parser.PROPERTY_PARSERS, 410 "NOT DETERMINISTIC": lambda self: self.expression( 411 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 412 ), 413 "OPTIONS": lambda self: self._parse_with_property(), 414 } 415 416 CONSTRAINT_PARSERS = { 417 **parser.Parser.CONSTRAINT_PARSERS, 418 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 419 } 420 421 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 422 RANGE_PARSERS.pop(TokenType.OVERLAPS) 423 424 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 425 426 STATEMENT_PARSERS = { 427 **parser.Parser.STATEMENT_PARSERS, 428 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 429 TokenType.END: lambda self: self._parse_as_command(self._prev), 430 TokenType.FOR: lambda self: self._parse_for_in(), 431 } 432 433 BRACKET_OFFSETS = { 434 "OFFSET": (0, False), 435 "ORDINAL": (1, False), 436 "SAFE_OFFSET": (0, True), 437 "SAFE_ORDINAL": (1, True), 438 } 439 440 def _parse_for_in(self) -> exp.ForIn: 441 this = self._parse_range() 442 self._match_text_seq("DO") 443 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 444 445 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 446 this = super()._parse_table_part(schema=schema) or self._parse_number() 447 448 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 449 if isinstance(this, exp.Identifier): 450 table_name = this.name 451 while self._match(TokenType.DASH, advance=False) and self._next: 452 text = "" 453 while self._curr and self._curr.token_type != TokenType.DOT: 454 self._advance() 455 text += self._prev.text 456 table_name += text 457 458 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 459 elif isinstance(this, exp.Literal): 460 table_name = this.name 461 462 if self._is_connected() and self._parse_var(any_token=True): 463 table_name += self._prev.text 464 465 this = exp.Identifier(this=table_name, quoted=True) 466 467 return this 468 469 def _parse_table_parts( 470 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 471 ) -> exp.Table: 472 table = super()._parse_table_parts( 473 schema=schema, is_db_reference=is_db_reference, wildcard=True 474 ) 475 476 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 477 if not table.catalog: 478 if table.db: 479 parts = table.db.split(".") 480 if len(parts) == 2 and not table.args["db"].quoted: 481 table.set("catalog", exp.Identifier(this=parts[0])) 482 table.set("db", exp.Identifier(this=parts[1])) 483 else: 484 parts = table.name.split(".") 485 if len(parts) == 2 and not table.this.quoted: 486 table.set("db", exp.Identifier(this=parts[0])) 487 table.set("this", exp.Identifier(this=parts[1])) 488 489 if any("." in p.name for p in table.parts): 490 catalog, db, this, *rest = ( 491 exp.to_identifier(p, quoted=True) 492 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 493 ) 494 495 if rest and this: 496 this = exp.Dot.build([this, *rest]) # type: ignore 497 498 table = exp.Table(this=this, db=db, catalog=catalog) 499 table.meta["quoted_table"] = True 500 501 return table 502 503 @t.overload 504 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 505 506 @t.overload 507 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 508 509 def _parse_json_object(self, agg=False): 510 json_object = super()._parse_json_object() 511 array_kv_pair = seq_get(json_object.expressions, 0) 512 513 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 514 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 515 if ( 516 array_kv_pair 517 and isinstance(array_kv_pair.this, exp.Array) 518 and isinstance(array_kv_pair.expression, exp.Array) 519 ): 520 keys = array_kv_pair.this.expressions 521 values = array_kv_pair.expression.expressions 522 523 json_object.set( 524 "expressions", 525 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 526 ) 527 528 return json_object 529 530 def _parse_bracket( 531 self, this: t.Optional[exp.Expression] = None 532 ) -> t.Optional[exp.Expression]: 533 bracket = super()._parse_bracket(this) 534 535 if this is bracket: 536 return bracket 537 538 if isinstance(bracket, exp.Bracket): 539 for expression in bracket.expressions: 540 name = expression.name.upper() 541 542 if name not in self.BRACKET_OFFSETS: 543 break 544 545 offset, safe = self.BRACKET_OFFSETS[name] 546 bracket.set("offset", offset) 547 bracket.set("safe", safe) 548 expression.replace(expression.expressions[0]) 549 550 return bracket 551 552 class Generator(generator.Generator): 553 EXPLICIT_UNION = True 554 INTERVAL_ALLOWS_PLURAL_FORM = False 555 JOIN_HINTS = False 556 QUERY_HINTS = False 557 TABLE_HINTS = False 558 LIMIT_FETCH = "LIMIT" 559 RENAME_TABLE_WITH_DB = False 560 NVL2_SUPPORTED = False 561 UNNEST_WITH_ORDINALITY = False 562 COLLATE_IS_FUNC = True 563 LIMIT_ONLY_LITERALS = True 564 SUPPORTS_TABLE_ALIAS_COLUMNS = False 565 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 566 JSON_KEY_VALUE_PAIR_SEP = "," 567 NULL_ORDERING_SUPPORTED = False 568 IGNORE_NULLS_IN_FUNC = True 569 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 570 CAN_IMPLEMENT_ARRAY_ANY = True 571 SUPPORTS_TO_NUMBER = False 572 NAMED_PLACEHOLDER_TOKEN = "@" 573 574 TRANSFORMS = { 575 **generator.Generator.TRANSFORMS, 576 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 577 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 578 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 579 exp.ArrayContains: _array_contains_sql, 580 exp.ArrayFilter: filter_array_using_unnest, 581 exp.ArraySize: rename_func("ARRAY_LENGTH"), 582 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 583 exp.CollateProperty: lambda self, e: ( 584 f"DEFAULT COLLATE {self.sql(e, 'this')}" 585 if e.args.get("default") 586 else f"COLLATE {self.sql(e, 'this')}" 587 ), 588 exp.Commit: lambda *_: "COMMIT TRANSACTION", 589 exp.CountIf: rename_func("COUNTIF"), 590 exp.Create: _create_sql, 591 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 592 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 593 exp.DateDiff: lambda self, e: self.func( 594 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 595 ), 596 exp.DateFromParts: rename_func("DATE"), 597 exp.DateStrToDate: datestrtodate_sql, 598 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 599 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 600 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 601 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 602 exp.FromTimeZone: lambda self, e: self.func( 603 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 604 ), 605 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 606 exp.GroupConcat: rename_func("STRING_AGG"), 607 exp.Hex: rename_func("TO_HEX"), 608 exp.If: if_sql(false_value="NULL"), 609 exp.ILike: no_ilike_sql, 610 exp.IntDiv: rename_func("DIV"), 611 exp.JSONFormat: rename_func("TO_JSON_STRING"), 612 exp.Max: max_or_greatest, 613 exp.Mod: rename_func("MOD"), 614 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 615 exp.MD5Digest: rename_func("MD5"), 616 exp.Min: min_or_least, 617 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 618 exp.RegexpExtract: lambda self, e: self.func( 619 "REGEXP_EXTRACT", 620 e.this, 621 e.expression, 622 e.args.get("position"), 623 e.args.get("occurrence"), 624 ), 625 exp.RegexpReplace: regexp_replace_sql, 626 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 627 exp.ReturnsProperty: _returnsproperty_sql, 628 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 629 exp.Select: transforms.preprocess( 630 [ 631 transforms.explode_to_unnest(), 632 _unqualify_unnest, 633 transforms.eliminate_distinct_on, 634 _alias_ordered_group, 635 transforms.eliminate_semi_and_anti_joins, 636 ] 637 ), 638 exp.SHA2: lambda self, e: self.func( 639 "SHA256" if e.text("length") == "256" else "SHA512", e.this 640 ), 641 exp.StabilityProperty: lambda self, e: ( 642 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 643 ), 644 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 645 exp.StrToTime: lambda self, e: self.func( 646 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 647 ), 648 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 649 exp.TimeFromParts: rename_func("TIME"), 650 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 651 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 652 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 653 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 654 exp.TimeStrToTime: timestrtotime_sql, 655 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 656 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 657 exp.TsOrDsAdd: _ts_or_ds_add_sql, 658 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 659 exp.TsOrDsToTime: rename_func("TIME"), 660 exp.Unhex: rename_func("FROM_HEX"), 661 exp.UnixDate: rename_func("UNIX_DATE"), 662 exp.UnixToTime: _unix_to_time_sql, 663 exp.Values: _derived_table_values_to_unnest, 664 exp.VariancePop: rename_func("VAR_POP"), 665 } 666 667 SUPPORTED_JSON_PATH_PARTS = { 668 exp.JSONPathKey, 669 exp.JSONPathRoot, 670 exp.JSONPathSubscript, 671 } 672 673 TYPE_MAPPING = { 674 **generator.Generator.TYPE_MAPPING, 675 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 676 exp.DataType.Type.BIGINT: "INT64", 677 exp.DataType.Type.BINARY: "BYTES", 678 exp.DataType.Type.BOOLEAN: "BOOL", 679 exp.DataType.Type.CHAR: "STRING", 680 exp.DataType.Type.DECIMAL: "NUMERIC", 681 exp.DataType.Type.DOUBLE: "FLOAT64", 682 exp.DataType.Type.FLOAT: "FLOAT64", 683 exp.DataType.Type.INT: "INT64", 684 exp.DataType.Type.NCHAR: "STRING", 685 exp.DataType.Type.NVARCHAR: "STRING", 686 exp.DataType.Type.SMALLINT: "INT64", 687 exp.DataType.Type.TEXT: "STRING", 688 exp.DataType.Type.TIMESTAMP: "DATETIME", 689 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 690 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 691 exp.DataType.Type.TINYINT: "INT64", 692 exp.DataType.Type.VARBINARY: "BYTES", 693 exp.DataType.Type.VARCHAR: "STRING", 694 exp.DataType.Type.VARIANT: "ANY TYPE", 695 } 696 697 PROPERTIES_LOCATION = { 698 **generator.Generator.PROPERTIES_LOCATION, 699 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 700 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 701 } 702 703 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 704 RESERVED_KEYWORDS = { 705 *generator.Generator.RESERVED_KEYWORDS, 706 "all", 707 "and", 708 "any", 709 "array", 710 "as", 711 "asc", 712 "assert_rows_modified", 713 "at", 714 "between", 715 "by", 716 "case", 717 "cast", 718 "collate", 719 "contains", 720 "create", 721 "cross", 722 "cube", 723 "current", 724 "default", 725 "define", 726 "desc", 727 "distinct", 728 "else", 729 "end", 730 "enum", 731 "escape", 732 "except", 733 "exclude", 734 "exists", 735 "extract", 736 "false", 737 "fetch", 738 "following", 739 "for", 740 "from", 741 "full", 742 "group", 743 "grouping", 744 "groups", 745 "hash", 746 "having", 747 "if", 748 "ignore", 749 "in", 750 "inner", 751 "intersect", 752 "interval", 753 "into", 754 "is", 755 "join", 756 "lateral", 757 "left", 758 "like", 759 "limit", 760 "lookup", 761 "merge", 762 "natural", 763 "new", 764 "no", 765 "not", 766 "null", 767 "nulls", 768 "of", 769 "on", 770 "or", 771 "order", 772 "outer", 773 "over", 774 "partition", 775 "preceding", 776 "proto", 777 "qualify", 778 "range", 779 "recursive", 780 "respect", 781 "right", 782 "rollup", 783 "rows", 784 "select", 785 "set", 786 "some", 787 "struct", 788 "tablesample", 789 "then", 790 "to", 791 "treat", 792 "true", 793 "unbounded", 794 "union", 795 "unnest", 796 "using", 797 "when", 798 "where", 799 "window", 800 "with", 801 "within", 802 } 803 804 def table_parts(self, expression: exp.Table) -> str: 805 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 806 # we need to make sure the correct quoting is used in each case. 807 # 808 # For example, if there is a CTE x that clashes with a schema name, then the former will 809 # return the table y in that schema, whereas the latter will return the CTE's y column: 810 # 811 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 812 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 813 if expression.meta.get("quoted_table"): 814 table_parts = ".".join(p.name for p in expression.parts) 815 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 816 817 return super().table_parts(expression) 818 819 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 820 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 821 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 822 823 def eq_sql(self, expression: exp.EQ) -> str: 824 # Operands of = cannot be NULL in BigQuery 825 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 826 if not isinstance(expression.parent, exp.Update): 827 return "NULL" 828 829 return self.binary(expression, "=") 830 831 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 832 parent = expression.parent 833 834 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 835 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 836 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 837 return self.func( 838 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 839 ) 840 841 return super().attimezone_sql(expression) 842 843 def trycast_sql(self, expression: exp.TryCast) -> str: 844 return self.cast_sql(expression, safe_prefix="SAFE_") 845 846 def array_sql(self, expression: exp.Array) -> str: 847 first_arg = seq_get(expression.expressions, 0) 848 if isinstance(first_arg, exp.Query): 849 return f"ARRAY{self.wrap(self.sql(first_arg))}" 850 851 return inline_array_sql(self, expression) 852 853 def bracket_sql(self, expression: exp.Bracket) -> str: 854 this = expression.this 855 expressions = expression.expressions 856 857 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 858 arg = expressions[0] 859 if arg.type is None: 860 from sqlglot.optimizer.annotate_types import annotate_types 861 862 arg = annotate_types(arg) 863 864 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 865 # BQ doesn't support bracket syntax with string values for structs 866 return f"{self.sql(this)}.{arg.name}" 867 868 expressions_sql = self.expressions(expression, flat=True) 869 offset = expression.args.get("offset") 870 871 if offset == 0: 872 expressions_sql = f"OFFSET({expressions_sql})" 873 elif offset == 1: 874 expressions_sql = f"ORDINAL({expressions_sql})" 875 elif offset is not None: 876 self.unsupported(f"Unsupported array offset: {offset}") 877 878 if expression.args.get("safe"): 879 expressions_sql = f"SAFE_{expressions_sql}" 880 881 return f"{self.sql(this)}[{expressions_sql}]" 882 883 def in_unnest_op(self, expression: exp.Unnest) -> str: 884 return self.sql(expression) 885 886 def except_op(self, expression: exp.Except) -> str: 887 if not expression.args.get("distinct"): 888 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 889 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 890 891 def intersect_op(self, expression: exp.Intersect) -> str: 892 if not expression.args.get("distinct"): 893 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 894 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 895 896 def with_properties(self, properties: exp.Properties) -> str: 897 return self.properties(properties, prefix=self.seg("OPTIONS")) 898 899 def version_sql(self, expression: exp.Version) -> str: 900 if expression.name == "TIMESTAMP": 901 expression.set("this", "SYSTEM_TIME") 902 return super().version_sql(expression)
231class BigQuery(Dialect): 232 WEEK_OFFSET = -1 233 UNNEST_COLUMN_ONLY = True 234 SUPPORTS_USER_DEFINED_TYPES = False 235 SUPPORTS_SEMI_ANTI_JOIN = False 236 LOG_BASE_FIRST = False 237 238 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 239 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 240 241 # bigquery udfs are case sensitive 242 NORMALIZE_FUNCTIONS = False 243 244 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 245 TIME_MAPPING = { 246 "%D": "%m/%d/%y", 247 "%E*S": "%S.%f", 248 "%E6S": "%S.%f", 249 } 250 251 FORMAT_MAPPING = { 252 "DD": "%d", 253 "MM": "%m", 254 "MON": "%b", 255 "MONTH": "%B", 256 "YYYY": "%Y", 257 "YY": "%y", 258 "HH": "%I", 259 "HH12": "%I", 260 "HH24": "%H", 261 "MI": "%M", 262 "SS": "%S", 263 "SSSSS": "%f", 264 "TZH": "%z", 265 } 266 267 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 268 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 269 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 270 271 def normalize_identifier(self, expression: E) -> E: 272 if isinstance(expression, exp.Identifier): 273 parent = expression.parent 274 while isinstance(parent, exp.Dot): 275 parent = parent.parent 276 277 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 278 # by default. The following check uses a heuristic to detect tables based on whether 279 # they are qualified. This should generally be correct, because tables in BigQuery 280 # must be qualified with at least a dataset, unless @@dataset_id is set. 281 case_sensitive = ( 282 isinstance(parent, exp.UserDefinedFunction) 283 or ( 284 isinstance(parent, exp.Table) 285 and parent.db 286 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 287 ) 288 or expression.meta.get("is_table") 289 ) 290 if not case_sensitive: 291 expression.set("this", expression.this.lower()) 292 293 return expression 294 295 class Tokenizer(tokens.Tokenizer): 296 QUOTES = ["'", '"', '"""', "'''"] 297 COMMENTS = ["--", "#", ("/*", "*/")] 298 IDENTIFIERS = ["`"] 299 STRING_ESCAPES = ["\\"] 300 301 HEX_STRINGS = [("0x", ""), ("0X", "")] 302 303 BYTE_STRINGS = [ 304 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 305 ] 306 307 RAW_STRINGS = [ 308 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 309 ] 310 311 KEYWORDS = { 312 **tokens.Tokenizer.KEYWORDS, 313 "ANY TYPE": TokenType.VARIANT, 314 "BEGIN": TokenType.COMMAND, 315 "BEGIN TRANSACTION": TokenType.BEGIN, 316 "BYTES": TokenType.BINARY, 317 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 318 "DATETIME": TokenType.TIMESTAMP, 319 "DECLARE": TokenType.COMMAND, 320 "ELSEIF": TokenType.COMMAND, 321 "EXCEPTION": TokenType.COMMAND, 322 "FLOAT64": TokenType.DOUBLE, 323 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 324 "MODEL": TokenType.MODEL, 325 "NOT DETERMINISTIC": TokenType.VOLATILE, 326 "RECORD": TokenType.STRUCT, 327 "TIMESTAMP": TokenType.TIMESTAMPTZ, 328 } 329 KEYWORDS.pop("DIV") 330 KEYWORDS.pop("VALUES") 331 332 class Parser(parser.Parser): 333 PREFIXED_PIVOT_COLUMNS = True 334 LOG_DEFAULTS_TO_LN = True 335 SUPPORTS_IMPLICIT_UNNEST = True 336 337 FUNCTIONS = { 338 **parser.Parser.FUNCTIONS, 339 "DATE": _build_date, 340 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 341 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 342 "DATE_TRUNC": lambda args: exp.DateTrunc( 343 unit=exp.Literal.string(str(seq_get(args, 1))), 344 this=seq_get(args, 0), 345 ), 346 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 347 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 348 "DIV": binary_from_function(exp.IntDiv), 349 "FORMAT_DATE": lambda args: exp.TimeToStr( 350 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 351 ), 352 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 353 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 354 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 355 ), 356 "MD5": exp.MD5Digest.from_arg_list, 357 "TO_HEX": _build_to_hex, 358 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 359 [seq_get(args, 1), seq_get(args, 0)] 360 ), 361 "PARSE_TIMESTAMP": _build_parse_timestamp, 362 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 363 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 364 this=seq_get(args, 0), 365 expression=seq_get(args, 1), 366 position=seq_get(args, 2), 367 occurrence=seq_get(args, 3), 368 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 369 ), 370 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 371 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 372 "SPLIT": lambda args: exp.Split( 373 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 374 this=seq_get(args, 0), 375 expression=seq_get(args, 1) or exp.Literal.string(","), 376 ), 377 "TIME": _build_time, 378 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 379 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 380 "TIMESTAMP": _build_timestamp, 381 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 382 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 383 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 384 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 385 ), 386 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 387 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 388 ), 389 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 390 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 391 } 392 393 FUNCTION_PARSERS = { 394 **parser.Parser.FUNCTION_PARSERS, 395 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 396 } 397 FUNCTION_PARSERS.pop("TRIM") 398 399 NO_PAREN_FUNCTIONS = { 400 **parser.Parser.NO_PAREN_FUNCTIONS, 401 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 402 } 403 404 NESTED_TYPE_TOKENS = { 405 *parser.Parser.NESTED_TYPE_TOKENS, 406 TokenType.TABLE, 407 } 408 409 PROPERTY_PARSERS = { 410 **parser.Parser.PROPERTY_PARSERS, 411 "NOT DETERMINISTIC": lambda self: self.expression( 412 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 413 ), 414 "OPTIONS": lambda self: self._parse_with_property(), 415 } 416 417 CONSTRAINT_PARSERS = { 418 **parser.Parser.CONSTRAINT_PARSERS, 419 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 420 } 421 422 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 423 RANGE_PARSERS.pop(TokenType.OVERLAPS) 424 425 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 426 427 STATEMENT_PARSERS = { 428 **parser.Parser.STATEMENT_PARSERS, 429 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 430 TokenType.END: lambda self: self._parse_as_command(self._prev), 431 TokenType.FOR: lambda self: self._parse_for_in(), 432 } 433 434 BRACKET_OFFSETS = { 435 "OFFSET": (0, False), 436 "ORDINAL": (1, False), 437 "SAFE_OFFSET": (0, True), 438 "SAFE_ORDINAL": (1, True), 439 } 440 441 def _parse_for_in(self) -> exp.ForIn: 442 this = self._parse_range() 443 self._match_text_seq("DO") 444 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 445 446 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 447 this = super()._parse_table_part(schema=schema) or self._parse_number() 448 449 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 450 if isinstance(this, exp.Identifier): 451 table_name = this.name 452 while self._match(TokenType.DASH, advance=False) and self._next: 453 text = "" 454 while self._curr and self._curr.token_type != TokenType.DOT: 455 self._advance() 456 text += self._prev.text 457 table_name += text 458 459 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 460 elif isinstance(this, exp.Literal): 461 table_name = this.name 462 463 if self._is_connected() and self._parse_var(any_token=True): 464 table_name += self._prev.text 465 466 this = exp.Identifier(this=table_name, quoted=True) 467 468 return this 469 470 def _parse_table_parts( 471 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 472 ) -> exp.Table: 473 table = super()._parse_table_parts( 474 schema=schema, is_db_reference=is_db_reference, wildcard=True 475 ) 476 477 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 478 if not table.catalog: 479 if table.db: 480 parts = table.db.split(".") 481 if len(parts) == 2 and not table.args["db"].quoted: 482 table.set("catalog", exp.Identifier(this=parts[0])) 483 table.set("db", exp.Identifier(this=parts[1])) 484 else: 485 parts = table.name.split(".") 486 if len(parts) == 2 and not table.this.quoted: 487 table.set("db", exp.Identifier(this=parts[0])) 488 table.set("this", exp.Identifier(this=parts[1])) 489 490 if any("." in p.name for p in table.parts): 491 catalog, db, this, *rest = ( 492 exp.to_identifier(p, quoted=True) 493 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 494 ) 495 496 if rest and this: 497 this = exp.Dot.build([this, *rest]) # type: ignore 498 499 table = exp.Table(this=this, db=db, catalog=catalog) 500 table.meta["quoted_table"] = True 501 502 return table 503 504 @t.overload 505 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 506 507 @t.overload 508 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 509 510 def _parse_json_object(self, agg=False): 511 json_object = super()._parse_json_object() 512 array_kv_pair = seq_get(json_object.expressions, 0) 513 514 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 515 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 516 if ( 517 array_kv_pair 518 and isinstance(array_kv_pair.this, exp.Array) 519 and isinstance(array_kv_pair.expression, exp.Array) 520 ): 521 keys = array_kv_pair.this.expressions 522 values = array_kv_pair.expression.expressions 523 524 json_object.set( 525 "expressions", 526 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 527 ) 528 529 return json_object 530 531 def _parse_bracket( 532 self, this: t.Optional[exp.Expression] = None 533 ) -> t.Optional[exp.Expression]: 534 bracket = super()._parse_bracket(this) 535 536 if this is bracket: 537 return bracket 538 539 if isinstance(bracket, exp.Bracket): 540 for expression in bracket.expressions: 541 name = expression.name.upper() 542 543 if name not in self.BRACKET_OFFSETS: 544 break 545 546 offset, safe = self.BRACKET_OFFSETS[name] 547 bracket.set("offset", offset) 548 bracket.set("safe", safe) 549 expression.replace(expression.expressions[0]) 550 551 return bracket 552 553 class Generator(generator.Generator): 554 EXPLICIT_UNION = True 555 INTERVAL_ALLOWS_PLURAL_FORM = False 556 JOIN_HINTS = False 557 QUERY_HINTS = False 558 TABLE_HINTS = False 559 LIMIT_FETCH = "LIMIT" 560 RENAME_TABLE_WITH_DB = False 561 NVL2_SUPPORTED = False 562 UNNEST_WITH_ORDINALITY = False 563 COLLATE_IS_FUNC = True 564 LIMIT_ONLY_LITERALS = True 565 SUPPORTS_TABLE_ALIAS_COLUMNS = False 566 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 567 JSON_KEY_VALUE_PAIR_SEP = "," 568 NULL_ORDERING_SUPPORTED = False 569 IGNORE_NULLS_IN_FUNC = True 570 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 571 CAN_IMPLEMENT_ARRAY_ANY = True 572 SUPPORTS_TO_NUMBER = False 573 NAMED_PLACEHOLDER_TOKEN = "@" 574 575 TRANSFORMS = { 576 **generator.Generator.TRANSFORMS, 577 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 578 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 579 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 580 exp.ArrayContains: _array_contains_sql, 581 exp.ArrayFilter: filter_array_using_unnest, 582 exp.ArraySize: rename_func("ARRAY_LENGTH"), 583 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 584 exp.CollateProperty: lambda self, e: ( 585 f"DEFAULT COLLATE {self.sql(e, 'this')}" 586 if e.args.get("default") 587 else f"COLLATE {self.sql(e, 'this')}" 588 ), 589 exp.Commit: lambda *_: "COMMIT TRANSACTION", 590 exp.CountIf: rename_func("COUNTIF"), 591 exp.Create: _create_sql, 592 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 593 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 594 exp.DateDiff: lambda self, e: self.func( 595 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 596 ), 597 exp.DateFromParts: rename_func("DATE"), 598 exp.DateStrToDate: datestrtodate_sql, 599 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 600 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 601 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 602 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 603 exp.FromTimeZone: lambda self, e: self.func( 604 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 605 ), 606 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 607 exp.GroupConcat: rename_func("STRING_AGG"), 608 exp.Hex: rename_func("TO_HEX"), 609 exp.If: if_sql(false_value="NULL"), 610 exp.ILike: no_ilike_sql, 611 exp.IntDiv: rename_func("DIV"), 612 exp.JSONFormat: rename_func("TO_JSON_STRING"), 613 exp.Max: max_or_greatest, 614 exp.Mod: rename_func("MOD"), 615 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 616 exp.MD5Digest: rename_func("MD5"), 617 exp.Min: min_or_least, 618 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 619 exp.RegexpExtract: lambda self, e: self.func( 620 "REGEXP_EXTRACT", 621 e.this, 622 e.expression, 623 e.args.get("position"), 624 e.args.get("occurrence"), 625 ), 626 exp.RegexpReplace: regexp_replace_sql, 627 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 628 exp.ReturnsProperty: _returnsproperty_sql, 629 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 630 exp.Select: transforms.preprocess( 631 [ 632 transforms.explode_to_unnest(), 633 _unqualify_unnest, 634 transforms.eliminate_distinct_on, 635 _alias_ordered_group, 636 transforms.eliminate_semi_and_anti_joins, 637 ] 638 ), 639 exp.SHA2: lambda self, e: self.func( 640 "SHA256" if e.text("length") == "256" else "SHA512", e.this 641 ), 642 exp.StabilityProperty: lambda self, e: ( 643 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 644 ), 645 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 646 exp.StrToTime: lambda self, e: self.func( 647 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 648 ), 649 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 650 exp.TimeFromParts: rename_func("TIME"), 651 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 652 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 653 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 654 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 655 exp.TimeStrToTime: timestrtotime_sql, 656 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 657 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 658 exp.TsOrDsAdd: _ts_or_ds_add_sql, 659 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 660 exp.TsOrDsToTime: rename_func("TIME"), 661 exp.Unhex: rename_func("FROM_HEX"), 662 exp.UnixDate: rename_func("UNIX_DATE"), 663 exp.UnixToTime: _unix_to_time_sql, 664 exp.Values: _derived_table_values_to_unnest, 665 exp.VariancePop: rename_func("VAR_POP"), 666 } 667 668 SUPPORTED_JSON_PATH_PARTS = { 669 exp.JSONPathKey, 670 exp.JSONPathRoot, 671 exp.JSONPathSubscript, 672 } 673 674 TYPE_MAPPING = { 675 **generator.Generator.TYPE_MAPPING, 676 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 677 exp.DataType.Type.BIGINT: "INT64", 678 exp.DataType.Type.BINARY: "BYTES", 679 exp.DataType.Type.BOOLEAN: "BOOL", 680 exp.DataType.Type.CHAR: "STRING", 681 exp.DataType.Type.DECIMAL: "NUMERIC", 682 exp.DataType.Type.DOUBLE: "FLOAT64", 683 exp.DataType.Type.FLOAT: "FLOAT64", 684 exp.DataType.Type.INT: "INT64", 685 exp.DataType.Type.NCHAR: "STRING", 686 exp.DataType.Type.NVARCHAR: "STRING", 687 exp.DataType.Type.SMALLINT: "INT64", 688 exp.DataType.Type.TEXT: "STRING", 689 exp.DataType.Type.TIMESTAMP: "DATETIME", 690 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 691 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 692 exp.DataType.Type.TINYINT: "INT64", 693 exp.DataType.Type.VARBINARY: "BYTES", 694 exp.DataType.Type.VARCHAR: "STRING", 695 exp.DataType.Type.VARIANT: "ANY TYPE", 696 } 697 698 PROPERTIES_LOCATION = { 699 **generator.Generator.PROPERTIES_LOCATION, 700 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 701 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 702 } 703 704 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 705 RESERVED_KEYWORDS = { 706 *generator.Generator.RESERVED_KEYWORDS, 707 "all", 708 "and", 709 "any", 710 "array", 711 "as", 712 "asc", 713 "assert_rows_modified", 714 "at", 715 "between", 716 "by", 717 "case", 718 "cast", 719 "collate", 720 "contains", 721 "create", 722 "cross", 723 "cube", 724 "current", 725 "default", 726 "define", 727 "desc", 728 "distinct", 729 "else", 730 "end", 731 "enum", 732 "escape", 733 "except", 734 "exclude", 735 "exists", 736 "extract", 737 "false", 738 "fetch", 739 "following", 740 "for", 741 "from", 742 "full", 743 "group", 744 "grouping", 745 "groups", 746 "hash", 747 "having", 748 "if", 749 "ignore", 750 "in", 751 "inner", 752 "intersect", 753 "interval", 754 "into", 755 "is", 756 "join", 757 "lateral", 758 "left", 759 "like", 760 "limit", 761 "lookup", 762 "merge", 763 "natural", 764 "new", 765 "no", 766 "not", 767 "null", 768 "nulls", 769 "of", 770 "on", 771 "or", 772 "order", 773 "outer", 774 "over", 775 "partition", 776 "preceding", 777 "proto", 778 "qualify", 779 "range", 780 "recursive", 781 "respect", 782 "right", 783 "rollup", 784 "rows", 785 "select", 786 "set", 787 "some", 788 "struct", 789 "tablesample", 790 "then", 791 "to", 792 "treat", 793 "true", 794 "unbounded", 795 "union", 796 "unnest", 797 "using", 798 "when", 799 "where", 800 "window", 801 "with", 802 "within", 803 } 804 805 def table_parts(self, expression: exp.Table) -> str: 806 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 807 # we need to make sure the correct quoting is used in each case. 808 # 809 # For example, if there is a CTE x that clashes with a schema name, then the former will 810 # return the table y in that schema, whereas the latter will return the CTE's y column: 811 # 812 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 813 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 814 if expression.meta.get("quoted_table"): 815 table_parts = ".".join(p.name for p in expression.parts) 816 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 817 818 return super().table_parts(expression) 819 820 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 821 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 822 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 823 824 def eq_sql(self, expression: exp.EQ) -> str: 825 # Operands of = cannot be NULL in BigQuery 826 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 827 if not isinstance(expression.parent, exp.Update): 828 return "NULL" 829 830 return self.binary(expression, "=") 831 832 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 833 parent = expression.parent 834 835 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 836 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 837 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 838 return self.func( 839 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 840 ) 841 842 return super().attimezone_sql(expression) 843 844 def trycast_sql(self, expression: exp.TryCast) -> str: 845 return self.cast_sql(expression, safe_prefix="SAFE_") 846 847 def array_sql(self, expression: exp.Array) -> str: 848 first_arg = seq_get(expression.expressions, 0) 849 if isinstance(first_arg, exp.Query): 850 return f"ARRAY{self.wrap(self.sql(first_arg))}" 851 852 return inline_array_sql(self, expression) 853 854 def bracket_sql(self, expression: exp.Bracket) -> str: 855 this = expression.this 856 expressions = expression.expressions 857 858 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 859 arg = expressions[0] 860 if arg.type is None: 861 from sqlglot.optimizer.annotate_types import annotate_types 862 863 arg = annotate_types(arg) 864 865 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 866 # BQ doesn't support bracket syntax with string values for structs 867 return f"{self.sql(this)}.{arg.name}" 868 869 expressions_sql = self.expressions(expression, flat=True) 870 offset = expression.args.get("offset") 871 872 if offset == 0: 873 expressions_sql = f"OFFSET({expressions_sql})" 874 elif offset == 1: 875 expressions_sql = f"ORDINAL({expressions_sql})" 876 elif offset is not None: 877 self.unsupported(f"Unsupported array offset: {offset}") 878 879 if expression.args.get("safe"): 880 expressions_sql = f"SAFE_{expressions_sql}" 881 882 return f"{self.sql(this)}[{expressions_sql}]" 883 884 def in_unnest_op(self, expression: exp.Unnest) -> str: 885 return self.sql(expression) 886 887 def except_op(self, expression: exp.Except) -> str: 888 if not expression.args.get("distinct"): 889 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 890 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 891 892 def intersect_op(self, expression: exp.Intersect) -> str: 893 if not expression.args.get("distinct"): 894 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 895 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 896 897 def with_properties(self, properties: exp.Properties) -> str: 898 return self.properties(properties, prefix=self.seg("OPTIONS")) 899 900 def version_sql(self, expression: exp.Version) -> str: 901 if expression.name == "TIMESTAMP": 902 expression.set("this", "SYSTEM_TIME") 903 return super().version_sql(expression)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
271 def normalize_identifier(self, expression: E) -> E: 272 if isinstance(expression, exp.Identifier): 273 parent = expression.parent 274 while isinstance(parent, exp.Dot): 275 parent = parent.parent 276 277 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 278 # by default. The following check uses a heuristic to detect tables based on whether 279 # they are qualified. This should generally be correct, because tables in BigQuery 280 # must be qualified with at least a dataset, unless @@dataset_id is set. 281 case_sensitive = ( 282 isinstance(parent, exp.UserDefinedFunction) 283 or ( 284 isinstance(parent, exp.Table) 285 and parent.db 286 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 287 ) 288 or expression.meta.get("is_table") 289 ) 290 if not case_sensitive: 291 expression.set("this", expression.this.lower()) 292 293 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- get_or_raise
- format_time
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
295 class Tokenizer(tokens.Tokenizer): 296 QUOTES = ["'", '"', '"""', "'''"] 297 COMMENTS = ["--", "#", ("/*", "*/")] 298 IDENTIFIERS = ["`"] 299 STRING_ESCAPES = ["\\"] 300 301 HEX_STRINGS = [("0x", ""), ("0X", "")] 302 303 BYTE_STRINGS = [ 304 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 305 ] 306 307 RAW_STRINGS = [ 308 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 309 ] 310 311 KEYWORDS = { 312 **tokens.Tokenizer.KEYWORDS, 313 "ANY TYPE": TokenType.VARIANT, 314 "BEGIN": TokenType.COMMAND, 315 "BEGIN TRANSACTION": TokenType.BEGIN, 316 "BYTES": TokenType.BINARY, 317 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 318 "DATETIME": TokenType.TIMESTAMP, 319 "DECLARE": TokenType.COMMAND, 320 "ELSEIF": TokenType.COMMAND, 321 "EXCEPTION": TokenType.COMMAND, 322 "FLOAT64": TokenType.DOUBLE, 323 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 324 "MODEL": TokenType.MODEL, 325 "NOT DETERMINISTIC": TokenType.VOLATILE, 326 "RECORD": TokenType.STRUCT, 327 "TIMESTAMP": TokenType.TIMESTAMPTZ, 328 } 329 KEYWORDS.pop("DIV") 330 KEYWORDS.pop("VALUES")
Inherited Members
332 class Parser(parser.Parser): 333 PREFIXED_PIVOT_COLUMNS = True 334 LOG_DEFAULTS_TO_LN = True 335 SUPPORTS_IMPLICIT_UNNEST = True 336 337 FUNCTIONS = { 338 **parser.Parser.FUNCTIONS, 339 "DATE": _build_date, 340 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 341 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 342 "DATE_TRUNC": lambda args: exp.DateTrunc( 343 unit=exp.Literal.string(str(seq_get(args, 1))), 344 this=seq_get(args, 0), 345 ), 346 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 347 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 348 "DIV": binary_from_function(exp.IntDiv), 349 "FORMAT_DATE": lambda args: exp.TimeToStr( 350 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 351 ), 352 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 353 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 354 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 355 ), 356 "MD5": exp.MD5Digest.from_arg_list, 357 "TO_HEX": _build_to_hex, 358 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 359 [seq_get(args, 1), seq_get(args, 0)] 360 ), 361 "PARSE_TIMESTAMP": _build_parse_timestamp, 362 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 363 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 364 this=seq_get(args, 0), 365 expression=seq_get(args, 1), 366 position=seq_get(args, 2), 367 occurrence=seq_get(args, 3), 368 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 369 ), 370 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 371 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 372 "SPLIT": lambda args: exp.Split( 373 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 374 this=seq_get(args, 0), 375 expression=seq_get(args, 1) or exp.Literal.string(","), 376 ), 377 "TIME": _build_time, 378 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 379 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 380 "TIMESTAMP": _build_timestamp, 381 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 382 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 383 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 384 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 385 ), 386 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 387 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 388 ), 389 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 390 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 391 } 392 393 FUNCTION_PARSERS = { 394 **parser.Parser.FUNCTION_PARSERS, 395 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 396 } 397 FUNCTION_PARSERS.pop("TRIM") 398 399 NO_PAREN_FUNCTIONS = { 400 **parser.Parser.NO_PAREN_FUNCTIONS, 401 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 402 } 403 404 NESTED_TYPE_TOKENS = { 405 *parser.Parser.NESTED_TYPE_TOKENS, 406 TokenType.TABLE, 407 } 408 409 PROPERTY_PARSERS = { 410 **parser.Parser.PROPERTY_PARSERS, 411 "NOT DETERMINISTIC": lambda self: self.expression( 412 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 413 ), 414 "OPTIONS": lambda self: self._parse_with_property(), 415 } 416 417 CONSTRAINT_PARSERS = { 418 **parser.Parser.CONSTRAINT_PARSERS, 419 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 420 } 421 422 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 423 RANGE_PARSERS.pop(TokenType.OVERLAPS) 424 425 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 426 427 STATEMENT_PARSERS = { 428 **parser.Parser.STATEMENT_PARSERS, 429 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 430 TokenType.END: lambda self: self._parse_as_command(self._prev), 431 TokenType.FOR: lambda self: self._parse_for_in(), 432 } 433 434 BRACKET_OFFSETS = { 435 "OFFSET": (0, False), 436 "ORDINAL": (1, False), 437 "SAFE_OFFSET": (0, True), 438 "SAFE_ORDINAL": (1, True), 439 } 440 441 def _parse_for_in(self) -> exp.ForIn: 442 this = self._parse_range() 443 self._match_text_seq("DO") 444 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 445 446 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 447 this = super()._parse_table_part(schema=schema) or self._parse_number() 448 449 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 450 if isinstance(this, exp.Identifier): 451 table_name = this.name 452 while self._match(TokenType.DASH, advance=False) and self._next: 453 text = "" 454 while self._curr and self._curr.token_type != TokenType.DOT: 455 self._advance() 456 text += self._prev.text 457 table_name += text 458 459 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 460 elif isinstance(this, exp.Literal): 461 table_name = this.name 462 463 if self._is_connected() and self._parse_var(any_token=True): 464 table_name += self._prev.text 465 466 this = exp.Identifier(this=table_name, quoted=True) 467 468 return this 469 470 def _parse_table_parts( 471 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 472 ) -> exp.Table: 473 table = super()._parse_table_parts( 474 schema=schema, is_db_reference=is_db_reference, wildcard=True 475 ) 476 477 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 478 if not table.catalog: 479 if table.db: 480 parts = table.db.split(".") 481 if len(parts) == 2 and not table.args["db"].quoted: 482 table.set("catalog", exp.Identifier(this=parts[0])) 483 table.set("db", exp.Identifier(this=parts[1])) 484 else: 485 parts = table.name.split(".") 486 if len(parts) == 2 and not table.this.quoted: 487 table.set("db", exp.Identifier(this=parts[0])) 488 table.set("this", exp.Identifier(this=parts[1])) 489 490 if any("." in p.name for p in table.parts): 491 catalog, db, this, *rest = ( 492 exp.to_identifier(p, quoted=True) 493 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 494 ) 495 496 if rest and this: 497 this = exp.Dot.build([this, *rest]) # type: ignore 498 499 table = exp.Table(this=this, db=db, catalog=catalog) 500 table.meta["quoted_table"] = True 501 502 return table 503 504 @t.overload 505 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 506 507 @t.overload 508 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 509 510 def _parse_json_object(self, agg=False): 511 json_object = super()._parse_json_object() 512 array_kv_pair = seq_get(json_object.expressions, 0) 513 514 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 515 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 516 if ( 517 array_kv_pair 518 and isinstance(array_kv_pair.this, exp.Array) 519 and isinstance(array_kv_pair.expression, exp.Array) 520 ): 521 keys = array_kv_pair.this.expressions 522 values = array_kv_pair.expression.expressions 523 524 json_object.set( 525 "expressions", 526 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 527 ) 528 529 return json_object 530 531 def _parse_bracket( 532 self, this: t.Optional[exp.Expression] = None 533 ) -> t.Optional[exp.Expression]: 534 bracket = super()._parse_bracket(this) 535 536 if this is bracket: 537 return bracket 538 539 if isinstance(bracket, exp.Bracket): 540 for expression in bracket.expressions: 541 name = expression.name.upper() 542 543 if name not in self.BRACKET_OFFSETS: 544 break 545 546 offset, safe = self.BRACKET_OFFSETS[name] 547 bracket.set("offset", offset) 548 bracket.set("safe", safe) 549 expression.replace(expression.expressions[0]) 550 551 return bracket
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
553 class Generator(generator.Generator): 554 EXPLICIT_UNION = True 555 INTERVAL_ALLOWS_PLURAL_FORM = False 556 JOIN_HINTS = False 557 QUERY_HINTS = False 558 TABLE_HINTS = False 559 LIMIT_FETCH = "LIMIT" 560 RENAME_TABLE_WITH_DB = False 561 NVL2_SUPPORTED = False 562 UNNEST_WITH_ORDINALITY = False 563 COLLATE_IS_FUNC = True 564 LIMIT_ONLY_LITERALS = True 565 SUPPORTS_TABLE_ALIAS_COLUMNS = False 566 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 567 JSON_KEY_VALUE_PAIR_SEP = "," 568 NULL_ORDERING_SUPPORTED = False 569 IGNORE_NULLS_IN_FUNC = True 570 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 571 CAN_IMPLEMENT_ARRAY_ANY = True 572 SUPPORTS_TO_NUMBER = False 573 NAMED_PLACEHOLDER_TOKEN = "@" 574 575 TRANSFORMS = { 576 **generator.Generator.TRANSFORMS, 577 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 578 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 579 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 580 exp.ArrayContains: _array_contains_sql, 581 exp.ArrayFilter: filter_array_using_unnest, 582 exp.ArraySize: rename_func("ARRAY_LENGTH"), 583 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 584 exp.CollateProperty: lambda self, e: ( 585 f"DEFAULT COLLATE {self.sql(e, 'this')}" 586 if e.args.get("default") 587 else f"COLLATE {self.sql(e, 'this')}" 588 ), 589 exp.Commit: lambda *_: "COMMIT TRANSACTION", 590 exp.CountIf: rename_func("COUNTIF"), 591 exp.Create: _create_sql, 592 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 593 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 594 exp.DateDiff: lambda self, e: self.func( 595 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 596 ), 597 exp.DateFromParts: rename_func("DATE"), 598 exp.DateStrToDate: datestrtodate_sql, 599 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 600 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 601 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 602 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 603 exp.FromTimeZone: lambda self, e: self.func( 604 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 605 ), 606 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 607 exp.GroupConcat: rename_func("STRING_AGG"), 608 exp.Hex: rename_func("TO_HEX"), 609 exp.If: if_sql(false_value="NULL"), 610 exp.ILike: no_ilike_sql, 611 exp.IntDiv: rename_func("DIV"), 612 exp.JSONFormat: rename_func("TO_JSON_STRING"), 613 exp.Max: max_or_greatest, 614 exp.Mod: rename_func("MOD"), 615 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 616 exp.MD5Digest: rename_func("MD5"), 617 exp.Min: min_or_least, 618 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 619 exp.RegexpExtract: lambda self, e: self.func( 620 "REGEXP_EXTRACT", 621 e.this, 622 e.expression, 623 e.args.get("position"), 624 e.args.get("occurrence"), 625 ), 626 exp.RegexpReplace: regexp_replace_sql, 627 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 628 exp.ReturnsProperty: _returnsproperty_sql, 629 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 630 exp.Select: transforms.preprocess( 631 [ 632 transforms.explode_to_unnest(), 633 _unqualify_unnest, 634 transforms.eliminate_distinct_on, 635 _alias_ordered_group, 636 transforms.eliminate_semi_and_anti_joins, 637 ] 638 ), 639 exp.SHA2: lambda self, e: self.func( 640 "SHA256" if e.text("length") == "256" else "SHA512", e.this 641 ), 642 exp.StabilityProperty: lambda self, e: ( 643 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 644 ), 645 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 646 exp.StrToTime: lambda self, e: self.func( 647 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 648 ), 649 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 650 exp.TimeFromParts: rename_func("TIME"), 651 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 652 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 653 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 654 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 655 exp.TimeStrToTime: timestrtotime_sql, 656 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 657 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 658 exp.TsOrDsAdd: _ts_or_ds_add_sql, 659 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 660 exp.TsOrDsToTime: rename_func("TIME"), 661 exp.Unhex: rename_func("FROM_HEX"), 662 exp.UnixDate: rename_func("UNIX_DATE"), 663 exp.UnixToTime: _unix_to_time_sql, 664 exp.Values: _derived_table_values_to_unnest, 665 exp.VariancePop: rename_func("VAR_POP"), 666 } 667 668 SUPPORTED_JSON_PATH_PARTS = { 669 exp.JSONPathKey, 670 exp.JSONPathRoot, 671 exp.JSONPathSubscript, 672 } 673 674 TYPE_MAPPING = { 675 **generator.Generator.TYPE_MAPPING, 676 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 677 exp.DataType.Type.BIGINT: "INT64", 678 exp.DataType.Type.BINARY: "BYTES", 679 exp.DataType.Type.BOOLEAN: "BOOL", 680 exp.DataType.Type.CHAR: "STRING", 681 exp.DataType.Type.DECIMAL: "NUMERIC", 682 exp.DataType.Type.DOUBLE: "FLOAT64", 683 exp.DataType.Type.FLOAT: "FLOAT64", 684 exp.DataType.Type.INT: "INT64", 685 exp.DataType.Type.NCHAR: "STRING", 686 exp.DataType.Type.NVARCHAR: "STRING", 687 exp.DataType.Type.SMALLINT: "INT64", 688 exp.DataType.Type.TEXT: "STRING", 689 exp.DataType.Type.TIMESTAMP: "DATETIME", 690 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 691 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 692 exp.DataType.Type.TINYINT: "INT64", 693 exp.DataType.Type.VARBINARY: "BYTES", 694 exp.DataType.Type.VARCHAR: "STRING", 695 exp.DataType.Type.VARIANT: "ANY TYPE", 696 } 697 698 PROPERTIES_LOCATION = { 699 **generator.Generator.PROPERTIES_LOCATION, 700 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 701 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 702 } 703 704 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 705 RESERVED_KEYWORDS = { 706 *generator.Generator.RESERVED_KEYWORDS, 707 "all", 708 "and", 709 "any", 710 "array", 711 "as", 712 "asc", 713 "assert_rows_modified", 714 "at", 715 "between", 716 "by", 717 "case", 718 "cast", 719 "collate", 720 "contains", 721 "create", 722 "cross", 723 "cube", 724 "current", 725 "default", 726 "define", 727 "desc", 728 "distinct", 729 "else", 730 "end", 731 "enum", 732 "escape", 733 "except", 734 "exclude", 735 "exists", 736 "extract", 737 "false", 738 "fetch", 739 "following", 740 "for", 741 "from", 742 "full", 743 "group", 744 "grouping", 745 "groups", 746 "hash", 747 "having", 748 "if", 749 "ignore", 750 "in", 751 "inner", 752 "intersect", 753 "interval", 754 "into", 755 "is", 756 "join", 757 "lateral", 758 "left", 759 "like", 760 "limit", 761 "lookup", 762 "merge", 763 "natural", 764 "new", 765 "no", 766 "not", 767 "null", 768 "nulls", 769 "of", 770 "on", 771 "or", 772 "order", 773 "outer", 774 "over", 775 "partition", 776 "preceding", 777 "proto", 778 "qualify", 779 "range", 780 "recursive", 781 "respect", 782 "right", 783 "rollup", 784 "rows", 785 "select", 786 "set", 787 "some", 788 "struct", 789 "tablesample", 790 "then", 791 "to", 792 "treat", 793 "true", 794 "unbounded", 795 "union", 796 "unnest", 797 "using", 798 "when", 799 "where", 800 "window", 801 "with", 802 "within", 803 } 804 805 def table_parts(self, expression: exp.Table) -> str: 806 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 807 # we need to make sure the correct quoting is used in each case. 808 # 809 # For example, if there is a CTE x that clashes with a schema name, then the former will 810 # return the table y in that schema, whereas the latter will return the CTE's y column: 811 # 812 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 813 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 814 if expression.meta.get("quoted_table"): 815 table_parts = ".".join(p.name for p in expression.parts) 816 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 817 818 return super().table_parts(expression) 819 820 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 821 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 822 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 823 824 def eq_sql(self, expression: exp.EQ) -> str: 825 # Operands of = cannot be NULL in BigQuery 826 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 827 if not isinstance(expression.parent, exp.Update): 828 return "NULL" 829 830 return self.binary(expression, "=") 831 832 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 833 parent = expression.parent 834 835 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 836 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 837 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 838 return self.func( 839 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 840 ) 841 842 return super().attimezone_sql(expression) 843 844 def trycast_sql(self, expression: exp.TryCast) -> str: 845 return self.cast_sql(expression, safe_prefix="SAFE_") 846 847 def array_sql(self, expression: exp.Array) -> str: 848 first_arg = seq_get(expression.expressions, 0) 849 if isinstance(first_arg, exp.Query): 850 return f"ARRAY{self.wrap(self.sql(first_arg))}" 851 852 return inline_array_sql(self, expression) 853 854 def bracket_sql(self, expression: exp.Bracket) -> str: 855 this = expression.this 856 expressions = expression.expressions 857 858 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 859 arg = expressions[0] 860 if arg.type is None: 861 from sqlglot.optimizer.annotate_types import annotate_types 862 863 arg = annotate_types(arg) 864 865 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 866 # BQ doesn't support bracket syntax with string values for structs 867 return f"{self.sql(this)}.{arg.name}" 868 869 expressions_sql = self.expressions(expression, flat=True) 870 offset = expression.args.get("offset") 871 872 if offset == 0: 873 expressions_sql = f"OFFSET({expressions_sql})" 874 elif offset == 1: 875 expressions_sql = f"ORDINAL({expressions_sql})" 876 elif offset is not None: 877 self.unsupported(f"Unsupported array offset: {offset}") 878 879 if expression.args.get("safe"): 880 expressions_sql = f"SAFE_{expressions_sql}" 881 882 return f"{self.sql(this)}[{expressions_sql}]" 883 884 def in_unnest_op(self, expression: exp.Unnest) -> str: 885 return self.sql(expression) 886 887 def except_op(self, expression: exp.Except) -> str: 888 if not expression.args.get("distinct"): 889 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 890 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 891 892 def intersect_op(self, expression: exp.Intersect) -> str: 893 if not expression.args.get("distinct"): 894 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 895 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 896 897 def with_properties(self, properties: exp.Properties) -> str: 898 return self.properties(properties, prefix=self.seg("OPTIONS")) 899 900 def version_sql(self, expression: exp.Version) -> str: 901 if expression.name == "TIMESTAMP": 902 expression.set("this", "SYSTEM_TIME") 903 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
805 def table_parts(self, expression: exp.Table) -> str: 806 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 807 # we need to make sure the correct quoting is used in each case. 808 # 809 # For example, if there is a CTE x that clashes with a schema name, then the former will 810 # return the table y in that schema, whereas the latter will return the CTE's y column: 811 # 812 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 813 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 814 if expression.meta.get("quoted_table"): 815 table_parts = ".".join(p.name for p in expression.parts) 816 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 817 818 return super().table_parts(expression)
832 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 833 parent = expression.parent 834 835 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 836 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 837 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 838 return self.func( 839 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 840 ) 841 842 return super().attimezone_sql(expression)
854 def bracket_sql(self, expression: exp.Bracket) -> str: 855 this = expression.this 856 expressions = expression.expressions 857 858 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 859 arg = expressions[0] 860 if arg.type is None: 861 from sqlglot.optimizer.annotate_types import annotate_types 862 863 arg = annotate_types(arg) 864 865 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 866 # BQ doesn't support bracket syntax with string values for structs 867 return f"{self.sql(this)}.{arg.name}" 868 869 expressions_sql = self.expressions(expression, flat=True) 870 offset = expression.args.get("offset") 871 872 if offset == 0: 873 expressions_sql = f"OFFSET({expressions_sql})" 874 elif offset == 1: 875 expressions_sql = f"ORDINAL({expressions_sql})" 876 elif offset is not None: 877 self.unsupported(f"Unsupported array offset: {offset}") 878 879 if expression.args.get("safe"): 880 expressions_sql = f"SAFE_{expressions_sql}" 881 882 return f"{self.sql(this)}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- generateseries_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql