sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 build_formatted_time, 16 filter_array_using_unnest, 17 if_sql, 18 inline_array_unless_query, 19 max_or_greatest, 20 min_or_least, 21 no_ilike_sql, 22 build_date_delta_with_interval, 23 regexp_replace_sql, 24 rename_func, 25 timestrtotime_sql, 26 ts_or_ds_add_cast, 27 unit_to_var, 28) 29from sqlglot.helper import seq_get, split_num_words 30from sqlglot.tokens import TokenType 31 32if t.TYPE_CHECKING: 33 from sqlglot._typing import E, Lit 34 35logger = logging.getLogger("sqlglot") 36 37 38def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 39 if not expression.find_ancestor(exp.From, exp.Join): 40 return self.values_sql(expression) 41 42 structs = [] 43 alias = expression.args.get("alias") 44 for tup in expression.find_all(exp.Tuple): 45 field_aliases = ( 46 alias.columns 47 if alias and alias.columns 48 else (f"_c{i}" for i in range(len(tup.expressions))) 49 ) 50 expressions = [ 51 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 52 for name, fld in zip(field_aliases, tup.expressions) 53 ] 54 structs.append(exp.Struct(expressions=expressions)) 55 56 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 57 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 58 return self.unnest_sql( 59 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 60 ) 61 62 63def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 64 this = expression.this 65 if isinstance(this, exp.Schema): 66 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 67 else: 68 this = self.sql(this) 69 return f"RETURNS {this}" 70 71 72def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 73 returns = expression.find(exp.ReturnsProperty) 74 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 75 expression.set("kind", "TABLE FUNCTION") 76 77 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 78 expression.set("expression", expression.expression.this) 79 80 return self.create_sql(expression) 81 82 83# https://issuetracker.google.com/issues/162294746 84# workaround for bigquery bug when grouping by an expression and then ordering 85# WITH x AS (SELECT 1 y) 86# SELECT y + 1 z 87# FROM x 88# GROUP BY x + 1 89# ORDER by z 90def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 91 if isinstance(expression, exp.Select): 92 group = expression.args.get("group") 93 order = expression.args.get("order") 94 95 if group and order: 96 aliases = { 97 select.this: select.args["alias"] 98 for select in expression.selects 99 if isinstance(select, exp.Alias) 100 } 101 102 for grouped in group.expressions: 103 if grouped.is_int: 104 continue 105 alias = aliases.get(grouped) 106 if alias: 107 grouped.replace(exp.column(alias)) 108 109 return expression 110 111 112def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 113 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 114 if isinstance(expression, exp.CTE) and expression.alias_column_names: 115 cte_query = expression.this 116 117 if cte_query.is_star: 118 logger.warning( 119 "Can't push down CTE column names for star queries. Run the query through" 120 " the optimizer or use 'qualify' to expand the star projections first." 121 ) 122 return expression 123 124 column_names = expression.alias_column_names 125 expression.args["alias"].set("columns", None) 126 127 for name, select in zip(column_names, cte_query.selects): 128 to_replace = select 129 130 if isinstance(select, exp.Alias): 131 select = select.this 132 133 # Inner aliases are shadowed by the CTE column names 134 to_replace.replace(exp.alias_(select, name)) 135 136 return expression 137 138 139def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 140 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 141 this.set("zone", seq_get(args, 2)) 142 return this 143 144 145def _build_timestamp(args: t.List) -> exp.Timestamp: 146 timestamp = exp.Timestamp.from_arg_list(args) 147 timestamp.set("with_tz", True) 148 return timestamp 149 150 151def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 152 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 153 return expr_type.from_arg_list(args) 154 155 156def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 157 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 158 arg = seq_get(args, 0) 159 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg) 160 161 162def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 163 return self.sql( 164 exp.Exists( 165 this=exp.select("1") 166 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 167 .where(exp.column("_col").eq(expression.right)) 168 ) 169 ) 170 171 172def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 173 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 174 175 176def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 177 expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 178 expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)) 179 unit = unit_to_var(expression) 180 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 181 182 183def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 184 scale = expression.args.get("scale") 185 timestamp = expression.this 186 187 if scale in (None, exp.UnixToTime.SECONDS): 188 return self.func("TIMESTAMP_SECONDS", timestamp) 189 if scale == exp.UnixToTime.MILLIS: 190 return self.func("TIMESTAMP_MILLIS", timestamp) 191 if scale == exp.UnixToTime.MICROS: 192 return self.func("TIMESTAMP_MICROS", timestamp) 193 194 unix_seconds = exp.cast( 195 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 196 ) 197 return self.func("TIMESTAMP_SECONDS", unix_seconds) 198 199 200def _build_time(args: t.List) -> exp.Func: 201 if len(args) == 1: 202 return exp.TsOrDsToTime(this=args[0]) 203 if len(args) == 3: 204 return exp.TimeFromParts.from_arg_list(args) 205 206 return exp.Anonymous(this="TIME", expressions=args) 207 208 209class BigQuery(Dialect): 210 WEEK_OFFSET = -1 211 UNNEST_COLUMN_ONLY = True 212 SUPPORTS_USER_DEFINED_TYPES = False 213 SUPPORTS_SEMI_ANTI_JOIN = False 214 LOG_BASE_FIRST = False 215 HEX_LOWERCASE = True 216 217 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 218 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 219 220 # bigquery udfs are case sensitive 221 NORMALIZE_FUNCTIONS = False 222 223 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 224 TIME_MAPPING = { 225 "%D": "%m/%d/%y", 226 "%E6S": "%S.%f", 227 } 228 229 FORMAT_MAPPING = { 230 "DD": "%d", 231 "MM": "%m", 232 "MON": "%b", 233 "MONTH": "%B", 234 "YYYY": "%Y", 235 "YY": "%y", 236 "HH": "%I", 237 "HH12": "%I", 238 "HH24": "%H", 239 "MI": "%M", 240 "SS": "%S", 241 "SSSSS": "%f", 242 "TZH": "%z", 243 } 244 245 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 246 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 247 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 248 249 def normalize_identifier(self, expression: E) -> E: 250 if isinstance(expression, exp.Identifier): 251 parent = expression.parent 252 while isinstance(parent, exp.Dot): 253 parent = parent.parent 254 255 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 256 # by default. The following check uses a heuristic to detect tables based on whether 257 # they are qualified. This should generally be correct, because tables in BigQuery 258 # must be qualified with at least a dataset, unless @@dataset_id is set. 259 case_sensitive = ( 260 isinstance(parent, exp.UserDefinedFunction) 261 or ( 262 isinstance(parent, exp.Table) 263 and parent.db 264 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 265 ) 266 or expression.meta.get("is_table") 267 ) 268 if not case_sensitive: 269 expression.set("this", expression.this.lower()) 270 271 return expression 272 273 class Tokenizer(tokens.Tokenizer): 274 QUOTES = ["'", '"', '"""', "'''"] 275 COMMENTS = ["--", "#", ("/*", "*/")] 276 IDENTIFIERS = ["`"] 277 STRING_ESCAPES = ["\\"] 278 279 HEX_STRINGS = [("0x", ""), ("0X", "")] 280 281 BYTE_STRINGS = [ 282 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 283 ] 284 285 RAW_STRINGS = [ 286 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 287 ] 288 289 KEYWORDS = { 290 **tokens.Tokenizer.KEYWORDS, 291 "ANY TYPE": TokenType.VARIANT, 292 "BEGIN": TokenType.COMMAND, 293 "BEGIN TRANSACTION": TokenType.BEGIN, 294 "BYTES": TokenType.BINARY, 295 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 296 "DATETIME": TokenType.TIMESTAMP, 297 "DECLARE": TokenType.COMMAND, 298 "ELSEIF": TokenType.COMMAND, 299 "EXCEPTION": TokenType.COMMAND, 300 "FLOAT64": TokenType.DOUBLE, 301 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 302 "MODEL": TokenType.MODEL, 303 "NOT DETERMINISTIC": TokenType.VOLATILE, 304 "RECORD": TokenType.STRUCT, 305 "TIMESTAMP": TokenType.TIMESTAMPTZ, 306 } 307 KEYWORDS.pop("DIV") 308 KEYWORDS.pop("VALUES") 309 310 class Parser(parser.Parser): 311 PREFIXED_PIVOT_COLUMNS = True 312 LOG_DEFAULTS_TO_LN = True 313 SUPPORTS_IMPLICIT_UNNEST = True 314 315 FUNCTIONS = { 316 **parser.Parser.FUNCTIONS, 317 "DATE": _build_date, 318 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 319 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 320 "DATE_TRUNC": lambda args: exp.DateTrunc( 321 unit=exp.Literal.string(str(seq_get(args, 1))), 322 this=seq_get(args, 0), 323 ), 324 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 325 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 326 "DIV": binary_from_function(exp.IntDiv), 327 "FORMAT_DATE": lambda args: exp.TimeToStr( 328 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 329 ), 330 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 331 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 332 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 333 ), 334 "MD5": exp.MD5Digest.from_arg_list, 335 "TO_HEX": _build_to_hex, 336 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 337 [seq_get(args, 1), seq_get(args, 0)] 338 ), 339 "PARSE_TIMESTAMP": _build_parse_timestamp, 340 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 341 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 342 this=seq_get(args, 0), 343 expression=seq_get(args, 1), 344 position=seq_get(args, 2), 345 occurrence=seq_get(args, 3), 346 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 347 ), 348 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 349 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 350 "SPLIT": lambda args: exp.Split( 351 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 352 this=seq_get(args, 0), 353 expression=seq_get(args, 1) or exp.Literal.string(","), 354 ), 355 "TIME": _build_time, 356 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 357 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 358 "TIMESTAMP": _build_timestamp, 359 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 360 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 361 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 362 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 363 ), 364 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 365 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 366 ), 367 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 368 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 369 } 370 371 FUNCTION_PARSERS = { 372 **parser.Parser.FUNCTION_PARSERS, 373 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 374 } 375 FUNCTION_PARSERS.pop("TRIM") 376 377 NO_PAREN_FUNCTIONS = { 378 **parser.Parser.NO_PAREN_FUNCTIONS, 379 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 380 } 381 382 NESTED_TYPE_TOKENS = { 383 *parser.Parser.NESTED_TYPE_TOKENS, 384 TokenType.TABLE, 385 } 386 387 PROPERTY_PARSERS = { 388 **parser.Parser.PROPERTY_PARSERS, 389 "NOT DETERMINISTIC": lambda self: self.expression( 390 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 391 ), 392 "OPTIONS": lambda self: self._parse_with_property(), 393 } 394 395 CONSTRAINT_PARSERS = { 396 **parser.Parser.CONSTRAINT_PARSERS, 397 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 398 } 399 400 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 401 RANGE_PARSERS.pop(TokenType.OVERLAPS) 402 403 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 404 405 STATEMENT_PARSERS = { 406 **parser.Parser.STATEMENT_PARSERS, 407 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 408 TokenType.END: lambda self: self._parse_as_command(self._prev), 409 TokenType.FOR: lambda self: self._parse_for_in(), 410 } 411 412 BRACKET_OFFSETS = { 413 "OFFSET": (0, False), 414 "ORDINAL": (1, False), 415 "SAFE_OFFSET": (0, True), 416 "SAFE_ORDINAL": (1, True), 417 } 418 419 def _parse_for_in(self) -> exp.ForIn: 420 this = self._parse_range() 421 self._match_text_seq("DO") 422 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 423 424 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 425 this = super()._parse_table_part(schema=schema) or self._parse_number() 426 427 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 428 if isinstance(this, exp.Identifier): 429 table_name = this.name 430 while self._match(TokenType.DASH, advance=False) and self._next: 431 text = "" 432 while self._curr and self._curr.token_type != TokenType.DOT: 433 self._advance() 434 text += self._prev.text 435 table_name += text 436 437 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 438 elif isinstance(this, exp.Literal): 439 table_name = this.name 440 441 if self._is_connected() and self._parse_var(any_token=True): 442 table_name += self._prev.text 443 444 this = exp.Identifier(this=table_name, quoted=True) 445 446 return this 447 448 def _parse_table_parts( 449 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 450 ) -> exp.Table: 451 table = super()._parse_table_parts( 452 schema=schema, is_db_reference=is_db_reference, wildcard=True 453 ) 454 455 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 456 if not table.catalog: 457 if table.db: 458 parts = table.db.split(".") 459 if len(parts) == 2 and not table.args["db"].quoted: 460 table.set("catalog", exp.Identifier(this=parts[0])) 461 table.set("db", exp.Identifier(this=parts[1])) 462 else: 463 parts = table.name.split(".") 464 if len(parts) == 2 and not table.this.quoted: 465 table.set("db", exp.Identifier(this=parts[0])) 466 table.set("this", exp.Identifier(this=parts[1])) 467 468 if any("." in p.name for p in table.parts): 469 catalog, db, this, *rest = ( 470 exp.to_identifier(p, quoted=True) 471 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 472 ) 473 474 if rest and this: 475 this = exp.Dot.build([this, *rest]) # type: ignore 476 477 table = exp.Table( 478 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 479 ) 480 table.meta["quoted_table"] = True 481 482 return table 483 484 def _parse_column(self) -> t.Optional[exp.Expression]: 485 column = super()._parse_column() 486 if isinstance(column, exp.Column): 487 parts = column.parts 488 if any("." in p.name for p in parts): 489 catalog, db, table, this, *rest = ( 490 exp.to_identifier(p, quoted=True) 491 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 492 ) 493 494 if rest and this: 495 this = exp.Dot.build([this, *rest]) # type: ignore 496 497 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 498 column.meta["quoted_column"] = True 499 500 return column 501 502 @t.overload 503 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 504 505 @t.overload 506 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 507 508 def _parse_json_object(self, agg=False): 509 json_object = super()._parse_json_object() 510 array_kv_pair = seq_get(json_object.expressions, 0) 511 512 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 513 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 514 if ( 515 array_kv_pair 516 and isinstance(array_kv_pair.this, exp.Array) 517 and isinstance(array_kv_pair.expression, exp.Array) 518 ): 519 keys = array_kv_pair.this.expressions 520 values = array_kv_pair.expression.expressions 521 522 json_object.set( 523 "expressions", 524 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 525 ) 526 527 return json_object 528 529 def _parse_bracket( 530 self, this: t.Optional[exp.Expression] = None 531 ) -> t.Optional[exp.Expression]: 532 bracket = super()._parse_bracket(this) 533 534 if this is bracket: 535 return bracket 536 537 if isinstance(bracket, exp.Bracket): 538 for expression in bracket.expressions: 539 name = expression.name.upper() 540 541 if name not in self.BRACKET_OFFSETS: 542 break 543 544 offset, safe = self.BRACKET_OFFSETS[name] 545 bracket.set("offset", offset) 546 bracket.set("safe", safe) 547 expression.replace(expression.expressions[0]) 548 549 return bracket 550 551 class Generator(generator.Generator): 552 EXPLICIT_UNION = True 553 INTERVAL_ALLOWS_PLURAL_FORM = False 554 JOIN_HINTS = False 555 QUERY_HINTS = False 556 TABLE_HINTS = False 557 LIMIT_FETCH = "LIMIT" 558 RENAME_TABLE_WITH_DB = False 559 NVL2_SUPPORTED = False 560 UNNEST_WITH_ORDINALITY = False 561 COLLATE_IS_FUNC = True 562 LIMIT_ONLY_LITERALS = True 563 SUPPORTS_TABLE_ALIAS_COLUMNS = False 564 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 565 JSON_KEY_VALUE_PAIR_SEP = "," 566 NULL_ORDERING_SUPPORTED = False 567 IGNORE_NULLS_IN_FUNC = True 568 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 569 CAN_IMPLEMENT_ARRAY_ANY = True 570 SUPPORTS_TO_NUMBER = False 571 NAMED_PLACEHOLDER_TOKEN = "@" 572 HEX_FUNC = "TO_HEX" 573 WITH_PROPERTIES_PREFIX = "OPTIONS" 574 575 TRANSFORMS = { 576 **generator.Generator.TRANSFORMS, 577 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 578 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 579 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 580 exp.Array: inline_array_unless_query, 581 exp.ArrayContains: _array_contains_sql, 582 exp.ArrayFilter: filter_array_using_unnest, 583 exp.ArraySize: rename_func("ARRAY_LENGTH"), 584 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 585 exp.CollateProperty: lambda self, e: ( 586 f"DEFAULT COLLATE {self.sql(e, 'this')}" 587 if e.args.get("default") 588 else f"COLLATE {self.sql(e, 'this')}" 589 ), 590 exp.Commit: lambda *_: "COMMIT TRANSACTION", 591 exp.CountIf: rename_func("COUNTIF"), 592 exp.Create: _create_sql, 593 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 594 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 595 exp.DateDiff: lambda self, e: self.func( 596 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 597 ), 598 exp.DateFromParts: rename_func("DATE"), 599 exp.DateStrToDate: datestrtodate_sql, 600 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 601 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 602 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 603 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 604 exp.FromTimeZone: lambda self, e: self.func( 605 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 606 ), 607 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 608 exp.GroupConcat: rename_func("STRING_AGG"), 609 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 610 exp.If: if_sql(false_value="NULL"), 611 exp.ILike: no_ilike_sql, 612 exp.IntDiv: rename_func("DIV"), 613 exp.JSONFormat: rename_func("TO_JSON_STRING"), 614 exp.Max: max_or_greatest, 615 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 616 exp.MD5Digest: rename_func("MD5"), 617 exp.Min: min_or_least, 618 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 619 exp.RegexpExtract: lambda self, e: self.func( 620 "REGEXP_EXTRACT", 621 e.this, 622 e.expression, 623 e.args.get("position"), 624 e.args.get("occurrence"), 625 ), 626 exp.RegexpReplace: regexp_replace_sql, 627 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 628 exp.ReturnsProperty: _returnsproperty_sql, 629 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 630 exp.Select: transforms.preprocess( 631 [ 632 transforms.explode_to_unnest(), 633 transforms.unqualify_unnest, 634 transforms.eliminate_distinct_on, 635 _alias_ordered_group, 636 transforms.eliminate_semi_and_anti_joins, 637 ] 638 ), 639 exp.SHA: rename_func("SHA1"), 640 exp.SHA2: lambda self, e: self.func( 641 "SHA256" if e.text("length") == "256" else "SHA512", e.this 642 ), 643 exp.StabilityProperty: lambda self, e: ( 644 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 645 ), 646 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 647 exp.StrToTime: lambda self, e: self.func( 648 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 649 ), 650 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 651 exp.TimeFromParts: rename_func("TIME"), 652 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 653 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 654 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 655 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 656 exp.TimeStrToTime: timestrtotime_sql, 657 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 658 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 659 exp.TsOrDsAdd: _ts_or_ds_add_sql, 660 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 661 exp.TsOrDsToTime: rename_func("TIME"), 662 exp.Unhex: rename_func("FROM_HEX"), 663 exp.UnixDate: rename_func("UNIX_DATE"), 664 exp.UnixToTime: _unix_to_time_sql, 665 exp.Values: _derived_table_values_to_unnest, 666 exp.VariancePop: rename_func("VAR_POP"), 667 } 668 669 SUPPORTED_JSON_PATH_PARTS = { 670 exp.JSONPathKey, 671 exp.JSONPathRoot, 672 exp.JSONPathSubscript, 673 } 674 675 TYPE_MAPPING = { 676 **generator.Generator.TYPE_MAPPING, 677 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 678 exp.DataType.Type.BIGINT: "INT64", 679 exp.DataType.Type.BINARY: "BYTES", 680 exp.DataType.Type.BOOLEAN: "BOOL", 681 exp.DataType.Type.CHAR: "STRING", 682 exp.DataType.Type.DECIMAL: "NUMERIC", 683 exp.DataType.Type.DOUBLE: "FLOAT64", 684 exp.DataType.Type.FLOAT: "FLOAT64", 685 exp.DataType.Type.INT: "INT64", 686 exp.DataType.Type.NCHAR: "STRING", 687 exp.DataType.Type.NVARCHAR: "STRING", 688 exp.DataType.Type.SMALLINT: "INT64", 689 exp.DataType.Type.TEXT: "STRING", 690 exp.DataType.Type.TIMESTAMP: "DATETIME", 691 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 692 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 693 exp.DataType.Type.TINYINT: "INT64", 694 exp.DataType.Type.VARBINARY: "BYTES", 695 exp.DataType.Type.ROWVERSION: "BYTES", 696 exp.DataType.Type.VARCHAR: "STRING", 697 exp.DataType.Type.VARIANT: "ANY TYPE", 698 } 699 700 PROPERTIES_LOCATION = { 701 **generator.Generator.PROPERTIES_LOCATION, 702 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 703 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 704 } 705 706 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 707 RESERVED_KEYWORDS = { 708 "all", 709 "and", 710 "any", 711 "array", 712 "as", 713 "asc", 714 "assert_rows_modified", 715 "at", 716 "between", 717 "by", 718 "case", 719 "cast", 720 "collate", 721 "contains", 722 "create", 723 "cross", 724 "cube", 725 "current", 726 "default", 727 "define", 728 "desc", 729 "distinct", 730 "else", 731 "end", 732 "enum", 733 "escape", 734 "except", 735 "exclude", 736 "exists", 737 "extract", 738 "false", 739 "fetch", 740 "following", 741 "for", 742 "from", 743 "full", 744 "group", 745 "grouping", 746 "groups", 747 "hash", 748 "having", 749 "if", 750 "ignore", 751 "in", 752 "inner", 753 "intersect", 754 "interval", 755 "into", 756 "is", 757 "join", 758 "lateral", 759 "left", 760 "like", 761 "limit", 762 "lookup", 763 "merge", 764 "natural", 765 "new", 766 "no", 767 "not", 768 "null", 769 "nulls", 770 "of", 771 "on", 772 "or", 773 "order", 774 "outer", 775 "over", 776 "partition", 777 "preceding", 778 "proto", 779 "qualify", 780 "range", 781 "recursive", 782 "respect", 783 "right", 784 "rollup", 785 "rows", 786 "select", 787 "set", 788 "some", 789 "struct", 790 "tablesample", 791 "then", 792 "to", 793 "treat", 794 "true", 795 "unbounded", 796 "union", 797 "unnest", 798 "using", 799 "when", 800 "where", 801 "window", 802 "with", 803 "within", 804 } 805 806 def mod_sql(self, expression: exp.Mod) -> str: 807 this = expression.this 808 expr = expression.expression 809 return self.func( 810 "MOD", 811 this.unnest() if isinstance(this, exp.Paren) else this, 812 expr.unnest() if isinstance(expr, exp.Paren) else expr, 813 ) 814 815 def column_parts(self, expression: exp.Column) -> str: 816 if expression.meta.get("quoted_column"): 817 # If a column reference is of the form `dataset.table`.name, we need 818 # to preserve the quoted table path, otherwise the reference breaks 819 table_parts = ".".join(p.name for p in expression.parts[:-1]) 820 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 821 return f"{table_path}.{self.sql(expression, 'this')}" 822 823 return super().column_parts(expression) 824 825 def table_parts(self, expression: exp.Table) -> str: 826 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 827 # we need to make sure the correct quoting is used in each case. 828 # 829 # For example, if there is a CTE x that clashes with a schema name, then the former will 830 # return the table y in that schema, whereas the latter will return the CTE's y column: 831 # 832 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 833 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 834 if expression.meta.get("quoted_table"): 835 table_parts = ".".join(p.name for p in expression.parts) 836 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 837 838 return super().table_parts(expression) 839 840 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 841 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 842 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 843 844 def eq_sql(self, expression: exp.EQ) -> str: 845 # Operands of = cannot be NULL in BigQuery 846 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 847 if not isinstance(expression.parent, exp.Update): 848 return "NULL" 849 850 return self.binary(expression, "=") 851 852 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 853 parent = expression.parent 854 855 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 856 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 857 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 858 return self.func( 859 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 860 ) 861 862 return super().attimezone_sql(expression) 863 864 def trycast_sql(self, expression: exp.TryCast) -> str: 865 return self.cast_sql(expression, safe_prefix="SAFE_") 866 867 def bracket_sql(self, expression: exp.Bracket) -> str: 868 this = expression.this 869 expressions = expression.expressions 870 871 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 872 arg = expressions[0] 873 if arg.type is None: 874 from sqlglot.optimizer.annotate_types import annotate_types 875 876 arg = annotate_types(arg) 877 878 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 879 # BQ doesn't support bracket syntax with string values for structs 880 return f"{self.sql(this)}.{arg.name}" 881 882 expressions_sql = self.expressions(expression, flat=True) 883 offset = expression.args.get("offset") 884 885 if offset == 0: 886 expressions_sql = f"OFFSET({expressions_sql})" 887 elif offset == 1: 888 expressions_sql = f"ORDINAL({expressions_sql})" 889 elif offset is not None: 890 self.unsupported(f"Unsupported array offset: {offset}") 891 892 if expression.args.get("safe"): 893 expressions_sql = f"SAFE_{expressions_sql}" 894 895 return f"{self.sql(this)}[{expressions_sql}]" 896 897 def in_unnest_op(self, expression: exp.Unnest) -> str: 898 return self.sql(expression) 899 900 def except_op(self, expression: exp.Except) -> str: 901 if not expression.args.get("distinct"): 902 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 903 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 904 905 def intersect_op(self, expression: exp.Intersect) -> str: 906 if not expression.args.get("distinct"): 907 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 908 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 909 910 def version_sql(self, expression: exp.Version) -> str: 911 if expression.name == "TIMESTAMP": 912 expression.set("this", "SYSTEM_TIME") 913 return super().version_sql(expression)
210class BigQuery(Dialect): 211 WEEK_OFFSET = -1 212 UNNEST_COLUMN_ONLY = True 213 SUPPORTS_USER_DEFINED_TYPES = False 214 SUPPORTS_SEMI_ANTI_JOIN = False 215 LOG_BASE_FIRST = False 216 HEX_LOWERCASE = True 217 218 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 219 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 220 221 # bigquery udfs are case sensitive 222 NORMALIZE_FUNCTIONS = False 223 224 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 225 TIME_MAPPING = { 226 "%D": "%m/%d/%y", 227 "%E6S": "%S.%f", 228 } 229 230 FORMAT_MAPPING = { 231 "DD": "%d", 232 "MM": "%m", 233 "MON": "%b", 234 "MONTH": "%B", 235 "YYYY": "%Y", 236 "YY": "%y", 237 "HH": "%I", 238 "HH12": "%I", 239 "HH24": "%H", 240 "MI": "%M", 241 "SS": "%S", 242 "SSSSS": "%f", 243 "TZH": "%z", 244 } 245 246 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 247 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 248 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 249 250 def normalize_identifier(self, expression: E) -> E: 251 if isinstance(expression, exp.Identifier): 252 parent = expression.parent 253 while isinstance(parent, exp.Dot): 254 parent = parent.parent 255 256 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 257 # by default. The following check uses a heuristic to detect tables based on whether 258 # they are qualified. This should generally be correct, because tables in BigQuery 259 # must be qualified with at least a dataset, unless @@dataset_id is set. 260 case_sensitive = ( 261 isinstance(parent, exp.UserDefinedFunction) 262 or ( 263 isinstance(parent, exp.Table) 264 and parent.db 265 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 266 ) 267 or expression.meta.get("is_table") 268 ) 269 if not case_sensitive: 270 expression.set("this", expression.this.lower()) 271 272 return expression 273 274 class Tokenizer(tokens.Tokenizer): 275 QUOTES = ["'", '"', '"""', "'''"] 276 COMMENTS = ["--", "#", ("/*", "*/")] 277 IDENTIFIERS = ["`"] 278 STRING_ESCAPES = ["\\"] 279 280 HEX_STRINGS = [("0x", ""), ("0X", "")] 281 282 BYTE_STRINGS = [ 283 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 284 ] 285 286 RAW_STRINGS = [ 287 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 288 ] 289 290 KEYWORDS = { 291 **tokens.Tokenizer.KEYWORDS, 292 "ANY TYPE": TokenType.VARIANT, 293 "BEGIN": TokenType.COMMAND, 294 "BEGIN TRANSACTION": TokenType.BEGIN, 295 "BYTES": TokenType.BINARY, 296 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 297 "DATETIME": TokenType.TIMESTAMP, 298 "DECLARE": TokenType.COMMAND, 299 "ELSEIF": TokenType.COMMAND, 300 "EXCEPTION": TokenType.COMMAND, 301 "FLOAT64": TokenType.DOUBLE, 302 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 303 "MODEL": TokenType.MODEL, 304 "NOT DETERMINISTIC": TokenType.VOLATILE, 305 "RECORD": TokenType.STRUCT, 306 "TIMESTAMP": TokenType.TIMESTAMPTZ, 307 } 308 KEYWORDS.pop("DIV") 309 KEYWORDS.pop("VALUES") 310 311 class Parser(parser.Parser): 312 PREFIXED_PIVOT_COLUMNS = True 313 LOG_DEFAULTS_TO_LN = True 314 SUPPORTS_IMPLICIT_UNNEST = True 315 316 FUNCTIONS = { 317 **parser.Parser.FUNCTIONS, 318 "DATE": _build_date, 319 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 320 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 321 "DATE_TRUNC": lambda args: exp.DateTrunc( 322 unit=exp.Literal.string(str(seq_get(args, 1))), 323 this=seq_get(args, 0), 324 ), 325 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 326 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 327 "DIV": binary_from_function(exp.IntDiv), 328 "FORMAT_DATE": lambda args: exp.TimeToStr( 329 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 330 ), 331 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 332 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 333 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 334 ), 335 "MD5": exp.MD5Digest.from_arg_list, 336 "TO_HEX": _build_to_hex, 337 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 338 [seq_get(args, 1), seq_get(args, 0)] 339 ), 340 "PARSE_TIMESTAMP": _build_parse_timestamp, 341 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 342 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 343 this=seq_get(args, 0), 344 expression=seq_get(args, 1), 345 position=seq_get(args, 2), 346 occurrence=seq_get(args, 3), 347 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 348 ), 349 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 350 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 351 "SPLIT": lambda args: exp.Split( 352 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 353 this=seq_get(args, 0), 354 expression=seq_get(args, 1) or exp.Literal.string(","), 355 ), 356 "TIME": _build_time, 357 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 358 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 359 "TIMESTAMP": _build_timestamp, 360 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 361 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 362 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 363 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 364 ), 365 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 366 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 367 ), 368 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 369 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 370 } 371 372 FUNCTION_PARSERS = { 373 **parser.Parser.FUNCTION_PARSERS, 374 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 375 } 376 FUNCTION_PARSERS.pop("TRIM") 377 378 NO_PAREN_FUNCTIONS = { 379 **parser.Parser.NO_PAREN_FUNCTIONS, 380 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 381 } 382 383 NESTED_TYPE_TOKENS = { 384 *parser.Parser.NESTED_TYPE_TOKENS, 385 TokenType.TABLE, 386 } 387 388 PROPERTY_PARSERS = { 389 **parser.Parser.PROPERTY_PARSERS, 390 "NOT DETERMINISTIC": lambda self: self.expression( 391 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 392 ), 393 "OPTIONS": lambda self: self._parse_with_property(), 394 } 395 396 CONSTRAINT_PARSERS = { 397 **parser.Parser.CONSTRAINT_PARSERS, 398 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 399 } 400 401 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 402 RANGE_PARSERS.pop(TokenType.OVERLAPS) 403 404 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 405 406 STATEMENT_PARSERS = { 407 **parser.Parser.STATEMENT_PARSERS, 408 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 409 TokenType.END: lambda self: self._parse_as_command(self._prev), 410 TokenType.FOR: lambda self: self._parse_for_in(), 411 } 412 413 BRACKET_OFFSETS = { 414 "OFFSET": (0, False), 415 "ORDINAL": (1, False), 416 "SAFE_OFFSET": (0, True), 417 "SAFE_ORDINAL": (1, True), 418 } 419 420 def _parse_for_in(self) -> exp.ForIn: 421 this = self._parse_range() 422 self._match_text_seq("DO") 423 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 424 425 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 426 this = super()._parse_table_part(schema=schema) or self._parse_number() 427 428 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 429 if isinstance(this, exp.Identifier): 430 table_name = this.name 431 while self._match(TokenType.DASH, advance=False) and self._next: 432 text = "" 433 while self._curr and self._curr.token_type != TokenType.DOT: 434 self._advance() 435 text += self._prev.text 436 table_name += text 437 438 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 439 elif isinstance(this, exp.Literal): 440 table_name = this.name 441 442 if self._is_connected() and self._parse_var(any_token=True): 443 table_name += self._prev.text 444 445 this = exp.Identifier(this=table_name, quoted=True) 446 447 return this 448 449 def _parse_table_parts( 450 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 451 ) -> exp.Table: 452 table = super()._parse_table_parts( 453 schema=schema, is_db_reference=is_db_reference, wildcard=True 454 ) 455 456 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 457 if not table.catalog: 458 if table.db: 459 parts = table.db.split(".") 460 if len(parts) == 2 and not table.args["db"].quoted: 461 table.set("catalog", exp.Identifier(this=parts[0])) 462 table.set("db", exp.Identifier(this=parts[1])) 463 else: 464 parts = table.name.split(".") 465 if len(parts) == 2 and not table.this.quoted: 466 table.set("db", exp.Identifier(this=parts[0])) 467 table.set("this", exp.Identifier(this=parts[1])) 468 469 if any("." in p.name for p in table.parts): 470 catalog, db, this, *rest = ( 471 exp.to_identifier(p, quoted=True) 472 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 473 ) 474 475 if rest and this: 476 this = exp.Dot.build([this, *rest]) # type: ignore 477 478 table = exp.Table( 479 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 480 ) 481 table.meta["quoted_table"] = True 482 483 return table 484 485 def _parse_column(self) -> t.Optional[exp.Expression]: 486 column = super()._parse_column() 487 if isinstance(column, exp.Column): 488 parts = column.parts 489 if any("." in p.name for p in parts): 490 catalog, db, table, this, *rest = ( 491 exp.to_identifier(p, quoted=True) 492 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 493 ) 494 495 if rest and this: 496 this = exp.Dot.build([this, *rest]) # type: ignore 497 498 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 499 column.meta["quoted_column"] = True 500 501 return column 502 503 @t.overload 504 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 505 506 @t.overload 507 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 508 509 def _parse_json_object(self, agg=False): 510 json_object = super()._parse_json_object() 511 array_kv_pair = seq_get(json_object.expressions, 0) 512 513 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 514 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 515 if ( 516 array_kv_pair 517 and isinstance(array_kv_pair.this, exp.Array) 518 and isinstance(array_kv_pair.expression, exp.Array) 519 ): 520 keys = array_kv_pair.this.expressions 521 values = array_kv_pair.expression.expressions 522 523 json_object.set( 524 "expressions", 525 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 526 ) 527 528 return json_object 529 530 def _parse_bracket( 531 self, this: t.Optional[exp.Expression] = None 532 ) -> t.Optional[exp.Expression]: 533 bracket = super()._parse_bracket(this) 534 535 if this is bracket: 536 return bracket 537 538 if isinstance(bracket, exp.Bracket): 539 for expression in bracket.expressions: 540 name = expression.name.upper() 541 542 if name not in self.BRACKET_OFFSETS: 543 break 544 545 offset, safe = self.BRACKET_OFFSETS[name] 546 bracket.set("offset", offset) 547 bracket.set("safe", safe) 548 expression.replace(expression.expressions[0]) 549 550 return bracket 551 552 class Generator(generator.Generator): 553 EXPLICIT_UNION = True 554 INTERVAL_ALLOWS_PLURAL_FORM = False 555 JOIN_HINTS = False 556 QUERY_HINTS = False 557 TABLE_HINTS = False 558 LIMIT_FETCH = "LIMIT" 559 RENAME_TABLE_WITH_DB = False 560 NVL2_SUPPORTED = False 561 UNNEST_WITH_ORDINALITY = False 562 COLLATE_IS_FUNC = True 563 LIMIT_ONLY_LITERALS = True 564 SUPPORTS_TABLE_ALIAS_COLUMNS = False 565 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 566 JSON_KEY_VALUE_PAIR_SEP = "," 567 NULL_ORDERING_SUPPORTED = False 568 IGNORE_NULLS_IN_FUNC = True 569 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 570 CAN_IMPLEMENT_ARRAY_ANY = True 571 SUPPORTS_TO_NUMBER = False 572 NAMED_PLACEHOLDER_TOKEN = "@" 573 HEX_FUNC = "TO_HEX" 574 WITH_PROPERTIES_PREFIX = "OPTIONS" 575 576 TRANSFORMS = { 577 **generator.Generator.TRANSFORMS, 578 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 579 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 580 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 581 exp.Array: inline_array_unless_query, 582 exp.ArrayContains: _array_contains_sql, 583 exp.ArrayFilter: filter_array_using_unnest, 584 exp.ArraySize: rename_func("ARRAY_LENGTH"), 585 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 586 exp.CollateProperty: lambda self, e: ( 587 f"DEFAULT COLLATE {self.sql(e, 'this')}" 588 if e.args.get("default") 589 else f"COLLATE {self.sql(e, 'this')}" 590 ), 591 exp.Commit: lambda *_: "COMMIT TRANSACTION", 592 exp.CountIf: rename_func("COUNTIF"), 593 exp.Create: _create_sql, 594 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 595 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 596 exp.DateDiff: lambda self, e: self.func( 597 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 598 ), 599 exp.DateFromParts: rename_func("DATE"), 600 exp.DateStrToDate: datestrtodate_sql, 601 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 602 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 603 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 604 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 605 exp.FromTimeZone: lambda self, e: self.func( 606 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 607 ), 608 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 609 exp.GroupConcat: rename_func("STRING_AGG"), 610 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 611 exp.If: if_sql(false_value="NULL"), 612 exp.ILike: no_ilike_sql, 613 exp.IntDiv: rename_func("DIV"), 614 exp.JSONFormat: rename_func("TO_JSON_STRING"), 615 exp.Max: max_or_greatest, 616 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 617 exp.MD5Digest: rename_func("MD5"), 618 exp.Min: min_or_least, 619 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 620 exp.RegexpExtract: lambda self, e: self.func( 621 "REGEXP_EXTRACT", 622 e.this, 623 e.expression, 624 e.args.get("position"), 625 e.args.get("occurrence"), 626 ), 627 exp.RegexpReplace: regexp_replace_sql, 628 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 629 exp.ReturnsProperty: _returnsproperty_sql, 630 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 631 exp.Select: transforms.preprocess( 632 [ 633 transforms.explode_to_unnest(), 634 transforms.unqualify_unnest, 635 transforms.eliminate_distinct_on, 636 _alias_ordered_group, 637 transforms.eliminate_semi_and_anti_joins, 638 ] 639 ), 640 exp.SHA: rename_func("SHA1"), 641 exp.SHA2: lambda self, e: self.func( 642 "SHA256" if e.text("length") == "256" else "SHA512", e.this 643 ), 644 exp.StabilityProperty: lambda self, e: ( 645 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 646 ), 647 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 648 exp.StrToTime: lambda self, e: self.func( 649 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 650 ), 651 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 652 exp.TimeFromParts: rename_func("TIME"), 653 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 654 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 655 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 656 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 657 exp.TimeStrToTime: timestrtotime_sql, 658 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 659 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 660 exp.TsOrDsAdd: _ts_or_ds_add_sql, 661 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 662 exp.TsOrDsToTime: rename_func("TIME"), 663 exp.Unhex: rename_func("FROM_HEX"), 664 exp.UnixDate: rename_func("UNIX_DATE"), 665 exp.UnixToTime: _unix_to_time_sql, 666 exp.Values: _derived_table_values_to_unnest, 667 exp.VariancePop: rename_func("VAR_POP"), 668 } 669 670 SUPPORTED_JSON_PATH_PARTS = { 671 exp.JSONPathKey, 672 exp.JSONPathRoot, 673 exp.JSONPathSubscript, 674 } 675 676 TYPE_MAPPING = { 677 **generator.Generator.TYPE_MAPPING, 678 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 679 exp.DataType.Type.BIGINT: "INT64", 680 exp.DataType.Type.BINARY: "BYTES", 681 exp.DataType.Type.BOOLEAN: "BOOL", 682 exp.DataType.Type.CHAR: "STRING", 683 exp.DataType.Type.DECIMAL: "NUMERIC", 684 exp.DataType.Type.DOUBLE: "FLOAT64", 685 exp.DataType.Type.FLOAT: "FLOAT64", 686 exp.DataType.Type.INT: "INT64", 687 exp.DataType.Type.NCHAR: "STRING", 688 exp.DataType.Type.NVARCHAR: "STRING", 689 exp.DataType.Type.SMALLINT: "INT64", 690 exp.DataType.Type.TEXT: "STRING", 691 exp.DataType.Type.TIMESTAMP: "DATETIME", 692 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 693 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 694 exp.DataType.Type.TINYINT: "INT64", 695 exp.DataType.Type.VARBINARY: "BYTES", 696 exp.DataType.Type.ROWVERSION: "BYTES", 697 exp.DataType.Type.VARCHAR: "STRING", 698 exp.DataType.Type.VARIANT: "ANY TYPE", 699 } 700 701 PROPERTIES_LOCATION = { 702 **generator.Generator.PROPERTIES_LOCATION, 703 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 704 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 705 } 706 707 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 708 RESERVED_KEYWORDS = { 709 "all", 710 "and", 711 "any", 712 "array", 713 "as", 714 "asc", 715 "assert_rows_modified", 716 "at", 717 "between", 718 "by", 719 "case", 720 "cast", 721 "collate", 722 "contains", 723 "create", 724 "cross", 725 "cube", 726 "current", 727 "default", 728 "define", 729 "desc", 730 "distinct", 731 "else", 732 "end", 733 "enum", 734 "escape", 735 "except", 736 "exclude", 737 "exists", 738 "extract", 739 "false", 740 "fetch", 741 "following", 742 "for", 743 "from", 744 "full", 745 "group", 746 "grouping", 747 "groups", 748 "hash", 749 "having", 750 "if", 751 "ignore", 752 "in", 753 "inner", 754 "intersect", 755 "interval", 756 "into", 757 "is", 758 "join", 759 "lateral", 760 "left", 761 "like", 762 "limit", 763 "lookup", 764 "merge", 765 "natural", 766 "new", 767 "no", 768 "not", 769 "null", 770 "nulls", 771 "of", 772 "on", 773 "or", 774 "order", 775 "outer", 776 "over", 777 "partition", 778 "preceding", 779 "proto", 780 "qualify", 781 "range", 782 "recursive", 783 "respect", 784 "right", 785 "rollup", 786 "rows", 787 "select", 788 "set", 789 "some", 790 "struct", 791 "tablesample", 792 "then", 793 "to", 794 "treat", 795 "true", 796 "unbounded", 797 "union", 798 "unnest", 799 "using", 800 "when", 801 "where", 802 "window", 803 "with", 804 "within", 805 } 806 807 def mod_sql(self, expression: exp.Mod) -> str: 808 this = expression.this 809 expr = expression.expression 810 return self.func( 811 "MOD", 812 this.unnest() if isinstance(this, exp.Paren) else this, 813 expr.unnest() if isinstance(expr, exp.Paren) else expr, 814 ) 815 816 def column_parts(self, expression: exp.Column) -> str: 817 if expression.meta.get("quoted_column"): 818 # If a column reference is of the form `dataset.table`.name, we need 819 # to preserve the quoted table path, otherwise the reference breaks 820 table_parts = ".".join(p.name for p in expression.parts[:-1]) 821 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 822 return f"{table_path}.{self.sql(expression, 'this')}" 823 824 return super().column_parts(expression) 825 826 def table_parts(self, expression: exp.Table) -> str: 827 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 828 # we need to make sure the correct quoting is used in each case. 829 # 830 # For example, if there is a CTE x that clashes with a schema name, then the former will 831 # return the table y in that schema, whereas the latter will return the CTE's y column: 832 # 833 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 834 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 835 if expression.meta.get("quoted_table"): 836 table_parts = ".".join(p.name for p in expression.parts) 837 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 838 839 return super().table_parts(expression) 840 841 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 842 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 843 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 844 845 def eq_sql(self, expression: exp.EQ) -> str: 846 # Operands of = cannot be NULL in BigQuery 847 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 848 if not isinstance(expression.parent, exp.Update): 849 return "NULL" 850 851 return self.binary(expression, "=") 852 853 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 854 parent = expression.parent 855 856 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 857 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 858 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 859 return self.func( 860 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 861 ) 862 863 return super().attimezone_sql(expression) 864 865 def trycast_sql(self, expression: exp.TryCast) -> str: 866 return self.cast_sql(expression, safe_prefix="SAFE_") 867 868 def bracket_sql(self, expression: exp.Bracket) -> str: 869 this = expression.this 870 expressions = expression.expressions 871 872 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 873 arg = expressions[0] 874 if arg.type is None: 875 from sqlglot.optimizer.annotate_types import annotate_types 876 877 arg = annotate_types(arg) 878 879 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 880 # BQ doesn't support bracket syntax with string values for structs 881 return f"{self.sql(this)}.{arg.name}" 882 883 expressions_sql = self.expressions(expression, flat=True) 884 offset = expression.args.get("offset") 885 886 if offset == 0: 887 expressions_sql = f"OFFSET({expressions_sql})" 888 elif offset == 1: 889 expressions_sql = f"ORDINAL({expressions_sql})" 890 elif offset is not None: 891 self.unsupported(f"Unsupported array offset: {offset}") 892 893 if expression.args.get("safe"): 894 expressions_sql = f"SAFE_{expressions_sql}" 895 896 return f"{self.sql(this)}[{expressions_sql}]" 897 898 def in_unnest_op(self, expression: exp.Unnest) -> str: 899 return self.sql(expression) 900 901 def except_op(self, expression: exp.Except) -> str: 902 if not expression.args.get("distinct"): 903 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 904 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 905 906 def intersect_op(self, expression: exp.Intersect) -> str: 907 if not expression.args.get("distinct"): 908 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 909 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 910 911 def version_sql(self, expression: exp.Version) -> str: 912 if expression.name == "TIMESTAMP": 913 expression.set("this", "SYSTEM_TIME") 914 return super().version_sql(expression)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
250 def normalize_identifier(self, expression: E) -> E: 251 if isinstance(expression, exp.Identifier): 252 parent = expression.parent 253 while isinstance(parent, exp.Dot): 254 parent = parent.parent 255 256 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 257 # by default. The following check uses a heuristic to detect tables based on whether 258 # they are qualified. This should generally be correct, because tables in BigQuery 259 # must be qualified with at least a dataset, unless @@dataset_id is set. 260 case_sensitive = ( 261 isinstance(parent, exp.UserDefinedFunction) 262 or ( 263 isinstance(parent, exp.Table) 264 and parent.db 265 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 266 ) 267 or expression.meta.get("is_table") 268 ) 269 if not case_sensitive: 270 expression.set("this", expression.this.lower()) 271 272 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- COPY_PARAMS_ARE_CSV
- get_or_raise
- format_time
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
274 class Tokenizer(tokens.Tokenizer): 275 QUOTES = ["'", '"', '"""', "'''"] 276 COMMENTS = ["--", "#", ("/*", "*/")] 277 IDENTIFIERS = ["`"] 278 STRING_ESCAPES = ["\\"] 279 280 HEX_STRINGS = [("0x", ""), ("0X", "")] 281 282 BYTE_STRINGS = [ 283 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 284 ] 285 286 RAW_STRINGS = [ 287 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 288 ] 289 290 KEYWORDS = { 291 **tokens.Tokenizer.KEYWORDS, 292 "ANY TYPE": TokenType.VARIANT, 293 "BEGIN": TokenType.COMMAND, 294 "BEGIN TRANSACTION": TokenType.BEGIN, 295 "BYTES": TokenType.BINARY, 296 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 297 "DATETIME": TokenType.TIMESTAMP, 298 "DECLARE": TokenType.COMMAND, 299 "ELSEIF": TokenType.COMMAND, 300 "EXCEPTION": TokenType.COMMAND, 301 "FLOAT64": TokenType.DOUBLE, 302 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 303 "MODEL": TokenType.MODEL, 304 "NOT DETERMINISTIC": TokenType.VOLATILE, 305 "RECORD": TokenType.STRUCT, 306 "TIMESTAMP": TokenType.TIMESTAMPTZ, 307 } 308 KEYWORDS.pop("DIV") 309 KEYWORDS.pop("VALUES")
Inherited Members
311 class Parser(parser.Parser): 312 PREFIXED_PIVOT_COLUMNS = True 313 LOG_DEFAULTS_TO_LN = True 314 SUPPORTS_IMPLICIT_UNNEST = True 315 316 FUNCTIONS = { 317 **parser.Parser.FUNCTIONS, 318 "DATE": _build_date, 319 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 320 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 321 "DATE_TRUNC": lambda args: exp.DateTrunc( 322 unit=exp.Literal.string(str(seq_get(args, 1))), 323 this=seq_get(args, 0), 324 ), 325 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 326 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 327 "DIV": binary_from_function(exp.IntDiv), 328 "FORMAT_DATE": lambda args: exp.TimeToStr( 329 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 330 ), 331 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 332 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 333 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 334 ), 335 "MD5": exp.MD5Digest.from_arg_list, 336 "TO_HEX": _build_to_hex, 337 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 338 [seq_get(args, 1), seq_get(args, 0)] 339 ), 340 "PARSE_TIMESTAMP": _build_parse_timestamp, 341 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 342 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 343 this=seq_get(args, 0), 344 expression=seq_get(args, 1), 345 position=seq_get(args, 2), 346 occurrence=seq_get(args, 3), 347 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 348 ), 349 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 350 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 351 "SPLIT": lambda args: exp.Split( 352 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 353 this=seq_get(args, 0), 354 expression=seq_get(args, 1) or exp.Literal.string(","), 355 ), 356 "TIME": _build_time, 357 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 358 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 359 "TIMESTAMP": _build_timestamp, 360 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 361 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 362 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 363 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 364 ), 365 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 366 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 367 ), 368 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 369 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 370 } 371 372 FUNCTION_PARSERS = { 373 **parser.Parser.FUNCTION_PARSERS, 374 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 375 } 376 FUNCTION_PARSERS.pop("TRIM") 377 378 NO_PAREN_FUNCTIONS = { 379 **parser.Parser.NO_PAREN_FUNCTIONS, 380 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 381 } 382 383 NESTED_TYPE_TOKENS = { 384 *parser.Parser.NESTED_TYPE_TOKENS, 385 TokenType.TABLE, 386 } 387 388 PROPERTY_PARSERS = { 389 **parser.Parser.PROPERTY_PARSERS, 390 "NOT DETERMINISTIC": lambda self: self.expression( 391 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 392 ), 393 "OPTIONS": lambda self: self._parse_with_property(), 394 } 395 396 CONSTRAINT_PARSERS = { 397 **parser.Parser.CONSTRAINT_PARSERS, 398 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 399 } 400 401 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 402 RANGE_PARSERS.pop(TokenType.OVERLAPS) 403 404 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 405 406 STATEMENT_PARSERS = { 407 **parser.Parser.STATEMENT_PARSERS, 408 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 409 TokenType.END: lambda self: self._parse_as_command(self._prev), 410 TokenType.FOR: lambda self: self._parse_for_in(), 411 } 412 413 BRACKET_OFFSETS = { 414 "OFFSET": (0, False), 415 "ORDINAL": (1, False), 416 "SAFE_OFFSET": (0, True), 417 "SAFE_ORDINAL": (1, True), 418 } 419 420 def _parse_for_in(self) -> exp.ForIn: 421 this = self._parse_range() 422 self._match_text_seq("DO") 423 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 424 425 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 426 this = super()._parse_table_part(schema=schema) or self._parse_number() 427 428 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 429 if isinstance(this, exp.Identifier): 430 table_name = this.name 431 while self._match(TokenType.DASH, advance=False) and self._next: 432 text = "" 433 while self._curr and self._curr.token_type != TokenType.DOT: 434 self._advance() 435 text += self._prev.text 436 table_name += text 437 438 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 439 elif isinstance(this, exp.Literal): 440 table_name = this.name 441 442 if self._is_connected() and self._parse_var(any_token=True): 443 table_name += self._prev.text 444 445 this = exp.Identifier(this=table_name, quoted=True) 446 447 return this 448 449 def _parse_table_parts( 450 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 451 ) -> exp.Table: 452 table = super()._parse_table_parts( 453 schema=schema, is_db_reference=is_db_reference, wildcard=True 454 ) 455 456 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 457 if not table.catalog: 458 if table.db: 459 parts = table.db.split(".") 460 if len(parts) == 2 and not table.args["db"].quoted: 461 table.set("catalog", exp.Identifier(this=parts[0])) 462 table.set("db", exp.Identifier(this=parts[1])) 463 else: 464 parts = table.name.split(".") 465 if len(parts) == 2 and not table.this.quoted: 466 table.set("db", exp.Identifier(this=parts[0])) 467 table.set("this", exp.Identifier(this=parts[1])) 468 469 if any("." in p.name for p in table.parts): 470 catalog, db, this, *rest = ( 471 exp.to_identifier(p, quoted=True) 472 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 473 ) 474 475 if rest and this: 476 this = exp.Dot.build([this, *rest]) # type: ignore 477 478 table = exp.Table( 479 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 480 ) 481 table.meta["quoted_table"] = True 482 483 return table 484 485 def _parse_column(self) -> t.Optional[exp.Expression]: 486 column = super()._parse_column() 487 if isinstance(column, exp.Column): 488 parts = column.parts 489 if any("." in p.name for p in parts): 490 catalog, db, table, this, *rest = ( 491 exp.to_identifier(p, quoted=True) 492 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 493 ) 494 495 if rest and this: 496 this = exp.Dot.build([this, *rest]) # type: ignore 497 498 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 499 column.meta["quoted_column"] = True 500 501 return column 502 503 @t.overload 504 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 505 506 @t.overload 507 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 508 509 def _parse_json_object(self, agg=False): 510 json_object = super()._parse_json_object() 511 array_kv_pair = seq_get(json_object.expressions, 0) 512 513 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 514 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 515 if ( 516 array_kv_pair 517 and isinstance(array_kv_pair.this, exp.Array) 518 and isinstance(array_kv_pair.expression, exp.Array) 519 ): 520 keys = array_kv_pair.this.expressions 521 values = array_kv_pair.expression.expressions 522 523 json_object.set( 524 "expressions", 525 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 526 ) 527 528 return json_object 529 530 def _parse_bracket( 531 self, this: t.Optional[exp.Expression] = None 532 ) -> t.Optional[exp.Expression]: 533 bracket = super()._parse_bracket(this) 534 535 if this is bracket: 536 return bracket 537 538 if isinstance(bracket, exp.Bracket): 539 for expression in bracket.expressions: 540 name = expression.name.upper() 541 542 if name not in self.BRACKET_OFFSETS: 543 break 544 545 offset, safe = self.BRACKET_OFFSETS[name] 546 bracket.set("offset", offset) 547 bracket.set("safe", safe) 548 expression.replace(expression.expressions[0]) 549 550 return bracket
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTER
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_JSON_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
552 class Generator(generator.Generator): 553 EXPLICIT_UNION = True 554 INTERVAL_ALLOWS_PLURAL_FORM = False 555 JOIN_HINTS = False 556 QUERY_HINTS = False 557 TABLE_HINTS = False 558 LIMIT_FETCH = "LIMIT" 559 RENAME_TABLE_WITH_DB = False 560 NVL2_SUPPORTED = False 561 UNNEST_WITH_ORDINALITY = False 562 COLLATE_IS_FUNC = True 563 LIMIT_ONLY_LITERALS = True 564 SUPPORTS_TABLE_ALIAS_COLUMNS = False 565 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 566 JSON_KEY_VALUE_PAIR_SEP = "," 567 NULL_ORDERING_SUPPORTED = False 568 IGNORE_NULLS_IN_FUNC = True 569 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 570 CAN_IMPLEMENT_ARRAY_ANY = True 571 SUPPORTS_TO_NUMBER = False 572 NAMED_PLACEHOLDER_TOKEN = "@" 573 HEX_FUNC = "TO_HEX" 574 WITH_PROPERTIES_PREFIX = "OPTIONS" 575 576 TRANSFORMS = { 577 **generator.Generator.TRANSFORMS, 578 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 579 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 580 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 581 exp.Array: inline_array_unless_query, 582 exp.ArrayContains: _array_contains_sql, 583 exp.ArrayFilter: filter_array_using_unnest, 584 exp.ArraySize: rename_func("ARRAY_LENGTH"), 585 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 586 exp.CollateProperty: lambda self, e: ( 587 f"DEFAULT COLLATE {self.sql(e, 'this')}" 588 if e.args.get("default") 589 else f"COLLATE {self.sql(e, 'this')}" 590 ), 591 exp.Commit: lambda *_: "COMMIT TRANSACTION", 592 exp.CountIf: rename_func("COUNTIF"), 593 exp.Create: _create_sql, 594 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 595 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 596 exp.DateDiff: lambda self, e: self.func( 597 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 598 ), 599 exp.DateFromParts: rename_func("DATE"), 600 exp.DateStrToDate: datestrtodate_sql, 601 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 602 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 603 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 604 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 605 exp.FromTimeZone: lambda self, e: self.func( 606 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 607 ), 608 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 609 exp.GroupConcat: rename_func("STRING_AGG"), 610 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 611 exp.If: if_sql(false_value="NULL"), 612 exp.ILike: no_ilike_sql, 613 exp.IntDiv: rename_func("DIV"), 614 exp.JSONFormat: rename_func("TO_JSON_STRING"), 615 exp.Max: max_or_greatest, 616 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 617 exp.MD5Digest: rename_func("MD5"), 618 exp.Min: min_or_least, 619 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 620 exp.RegexpExtract: lambda self, e: self.func( 621 "REGEXP_EXTRACT", 622 e.this, 623 e.expression, 624 e.args.get("position"), 625 e.args.get("occurrence"), 626 ), 627 exp.RegexpReplace: regexp_replace_sql, 628 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 629 exp.ReturnsProperty: _returnsproperty_sql, 630 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 631 exp.Select: transforms.preprocess( 632 [ 633 transforms.explode_to_unnest(), 634 transforms.unqualify_unnest, 635 transforms.eliminate_distinct_on, 636 _alias_ordered_group, 637 transforms.eliminate_semi_and_anti_joins, 638 ] 639 ), 640 exp.SHA: rename_func("SHA1"), 641 exp.SHA2: lambda self, e: self.func( 642 "SHA256" if e.text("length") == "256" else "SHA512", e.this 643 ), 644 exp.StabilityProperty: lambda self, e: ( 645 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 646 ), 647 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 648 exp.StrToTime: lambda self, e: self.func( 649 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 650 ), 651 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 652 exp.TimeFromParts: rename_func("TIME"), 653 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 654 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 655 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 656 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 657 exp.TimeStrToTime: timestrtotime_sql, 658 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 659 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 660 exp.TsOrDsAdd: _ts_or_ds_add_sql, 661 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 662 exp.TsOrDsToTime: rename_func("TIME"), 663 exp.Unhex: rename_func("FROM_HEX"), 664 exp.UnixDate: rename_func("UNIX_DATE"), 665 exp.UnixToTime: _unix_to_time_sql, 666 exp.Values: _derived_table_values_to_unnest, 667 exp.VariancePop: rename_func("VAR_POP"), 668 } 669 670 SUPPORTED_JSON_PATH_PARTS = { 671 exp.JSONPathKey, 672 exp.JSONPathRoot, 673 exp.JSONPathSubscript, 674 } 675 676 TYPE_MAPPING = { 677 **generator.Generator.TYPE_MAPPING, 678 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 679 exp.DataType.Type.BIGINT: "INT64", 680 exp.DataType.Type.BINARY: "BYTES", 681 exp.DataType.Type.BOOLEAN: "BOOL", 682 exp.DataType.Type.CHAR: "STRING", 683 exp.DataType.Type.DECIMAL: "NUMERIC", 684 exp.DataType.Type.DOUBLE: "FLOAT64", 685 exp.DataType.Type.FLOAT: "FLOAT64", 686 exp.DataType.Type.INT: "INT64", 687 exp.DataType.Type.NCHAR: "STRING", 688 exp.DataType.Type.NVARCHAR: "STRING", 689 exp.DataType.Type.SMALLINT: "INT64", 690 exp.DataType.Type.TEXT: "STRING", 691 exp.DataType.Type.TIMESTAMP: "DATETIME", 692 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 693 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 694 exp.DataType.Type.TINYINT: "INT64", 695 exp.DataType.Type.VARBINARY: "BYTES", 696 exp.DataType.Type.ROWVERSION: "BYTES", 697 exp.DataType.Type.VARCHAR: "STRING", 698 exp.DataType.Type.VARIANT: "ANY TYPE", 699 } 700 701 PROPERTIES_LOCATION = { 702 **generator.Generator.PROPERTIES_LOCATION, 703 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 704 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 705 } 706 707 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 708 RESERVED_KEYWORDS = { 709 "all", 710 "and", 711 "any", 712 "array", 713 "as", 714 "asc", 715 "assert_rows_modified", 716 "at", 717 "between", 718 "by", 719 "case", 720 "cast", 721 "collate", 722 "contains", 723 "create", 724 "cross", 725 "cube", 726 "current", 727 "default", 728 "define", 729 "desc", 730 "distinct", 731 "else", 732 "end", 733 "enum", 734 "escape", 735 "except", 736 "exclude", 737 "exists", 738 "extract", 739 "false", 740 "fetch", 741 "following", 742 "for", 743 "from", 744 "full", 745 "group", 746 "grouping", 747 "groups", 748 "hash", 749 "having", 750 "if", 751 "ignore", 752 "in", 753 "inner", 754 "intersect", 755 "interval", 756 "into", 757 "is", 758 "join", 759 "lateral", 760 "left", 761 "like", 762 "limit", 763 "lookup", 764 "merge", 765 "natural", 766 "new", 767 "no", 768 "not", 769 "null", 770 "nulls", 771 "of", 772 "on", 773 "or", 774 "order", 775 "outer", 776 "over", 777 "partition", 778 "preceding", 779 "proto", 780 "qualify", 781 "range", 782 "recursive", 783 "respect", 784 "right", 785 "rollup", 786 "rows", 787 "select", 788 "set", 789 "some", 790 "struct", 791 "tablesample", 792 "then", 793 "to", 794 "treat", 795 "true", 796 "unbounded", 797 "union", 798 "unnest", 799 "using", 800 "when", 801 "where", 802 "window", 803 "with", 804 "within", 805 } 806 807 def mod_sql(self, expression: exp.Mod) -> str: 808 this = expression.this 809 expr = expression.expression 810 return self.func( 811 "MOD", 812 this.unnest() if isinstance(this, exp.Paren) else this, 813 expr.unnest() if isinstance(expr, exp.Paren) else expr, 814 ) 815 816 def column_parts(self, expression: exp.Column) -> str: 817 if expression.meta.get("quoted_column"): 818 # If a column reference is of the form `dataset.table`.name, we need 819 # to preserve the quoted table path, otherwise the reference breaks 820 table_parts = ".".join(p.name for p in expression.parts[:-1]) 821 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 822 return f"{table_path}.{self.sql(expression, 'this')}" 823 824 return super().column_parts(expression) 825 826 def table_parts(self, expression: exp.Table) -> str: 827 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 828 # we need to make sure the correct quoting is used in each case. 829 # 830 # For example, if there is a CTE x that clashes with a schema name, then the former will 831 # return the table y in that schema, whereas the latter will return the CTE's y column: 832 # 833 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 834 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 835 if expression.meta.get("quoted_table"): 836 table_parts = ".".join(p.name for p in expression.parts) 837 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 838 839 return super().table_parts(expression) 840 841 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 842 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 843 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 844 845 def eq_sql(self, expression: exp.EQ) -> str: 846 # Operands of = cannot be NULL in BigQuery 847 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 848 if not isinstance(expression.parent, exp.Update): 849 return "NULL" 850 851 return self.binary(expression, "=") 852 853 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 854 parent = expression.parent 855 856 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 857 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 858 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 859 return self.func( 860 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 861 ) 862 863 return super().attimezone_sql(expression) 864 865 def trycast_sql(self, expression: exp.TryCast) -> str: 866 return self.cast_sql(expression, safe_prefix="SAFE_") 867 868 def bracket_sql(self, expression: exp.Bracket) -> str: 869 this = expression.this 870 expressions = expression.expressions 871 872 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 873 arg = expressions[0] 874 if arg.type is None: 875 from sqlglot.optimizer.annotate_types import annotate_types 876 877 arg = annotate_types(arg) 878 879 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 880 # BQ doesn't support bracket syntax with string values for structs 881 return f"{self.sql(this)}.{arg.name}" 882 883 expressions_sql = self.expressions(expression, flat=True) 884 offset = expression.args.get("offset") 885 886 if offset == 0: 887 expressions_sql = f"OFFSET({expressions_sql})" 888 elif offset == 1: 889 expressions_sql = f"ORDINAL({expressions_sql})" 890 elif offset is not None: 891 self.unsupported(f"Unsupported array offset: {offset}") 892 893 if expression.args.get("safe"): 894 expressions_sql = f"SAFE_{expressions_sql}" 895 896 return f"{self.sql(this)}[{expressions_sql}]" 897 898 def in_unnest_op(self, expression: exp.Unnest) -> str: 899 return self.sql(expression) 900 901 def except_op(self, expression: exp.Except) -> str: 902 if not expression.args.get("distinct"): 903 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 904 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 905 906 def intersect_op(self, expression: exp.Intersect) -> str: 907 if not expression.args.get("distinct"): 908 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 909 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 910 911 def version_sql(self, expression: exp.Version) -> str: 912 if expression.name == "TIMESTAMP": 913 expression.set("this", "SYSTEM_TIME") 914 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
816 def column_parts(self, expression: exp.Column) -> str: 817 if expression.meta.get("quoted_column"): 818 # If a column reference is of the form `dataset.table`.name, we need 819 # to preserve the quoted table path, otherwise the reference breaks 820 table_parts = ".".join(p.name for p in expression.parts[:-1]) 821 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 822 return f"{table_path}.{self.sql(expression, 'this')}" 823 824 return super().column_parts(expression)
826 def table_parts(self, expression: exp.Table) -> str: 827 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 828 # we need to make sure the correct quoting is used in each case. 829 # 830 # For example, if there is a CTE x that clashes with a schema name, then the former will 831 # return the table y in that schema, whereas the latter will return the CTE's y column: 832 # 833 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 834 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 835 if expression.meta.get("quoted_table"): 836 table_parts = ".".join(p.name for p in expression.parts) 837 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 838 839 return super().table_parts(expression)
853 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 854 parent = expression.parent 855 856 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 857 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 858 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 859 return self.func( 860 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 861 ) 862 863 return super().attimezone_sql(expression)
868 def bracket_sql(self, expression: exp.Bracket) -> str: 869 this = expression.this 870 expressions = expression.expressions 871 872 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 873 arg = expressions[0] 874 if arg.type is None: 875 from sqlglot.optimizer.annotate_types import annotate_types 876 877 arg = annotate_types(arg) 878 879 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 880 # BQ doesn't support bracket syntax with string values for structs 881 return f"{self.sql(this)}.{arg.name}" 882 883 expressions_sql = self.expressions(expression, flat=True) 884 offset = expression.args.get("offset") 885 886 if offset == 0: 887 expressions_sql = f"OFFSET({expressions_sql})" 888 elif offset == 1: 889 expressions_sql = f"ORDINAL({expressions_sql})" 890 elif offset is not None: 891 self.unsupported(f"Unsupported array offset: {offset}") 892 893 if expression.args.get("safe"): 894 expressions_sql = f"SAFE_{expressions_sql}" 895 896 return f"{self.sql(this)}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- OUTER_UNION_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- generateseries_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql