sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 build_formatted_time, 16 filter_array_using_unnest, 17 if_sql, 18 inline_array_unless_query, 19 max_or_greatest, 20 min_or_least, 21 no_ilike_sql, 22 build_date_delta_with_interval, 23 regexp_replace_sql, 24 rename_func, 25 sha256_sql, 26 timestrtotime_sql, 27 ts_or_ds_add_cast, 28 unit_to_var, 29) 30from sqlglot.helper import seq_get, split_num_words 31from sqlglot.tokens import TokenType 32 33if t.TYPE_CHECKING: 34 from sqlglot._typing import E, Lit 35 36logger = logging.getLogger("sqlglot") 37 38 39def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 40 if not expression.find_ancestor(exp.From, exp.Join): 41 return self.values_sql(expression) 42 43 structs = [] 44 alias = expression.args.get("alias") 45 for tup in expression.find_all(exp.Tuple): 46 field_aliases = ( 47 alias.columns 48 if alias and alias.columns 49 else (f"_c{i}" for i in range(len(tup.expressions))) 50 ) 51 expressions = [ 52 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 53 for name, fld in zip(field_aliases, tup.expressions) 54 ] 55 structs.append(exp.Struct(expressions=expressions)) 56 57 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 58 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 59 return self.unnest_sql( 60 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 61 ) 62 63 64def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 65 this = expression.this 66 if isinstance(this, exp.Schema): 67 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 68 else: 69 this = self.sql(this) 70 return f"RETURNS {this}" 71 72 73def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 74 returns = expression.find(exp.ReturnsProperty) 75 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 76 expression.set("kind", "TABLE FUNCTION") 77 78 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 79 expression.set("expression", expression.expression.this) 80 81 return self.create_sql(expression) 82 83 84# https://issuetracker.google.com/issues/162294746 85# workaround for bigquery bug when grouping by an expression and then ordering 86# WITH x AS (SELECT 1 y) 87# SELECT y + 1 z 88# FROM x 89# GROUP BY x + 1 90# ORDER by z 91def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 92 if isinstance(expression, exp.Select): 93 group = expression.args.get("group") 94 order = expression.args.get("order") 95 96 if group and order: 97 aliases = { 98 select.this: select.args["alias"] 99 for select in expression.selects 100 if isinstance(select, exp.Alias) 101 } 102 103 for grouped in group.expressions: 104 if grouped.is_int: 105 continue 106 alias = aliases.get(grouped) 107 if alias: 108 grouped.replace(exp.column(alias)) 109 110 return expression 111 112 113def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 114 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 115 if isinstance(expression, exp.CTE) and expression.alias_column_names: 116 cte_query = expression.this 117 118 if cte_query.is_star: 119 logger.warning( 120 "Can't push down CTE column names for star queries. Run the query through" 121 " the optimizer or use 'qualify' to expand the star projections first." 122 ) 123 return expression 124 125 column_names = expression.alias_column_names 126 expression.args["alias"].set("columns", None) 127 128 for name, select in zip(column_names, cte_query.selects): 129 to_replace = select 130 131 if isinstance(select, exp.Alias): 132 select = select.this 133 134 # Inner aliases are shadowed by the CTE column names 135 to_replace.replace(exp.alias_(select, name)) 136 137 return expression 138 139 140def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 141 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 142 this.set("zone", seq_get(args, 2)) 143 return this 144 145 146def _build_timestamp(args: t.List) -> exp.Timestamp: 147 timestamp = exp.Timestamp.from_arg_list(args) 148 timestamp.set("with_tz", True) 149 return timestamp 150 151 152def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 153 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 154 return expr_type.from_arg_list(args) 155 156 157def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 158 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 159 arg = seq_get(args, 0) 160 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg) 161 162 163def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 164 return self.sql( 165 exp.Exists( 166 this=exp.select("1") 167 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 168 .where(exp.column("_col").eq(expression.right)) 169 ) 170 ) 171 172 173def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 174 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 175 176 177def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 178 expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 179 expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)) 180 unit = unit_to_var(expression) 181 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 182 183 184def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 185 scale = expression.args.get("scale") 186 timestamp = expression.this 187 188 if scale in (None, exp.UnixToTime.SECONDS): 189 return self.func("TIMESTAMP_SECONDS", timestamp) 190 if scale == exp.UnixToTime.MILLIS: 191 return self.func("TIMESTAMP_MILLIS", timestamp) 192 if scale == exp.UnixToTime.MICROS: 193 return self.func("TIMESTAMP_MICROS", timestamp) 194 195 unix_seconds = exp.cast( 196 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 197 ) 198 return self.func("TIMESTAMP_SECONDS", unix_seconds) 199 200 201def _build_time(args: t.List) -> exp.Func: 202 if len(args) == 1: 203 return exp.TsOrDsToTime(this=args[0]) 204 if len(args) == 2: 205 return exp.Time.from_arg_list(args) 206 return exp.TimeFromParts.from_arg_list(args) 207 208 209def _build_datetime(args: t.List) -> exp.Func: 210 if len(args) == 1: 211 return exp.TsOrDsToTimestamp.from_arg_list(args) 212 if len(args) == 2: 213 return exp.Datetime.from_arg_list(args) 214 return exp.TimestampFromParts.from_arg_list(args) 215 216 217def _build_regexp_extract(args: t.List) -> exp.RegexpExtract: 218 try: 219 group = re.compile(args[1].name).groups == 1 220 except re.error: 221 group = False 222 223 return exp.RegexpExtract( 224 this=seq_get(args, 0), 225 expression=seq_get(args, 1), 226 position=seq_get(args, 2), 227 occurrence=seq_get(args, 3), 228 group=exp.Literal.number(1) if group else None, 229 ) 230 231 232def _str_to_datetime_sql( 233 self: BigQuery.Generator, expression: exp.StrToDate | exp.StrToTime 234) -> str: 235 this = self.sql(expression, "this") 236 dtype = "DATE" if isinstance(expression, exp.StrToDate) else "TIMESTAMP" 237 238 if expression.args.get("safe"): 239 fmt = self.format_time( 240 expression, 241 self.dialect.INVERSE_FORMAT_MAPPING, 242 self.dialect.INVERSE_FORMAT_TRIE, 243 ) 244 return f"SAFE_CAST({this} AS {dtype} FORMAT {fmt})" 245 246 fmt = self.format_time(expression) 247 return self.func(f"PARSE_{dtype}", fmt, this, expression.args.get("zone")) 248 249 250class BigQuery(Dialect): 251 WEEK_OFFSET = -1 252 UNNEST_COLUMN_ONLY = True 253 SUPPORTS_USER_DEFINED_TYPES = False 254 SUPPORTS_SEMI_ANTI_JOIN = False 255 LOG_BASE_FIRST = False 256 HEX_LOWERCASE = True 257 FORCE_EARLY_ALIAS_REF_EXPANSION = True 258 EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = True 259 260 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 261 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 262 263 # bigquery udfs are case sensitive 264 NORMALIZE_FUNCTIONS = False 265 266 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 267 TIME_MAPPING = { 268 "%D": "%m/%d/%y", 269 "%E6S": "%S.%f", 270 "%e": "%-d", 271 } 272 273 FORMAT_MAPPING = { 274 "DD": "%d", 275 "MM": "%m", 276 "MON": "%b", 277 "MONTH": "%B", 278 "YYYY": "%Y", 279 "YY": "%y", 280 "HH": "%I", 281 "HH12": "%I", 282 "HH24": "%H", 283 "MI": "%M", 284 "SS": "%S", 285 "SSSSS": "%f", 286 "TZH": "%z", 287 } 288 289 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 290 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 291 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 292 293 # All set operations require either a DISTINCT or ALL specifier 294 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 295 296 ANNOTATORS = { 297 **Dialect.ANNOTATORS, 298 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 299 } 300 301 def normalize_identifier(self, expression: E) -> E: 302 if ( 303 isinstance(expression, exp.Identifier) 304 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 305 ): 306 parent = expression.parent 307 while isinstance(parent, exp.Dot): 308 parent = parent.parent 309 310 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 311 # by default. The following check uses a heuristic to detect tables based on whether 312 # they are qualified. This should generally be correct, because tables in BigQuery 313 # must be qualified with at least a dataset, unless @@dataset_id is set. 314 case_sensitive = ( 315 isinstance(parent, exp.UserDefinedFunction) 316 or ( 317 isinstance(parent, exp.Table) 318 and parent.db 319 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 320 ) 321 or expression.meta.get("is_table") 322 ) 323 if not case_sensitive: 324 expression.set("this", expression.this.lower()) 325 326 return expression 327 328 class Tokenizer(tokens.Tokenizer): 329 QUOTES = ["'", '"', '"""', "'''"] 330 COMMENTS = ["--", "#", ("/*", "*/")] 331 IDENTIFIERS = ["`"] 332 STRING_ESCAPES = ["\\"] 333 334 HEX_STRINGS = [("0x", ""), ("0X", "")] 335 336 BYTE_STRINGS = [ 337 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 338 ] 339 340 RAW_STRINGS = [ 341 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 342 ] 343 344 KEYWORDS = { 345 **tokens.Tokenizer.KEYWORDS, 346 "ANY TYPE": TokenType.VARIANT, 347 "BEGIN": TokenType.COMMAND, 348 "BEGIN TRANSACTION": TokenType.BEGIN, 349 "BYTEINT": TokenType.INT, 350 "BYTES": TokenType.BINARY, 351 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 352 "DATETIME": TokenType.TIMESTAMP, 353 "DECLARE": TokenType.COMMAND, 354 "ELSEIF": TokenType.COMMAND, 355 "EXCEPTION": TokenType.COMMAND, 356 "FLOAT64": TokenType.DOUBLE, 357 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 358 "MODEL": TokenType.MODEL, 359 "NOT DETERMINISTIC": TokenType.VOLATILE, 360 "RECORD": TokenType.STRUCT, 361 "TIMESTAMP": TokenType.TIMESTAMPTZ, 362 } 363 KEYWORDS.pop("DIV") 364 KEYWORDS.pop("VALUES") 365 KEYWORDS.pop("/*+") 366 367 class Parser(parser.Parser): 368 PREFIXED_PIVOT_COLUMNS = True 369 LOG_DEFAULTS_TO_LN = True 370 SUPPORTS_IMPLICIT_UNNEST = True 371 372 FUNCTIONS = { 373 **parser.Parser.FUNCTIONS, 374 "DATE": _build_date, 375 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 376 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 377 "DATE_TRUNC": lambda args: exp.DateTrunc( 378 unit=exp.Literal.string(str(seq_get(args, 1))), 379 this=seq_get(args, 0), 380 ), 381 "DATETIME": _build_datetime, 382 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 383 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 384 "DIV": binary_from_function(exp.IntDiv), 385 "FORMAT_DATE": lambda args: exp.TimeToStr( 386 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 387 ), 388 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 389 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 390 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 391 ), 392 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 393 "MD5": exp.MD5Digest.from_arg_list, 394 "TO_HEX": _build_to_hex, 395 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 396 [seq_get(args, 1), seq_get(args, 0)] 397 ), 398 "PARSE_TIMESTAMP": _build_parse_timestamp, 399 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 400 "REGEXP_EXTRACT": _build_regexp_extract, 401 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 402 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 403 "SPLIT": lambda args: exp.Split( 404 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 405 this=seq_get(args, 0), 406 expression=seq_get(args, 1) or exp.Literal.string(","), 407 ), 408 "TIME": _build_time, 409 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 410 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 411 "TIMESTAMP": _build_timestamp, 412 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 413 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 414 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 415 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 416 ), 417 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 418 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 419 ), 420 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 421 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 422 "FORMAT_DATETIME": lambda args: exp.TimeToStr( 423 this=exp.TsOrDsToTimestamp(this=seq_get(args, 1)), format=seq_get(args, 0) 424 ), 425 } 426 427 FUNCTION_PARSERS = { 428 **parser.Parser.FUNCTION_PARSERS, 429 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 430 } 431 FUNCTION_PARSERS.pop("TRIM") 432 433 NO_PAREN_FUNCTIONS = { 434 **parser.Parser.NO_PAREN_FUNCTIONS, 435 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 436 } 437 438 NESTED_TYPE_TOKENS = { 439 *parser.Parser.NESTED_TYPE_TOKENS, 440 TokenType.TABLE, 441 } 442 443 PROPERTY_PARSERS = { 444 **parser.Parser.PROPERTY_PARSERS, 445 "NOT DETERMINISTIC": lambda self: self.expression( 446 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 447 ), 448 "OPTIONS": lambda self: self._parse_with_property(), 449 } 450 451 CONSTRAINT_PARSERS = { 452 **parser.Parser.CONSTRAINT_PARSERS, 453 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 454 } 455 456 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 457 RANGE_PARSERS.pop(TokenType.OVERLAPS) 458 459 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 460 461 STATEMENT_PARSERS = { 462 **parser.Parser.STATEMENT_PARSERS, 463 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 464 TokenType.END: lambda self: self._parse_as_command(self._prev), 465 TokenType.FOR: lambda self: self._parse_for_in(), 466 } 467 468 BRACKET_OFFSETS = { 469 "OFFSET": (0, False), 470 "ORDINAL": (1, False), 471 "SAFE_OFFSET": (0, True), 472 "SAFE_ORDINAL": (1, True), 473 } 474 475 def _parse_for_in(self) -> exp.ForIn: 476 this = self._parse_range() 477 self._match_text_seq("DO") 478 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 479 480 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 481 this = super()._parse_table_part(schema=schema) or self._parse_number() 482 483 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 484 if isinstance(this, exp.Identifier): 485 table_name = this.name 486 while self._match(TokenType.DASH, advance=False) and self._next: 487 text = "" 488 while self._is_connected() and self._curr.token_type != TokenType.DOT: 489 self._advance() 490 text += self._prev.text 491 table_name += text 492 493 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 494 elif isinstance(this, exp.Literal): 495 table_name = this.name 496 497 if self._is_connected() and self._parse_var(any_token=True): 498 table_name += self._prev.text 499 500 this = exp.Identifier(this=table_name, quoted=True) 501 502 return this 503 504 def _parse_table_parts( 505 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 506 ) -> exp.Table: 507 table = super()._parse_table_parts( 508 schema=schema, is_db_reference=is_db_reference, wildcard=True 509 ) 510 511 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 512 if not table.catalog: 513 if table.db: 514 parts = table.db.split(".") 515 if len(parts) == 2 and not table.args["db"].quoted: 516 table.set("catalog", exp.Identifier(this=parts[0])) 517 table.set("db", exp.Identifier(this=parts[1])) 518 else: 519 parts = table.name.split(".") 520 if len(parts) == 2 and not table.this.quoted: 521 table.set("db", exp.Identifier(this=parts[0])) 522 table.set("this", exp.Identifier(this=parts[1])) 523 524 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 525 catalog, db, this, *rest = ( 526 exp.to_identifier(p, quoted=True) 527 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 528 ) 529 530 if rest and this: 531 this = exp.Dot.build([this, *rest]) # type: ignore 532 533 table = exp.Table( 534 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 535 ) 536 table.meta["quoted_table"] = True 537 538 return table 539 540 def _parse_column(self) -> t.Optional[exp.Expression]: 541 column = super()._parse_column() 542 if isinstance(column, exp.Column): 543 parts = column.parts 544 if any("." in p.name for p in parts): 545 catalog, db, table, this, *rest = ( 546 exp.to_identifier(p, quoted=True) 547 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 548 ) 549 550 if rest and this: 551 this = exp.Dot.build([this, *rest]) # type: ignore 552 553 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 554 column.meta["quoted_column"] = True 555 556 return column 557 558 @t.overload 559 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 560 561 @t.overload 562 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 563 564 def _parse_json_object(self, agg=False): 565 json_object = super()._parse_json_object() 566 array_kv_pair = seq_get(json_object.expressions, 0) 567 568 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 569 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 570 if ( 571 array_kv_pair 572 and isinstance(array_kv_pair.this, exp.Array) 573 and isinstance(array_kv_pair.expression, exp.Array) 574 ): 575 keys = array_kv_pair.this.expressions 576 values = array_kv_pair.expression.expressions 577 578 json_object.set( 579 "expressions", 580 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 581 ) 582 583 return json_object 584 585 def _parse_bracket( 586 self, this: t.Optional[exp.Expression] = None 587 ) -> t.Optional[exp.Expression]: 588 bracket = super()._parse_bracket(this) 589 590 if this is bracket: 591 return bracket 592 593 if isinstance(bracket, exp.Bracket): 594 for expression in bracket.expressions: 595 name = expression.name.upper() 596 597 if name not in self.BRACKET_OFFSETS: 598 break 599 600 offset, safe = self.BRACKET_OFFSETS[name] 601 bracket.set("offset", offset) 602 bracket.set("safe", safe) 603 expression.replace(expression.expressions[0]) 604 605 return bracket 606 607 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 608 unnest = super()._parse_unnest(with_alias=with_alias) 609 610 if not unnest: 611 return None 612 613 unnest_expr = seq_get(unnest.expressions, 0) 614 if unnest_expr: 615 from sqlglot.optimizer.annotate_types import annotate_types 616 617 unnest_expr = annotate_types(unnest_expr) 618 619 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 620 # in contrast to other dialects such as DuckDB which flattens only the array by default 621 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 622 array_elem.is_type(exp.DataType.Type.STRUCT) 623 for array_elem in unnest_expr._type.expressions 624 ): 625 unnest.set("explode_array", True) 626 627 return unnest 628 629 class Generator(generator.Generator): 630 INTERVAL_ALLOWS_PLURAL_FORM = False 631 JOIN_HINTS = False 632 QUERY_HINTS = False 633 TABLE_HINTS = False 634 LIMIT_FETCH = "LIMIT" 635 RENAME_TABLE_WITH_DB = False 636 NVL2_SUPPORTED = False 637 UNNEST_WITH_ORDINALITY = False 638 COLLATE_IS_FUNC = True 639 LIMIT_ONLY_LITERALS = True 640 SUPPORTS_TABLE_ALIAS_COLUMNS = False 641 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 642 JSON_KEY_VALUE_PAIR_SEP = "," 643 NULL_ORDERING_SUPPORTED = False 644 IGNORE_NULLS_IN_FUNC = True 645 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 646 CAN_IMPLEMENT_ARRAY_ANY = True 647 SUPPORTS_TO_NUMBER = False 648 NAMED_PLACEHOLDER_TOKEN = "@" 649 HEX_FUNC = "TO_HEX" 650 WITH_PROPERTIES_PREFIX = "OPTIONS" 651 SUPPORTS_EXPLODING_PROJECTIONS = False 652 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 653 654 TRANSFORMS = { 655 **generator.Generator.TRANSFORMS, 656 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 657 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 658 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 659 exp.Array: inline_array_unless_query, 660 exp.ArrayContains: _array_contains_sql, 661 exp.ArrayFilter: filter_array_using_unnest, 662 exp.ArraySize: rename_func("ARRAY_LENGTH"), 663 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 664 exp.CollateProperty: lambda self, e: ( 665 f"DEFAULT COLLATE {self.sql(e, 'this')}" 666 if e.args.get("default") 667 else f"COLLATE {self.sql(e, 'this')}" 668 ), 669 exp.Commit: lambda *_: "COMMIT TRANSACTION", 670 exp.CountIf: rename_func("COUNTIF"), 671 exp.Create: _create_sql, 672 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 673 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 674 exp.DateDiff: lambda self, e: self.func( 675 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 676 ), 677 exp.DateFromParts: rename_func("DATE"), 678 exp.DateStrToDate: datestrtodate_sql, 679 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 680 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 681 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 682 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 683 exp.FromTimeZone: lambda self, e: self.func( 684 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 685 ), 686 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 687 exp.GroupConcat: rename_func("STRING_AGG"), 688 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 689 exp.If: if_sql(false_value="NULL"), 690 exp.ILike: no_ilike_sql, 691 exp.IntDiv: rename_func("DIV"), 692 exp.JSONFormat: rename_func("TO_JSON_STRING"), 693 exp.Max: max_or_greatest, 694 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 695 exp.MD5Digest: rename_func("MD5"), 696 exp.Min: min_or_least, 697 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 698 exp.RegexpExtract: lambda self, e: self.func( 699 "REGEXP_EXTRACT", 700 e.this, 701 e.expression, 702 e.args.get("position"), 703 e.args.get("occurrence"), 704 ), 705 exp.RegexpReplace: regexp_replace_sql, 706 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 707 exp.ReturnsProperty: _returnsproperty_sql, 708 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 709 exp.Select: transforms.preprocess( 710 [ 711 transforms.explode_to_unnest(), 712 transforms.unqualify_unnest, 713 transforms.eliminate_distinct_on, 714 _alias_ordered_group, 715 transforms.eliminate_semi_and_anti_joins, 716 ] 717 ), 718 exp.SHA: rename_func("SHA1"), 719 exp.SHA2: sha256_sql, 720 exp.StabilityProperty: lambda self, e: ( 721 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 722 ), 723 exp.StrToDate: _str_to_datetime_sql, 724 exp.StrToTime: _str_to_datetime_sql, 725 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 726 exp.TimeFromParts: rename_func("TIME"), 727 exp.TimestampFromParts: rename_func("DATETIME"), 728 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 729 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 730 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 731 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 732 exp.TimeStrToTime: timestrtotime_sql, 733 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 734 exp.TsOrDsAdd: _ts_or_ds_add_sql, 735 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 736 exp.TsOrDsToTime: rename_func("TIME"), 737 exp.TsOrDsToTimestamp: rename_func("DATETIME"), 738 exp.Unhex: rename_func("FROM_HEX"), 739 exp.UnixDate: rename_func("UNIX_DATE"), 740 exp.UnixToTime: _unix_to_time_sql, 741 exp.Uuid: lambda *_: "GENERATE_UUID()", 742 exp.Values: _derived_table_values_to_unnest, 743 exp.VariancePop: rename_func("VAR_POP"), 744 } 745 746 SUPPORTED_JSON_PATH_PARTS = { 747 exp.JSONPathKey, 748 exp.JSONPathRoot, 749 exp.JSONPathSubscript, 750 } 751 752 TYPE_MAPPING = { 753 **generator.Generator.TYPE_MAPPING, 754 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 755 exp.DataType.Type.BIGINT: "INT64", 756 exp.DataType.Type.BINARY: "BYTES", 757 exp.DataType.Type.BOOLEAN: "BOOL", 758 exp.DataType.Type.CHAR: "STRING", 759 exp.DataType.Type.DECIMAL: "NUMERIC", 760 exp.DataType.Type.DOUBLE: "FLOAT64", 761 exp.DataType.Type.FLOAT: "FLOAT64", 762 exp.DataType.Type.INT: "INT64", 763 exp.DataType.Type.NCHAR: "STRING", 764 exp.DataType.Type.NVARCHAR: "STRING", 765 exp.DataType.Type.SMALLINT: "INT64", 766 exp.DataType.Type.TEXT: "STRING", 767 exp.DataType.Type.TIMESTAMP: "DATETIME", 768 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 769 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 770 exp.DataType.Type.TINYINT: "INT64", 771 exp.DataType.Type.ROWVERSION: "BYTES", 772 exp.DataType.Type.UUID: "STRING", 773 exp.DataType.Type.VARBINARY: "BYTES", 774 exp.DataType.Type.VARCHAR: "STRING", 775 exp.DataType.Type.VARIANT: "ANY TYPE", 776 } 777 778 PROPERTIES_LOCATION = { 779 **generator.Generator.PROPERTIES_LOCATION, 780 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 781 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 782 } 783 784 # WINDOW comes after QUALIFY 785 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 786 AFTER_HAVING_MODIFIER_TRANSFORMS = { 787 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 788 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 789 } 790 791 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 792 RESERVED_KEYWORDS = { 793 "all", 794 "and", 795 "any", 796 "array", 797 "as", 798 "asc", 799 "assert_rows_modified", 800 "at", 801 "between", 802 "by", 803 "case", 804 "cast", 805 "collate", 806 "contains", 807 "create", 808 "cross", 809 "cube", 810 "current", 811 "default", 812 "define", 813 "desc", 814 "distinct", 815 "else", 816 "end", 817 "enum", 818 "escape", 819 "except", 820 "exclude", 821 "exists", 822 "extract", 823 "false", 824 "fetch", 825 "following", 826 "for", 827 "from", 828 "full", 829 "group", 830 "grouping", 831 "groups", 832 "hash", 833 "having", 834 "if", 835 "ignore", 836 "in", 837 "inner", 838 "intersect", 839 "interval", 840 "into", 841 "is", 842 "join", 843 "lateral", 844 "left", 845 "like", 846 "limit", 847 "lookup", 848 "merge", 849 "natural", 850 "new", 851 "no", 852 "not", 853 "null", 854 "nulls", 855 "of", 856 "on", 857 "or", 858 "order", 859 "outer", 860 "over", 861 "partition", 862 "preceding", 863 "proto", 864 "qualify", 865 "range", 866 "recursive", 867 "respect", 868 "right", 869 "rollup", 870 "rows", 871 "select", 872 "set", 873 "some", 874 "struct", 875 "tablesample", 876 "then", 877 "to", 878 "treat", 879 "true", 880 "unbounded", 881 "union", 882 "unnest", 883 "using", 884 "when", 885 "where", 886 "window", 887 "with", 888 "within", 889 } 890 891 def mod_sql(self, expression: exp.Mod) -> str: 892 this = expression.this 893 expr = expression.expression 894 return self.func( 895 "MOD", 896 this.unnest() if isinstance(this, exp.Paren) else this, 897 expr.unnest() if isinstance(expr, exp.Paren) else expr, 898 ) 899 900 def column_parts(self, expression: exp.Column) -> str: 901 if expression.meta.get("quoted_column"): 902 # If a column reference is of the form `dataset.table`.name, we need 903 # to preserve the quoted table path, otherwise the reference breaks 904 table_parts = ".".join(p.name for p in expression.parts[:-1]) 905 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 906 return f"{table_path}.{self.sql(expression, 'this')}" 907 908 return super().column_parts(expression) 909 910 def table_parts(self, expression: exp.Table) -> str: 911 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 912 # we need to make sure the correct quoting is used in each case. 913 # 914 # For example, if there is a CTE x that clashes with a schema name, then the former will 915 # return the table y in that schema, whereas the latter will return the CTE's y column: 916 # 917 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 918 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 919 if expression.meta.get("quoted_table"): 920 table_parts = ".".join(p.name for p in expression.parts) 921 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 922 923 return super().table_parts(expression) 924 925 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 926 if isinstance(expression.this, exp.TsOrDsToTimestamp): 927 func_name = "FORMAT_DATETIME" 928 else: 929 func_name = "FORMAT_DATE" 930 this = ( 931 expression.this 932 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 933 else expression 934 ) 935 return self.func(func_name, self.format_time(expression), this.this) 936 937 def eq_sql(self, expression: exp.EQ) -> str: 938 # Operands of = cannot be NULL in BigQuery 939 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 940 if not isinstance(expression.parent, exp.Update): 941 return "NULL" 942 943 return self.binary(expression, "=") 944 945 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 946 parent = expression.parent 947 948 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 949 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 950 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 951 return self.func( 952 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 953 ) 954 955 return super().attimezone_sql(expression) 956 957 def trycast_sql(self, expression: exp.TryCast) -> str: 958 return self.cast_sql(expression, safe_prefix="SAFE_") 959 960 def bracket_sql(self, expression: exp.Bracket) -> str: 961 this = expression.this 962 expressions = expression.expressions 963 964 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 965 arg = expressions[0] 966 if arg.type is None: 967 from sqlglot.optimizer.annotate_types import annotate_types 968 969 arg = annotate_types(arg) 970 971 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 972 # BQ doesn't support bracket syntax with string values for structs 973 return f"{self.sql(this)}.{arg.name}" 974 975 expressions_sql = self.expressions(expression, flat=True) 976 offset = expression.args.get("offset") 977 978 if offset == 0: 979 expressions_sql = f"OFFSET({expressions_sql})" 980 elif offset == 1: 981 expressions_sql = f"ORDINAL({expressions_sql})" 982 elif offset is not None: 983 self.unsupported(f"Unsupported array offset: {offset}") 984 985 if expression.args.get("safe"): 986 expressions_sql = f"SAFE_{expressions_sql}" 987 988 return f"{self.sql(this)}[{expressions_sql}]" 989 990 def in_unnest_op(self, expression: exp.Unnest) -> str: 991 return self.sql(expression) 992 993 def version_sql(self, expression: exp.Version) -> str: 994 if expression.name == "TIMESTAMP": 995 expression.set("this", "SYSTEM_TIME") 996 return super().version_sql(expression)
251class BigQuery(Dialect): 252 WEEK_OFFSET = -1 253 UNNEST_COLUMN_ONLY = True 254 SUPPORTS_USER_DEFINED_TYPES = False 255 SUPPORTS_SEMI_ANTI_JOIN = False 256 LOG_BASE_FIRST = False 257 HEX_LOWERCASE = True 258 FORCE_EARLY_ALIAS_REF_EXPANSION = True 259 EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = True 260 261 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 262 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 263 264 # bigquery udfs are case sensitive 265 NORMALIZE_FUNCTIONS = False 266 267 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 268 TIME_MAPPING = { 269 "%D": "%m/%d/%y", 270 "%E6S": "%S.%f", 271 "%e": "%-d", 272 } 273 274 FORMAT_MAPPING = { 275 "DD": "%d", 276 "MM": "%m", 277 "MON": "%b", 278 "MONTH": "%B", 279 "YYYY": "%Y", 280 "YY": "%y", 281 "HH": "%I", 282 "HH12": "%I", 283 "HH24": "%H", 284 "MI": "%M", 285 "SS": "%S", 286 "SSSSS": "%f", 287 "TZH": "%z", 288 } 289 290 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 291 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 292 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 293 294 # All set operations require either a DISTINCT or ALL specifier 295 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 296 297 ANNOTATORS = { 298 **Dialect.ANNOTATORS, 299 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 300 } 301 302 def normalize_identifier(self, expression: E) -> E: 303 if ( 304 isinstance(expression, exp.Identifier) 305 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 306 ): 307 parent = expression.parent 308 while isinstance(parent, exp.Dot): 309 parent = parent.parent 310 311 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 312 # by default. The following check uses a heuristic to detect tables based on whether 313 # they are qualified. This should generally be correct, because tables in BigQuery 314 # must be qualified with at least a dataset, unless @@dataset_id is set. 315 case_sensitive = ( 316 isinstance(parent, exp.UserDefinedFunction) 317 or ( 318 isinstance(parent, exp.Table) 319 and parent.db 320 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 321 ) 322 or expression.meta.get("is_table") 323 ) 324 if not case_sensitive: 325 expression.set("this", expression.this.lower()) 326 327 return expression 328 329 class Tokenizer(tokens.Tokenizer): 330 QUOTES = ["'", '"', '"""', "'''"] 331 COMMENTS = ["--", "#", ("/*", "*/")] 332 IDENTIFIERS = ["`"] 333 STRING_ESCAPES = ["\\"] 334 335 HEX_STRINGS = [("0x", ""), ("0X", "")] 336 337 BYTE_STRINGS = [ 338 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 339 ] 340 341 RAW_STRINGS = [ 342 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 343 ] 344 345 KEYWORDS = { 346 **tokens.Tokenizer.KEYWORDS, 347 "ANY TYPE": TokenType.VARIANT, 348 "BEGIN": TokenType.COMMAND, 349 "BEGIN TRANSACTION": TokenType.BEGIN, 350 "BYTEINT": TokenType.INT, 351 "BYTES": TokenType.BINARY, 352 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 353 "DATETIME": TokenType.TIMESTAMP, 354 "DECLARE": TokenType.COMMAND, 355 "ELSEIF": TokenType.COMMAND, 356 "EXCEPTION": TokenType.COMMAND, 357 "FLOAT64": TokenType.DOUBLE, 358 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 359 "MODEL": TokenType.MODEL, 360 "NOT DETERMINISTIC": TokenType.VOLATILE, 361 "RECORD": TokenType.STRUCT, 362 "TIMESTAMP": TokenType.TIMESTAMPTZ, 363 } 364 KEYWORDS.pop("DIV") 365 KEYWORDS.pop("VALUES") 366 KEYWORDS.pop("/*+") 367 368 class Parser(parser.Parser): 369 PREFIXED_PIVOT_COLUMNS = True 370 LOG_DEFAULTS_TO_LN = True 371 SUPPORTS_IMPLICIT_UNNEST = True 372 373 FUNCTIONS = { 374 **parser.Parser.FUNCTIONS, 375 "DATE": _build_date, 376 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 377 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 378 "DATE_TRUNC": lambda args: exp.DateTrunc( 379 unit=exp.Literal.string(str(seq_get(args, 1))), 380 this=seq_get(args, 0), 381 ), 382 "DATETIME": _build_datetime, 383 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 384 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 385 "DIV": binary_from_function(exp.IntDiv), 386 "FORMAT_DATE": lambda args: exp.TimeToStr( 387 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 388 ), 389 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 390 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 391 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 392 ), 393 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 394 "MD5": exp.MD5Digest.from_arg_list, 395 "TO_HEX": _build_to_hex, 396 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 397 [seq_get(args, 1), seq_get(args, 0)] 398 ), 399 "PARSE_TIMESTAMP": _build_parse_timestamp, 400 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 401 "REGEXP_EXTRACT": _build_regexp_extract, 402 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 403 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 404 "SPLIT": lambda args: exp.Split( 405 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 406 this=seq_get(args, 0), 407 expression=seq_get(args, 1) or exp.Literal.string(","), 408 ), 409 "TIME": _build_time, 410 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 411 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 412 "TIMESTAMP": _build_timestamp, 413 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 414 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 415 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 416 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 417 ), 418 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 419 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 420 ), 421 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 422 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 423 "FORMAT_DATETIME": lambda args: exp.TimeToStr( 424 this=exp.TsOrDsToTimestamp(this=seq_get(args, 1)), format=seq_get(args, 0) 425 ), 426 } 427 428 FUNCTION_PARSERS = { 429 **parser.Parser.FUNCTION_PARSERS, 430 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 431 } 432 FUNCTION_PARSERS.pop("TRIM") 433 434 NO_PAREN_FUNCTIONS = { 435 **parser.Parser.NO_PAREN_FUNCTIONS, 436 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 437 } 438 439 NESTED_TYPE_TOKENS = { 440 *parser.Parser.NESTED_TYPE_TOKENS, 441 TokenType.TABLE, 442 } 443 444 PROPERTY_PARSERS = { 445 **parser.Parser.PROPERTY_PARSERS, 446 "NOT DETERMINISTIC": lambda self: self.expression( 447 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 448 ), 449 "OPTIONS": lambda self: self._parse_with_property(), 450 } 451 452 CONSTRAINT_PARSERS = { 453 **parser.Parser.CONSTRAINT_PARSERS, 454 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 455 } 456 457 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 458 RANGE_PARSERS.pop(TokenType.OVERLAPS) 459 460 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 461 462 STATEMENT_PARSERS = { 463 **parser.Parser.STATEMENT_PARSERS, 464 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 465 TokenType.END: lambda self: self._parse_as_command(self._prev), 466 TokenType.FOR: lambda self: self._parse_for_in(), 467 } 468 469 BRACKET_OFFSETS = { 470 "OFFSET": (0, False), 471 "ORDINAL": (1, False), 472 "SAFE_OFFSET": (0, True), 473 "SAFE_ORDINAL": (1, True), 474 } 475 476 def _parse_for_in(self) -> exp.ForIn: 477 this = self._parse_range() 478 self._match_text_seq("DO") 479 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 480 481 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 482 this = super()._parse_table_part(schema=schema) or self._parse_number() 483 484 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 485 if isinstance(this, exp.Identifier): 486 table_name = this.name 487 while self._match(TokenType.DASH, advance=False) and self._next: 488 text = "" 489 while self._is_connected() and self._curr.token_type != TokenType.DOT: 490 self._advance() 491 text += self._prev.text 492 table_name += text 493 494 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 495 elif isinstance(this, exp.Literal): 496 table_name = this.name 497 498 if self._is_connected() and self._parse_var(any_token=True): 499 table_name += self._prev.text 500 501 this = exp.Identifier(this=table_name, quoted=True) 502 503 return this 504 505 def _parse_table_parts( 506 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 507 ) -> exp.Table: 508 table = super()._parse_table_parts( 509 schema=schema, is_db_reference=is_db_reference, wildcard=True 510 ) 511 512 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 513 if not table.catalog: 514 if table.db: 515 parts = table.db.split(".") 516 if len(parts) == 2 and not table.args["db"].quoted: 517 table.set("catalog", exp.Identifier(this=parts[0])) 518 table.set("db", exp.Identifier(this=parts[1])) 519 else: 520 parts = table.name.split(".") 521 if len(parts) == 2 and not table.this.quoted: 522 table.set("db", exp.Identifier(this=parts[0])) 523 table.set("this", exp.Identifier(this=parts[1])) 524 525 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 526 catalog, db, this, *rest = ( 527 exp.to_identifier(p, quoted=True) 528 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 529 ) 530 531 if rest and this: 532 this = exp.Dot.build([this, *rest]) # type: ignore 533 534 table = exp.Table( 535 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 536 ) 537 table.meta["quoted_table"] = True 538 539 return table 540 541 def _parse_column(self) -> t.Optional[exp.Expression]: 542 column = super()._parse_column() 543 if isinstance(column, exp.Column): 544 parts = column.parts 545 if any("." in p.name for p in parts): 546 catalog, db, table, this, *rest = ( 547 exp.to_identifier(p, quoted=True) 548 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 549 ) 550 551 if rest and this: 552 this = exp.Dot.build([this, *rest]) # type: ignore 553 554 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 555 column.meta["quoted_column"] = True 556 557 return column 558 559 @t.overload 560 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 561 562 @t.overload 563 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 564 565 def _parse_json_object(self, agg=False): 566 json_object = super()._parse_json_object() 567 array_kv_pair = seq_get(json_object.expressions, 0) 568 569 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 570 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 571 if ( 572 array_kv_pair 573 and isinstance(array_kv_pair.this, exp.Array) 574 and isinstance(array_kv_pair.expression, exp.Array) 575 ): 576 keys = array_kv_pair.this.expressions 577 values = array_kv_pair.expression.expressions 578 579 json_object.set( 580 "expressions", 581 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 582 ) 583 584 return json_object 585 586 def _parse_bracket( 587 self, this: t.Optional[exp.Expression] = None 588 ) -> t.Optional[exp.Expression]: 589 bracket = super()._parse_bracket(this) 590 591 if this is bracket: 592 return bracket 593 594 if isinstance(bracket, exp.Bracket): 595 for expression in bracket.expressions: 596 name = expression.name.upper() 597 598 if name not in self.BRACKET_OFFSETS: 599 break 600 601 offset, safe = self.BRACKET_OFFSETS[name] 602 bracket.set("offset", offset) 603 bracket.set("safe", safe) 604 expression.replace(expression.expressions[0]) 605 606 return bracket 607 608 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 609 unnest = super()._parse_unnest(with_alias=with_alias) 610 611 if not unnest: 612 return None 613 614 unnest_expr = seq_get(unnest.expressions, 0) 615 if unnest_expr: 616 from sqlglot.optimizer.annotate_types import annotate_types 617 618 unnest_expr = annotate_types(unnest_expr) 619 620 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 621 # in contrast to other dialects such as DuckDB which flattens only the array by default 622 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 623 array_elem.is_type(exp.DataType.Type.STRUCT) 624 for array_elem in unnest_expr._type.expressions 625 ): 626 unnest.set("explode_array", True) 627 628 return unnest 629 630 class Generator(generator.Generator): 631 INTERVAL_ALLOWS_PLURAL_FORM = False 632 JOIN_HINTS = False 633 QUERY_HINTS = False 634 TABLE_HINTS = False 635 LIMIT_FETCH = "LIMIT" 636 RENAME_TABLE_WITH_DB = False 637 NVL2_SUPPORTED = False 638 UNNEST_WITH_ORDINALITY = False 639 COLLATE_IS_FUNC = True 640 LIMIT_ONLY_LITERALS = True 641 SUPPORTS_TABLE_ALIAS_COLUMNS = False 642 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 643 JSON_KEY_VALUE_PAIR_SEP = "," 644 NULL_ORDERING_SUPPORTED = False 645 IGNORE_NULLS_IN_FUNC = True 646 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 647 CAN_IMPLEMENT_ARRAY_ANY = True 648 SUPPORTS_TO_NUMBER = False 649 NAMED_PLACEHOLDER_TOKEN = "@" 650 HEX_FUNC = "TO_HEX" 651 WITH_PROPERTIES_PREFIX = "OPTIONS" 652 SUPPORTS_EXPLODING_PROJECTIONS = False 653 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 654 655 TRANSFORMS = { 656 **generator.Generator.TRANSFORMS, 657 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 658 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 659 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 660 exp.Array: inline_array_unless_query, 661 exp.ArrayContains: _array_contains_sql, 662 exp.ArrayFilter: filter_array_using_unnest, 663 exp.ArraySize: rename_func("ARRAY_LENGTH"), 664 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 665 exp.CollateProperty: lambda self, e: ( 666 f"DEFAULT COLLATE {self.sql(e, 'this')}" 667 if e.args.get("default") 668 else f"COLLATE {self.sql(e, 'this')}" 669 ), 670 exp.Commit: lambda *_: "COMMIT TRANSACTION", 671 exp.CountIf: rename_func("COUNTIF"), 672 exp.Create: _create_sql, 673 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 674 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 675 exp.DateDiff: lambda self, e: self.func( 676 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 677 ), 678 exp.DateFromParts: rename_func("DATE"), 679 exp.DateStrToDate: datestrtodate_sql, 680 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 681 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 682 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 683 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 684 exp.FromTimeZone: lambda self, e: self.func( 685 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 686 ), 687 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 688 exp.GroupConcat: rename_func("STRING_AGG"), 689 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 690 exp.If: if_sql(false_value="NULL"), 691 exp.ILike: no_ilike_sql, 692 exp.IntDiv: rename_func("DIV"), 693 exp.JSONFormat: rename_func("TO_JSON_STRING"), 694 exp.Max: max_or_greatest, 695 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 696 exp.MD5Digest: rename_func("MD5"), 697 exp.Min: min_or_least, 698 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 699 exp.RegexpExtract: lambda self, e: self.func( 700 "REGEXP_EXTRACT", 701 e.this, 702 e.expression, 703 e.args.get("position"), 704 e.args.get("occurrence"), 705 ), 706 exp.RegexpReplace: regexp_replace_sql, 707 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 708 exp.ReturnsProperty: _returnsproperty_sql, 709 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 710 exp.Select: transforms.preprocess( 711 [ 712 transforms.explode_to_unnest(), 713 transforms.unqualify_unnest, 714 transforms.eliminate_distinct_on, 715 _alias_ordered_group, 716 transforms.eliminate_semi_and_anti_joins, 717 ] 718 ), 719 exp.SHA: rename_func("SHA1"), 720 exp.SHA2: sha256_sql, 721 exp.StabilityProperty: lambda self, e: ( 722 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 723 ), 724 exp.StrToDate: _str_to_datetime_sql, 725 exp.StrToTime: _str_to_datetime_sql, 726 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 727 exp.TimeFromParts: rename_func("TIME"), 728 exp.TimestampFromParts: rename_func("DATETIME"), 729 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 730 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 731 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 732 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 733 exp.TimeStrToTime: timestrtotime_sql, 734 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 735 exp.TsOrDsAdd: _ts_or_ds_add_sql, 736 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 737 exp.TsOrDsToTime: rename_func("TIME"), 738 exp.TsOrDsToTimestamp: rename_func("DATETIME"), 739 exp.Unhex: rename_func("FROM_HEX"), 740 exp.UnixDate: rename_func("UNIX_DATE"), 741 exp.UnixToTime: _unix_to_time_sql, 742 exp.Uuid: lambda *_: "GENERATE_UUID()", 743 exp.Values: _derived_table_values_to_unnest, 744 exp.VariancePop: rename_func("VAR_POP"), 745 } 746 747 SUPPORTED_JSON_PATH_PARTS = { 748 exp.JSONPathKey, 749 exp.JSONPathRoot, 750 exp.JSONPathSubscript, 751 } 752 753 TYPE_MAPPING = { 754 **generator.Generator.TYPE_MAPPING, 755 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 756 exp.DataType.Type.BIGINT: "INT64", 757 exp.DataType.Type.BINARY: "BYTES", 758 exp.DataType.Type.BOOLEAN: "BOOL", 759 exp.DataType.Type.CHAR: "STRING", 760 exp.DataType.Type.DECIMAL: "NUMERIC", 761 exp.DataType.Type.DOUBLE: "FLOAT64", 762 exp.DataType.Type.FLOAT: "FLOAT64", 763 exp.DataType.Type.INT: "INT64", 764 exp.DataType.Type.NCHAR: "STRING", 765 exp.DataType.Type.NVARCHAR: "STRING", 766 exp.DataType.Type.SMALLINT: "INT64", 767 exp.DataType.Type.TEXT: "STRING", 768 exp.DataType.Type.TIMESTAMP: "DATETIME", 769 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 770 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 771 exp.DataType.Type.TINYINT: "INT64", 772 exp.DataType.Type.ROWVERSION: "BYTES", 773 exp.DataType.Type.UUID: "STRING", 774 exp.DataType.Type.VARBINARY: "BYTES", 775 exp.DataType.Type.VARCHAR: "STRING", 776 exp.DataType.Type.VARIANT: "ANY TYPE", 777 } 778 779 PROPERTIES_LOCATION = { 780 **generator.Generator.PROPERTIES_LOCATION, 781 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 782 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 783 } 784 785 # WINDOW comes after QUALIFY 786 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 787 AFTER_HAVING_MODIFIER_TRANSFORMS = { 788 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 789 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 790 } 791 792 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 793 RESERVED_KEYWORDS = { 794 "all", 795 "and", 796 "any", 797 "array", 798 "as", 799 "asc", 800 "assert_rows_modified", 801 "at", 802 "between", 803 "by", 804 "case", 805 "cast", 806 "collate", 807 "contains", 808 "create", 809 "cross", 810 "cube", 811 "current", 812 "default", 813 "define", 814 "desc", 815 "distinct", 816 "else", 817 "end", 818 "enum", 819 "escape", 820 "except", 821 "exclude", 822 "exists", 823 "extract", 824 "false", 825 "fetch", 826 "following", 827 "for", 828 "from", 829 "full", 830 "group", 831 "grouping", 832 "groups", 833 "hash", 834 "having", 835 "if", 836 "ignore", 837 "in", 838 "inner", 839 "intersect", 840 "interval", 841 "into", 842 "is", 843 "join", 844 "lateral", 845 "left", 846 "like", 847 "limit", 848 "lookup", 849 "merge", 850 "natural", 851 "new", 852 "no", 853 "not", 854 "null", 855 "nulls", 856 "of", 857 "on", 858 "or", 859 "order", 860 "outer", 861 "over", 862 "partition", 863 "preceding", 864 "proto", 865 "qualify", 866 "range", 867 "recursive", 868 "respect", 869 "right", 870 "rollup", 871 "rows", 872 "select", 873 "set", 874 "some", 875 "struct", 876 "tablesample", 877 "then", 878 "to", 879 "treat", 880 "true", 881 "unbounded", 882 "union", 883 "unnest", 884 "using", 885 "when", 886 "where", 887 "window", 888 "with", 889 "within", 890 } 891 892 def mod_sql(self, expression: exp.Mod) -> str: 893 this = expression.this 894 expr = expression.expression 895 return self.func( 896 "MOD", 897 this.unnest() if isinstance(this, exp.Paren) else this, 898 expr.unnest() if isinstance(expr, exp.Paren) else expr, 899 ) 900 901 def column_parts(self, expression: exp.Column) -> str: 902 if expression.meta.get("quoted_column"): 903 # If a column reference is of the form `dataset.table`.name, we need 904 # to preserve the quoted table path, otherwise the reference breaks 905 table_parts = ".".join(p.name for p in expression.parts[:-1]) 906 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 907 return f"{table_path}.{self.sql(expression, 'this')}" 908 909 return super().column_parts(expression) 910 911 def table_parts(self, expression: exp.Table) -> str: 912 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 913 # we need to make sure the correct quoting is used in each case. 914 # 915 # For example, if there is a CTE x that clashes with a schema name, then the former will 916 # return the table y in that schema, whereas the latter will return the CTE's y column: 917 # 918 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 919 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 920 if expression.meta.get("quoted_table"): 921 table_parts = ".".join(p.name for p in expression.parts) 922 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 923 924 return super().table_parts(expression) 925 926 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 927 if isinstance(expression.this, exp.TsOrDsToTimestamp): 928 func_name = "FORMAT_DATETIME" 929 else: 930 func_name = "FORMAT_DATE" 931 this = ( 932 expression.this 933 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 934 else expression 935 ) 936 return self.func(func_name, self.format_time(expression), this.this) 937 938 def eq_sql(self, expression: exp.EQ) -> str: 939 # Operands of = cannot be NULL in BigQuery 940 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 941 if not isinstance(expression.parent, exp.Update): 942 return "NULL" 943 944 return self.binary(expression, "=") 945 946 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 947 parent = expression.parent 948 949 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 950 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 951 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 952 return self.func( 953 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 954 ) 955 956 return super().attimezone_sql(expression) 957 958 def trycast_sql(self, expression: exp.TryCast) -> str: 959 return self.cast_sql(expression, safe_prefix="SAFE_") 960 961 def bracket_sql(self, expression: exp.Bracket) -> str: 962 this = expression.this 963 expressions = expression.expressions 964 965 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 966 arg = expressions[0] 967 if arg.type is None: 968 from sqlglot.optimizer.annotate_types import annotate_types 969 970 arg = annotate_types(arg) 971 972 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 973 # BQ doesn't support bracket syntax with string values for structs 974 return f"{self.sql(this)}.{arg.name}" 975 976 expressions_sql = self.expressions(expression, flat=True) 977 offset = expression.args.get("offset") 978 979 if offset == 0: 980 expressions_sql = f"OFFSET({expressions_sql})" 981 elif offset == 1: 982 expressions_sql = f"ORDINAL({expressions_sql})" 983 elif offset is not None: 984 self.unsupported(f"Unsupported array offset: {offset}") 985 986 if expression.args.get("safe"): 987 expressions_sql = f"SAFE_{expressions_sql}" 988 989 return f"{self.sql(this)}[{expressions_sql}]" 990 991 def in_unnest_op(self, expression: exp.Unnest) -> str: 992 return self.sql(expression) 993 994 def version_sql(self, expression: exp.Version) -> str: 995 if expression.name == "TIMESTAMP": 996 expression.set("this", "SYSTEM_TIME") 997 return super().version_sql(expression)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether alias reference expansion before qualification should only happen for the GROUP BY clause.
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
302 def normalize_identifier(self, expression: E) -> E: 303 if ( 304 isinstance(expression, exp.Identifier) 305 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 306 ): 307 parent = expression.parent 308 while isinstance(parent, exp.Dot): 309 parent = parent.parent 310 311 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 312 # by default. The following check uses a heuristic to detect tables based on whether 313 # they are qualified. This should generally be correct, because tables in BigQuery 314 # must be qualified with at least a dataset, unless @@dataset_id is set. 315 case_sensitive = ( 316 isinstance(parent, exp.UserDefinedFunction) 317 or ( 318 isinstance(parent, exp.Table) 319 and parent.db 320 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 321 ) 322 or expression.meta.get("is_table") 323 ) 324 if not case_sensitive: 325 expression.set("this", expression.this.lower()) 326 327 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- COPY_PARAMS_ARE_CSV
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- REGEXP_EXTRACT_DEFAULT_GROUP
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- get_or_raise
- format_time
- settings
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
329 class Tokenizer(tokens.Tokenizer): 330 QUOTES = ["'", '"', '"""', "'''"] 331 COMMENTS = ["--", "#", ("/*", "*/")] 332 IDENTIFIERS = ["`"] 333 STRING_ESCAPES = ["\\"] 334 335 HEX_STRINGS = [("0x", ""), ("0X", "")] 336 337 BYTE_STRINGS = [ 338 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 339 ] 340 341 RAW_STRINGS = [ 342 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 343 ] 344 345 KEYWORDS = { 346 **tokens.Tokenizer.KEYWORDS, 347 "ANY TYPE": TokenType.VARIANT, 348 "BEGIN": TokenType.COMMAND, 349 "BEGIN TRANSACTION": TokenType.BEGIN, 350 "BYTEINT": TokenType.INT, 351 "BYTES": TokenType.BINARY, 352 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 353 "DATETIME": TokenType.TIMESTAMP, 354 "DECLARE": TokenType.COMMAND, 355 "ELSEIF": TokenType.COMMAND, 356 "EXCEPTION": TokenType.COMMAND, 357 "FLOAT64": TokenType.DOUBLE, 358 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 359 "MODEL": TokenType.MODEL, 360 "NOT DETERMINISTIC": TokenType.VOLATILE, 361 "RECORD": TokenType.STRUCT, 362 "TIMESTAMP": TokenType.TIMESTAMPTZ, 363 } 364 KEYWORDS.pop("DIV") 365 KEYWORDS.pop("VALUES") 366 KEYWORDS.pop("/*+")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIER_ESCAPES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
368 class Parser(parser.Parser): 369 PREFIXED_PIVOT_COLUMNS = True 370 LOG_DEFAULTS_TO_LN = True 371 SUPPORTS_IMPLICIT_UNNEST = True 372 373 FUNCTIONS = { 374 **parser.Parser.FUNCTIONS, 375 "DATE": _build_date, 376 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 377 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 378 "DATE_TRUNC": lambda args: exp.DateTrunc( 379 unit=exp.Literal.string(str(seq_get(args, 1))), 380 this=seq_get(args, 0), 381 ), 382 "DATETIME": _build_datetime, 383 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 384 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 385 "DIV": binary_from_function(exp.IntDiv), 386 "FORMAT_DATE": lambda args: exp.TimeToStr( 387 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 388 ), 389 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 390 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 391 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 392 ), 393 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 394 "MD5": exp.MD5Digest.from_arg_list, 395 "TO_HEX": _build_to_hex, 396 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 397 [seq_get(args, 1), seq_get(args, 0)] 398 ), 399 "PARSE_TIMESTAMP": _build_parse_timestamp, 400 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 401 "REGEXP_EXTRACT": _build_regexp_extract, 402 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 403 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 404 "SPLIT": lambda args: exp.Split( 405 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 406 this=seq_get(args, 0), 407 expression=seq_get(args, 1) or exp.Literal.string(","), 408 ), 409 "TIME": _build_time, 410 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 411 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 412 "TIMESTAMP": _build_timestamp, 413 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 414 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 415 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 416 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 417 ), 418 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 419 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 420 ), 421 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 422 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 423 "FORMAT_DATETIME": lambda args: exp.TimeToStr( 424 this=exp.TsOrDsToTimestamp(this=seq_get(args, 1)), format=seq_get(args, 0) 425 ), 426 } 427 428 FUNCTION_PARSERS = { 429 **parser.Parser.FUNCTION_PARSERS, 430 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 431 } 432 FUNCTION_PARSERS.pop("TRIM") 433 434 NO_PAREN_FUNCTIONS = { 435 **parser.Parser.NO_PAREN_FUNCTIONS, 436 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 437 } 438 439 NESTED_TYPE_TOKENS = { 440 *parser.Parser.NESTED_TYPE_TOKENS, 441 TokenType.TABLE, 442 } 443 444 PROPERTY_PARSERS = { 445 **parser.Parser.PROPERTY_PARSERS, 446 "NOT DETERMINISTIC": lambda self: self.expression( 447 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 448 ), 449 "OPTIONS": lambda self: self._parse_with_property(), 450 } 451 452 CONSTRAINT_PARSERS = { 453 **parser.Parser.CONSTRAINT_PARSERS, 454 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 455 } 456 457 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 458 RANGE_PARSERS.pop(TokenType.OVERLAPS) 459 460 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 461 462 STATEMENT_PARSERS = { 463 **parser.Parser.STATEMENT_PARSERS, 464 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 465 TokenType.END: lambda self: self._parse_as_command(self._prev), 466 TokenType.FOR: lambda self: self._parse_for_in(), 467 } 468 469 BRACKET_OFFSETS = { 470 "OFFSET": (0, False), 471 "ORDINAL": (1, False), 472 "SAFE_OFFSET": (0, True), 473 "SAFE_ORDINAL": (1, True), 474 } 475 476 def _parse_for_in(self) -> exp.ForIn: 477 this = self._parse_range() 478 self._match_text_seq("DO") 479 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 480 481 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 482 this = super()._parse_table_part(schema=schema) or self._parse_number() 483 484 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 485 if isinstance(this, exp.Identifier): 486 table_name = this.name 487 while self._match(TokenType.DASH, advance=False) and self._next: 488 text = "" 489 while self._is_connected() and self._curr.token_type != TokenType.DOT: 490 self._advance() 491 text += self._prev.text 492 table_name += text 493 494 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 495 elif isinstance(this, exp.Literal): 496 table_name = this.name 497 498 if self._is_connected() and self._parse_var(any_token=True): 499 table_name += self._prev.text 500 501 this = exp.Identifier(this=table_name, quoted=True) 502 503 return this 504 505 def _parse_table_parts( 506 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 507 ) -> exp.Table: 508 table = super()._parse_table_parts( 509 schema=schema, is_db_reference=is_db_reference, wildcard=True 510 ) 511 512 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 513 if not table.catalog: 514 if table.db: 515 parts = table.db.split(".") 516 if len(parts) == 2 and not table.args["db"].quoted: 517 table.set("catalog", exp.Identifier(this=parts[0])) 518 table.set("db", exp.Identifier(this=parts[1])) 519 else: 520 parts = table.name.split(".") 521 if len(parts) == 2 and not table.this.quoted: 522 table.set("db", exp.Identifier(this=parts[0])) 523 table.set("this", exp.Identifier(this=parts[1])) 524 525 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 526 catalog, db, this, *rest = ( 527 exp.to_identifier(p, quoted=True) 528 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 529 ) 530 531 if rest and this: 532 this = exp.Dot.build([this, *rest]) # type: ignore 533 534 table = exp.Table( 535 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 536 ) 537 table.meta["quoted_table"] = True 538 539 return table 540 541 def _parse_column(self) -> t.Optional[exp.Expression]: 542 column = super()._parse_column() 543 if isinstance(column, exp.Column): 544 parts = column.parts 545 if any("." in p.name for p in parts): 546 catalog, db, table, this, *rest = ( 547 exp.to_identifier(p, quoted=True) 548 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 549 ) 550 551 if rest and this: 552 this = exp.Dot.build([this, *rest]) # type: ignore 553 554 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 555 column.meta["quoted_column"] = True 556 557 return column 558 559 @t.overload 560 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 561 562 @t.overload 563 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 564 565 def _parse_json_object(self, agg=False): 566 json_object = super()._parse_json_object() 567 array_kv_pair = seq_get(json_object.expressions, 0) 568 569 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 570 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 571 if ( 572 array_kv_pair 573 and isinstance(array_kv_pair.this, exp.Array) 574 and isinstance(array_kv_pair.expression, exp.Array) 575 ): 576 keys = array_kv_pair.this.expressions 577 values = array_kv_pair.expression.expressions 578 579 json_object.set( 580 "expressions", 581 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 582 ) 583 584 return json_object 585 586 def _parse_bracket( 587 self, this: t.Optional[exp.Expression] = None 588 ) -> t.Optional[exp.Expression]: 589 bracket = super()._parse_bracket(this) 590 591 if this is bracket: 592 return bracket 593 594 if isinstance(bracket, exp.Bracket): 595 for expression in bracket.expressions: 596 name = expression.name.upper() 597 598 if name not in self.BRACKET_OFFSETS: 599 break 600 601 offset, safe = self.BRACKET_OFFSETS[name] 602 bracket.set("offset", offset) 603 bracket.set("safe", safe) 604 expression.replace(expression.expressions[0]) 605 606 return bracket 607 608 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 609 unnest = super()._parse_unnest(with_alias=with_alias) 610 611 if not unnest: 612 return None 613 614 unnest_expr = seq_get(unnest.expressions, 0) 615 if unnest_expr: 616 from sqlglot.optimizer.annotate_types import annotate_types 617 618 unnest_expr = annotate_types(unnest_expr) 619 620 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 621 # in contrast to other dialects such as DuckDB which flattens only the array by default 622 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 623 array_elem.is_type(exp.DataType.Type.STRUCT) 624 for array_elem in unnest_expr._type.expressions 625 ): 626 unnest.set("explode_array", True) 627 628 return unnest
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
630 class Generator(generator.Generator): 631 INTERVAL_ALLOWS_PLURAL_FORM = False 632 JOIN_HINTS = False 633 QUERY_HINTS = False 634 TABLE_HINTS = False 635 LIMIT_FETCH = "LIMIT" 636 RENAME_TABLE_WITH_DB = False 637 NVL2_SUPPORTED = False 638 UNNEST_WITH_ORDINALITY = False 639 COLLATE_IS_FUNC = True 640 LIMIT_ONLY_LITERALS = True 641 SUPPORTS_TABLE_ALIAS_COLUMNS = False 642 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 643 JSON_KEY_VALUE_PAIR_SEP = "," 644 NULL_ORDERING_SUPPORTED = False 645 IGNORE_NULLS_IN_FUNC = True 646 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 647 CAN_IMPLEMENT_ARRAY_ANY = True 648 SUPPORTS_TO_NUMBER = False 649 NAMED_PLACEHOLDER_TOKEN = "@" 650 HEX_FUNC = "TO_HEX" 651 WITH_PROPERTIES_PREFIX = "OPTIONS" 652 SUPPORTS_EXPLODING_PROJECTIONS = False 653 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 654 655 TRANSFORMS = { 656 **generator.Generator.TRANSFORMS, 657 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 658 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 659 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 660 exp.Array: inline_array_unless_query, 661 exp.ArrayContains: _array_contains_sql, 662 exp.ArrayFilter: filter_array_using_unnest, 663 exp.ArraySize: rename_func("ARRAY_LENGTH"), 664 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 665 exp.CollateProperty: lambda self, e: ( 666 f"DEFAULT COLLATE {self.sql(e, 'this')}" 667 if e.args.get("default") 668 else f"COLLATE {self.sql(e, 'this')}" 669 ), 670 exp.Commit: lambda *_: "COMMIT TRANSACTION", 671 exp.CountIf: rename_func("COUNTIF"), 672 exp.Create: _create_sql, 673 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 674 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 675 exp.DateDiff: lambda self, e: self.func( 676 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 677 ), 678 exp.DateFromParts: rename_func("DATE"), 679 exp.DateStrToDate: datestrtodate_sql, 680 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 681 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 682 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 683 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 684 exp.FromTimeZone: lambda self, e: self.func( 685 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 686 ), 687 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 688 exp.GroupConcat: rename_func("STRING_AGG"), 689 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 690 exp.If: if_sql(false_value="NULL"), 691 exp.ILike: no_ilike_sql, 692 exp.IntDiv: rename_func("DIV"), 693 exp.JSONFormat: rename_func("TO_JSON_STRING"), 694 exp.Max: max_or_greatest, 695 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 696 exp.MD5Digest: rename_func("MD5"), 697 exp.Min: min_or_least, 698 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 699 exp.RegexpExtract: lambda self, e: self.func( 700 "REGEXP_EXTRACT", 701 e.this, 702 e.expression, 703 e.args.get("position"), 704 e.args.get("occurrence"), 705 ), 706 exp.RegexpReplace: regexp_replace_sql, 707 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 708 exp.ReturnsProperty: _returnsproperty_sql, 709 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 710 exp.Select: transforms.preprocess( 711 [ 712 transforms.explode_to_unnest(), 713 transforms.unqualify_unnest, 714 transforms.eliminate_distinct_on, 715 _alias_ordered_group, 716 transforms.eliminate_semi_and_anti_joins, 717 ] 718 ), 719 exp.SHA: rename_func("SHA1"), 720 exp.SHA2: sha256_sql, 721 exp.StabilityProperty: lambda self, e: ( 722 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 723 ), 724 exp.StrToDate: _str_to_datetime_sql, 725 exp.StrToTime: _str_to_datetime_sql, 726 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 727 exp.TimeFromParts: rename_func("TIME"), 728 exp.TimestampFromParts: rename_func("DATETIME"), 729 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 730 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 731 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 732 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 733 exp.TimeStrToTime: timestrtotime_sql, 734 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 735 exp.TsOrDsAdd: _ts_or_ds_add_sql, 736 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 737 exp.TsOrDsToTime: rename_func("TIME"), 738 exp.TsOrDsToTimestamp: rename_func("DATETIME"), 739 exp.Unhex: rename_func("FROM_HEX"), 740 exp.UnixDate: rename_func("UNIX_DATE"), 741 exp.UnixToTime: _unix_to_time_sql, 742 exp.Uuid: lambda *_: "GENERATE_UUID()", 743 exp.Values: _derived_table_values_to_unnest, 744 exp.VariancePop: rename_func("VAR_POP"), 745 } 746 747 SUPPORTED_JSON_PATH_PARTS = { 748 exp.JSONPathKey, 749 exp.JSONPathRoot, 750 exp.JSONPathSubscript, 751 } 752 753 TYPE_MAPPING = { 754 **generator.Generator.TYPE_MAPPING, 755 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 756 exp.DataType.Type.BIGINT: "INT64", 757 exp.DataType.Type.BINARY: "BYTES", 758 exp.DataType.Type.BOOLEAN: "BOOL", 759 exp.DataType.Type.CHAR: "STRING", 760 exp.DataType.Type.DECIMAL: "NUMERIC", 761 exp.DataType.Type.DOUBLE: "FLOAT64", 762 exp.DataType.Type.FLOAT: "FLOAT64", 763 exp.DataType.Type.INT: "INT64", 764 exp.DataType.Type.NCHAR: "STRING", 765 exp.DataType.Type.NVARCHAR: "STRING", 766 exp.DataType.Type.SMALLINT: "INT64", 767 exp.DataType.Type.TEXT: "STRING", 768 exp.DataType.Type.TIMESTAMP: "DATETIME", 769 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 770 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 771 exp.DataType.Type.TINYINT: "INT64", 772 exp.DataType.Type.ROWVERSION: "BYTES", 773 exp.DataType.Type.UUID: "STRING", 774 exp.DataType.Type.VARBINARY: "BYTES", 775 exp.DataType.Type.VARCHAR: "STRING", 776 exp.DataType.Type.VARIANT: "ANY TYPE", 777 } 778 779 PROPERTIES_LOCATION = { 780 **generator.Generator.PROPERTIES_LOCATION, 781 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 782 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 783 } 784 785 # WINDOW comes after QUALIFY 786 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 787 AFTER_HAVING_MODIFIER_TRANSFORMS = { 788 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 789 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 790 } 791 792 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 793 RESERVED_KEYWORDS = { 794 "all", 795 "and", 796 "any", 797 "array", 798 "as", 799 "asc", 800 "assert_rows_modified", 801 "at", 802 "between", 803 "by", 804 "case", 805 "cast", 806 "collate", 807 "contains", 808 "create", 809 "cross", 810 "cube", 811 "current", 812 "default", 813 "define", 814 "desc", 815 "distinct", 816 "else", 817 "end", 818 "enum", 819 "escape", 820 "except", 821 "exclude", 822 "exists", 823 "extract", 824 "false", 825 "fetch", 826 "following", 827 "for", 828 "from", 829 "full", 830 "group", 831 "grouping", 832 "groups", 833 "hash", 834 "having", 835 "if", 836 "ignore", 837 "in", 838 "inner", 839 "intersect", 840 "interval", 841 "into", 842 "is", 843 "join", 844 "lateral", 845 "left", 846 "like", 847 "limit", 848 "lookup", 849 "merge", 850 "natural", 851 "new", 852 "no", 853 "not", 854 "null", 855 "nulls", 856 "of", 857 "on", 858 "or", 859 "order", 860 "outer", 861 "over", 862 "partition", 863 "preceding", 864 "proto", 865 "qualify", 866 "range", 867 "recursive", 868 "respect", 869 "right", 870 "rollup", 871 "rows", 872 "select", 873 "set", 874 "some", 875 "struct", 876 "tablesample", 877 "then", 878 "to", 879 "treat", 880 "true", 881 "unbounded", 882 "union", 883 "unnest", 884 "using", 885 "when", 886 "where", 887 "window", 888 "with", 889 "within", 890 } 891 892 def mod_sql(self, expression: exp.Mod) -> str: 893 this = expression.this 894 expr = expression.expression 895 return self.func( 896 "MOD", 897 this.unnest() if isinstance(this, exp.Paren) else this, 898 expr.unnest() if isinstance(expr, exp.Paren) else expr, 899 ) 900 901 def column_parts(self, expression: exp.Column) -> str: 902 if expression.meta.get("quoted_column"): 903 # If a column reference is of the form `dataset.table`.name, we need 904 # to preserve the quoted table path, otherwise the reference breaks 905 table_parts = ".".join(p.name for p in expression.parts[:-1]) 906 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 907 return f"{table_path}.{self.sql(expression, 'this')}" 908 909 return super().column_parts(expression) 910 911 def table_parts(self, expression: exp.Table) -> str: 912 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 913 # we need to make sure the correct quoting is used in each case. 914 # 915 # For example, if there is a CTE x that clashes with a schema name, then the former will 916 # return the table y in that schema, whereas the latter will return the CTE's y column: 917 # 918 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 919 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 920 if expression.meta.get("quoted_table"): 921 table_parts = ".".join(p.name for p in expression.parts) 922 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 923 924 return super().table_parts(expression) 925 926 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 927 if isinstance(expression.this, exp.TsOrDsToTimestamp): 928 func_name = "FORMAT_DATETIME" 929 else: 930 func_name = "FORMAT_DATE" 931 this = ( 932 expression.this 933 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 934 else expression 935 ) 936 return self.func(func_name, self.format_time(expression), this.this) 937 938 def eq_sql(self, expression: exp.EQ) -> str: 939 # Operands of = cannot be NULL in BigQuery 940 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 941 if not isinstance(expression.parent, exp.Update): 942 return "NULL" 943 944 return self.binary(expression, "=") 945 946 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 947 parent = expression.parent 948 949 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 950 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 951 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 952 return self.func( 953 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 954 ) 955 956 return super().attimezone_sql(expression) 957 958 def trycast_sql(self, expression: exp.TryCast) -> str: 959 return self.cast_sql(expression, safe_prefix="SAFE_") 960 961 def bracket_sql(self, expression: exp.Bracket) -> str: 962 this = expression.this 963 expressions = expression.expressions 964 965 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 966 arg = expressions[0] 967 if arg.type is None: 968 from sqlglot.optimizer.annotate_types import annotate_types 969 970 arg = annotate_types(arg) 971 972 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 973 # BQ doesn't support bracket syntax with string values for structs 974 return f"{self.sql(this)}.{arg.name}" 975 976 expressions_sql = self.expressions(expression, flat=True) 977 offset = expression.args.get("offset") 978 979 if offset == 0: 980 expressions_sql = f"OFFSET({expressions_sql})" 981 elif offset == 1: 982 expressions_sql = f"ORDINAL({expressions_sql})" 983 elif offset is not None: 984 self.unsupported(f"Unsupported array offset: {offset}") 985 986 if expression.args.get("safe"): 987 expressions_sql = f"SAFE_{expressions_sql}" 988 989 return f"{self.sql(this)}[{expressions_sql}]" 990 991 def in_unnest_op(self, expression: exp.Unnest) -> str: 992 return self.sql(expression) 993 994 def version_sql(self, expression: exp.Version) -> str: 995 if expression.name == "TIMESTAMP": 996 expression.set("this", "SYSTEM_TIME") 997 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
901 def column_parts(self, expression: exp.Column) -> str: 902 if expression.meta.get("quoted_column"): 903 # If a column reference is of the form `dataset.table`.name, we need 904 # to preserve the quoted table path, otherwise the reference breaks 905 table_parts = ".".join(p.name for p in expression.parts[:-1]) 906 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 907 return f"{table_path}.{self.sql(expression, 'this')}" 908 909 return super().column_parts(expression)
911 def table_parts(self, expression: exp.Table) -> str: 912 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 913 # we need to make sure the correct quoting is used in each case. 914 # 915 # For example, if there is a CTE x that clashes with a schema name, then the former will 916 # return the table y in that schema, whereas the latter will return the CTE's y column: 917 # 918 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 919 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 920 if expression.meta.get("quoted_table"): 921 table_parts = ".".join(p.name for p in expression.parts) 922 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 923 924 return super().table_parts(expression)
926 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 927 if isinstance(expression.this, exp.TsOrDsToTimestamp): 928 func_name = "FORMAT_DATETIME" 929 else: 930 func_name = "FORMAT_DATE" 931 this = ( 932 expression.this 933 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 934 else expression 935 ) 936 return self.func(func_name, self.format_time(expression), this.this)
946 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 947 parent = expression.parent 948 949 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 950 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 951 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 952 return self.func( 953 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 954 ) 955 956 return super().attimezone_sql(expression)
961 def bracket_sql(self, expression: exp.Bracket) -> str: 962 this = expression.this 963 expressions = expression.expressions 964 965 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 966 arg = expressions[0] 967 if arg.type is None: 968 from sqlglot.optimizer.annotate_types import annotate_types 969 970 arg = annotate_types(arg) 971 972 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 973 # BQ doesn't support bracket syntax with string values for structs 974 return f"{self.sql(this)}.{arg.name}" 975 976 expressions_sql = self.expressions(expression, flat=True) 977 offset = expression.args.get("offset") 978 979 if offset == 0: 980 expressions_sql = f"OFFSET({expressions_sql})" 981 elif offset == 1: 982 expressions_sql = f"ORDINAL({expressions_sql})" 983 elif offset is not None: 984 self.unsupported(f"Unsupported array offset: {offset}") 985 986 if expression.args.get("safe"): 987 expressions_sql = f"SAFE_{expressions_sql}" 988 989 return f"{self.sql(this)}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql