sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 build_formatted_time, 16 filter_array_using_unnest, 17 if_sql, 18 inline_array_unless_query, 19 max_or_greatest, 20 min_or_least, 21 no_ilike_sql, 22 build_date_delta_with_interval, 23 regexp_replace_sql, 24 rename_func, 25 sha256_sql, 26 timestrtotime_sql, 27 ts_or_ds_add_cast, 28 unit_to_var, 29) 30from sqlglot.helper import seq_get, split_num_words 31from sqlglot.tokens import TokenType 32 33if t.TYPE_CHECKING: 34 from sqlglot._typing import E, Lit 35 36logger = logging.getLogger("sqlglot") 37 38 39def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 40 if not expression.find_ancestor(exp.From, exp.Join): 41 return self.values_sql(expression) 42 43 structs = [] 44 alias = expression.args.get("alias") 45 for tup in expression.find_all(exp.Tuple): 46 field_aliases = ( 47 alias.columns 48 if alias and alias.columns 49 else (f"_c{i}" for i in range(len(tup.expressions))) 50 ) 51 expressions = [ 52 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 53 for name, fld in zip(field_aliases, tup.expressions) 54 ] 55 structs.append(exp.Struct(expressions=expressions)) 56 57 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 58 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 59 return self.unnest_sql( 60 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 61 ) 62 63 64def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 65 this = expression.this 66 if isinstance(this, exp.Schema): 67 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 68 else: 69 this = self.sql(this) 70 return f"RETURNS {this}" 71 72 73def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 74 returns = expression.find(exp.ReturnsProperty) 75 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 76 expression.set("kind", "TABLE FUNCTION") 77 78 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 79 expression.set("expression", expression.expression.this) 80 81 return self.create_sql(expression) 82 83 84# https://issuetracker.google.com/issues/162294746 85# workaround for bigquery bug when grouping by an expression and then ordering 86# WITH x AS (SELECT 1 y) 87# SELECT y + 1 z 88# FROM x 89# GROUP BY x + 1 90# ORDER by z 91def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 92 if isinstance(expression, exp.Select): 93 group = expression.args.get("group") 94 order = expression.args.get("order") 95 96 if group and order: 97 aliases = { 98 select.this: select.args["alias"] 99 for select in expression.selects 100 if isinstance(select, exp.Alias) 101 } 102 103 for grouped in group.expressions: 104 if grouped.is_int: 105 continue 106 alias = aliases.get(grouped) 107 if alias: 108 grouped.replace(exp.column(alias)) 109 110 return expression 111 112 113def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 114 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 115 if isinstance(expression, exp.CTE) and expression.alias_column_names: 116 cte_query = expression.this 117 118 if cte_query.is_star: 119 logger.warning( 120 "Can't push down CTE column names for star queries. Run the query through" 121 " the optimizer or use 'qualify' to expand the star projections first." 122 ) 123 return expression 124 125 column_names = expression.alias_column_names 126 expression.args["alias"].set("columns", None) 127 128 for name, select in zip(column_names, cte_query.selects): 129 to_replace = select 130 131 if isinstance(select, exp.Alias): 132 select = select.this 133 134 # Inner aliases are shadowed by the CTE column names 135 to_replace.replace(exp.alias_(select, name)) 136 137 return expression 138 139 140def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 141 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 142 this.set("zone", seq_get(args, 2)) 143 return this 144 145 146def _build_timestamp(args: t.List) -> exp.Timestamp: 147 timestamp = exp.Timestamp.from_arg_list(args) 148 timestamp.set("with_tz", True) 149 return timestamp 150 151 152def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 153 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 154 return expr_type.from_arg_list(args) 155 156 157def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 158 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 159 arg = seq_get(args, 0) 160 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg) 161 162 163def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 164 return self.sql( 165 exp.Exists( 166 this=exp.select("1") 167 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 168 .where(exp.column("_col").eq(expression.right)) 169 ) 170 ) 171 172 173def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 174 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 175 176 177def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 178 expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 179 expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)) 180 unit = unit_to_var(expression) 181 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 182 183 184def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 185 scale = expression.args.get("scale") 186 timestamp = expression.this 187 188 if scale in (None, exp.UnixToTime.SECONDS): 189 return self.func("TIMESTAMP_SECONDS", timestamp) 190 if scale == exp.UnixToTime.MILLIS: 191 return self.func("TIMESTAMP_MILLIS", timestamp) 192 if scale == exp.UnixToTime.MICROS: 193 return self.func("TIMESTAMP_MICROS", timestamp) 194 195 unix_seconds = exp.cast( 196 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 197 ) 198 return self.func("TIMESTAMP_SECONDS", unix_seconds) 199 200 201def _build_time(args: t.List) -> exp.Func: 202 if len(args) == 1: 203 return exp.TsOrDsToTime(this=args[0]) 204 if len(args) == 2: 205 return exp.Time.from_arg_list(args) 206 return exp.TimeFromParts.from_arg_list(args) 207 208 209def _build_datetime(args: t.List) -> exp.Func: 210 if len(args) == 1: 211 return exp.TsOrDsToTimestamp.from_arg_list(args) 212 if len(args) == 2: 213 return exp.Datetime.from_arg_list(args) 214 return exp.TimestampFromParts.from_arg_list(args) 215 216 217def _str_to_datetime_sql( 218 self: BigQuery.Generator, expression: exp.StrToDate | exp.StrToTime 219) -> str: 220 this = self.sql(expression, "this") 221 dtype = "DATE" if isinstance(expression, exp.StrToDate) else "TIMESTAMP" 222 223 if expression.args.get("safe"): 224 fmt = self.format_time( 225 expression, 226 self.dialect.INVERSE_FORMAT_MAPPING, 227 self.dialect.INVERSE_FORMAT_TRIE, 228 ) 229 return f"SAFE_CAST({this} AS {dtype} FORMAT {fmt})" 230 231 fmt = self.format_time(expression) 232 return self.func(f"PARSE_{dtype}", fmt, this, expression.args.get("zone")) 233 234 235class BigQuery(Dialect): 236 WEEK_OFFSET = -1 237 UNNEST_COLUMN_ONLY = True 238 SUPPORTS_USER_DEFINED_TYPES = False 239 SUPPORTS_SEMI_ANTI_JOIN = False 240 LOG_BASE_FIRST = False 241 HEX_LOWERCASE = True 242 FORCE_EARLY_ALIAS_REF_EXPANSION = True 243 EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = True 244 245 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 246 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 247 248 # bigquery udfs are case sensitive 249 NORMALIZE_FUNCTIONS = False 250 251 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 252 TIME_MAPPING = { 253 "%D": "%m/%d/%y", 254 "%E6S": "%S.%f", 255 "%e": "%-d", 256 } 257 258 FORMAT_MAPPING = { 259 "DD": "%d", 260 "MM": "%m", 261 "MON": "%b", 262 "MONTH": "%B", 263 "YYYY": "%Y", 264 "YY": "%y", 265 "HH": "%I", 266 "HH12": "%I", 267 "HH24": "%H", 268 "MI": "%M", 269 "SS": "%S", 270 "SSSSS": "%f", 271 "TZH": "%z", 272 } 273 274 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 275 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 276 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 277 278 # All set operations require either a DISTINCT or ALL specifier 279 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 280 281 def normalize_identifier(self, expression: E) -> E: 282 if ( 283 isinstance(expression, exp.Identifier) 284 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 285 ): 286 parent = expression.parent 287 while isinstance(parent, exp.Dot): 288 parent = parent.parent 289 290 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 291 # by default. The following check uses a heuristic to detect tables based on whether 292 # they are qualified. This should generally be correct, because tables in BigQuery 293 # must be qualified with at least a dataset, unless @@dataset_id is set. 294 case_sensitive = ( 295 isinstance(parent, exp.UserDefinedFunction) 296 or ( 297 isinstance(parent, exp.Table) 298 and parent.db 299 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 300 ) 301 or expression.meta.get("is_table") 302 ) 303 if not case_sensitive: 304 expression.set("this", expression.this.lower()) 305 306 return expression 307 308 class Tokenizer(tokens.Tokenizer): 309 QUOTES = ["'", '"', '"""', "'''"] 310 COMMENTS = ["--", "#", ("/*", "*/")] 311 IDENTIFIERS = ["`"] 312 STRING_ESCAPES = ["\\"] 313 314 HEX_STRINGS = [("0x", ""), ("0X", "")] 315 316 BYTE_STRINGS = [ 317 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 318 ] 319 320 RAW_STRINGS = [ 321 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 322 ] 323 324 KEYWORDS = { 325 **tokens.Tokenizer.KEYWORDS, 326 "ANY TYPE": TokenType.VARIANT, 327 "BEGIN": TokenType.COMMAND, 328 "BEGIN TRANSACTION": TokenType.BEGIN, 329 "BYTEINT": TokenType.INT, 330 "BYTES": TokenType.BINARY, 331 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 332 "DATETIME": TokenType.TIMESTAMP, 333 "DECLARE": TokenType.COMMAND, 334 "ELSEIF": TokenType.COMMAND, 335 "EXCEPTION": TokenType.COMMAND, 336 "FLOAT64": TokenType.DOUBLE, 337 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 338 "MODEL": TokenType.MODEL, 339 "NOT DETERMINISTIC": TokenType.VOLATILE, 340 "RECORD": TokenType.STRUCT, 341 "TIMESTAMP": TokenType.TIMESTAMPTZ, 342 } 343 KEYWORDS.pop("DIV") 344 KEYWORDS.pop("VALUES") 345 KEYWORDS.pop("/*+") 346 347 class Parser(parser.Parser): 348 PREFIXED_PIVOT_COLUMNS = True 349 LOG_DEFAULTS_TO_LN = True 350 SUPPORTS_IMPLICIT_UNNEST = True 351 352 FUNCTIONS = { 353 **parser.Parser.FUNCTIONS, 354 "DATE": _build_date, 355 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 356 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 357 "DATE_TRUNC": lambda args: exp.DateTrunc( 358 unit=exp.Literal.string(str(seq_get(args, 1))), 359 this=seq_get(args, 0), 360 ), 361 "DATETIME": _build_datetime, 362 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 363 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 364 "DIV": binary_from_function(exp.IntDiv), 365 "FORMAT_DATE": lambda args: exp.TimeToStr( 366 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 367 ), 368 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 369 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 370 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 371 ), 372 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 373 "MD5": exp.MD5Digest.from_arg_list, 374 "TO_HEX": _build_to_hex, 375 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 376 [seq_get(args, 1), seq_get(args, 0)] 377 ), 378 "PARSE_TIMESTAMP": _build_parse_timestamp, 379 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 380 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 381 this=seq_get(args, 0), 382 expression=seq_get(args, 1), 383 position=seq_get(args, 2), 384 occurrence=seq_get(args, 3), 385 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 386 ), 387 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 388 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 389 "SPLIT": lambda args: exp.Split( 390 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 391 this=seq_get(args, 0), 392 expression=seq_get(args, 1) or exp.Literal.string(","), 393 ), 394 "TIME": _build_time, 395 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 396 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 397 "TIMESTAMP": _build_timestamp, 398 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 399 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 400 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 401 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 402 ), 403 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 404 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 405 ), 406 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 407 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 408 "FORMAT_DATETIME": lambda args: exp.TimeToStr( 409 this=exp.TsOrDsToTimestamp(this=seq_get(args, 1)), format=seq_get(args, 0) 410 ), 411 } 412 413 FUNCTION_PARSERS = { 414 **parser.Parser.FUNCTION_PARSERS, 415 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 416 } 417 FUNCTION_PARSERS.pop("TRIM") 418 419 NO_PAREN_FUNCTIONS = { 420 **parser.Parser.NO_PAREN_FUNCTIONS, 421 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 422 } 423 424 NESTED_TYPE_TOKENS = { 425 *parser.Parser.NESTED_TYPE_TOKENS, 426 TokenType.TABLE, 427 } 428 429 PROPERTY_PARSERS = { 430 **parser.Parser.PROPERTY_PARSERS, 431 "NOT DETERMINISTIC": lambda self: self.expression( 432 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 433 ), 434 "OPTIONS": lambda self: self._parse_with_property(), 435 } 436 437 CONSTRAINT_PARSERS = { 438 **parser.Parser.CONSTRAINT_PARSERS, 439 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 440 } 441 442 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 443 RANGE_PARSERS.pop(TokenType.OVERLAPS) 444 445 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 446 447 STATEMENT_PARSERS = { 448 **parser.Parser.STATEMENT_PARSERS, 449 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 450 TokenType.END: lambda self: self._parse_as_command(self._prev), 451 TokenType.FOR: lambda self: self._parse_for_in(), 452 } 453 454 BRACKET_OFFSETS = { 455 "OFFSET": (0, False), 456 "ORDINAL": (1, False), 457 "SAFE_OFFSET": (0, True), 458 "SAFE_ORDINAL": (1, True), 459 } 460 461 def _parse_for_in(self) -> exp.ForIn: 462 this = self._parse_range() 463 self._match_text_seq("DO") 464 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 465 466 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 467 this = super()._parse_table_part(schema=schema) or self._parse_number() 468 469 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 470 if isinstance(this, exp.Identifier): 471 table_name = this.name 472 while self._match(TokenType.DASH, advance=False) and self._next: 473 text = "" 474 while self._curr and self._curr.token_type != TokenType.DOT: 475 self._advance() 476 text += self._prev.text 477 table_name += text 478 479 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 480 elif isinstance(this, exp.Literal): 481 table_name = this.name 482 483 if self._is_connected() and self._parse_var(any_token=True): 484 table_name += self._prev.text 485 486 this = exp.Identifier(this=table_name, quoted=True) 487 488 return this 489 490 def _parse_table_parts( 491 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 492 ) -> exp.Table: 493 table = super()._parse_table_parts( 494 schema=schema, is_db_reference=is_db_reference, wildcard=True 495 ) 496 497 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 498 if not table.catalog: 499 if table.db: 500 parts = table.db.split(".") 501 if len(parts) == 2 and not table.args["db"].quoted: 502 table.set("catalog", exp.Identifier(this=parts[0])) 503 table.set("db", exp.Identifier(this=parts[1])) 504 else: 505 parts = table.name.split(".") 506 if len(parts) == 2 and not table.this.quoted: 507 table.set("db", exp.Identifier(this=parts[0])) 508 table.set("this", exp.Identifier(this=parts[1])) 509 510 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 511 catalog, db, this, *rest = ( 512 exp.to_identifier(p, quoted=True) 513 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 514 ) 515 516 if rest and this: 517 this = exp.Dot.build([this, *rest]) # type: ignore 518 519 table = exp.Table( 520 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 521 ) 522 table.meta["quoted_table"] = True 523 524 return table 525 526 def _parse_column(self) -> t.Optional[exp.Expression]: 527 column = super()._parse_column() 528 if isinstance(column, exp.Column): 529 parts = column.parts 530 if any("." in p.name for p in parts): 531 catalog, db, table, this, *rest = ( 532 exp.to_identifier(p, quoted=True) 533 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 534 ) 535 536 if rest and this: 537 this = exp.Dot.build([this, *rest]) # type: ignore 538 539 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 540 column.meta["quoted_column"] = True 541 542 return column 543 544 @t.overload 545 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 546 547 @t.overload 548 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 549 550 def _parse_json_object(self, agg=False): 551 json_object = super()._parse_json_object() 552 array_kv_pair = seq_get(json_object.expressions, 0) 553 554 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 555 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 556 if ( 557 array_kv_pair 558 and isinstance(array_kv_pair.this, exp.Array) 559 and isinstance(array_kv_pair.expression, exp.Array) 560 ): 561 keys = array_kv_pair.this.expressions 562 values = array_kv_pair.expression.expressions 563 564 json_object.set( 565 "expressions", 566 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 567 ) 568 569 return json_object 570 571 def _parse_bracket( 572 self, this: t.Optional[exp.Expression] = None 573 ) -> t.Optional[exp.Expression]: 574 bracket = super()._parse_bracket(this) 575 576 if this is bracket: 577 return bracket 578 579 if isinstance(bracket, exp.Bracket): 580 for expression in bracket.expressions: 581 name = expression.name.upper() 582 583 if name not in self.BRACKET_OFFSETS: 584 break 585 586 offset, safe = self.BRACKET_OFFSETS[name] 587 bracket.set("offset", offset) 588 bracket.set("safe", safe) 589 expression.replace(expression.expressions[0]) 590 591 return bracket 592 593 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 594 unnest = super()._parse_unnest(with_alias=with_alias) 595 596 if not unnest: 597 return None 598 599 unnest_expr = seq_get(unnest.expressions, 0) 600 if unnest_expr: 601 from sqlglot.optimizer.annotate_types import annotate_types 602 603 unnest_expr = annotate_types(unnest_expr) 604 605 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 606 # in contrast to other dialects such as DuckDB which flattens only the array by default 607 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 608 array_elem.is_type(exp.DataType.Type.STRUCT) 609 for array_elem in unnest_expr._type.expressions 610 ): 611 unnest.set("explode_array", True) 612 613 return unnest 614 615 class Generator(generator.Generator): 616 INTERVAL_ALLOWS_PLURAL_FORM = False 617 JOIN_HINTS = False 618 QUERY_HINTS = False 619 TABLE_HINTS = False 620 LIMIT_FETCH = "LIMIT" 621 RENAME_TABLE_WITH_DB = False 622 NVL2_SUPPORTED = False 623 UNNEST_WITH_ORDINALITY = False 624 COLLATE_IS_FUNC = True 625 LIMIT_ONLY_LITERALS = True 626 SUPPORTS_TABLE_ALIAS_COLUMNS = False 627 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 628 JSON_KEY_VALUE_PAIR_SEP = "," 629 NULL_ORDERING_SUPPORTED = False 630 IGNORE_NULLS_IN_FUNC = True 631 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 632 CAN_IMPLEMENT_ARRAY_ANY = True 633 SUPPORTS_TO_NUMBER = False 634 NAMED_PLACEHOLDER_TOKEN = "@" 635 HEX_FUNC = "TO_HEX" 636 WITH_PROPERTIES_PREFIX = "OPTIONS" 637 SUPPORTS_EXPLODING_PROJECTIONS = False 638 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 639 640 TRANSFORMS = { 641 **generator.Generator.TRANSFORMS, 642 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 643 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 644 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 645 exp.Array: inline_array_unless_query, 646 exp.ArrayContains: _array_contains_sql, 647 exp.ArrayFilter: filter_array_using_unnest, 648 exp.ArraySize: rename_func("ARRAY_LENGTH"), 649 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 650 exp.CollateProperty: lambda self, e: ( 651 f"DEFAULT COLLATE {self.sql(e, 'this')}" 652 if e.args.get("default") 653 else f"COLLATE {self.sql(e, 'this')}" 654 ), 655 exp.Commit: lambda *_: "COMMIT TRANSACTION", 656 exp.CountIf: rename_func("COUNTIF"), 657 exp.Create: _create_sql, 658 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 659 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 660 exp.DateDiff: lambda self, e: self.func( 661 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 662 ), 663 exp.DateFromParts: rename_func("DATE"), 664 exp.DateStrToDate: datestrtodate_sql, 665 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 666 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 667 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 668 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 669 exp.FromTimeZone: lambda self, e: self.func( 670 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 671 ), 672 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 673 exp.GroupConcat: rename_func("STRING_AGG"), 674 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 675 exp.If: if_sql(false_value="NULL"), 676 exp.ILike: no_ilike_sql, 677 exp.IntDiv: rename_func("DIV"), 678 exp.JSONFormat: rename_func("TO_JSON_STRING"), 679 exp.Max: max_or_greatest, 680 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 681 exp.MD5Digest: rename_func("MD5"), 682 exp.Min: min_or_least, 683 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 684 exp.RegexpExtract: lambda self, e: self.func( 685 "REGEXP_EXTRACT", 686 e.this, 687 e.expression, 688 e.args.get("position"), 689 e.args.get("occurrence"), 690 ), 691 exp.RegexpReplace: regexp_replace_sql, 692 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 693 exp.ReturnsProperty: _returnsproperty_sql, 694 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 695 exp.Select: transforms.preprocess( 696 [ 697 transforms.explode_to_unnest(), 698 transforms.unqualify_unnest, 699 transforms.eliminate_distinct_on, 700 _alias_ordered_group, 701 transforms.eliminate_semi_and_anti_joins, 702 ] 703 ), 704 exp.SHA: rename_func("SHA1"), 705 exp.SHA2: sha256_sql, 706 exp.StabilityProperty: lambda self, e: ( 707 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 708 ), 709 exp.StrToDate: _str_to_datetime_sql, 710 exp.StrToTime: _str_to_datetime_sql, 711 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 712 exp.TimeFromParts: rename_func("TIME"), 713 exp.TimestampFromParts: rename_func("DATETIME"), 714 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 715 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 716 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 717 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 718 exp.TimeStrToTime: timestrtotime_sql, 719 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 720 exp.TsOrDsAdd: _ts_or_ds_add_sql, 721 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 722 exp.TsOrDsToTime: rename_func("TIME"), 723 exp.TsOrDsToTimestamp: rename_func("DATETIME"), 724 exp.Unhex: rename_func("FROM_HEX"), 725 exp.UnixDate: rename_func("UNIX_DATE"), 726 exp.UnixToTime: _unix_to_time_sql, 727 exp.Uuid: lambda *_: "GENERATE_UUID()", 728 exp.Values: _derived_table_values_to_unnest, 729 exp.VariancePop: rename_func("VAR_POP"), 730 } 731 732 SUPPORTED_JSON_PATH_PARTS = { 733 exp.JSONPathKey, 734 exp.JSONPathRoot, 735 exp.JSONPathSubscript, 736 } 737 738 TYPE_MAPPING = { 739 **generator.Generator.TYPE_MAPPING, 740 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 741 exp.DataType.Type.BIGINT: "INT64", 742 exp.DataType.Type.BINARY: "BYTES", 743 exp.DataType.Type.BOOLEAN: "BOOL", 744 exp.DataType.Type.CHAR: "STRING", 745 exp.DataType.Type.DECIMAL: "NUMERIC", 746 exp.DataType.Type.DOUBLE: "FLOAT64", 747 exp.DataType.Type.FLOAT: "FLOAT64", 748 exp.DataType.Type.INT: "INT64", 749 exp.DataType.Type.NCHAR: "STRING", 750 exp.DataType.Type.NVARCHAR: "STRING", 751 exp.DataType.Type.SMALLINT: "INT64", 752 exp.DataType.Type.TEXT: "STRING", 753 exp.DataType.Type.TIMESTAMP: "DATETIME", 754 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 755 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 756 exp.DataType.Type.TINYINT: "INT64", 757 exp.DataType.Type.ROWVERSION: "BYTES", 758 exp.DataType.Type.UUID: "STRING", 759 exp.DataType.Type.VARBINARY: "BYTES", 760 exp.DataType.Type.VARCHAR: "STRING", 761 exp.DataType.Type.VARIANT: "ANY TYPE", 762 } 763 764 PROPERTIES_LOCATION = { 765 **generator.Generator.PROPERTIES_LOCATION, 766 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 767 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 768 } 769 770 # WINDOW comes after QUALIFY 771 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 772 AFTER_HAVING_MODIFIER_TRANSFORMS = { 773 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 774 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 775 } 776 777 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 778 RESERVED_KEYWORDS = { 779 "all", 780 "and", 781 "any", 782 "array", 783 "as", 784 "asc", 785 "assert_rows_modified", 786 "at", 787 "between", 788 "by", 789 "case", 790 "cast", 791 "collate", 792 "contains", 793 "create", 794 "cross", 795 "cube", 796 "current", 797 "default", 798 "define", 799 "desc", 800 "distinct", 801 "else", 802 "end", 803 "enum", 804 "escape", 805 "except", 806 "exclude", 807 "exists", 808 "extract", 809 "false", 810 "fetch", 811 "following", 812 "for", 813 "from", 814 "full", 815 "group", 816 "grouping", 817 "groups", 818 "hash", 819 "having", 820 "if", 821 "ignore", 822 "in", 823 "inner", 824 "intersect", 825 "interval", 826 "into", 827 "is", 828 "join", 829 "lateral", 830 "left", 831 "like", 832 "limit", 833 "lookup", 834 "merge", 835 "natural", 836 "new", 837 "no", 838 "not", 839 "null", 840 "nulls", 841 "of", 842 "on", 843 "or", 844 "order", 845 "outer", 846 "over", 847 "partition", 848 "preceding", 849 "proto", 850 "qualify", 851 "range", 852 "recursive", 853 "respect", 854 "right", 855 "rollup", 856 "rows", 857 "select", 858 "set", 859 "some", 860 "struct", 861 "tablesample", 862 "then", 863 "to", 864 "treat", 865 "true", 866 "unbounded", 867 "union", 868 "unnest", 869 "using", 870 "when", 871 "where", 872 "window", 873 "with", 874 "within", 875 } 876 877 def mod_sql(self, expression: exp.Mod) -> str: 878 this = expression.this 879 expr = expression.expression 880 return self.func( 881 "MOD", 882 this.unnest() if isinstance(this, exp.Paren) else this, 883 expr.unnest() if isinstance(expr, exp.Paren) else expr, 884 ) 885 886 def column_parts(self, expression: exp.Column) -> str: 887 if expression.meta.get("quoted_column"): 888 # If a column reference is of the form `dataset.table`.name, we need 889 # to preserve the quoted table path, otherwise the reference breaks 890 table_parts = ".".join(p.name for p in expression.parts[:-1]) 891 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 892 return f"{table_path}.{self.sql(expression, 'this')}" 893 894 return super().column_parts(expression) 895 896 def table_parts(self, expression: exp.Table) -> str: 897 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 898 # we need to make sure the correct quoting is used in each case. 899 # 900 # For example, if there is a CTE x that clashes with a schema name, then the former will 901 # return the table y in that schema, whereas the latter will return the CTE's y column: 902 # 903 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 904 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 905 if expression.meta.get("quoted_table"): 906 table_parts = ".".join(p.name for p in expression.parts) 907 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 908 909 return super().table_parts(expression) 910 911 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 912 if isinstance(expression.this, exp.TsOrDsToTimestamp): 913 func_name = "FORMAT_DATETIME" 914 else: 915 func_name = "FORMAT_DATE" 916 this = ( 917 expression.this 918 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 919 else expression 920 ) 921 return self.func(func_name, self.format_time(expression), this.this) 922 923 def eq_sql(self, expression: exp.EQ) -> str: 924 # Operands of = cannot be NULL in BigQuery 925 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 926 if not isinstance(expression.parent, exp.Update): 927 return "NULL" 928 929 return self.binary(expression, "=") 930 931 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 932 parent = expression.parent 933 934 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 935 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 936 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 937 return self.func( 938 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 939 ) 940 941 return super().attimezone_sql(expression) 942 943 def trycast_sql(self, expression: exp.TryCast) -> str: 944 return self.cast_sql(expression, safe_prefix="SAFE_") 945 946 def bracket_sql(self, expression: exp.Bracket) -> str: 947 this = expression.this 948 expressions = expression.expressions 949 950 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 951 arg = expressions[0] 952 if arg.type is None: 953 from sqlglot.optimizer.annotate_types import annotate_types 954 955 arg = annotate_types(arg) 956 957 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 958 # BQ doesn't support bracket syntax with string values for structs 959 return f"{self.sql(this)}.{arg.name}" 960 961 expressions_sql = self.expressions(expression, flat=True) 962 offset = expression.args.get("offset") 963 964 if offset == 0: 965 expressions_sql = f"OFFSET({expressions_sql})" 966 elif offset == 1: 967 expressions_sql = f"ORDINAL({expressions_sql})" 968 elif offset is not None: 969 self.unsupported(f"Unsupported array offset: {offset}") 970 971 if expression.args.get("safe"): 972 expressions_sql = f"SAFE_{expressions_sql}" 973 974 return f"{self.sql(this)}[{expressions_sql}]" 975 976 def in_unnest_op(self, expression: exp.Unnest) -> str: 977 return self.sql(expression) 978 979 def version_sql(self, expression: exp.Version) -> str: 980 if expression.name == "TIMESTAMP": 981 expression.set("this", "SYSTEM_TIME") 982 return super().version_sql(expression)
236class BigQuery(Dialect): 237 WEEK_OFFSET = -1 238 UNNEST_COLUMN_ONLY = True 239 SUPPORTS_USER_DEFINED_TYPES = False 240 SUPPORTS_SEMI_ANTI_JOIN = False 241 LOG_BASE_FIRST = False 242 HEX_LOWERCASE = True 243 FORCE_EARLY_ALIAS_REF_EXPANSION = True 244 EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = True 245 246 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 247 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 248 249 # bigquery udfs are case sensitive 250 NORMALIZE_FUNCTIONS = False 251 252 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 253 TIME_MAPPING = { 254 "%D": "%m/%d/%y", 255 "%E6S": "%S.%f", 256 "%e": "%-d", 257 } 258 259 FORMAT_MAPPING = { 260 "DD": "%d", 261 "MM": "%m", 262 "MON": "%b", 263 "MONTH": "%B", 264 "YYYY": "%Y", 265 "YY": "%y", 266 "HH": "%I", 267 "HH12": "%I", 268 "HH24": "%H", 269 "MI": "%M", 270 "SS": "%S", 271 "SSSSS": "%f", 272 "TZH": "%z", 273 } 274 275 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 276 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 277 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 278 279 # All set operations require either a DISTINCT or ALL specifier 280 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 281 282 def normalize_identifier(self, expression: E) -> E: 283 if ( 284 isinstance(expression, exp.Identifier) 285 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 286 ): 287 parent = expression.parent 288 while isinstance(parent, exp.Dot): 289 parent = parent.parent 290 291 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 292 # by default. The following check uses a heuristic to detect tables based on whether 293 # they are qualified. This should generally be correct, because tables in BigQuery 294 # must be qualified with at least a dataset, unless @@dataset_id is set. 295 case_sensitive = ( 296 isinstance(parent, exp.UserDefinedFunction) 297 or ( 298 isinstance(parent, exp.Table) 299 and parent.db 300 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 301 ) 302 or expression.meta.get("is_table") 303 ) 304 if not case_sensitive: 305 expression.set("this", expression.this.lower()) 306 307 return expression 308 309 class Tokenizer(tokens.Tokenizer): 310 QUOTES = ["'", '"', '"""', "'''"] 311 COMMENTS = ["--", "#", ("/*", "*/")] 312 IDENTIFIERS = ["`"] 313 STRING_ESCAPES = ["\\"] 314 315 HEX_STRINGS = [("0x", ""), ("0X", "")] 316 317 BYTE_STRINGS = [ 318 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 319 ] 320 321 RAW_STRINGS = [ 322 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 323 ] 324 325 KEYWORDS = { 326 **tokens.Tokenizer.KEYWORDS, 327 "ANY TYPE": TokenType.VARIANT, 328 "BEGIN": TokenType.COMMAND, 329 "BEGIN TRANSACTION": TokenType.BEGIN, 330 "BYTEINT": TokenType.INT, 331 "BYTES": TokenType.BINARY, 332 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 333 "DATETIME": TokenType.TIMESTAMP, 334 "DECLARE": TokenType.COMMAND, 335 "ELSEIF": TokenType.COMMAND, 336 "EXCEPTION": TokenType.COMMAND, 337 "FLOAT64": TokenType.DOUBLE, 338 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 339 "MODEL": TokenType.MODEL, 340 "NOT DETERMINISTIC": TokenType.VOLATILE, 341 "RECORD": TokenType.STRUCT, 342 "TIMESTAMP": TokenType.TIMESTAMPTZ, 343 } 344 KEYWORDS.pop("DIV") 345 KEYWORDS.pop("VALUES") 346 KEYWORDS.pop("/*+") 347 348 class Parser(parser.Parser): 349 PREFIXED_PIVOT_COLUMNS = True 350 LOG_DEFAULTS_TO_LN = True 351 SUPPORTS_IMPLICIT_UNNEST = True 352 353 FUNCTIONS = { 354 **parser.Parser.FUNCTIONS, 355 "DATE": _build_date, 356 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 357 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 358 "DATE_TRUNC": lambda args: exp.DateTrunc( 359 unit=exp.Literal.string(str(seq_get(args, 1))), 360 this=seq_get(args, 0), 361 ), 362 "DATETIME": _build_datetime, 363 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 364 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 365 "DIV": binary_from_function(exp.IntDiv), 366 "FORMAT_DATE": lambda args: exp.TimeToStr( 367 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 368 ), 369 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 370 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 371 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 372 ), 373 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 374 "MD5": exp.MD5Digest.from_arg_list, 375 "TO_HEX": _build_to_hex, 376 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 377 [seq_get(args, 1), seq_get(args, 0)] 378 ), 379 "PARSE_TIMESTAMP": _build_parse_timestamp, 380 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 381 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 382 this=seq_get(args, 0), 383 expression=seq_get(args, 1), 384 position=seq_get(args, 2), 385 occurrence=seq_get(args, 3), 386 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 387 ), 388 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 389 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 390 "SPLIT": lambda args: exp.Split( 391 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 392 this=seq_get(args, 0), 393 expression=seq_get(args, 1) or exp.Literal.string(","), 394 ), 395 "TIME": _build_time, 396 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 397 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 398 "TIMESTAMP": _build_timestamp, 399 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 400 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 401 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 402 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 403 ), 404 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 405 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 406 ), 407 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 408 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 409 "FORMAT_DATETIME": lambda args: exp.TimeToStr( 410 this=exp.TsOrDsToTimestamp(this=seq_get(args, 1)), format=seq_get(args, 0) 411 ), 412 } 413 414 FUNCTION_PARSERS = { 415 **parser.Parser.FUNCTION_PARSERS, 416 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 417 } 418 FUNCTION_PARSERS.pop("TRIM") 419 420 NO_PAREN_FUNCTIONS = { 421 **parser.Parser.NO_PAREN_FUNCTIONS, 422 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 423 } 424 425 NESTED_TYPE_TOKENS = { 426 *parser.Parser.NESTED_TYPE_TOKENS, 427 TokenType.TABLE, 428 } 429 430 PROPERTY_PARSERS = { 431 **parser.Parser.PROPERTY_PARSERS, 432 "NOT DETERMINISTIC": lambda self: self.expression( 433 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 434 ), 435 "OPTIONS": lambda self: self._parse_with_property(), 436 } 437 438 CONSTRAINT_PARSERS = { 439 **parser.Parser.CONSTRAINT_PARSERS, 440 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 441 } 442 443 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 444 RANGE_PARSERS.pop(TokenType.OVERLAPS) 445 446 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 447 448 STATEMENT_PARSERS = { 449 **parser.Parser.STATEMENT_PARSERS, 450 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 451 TokenType.END: lambda self: self._parse_as_command(self._prev), 452 TokenType.FOR: lambda self: self._parse_for_in(), 453 } 454 455 BRACKET_OFFSETS = { 456 "OFFSET": (0, False), 457 "ORDINAL": (1, False), 458 "SAFE_OFFSET": (0, True), 459 "SAFE_ORDINAL": (1, True), 460 } 461 462 def _parse_for_in(self) -> exp.ForIn: 463 this = self._parse_range() 464 self._match_text_seq("DO") 465 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 466 467 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 468 this = super()._parse_table_part(schema=schema) or self._parse_number() 469 470 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 471 if isinstance(this, exp.Identifier): 472 table_name = this.name 473 while self._match(TokenType.DASH, advance=False) and self._next: 474 text = "" 475 while self._curr and self._curr.token_type != TokenType.DOT: 476 self._advance() 477 text += self._prev.text 478 table_name += text 479 480 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 481 elif isinstance(this, exp.Literal): 482 table_name = this.name 483 484 if self._is_connected() and self._parse_var(any_token=True): 485 table_name += self._prev.text 486 487 this = exp.Identifier(this=table_name, quoted=True) 488 489 return this 490 491 def _parse_table_parts( 492 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 493 ) -> exp.Table: 494 table = super()._parse_table_parts( 495 schema=schema, is_db_reference=is_db_reference, wildcard=True 496 ) 497 498 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 499 if not table.catalog: 500 if table.db: 501 parts = table.db.split(".") 502 if len(parts) == 2 and not table.args["db"].quoted: 503 table.set("catalog", exp.Identifier(this=parts[0])) 504 table.set("db", exp.Identifier(this=parts[1])) 505 else: 506 parts = table.name.split(".") 507 if len(parts) == 2 and not table.this.quoted: 508 table.set("db", exp.Identifier(this=parts[0])) 509 table.set("this", exp.Identifier(this=parts[1])) 510 511 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 512 catalog, db, this, *rest = ( 513 exp.to_identifier(p, quoted=True) 514 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 515 ) 516 517 if rest and this: 518 this = exp.Dot.build([this, *rest]) # type: ignore 519 520 table = exp.Table( 521 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 522 ) 523 table.meta["quoted_table"] = True 524 525 return table 526 527 def _parse_column(self) -> t.Optional[exp.Expression]: 528 column = super()._parse_column() 529 if isinstance(column, exp.Column): 530 parts = column.parts 531 if any("." in p.name for p in parts): 532 catalog, db, table, this, *rest = ( 533 exp.to_identifier(p, quoted=True) 534 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 535 ) 536 537 if rest and this: 538 this = exp.Dot.build([this, *rest]) # type: ignore 539 540 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 541 column.meta["quoted_column"] = True 542 543 return column 544 545 @t.overload 546 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 547 548 @t.overload 549 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 550 551 def _parse_json_object(self, agg=False): 552 json_object = super()._parse_json_object() 553 array_kv_pair = seq_get(json_object.expressions, 0) 554 555 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 556 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 557 if ( 558 array_kv_pair 559 and isinstance(array_kv_pair.this, exp.Array) 560 and isinstance(array_kv_pair.expression, exp.Array) 561 ): 562 keys = array_kv_pair.this.expressions 563 values = array_kv_pair.expression.expressions 564 565 json_object.set( 566 "expressions", 567 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 568 ) 569 570 return json_object 571 572 def _parse_bracket( 573 self, this: t.Optional[exp.Expression] = None 574 ) -> t.Optional[exp.Expression]: 575 bracket = super()._parse_bracket(this) 576 577 if this is bracket: 578 return bracket 579 580 if isinstance(bracket, exp.Bracket): 581 for expression in bracket.expressions: 582 name = expression.name.upper() 583 584 if name not in self.BRACKET_OFFSETS: 585 break 586 587 offset, safe = self.BRACKET_OFFSETS[name] 588 bracket.set("offset", offset) 589 bracket.set("safe", safe) 590 expression.replace(expression.expressions[0]) 591 592 return bracket 593 594 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 595 unnest = super()._parse_unnest(with_alias=with_alias) 596 597 if not unnest: 598 return None 599 600 unnest_expr = seq_get(unnest.expressions, 0) 601 if unnest_expr: 602 from sqlglot.optimizer.annotate_types import annotate_types 603 604 unnest_expr = annotate_types(unnest_expr) 605 606 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 607 # in contrast to other dialects such as DuckDB which flattens only the array by default 608 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 609 array_elem.is_type(exp.DataType.Type.STRUCT) 610 for array_elem in unnest_expr._type.expressions 611 ): 612 unnest.set("explode_array", True) 613 614 return unnest 615 616 class Generator(generator.Generator): 617 INTERVAL_ALLOWS_PLURAL_FORM = False 618 JOIN_HINTS = False 619 QUERY_HINTS = False 620 TABLE_HINTS = False 621 LIMIT_FETCH = "LIMIT" 622 RENAME_TABLE_WITH_DB = False 623 NVL2_SUPPORTED = False 624 UNNEST_WITH_ORDINALITY = False 625 COLLATE_IS_FUNC = True 626 LIMIT_ONLY_LITERALS = True 627 SUPPORTS_TABLE_ALIAS_COLUMNS = False 628 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 629 JSON_KEY_VALUE_PAIR_SEP = "," 630 NULL_ORDERING_SUPPORTED = False 631 IGNORE_NULLS_IN_FUNC = True 632 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 633 CAN_IMPLEMENT_ARRAY_ANY = True 634 SUPPORTS_TO_NUMBER = False 635 NAMED_PLACEHOLDER_TOKEN = "@" 636 HEX_FUNC = "TO_HEX" 637 WITH_PROPERTIES_PREFIX = "OPTIONS" 638 SUPPORTS_EXPLODING_PROJECTIONS = False 639 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 640 641 TRANSFORMS = { 642 **generator.Generator.TRANSFORMS, 643 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 644 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 645 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 646 exp.Array: inline_array_unless_query, 647 exp.ArrayContains: _array_contains_sql, 648 exp.ArrayFilter: filter_array_using_unnest, 649 exp.ArraySize: rename_func("ARRAY_LENGTH"), 650 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 651 exp.CollateProperty: lambda self, e: ( 652 f"DEFAULT COLLATE {self.sql(e, 'this')}" 653 if e.args.get("default") 654 else f"COLLATE {self.sql(e, 'this')}" 655 ), 656 exp.Commit: lambda *_: "COMMIT TRANSACTION", 657 exp.CountIf: rename_func("COUNTIF"), 658 exp.Create: _create_sql, 659 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 660 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 661 exp.DateDiff: lambda self, e: self.func( 662 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 663 ), 664 exp.DateFromParts: rename_func("DATE"), 665 exp.DateStrToDate: datestrtodate_sql, 666 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 667 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 668 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 669 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 670 exp.FromTimeZone: lambda self, e: self.func( 671 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 672 ), 673 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 674 exp.GroupConcat: rename_func("STRING_AGG"), 675 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 676 exp.If: if_sql(false_value="NULL"), 677 exp.ILike: no_ilike_sql, 678 exp.IntDiv: rename_func("DIV"), 679 exp.JSONFormat: rename_func("TO_JSON_STRING"), 680 exp.Max: max_or_greatest, 681 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 682 exp.MD5Digest: rename_func("MD5"), 683 exp.Min: min_or_least, 684 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 685 exp.RegexpExtract: lambda self, e: self.func( 686 "REGEXP_EXTRACT", 687 e.this, 688 e.expression, 689 e.args.get("position"), 690 e.args.get("occurrence"), 691 ), 692 exp.RegexpReplace: regexp_replace_sql, 693 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 694 exp.ReturnsProperty: _returnsproperty_sql, 695 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 696 exp.Select: transforms.preprocess( 697 [ 698 transforms.explode_to_unnest(), 699 transforms.unqualify_unnest, 700 transforms.eliminate_distinct_on, 701 _alias_ordered_group, 702 transforms.eliminate_semi_and_anti_joins, 703 ] 704 ), 705 exp.SHA: rename_func("SHA1"), 706 exp.SHA2: sha256_sql, 707 exp.StabilityProperty: lambda self, e: ( 708 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 709 ), 710 exp.StrToDate: _str_to_datetime_sql, 711 exp.StrToTime: _str_to_datetime_sql, 712 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 713 exp.TimeFromParts: rename_func("TIME"), 714 exp.TimestampFromParts: rename_func("DATETIME"), 715 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 716 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 717 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 718 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 719 exp.TimeStrToTime: timestrtotime_sql, 720 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 721 exp.TsOrDsAdd: _ts_or_ds_add_sql, 722 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 723 exp.TsOrDsToTime: rename_func("TIME"), 724 exp.TsOrDsToTimestamp: rename_func("DATETIME"), 725 exp.Unhex: rename_func("FROM_HEX"), 726 exp.UnixDate: rename_func("UNIX_DATE"), 727 exp.UnixToTime: _unix_to_time_sql, 728 exp.Uuid: lambda *_: "GENERATE_UUID()", 729 exp.Values: _derived_table_values_to_unnest, 730 exp.VariancePop: rename_func("VAR_POP"), 731 } 732 733 SUPPORTED_JSON_PATH_PARTS = { 734 exp.JSONPathKey, 735 exp.JSONPathRoot, 736 exp.JSONPathSubscript, 737 } 738 739 TYPE_MAPPING = { 740 **generator.Generator.TYPE_MAPPING, 741 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 742 exp.DataType.Type.BIGINT: "INT64", 743 exp.DataType.Type.BINARY: "BYTES", 744 exp.DataType.Type.BOOLEAN: "BOOL", 745 exp.DataType.Type.CHAR: "STRING", 746 exp.DataType.Type.DECIMAL: "NUMERIC", 747 exp.DataType.Type.DOUBLE: "FLOAT64", 748 exp.DataType.Type.FLOAT: "FLOAT64", 749 exp.DataType.Type.INT: "INT64", 750 exp.DataType.Type.NCHAR: "STRING", 751 exp.DataType.Type.NVARCHAR: "STRING", 752 exp.DataType.Type.SMALLINT: "INT64", 753 exp.DataType.Type.TEXT: "STRING", 754 exp.DataType.Type.TIMESTAMP: "DATETIME", 755 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 756 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 757 exp.DataType.Type.TINYINT: "INT64", 758 exp.DataType.Type.ROWVERSION: "BYTES", 759 exp.DataType.Type.UUID: "STRING", 760 exp.DataType.Type.VARBINARY: "BYTES", 761 exp.DataType.Type.VARCHAR: "STRING", 762 exp.DataType.Type.VARIANT: "ANY TYPE", 763 } 764 765 PROPERTIES_LOCATION = { 766 **generator.Generator.PROPERTIES_LOCATION, 767 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 768 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 769 } 770 771 # WINDOW comes after QUALIFY 772 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 773 AFTER_HAVING_MODIFIER_TRANSFORMS = { 774 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 775 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 776 } 777 778 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 779 RESERVED_KEYWORDS = { 780 "all", 781 "and", 782 "any", 783 "array", 784 "as", 785 "asc", 786 "assert_rows_modified", 787 "at", 788 "between", 789 "by", 790 "case", 791 "cast", 792 "collate", 793 "contains", 794 "create", 795 "cross", 796 "cube", 797 "current", 798 "default", 799 "define", 800 "desc", 801 "distinct", 802 "else", 803 "end", 804 "enum", 805 "escape", 806 "except", 807 "exclude", 808 "exists", 809 "extract", 810 "false", 811 "fetch", 812 "following", 813 "for", 814 "from", 815 "full", 816 "group", 817 "grouping", 818 "groups", 819 "hash", 820 "having", 821 "if", 822 "ignore", 823 "in", 824 "inner", 825 "intersect", 826 "interval", 827 "into", 828 "is", 829 "join", 830 "lateral", 831 "left", 832 "like", 833 "limit", 834 "lookup", 835 "merge", 836 "natural", 837 "new", 838 "no", 839 "not", 840 "null", 841 "nulls", 842 "of", 843 "on", 844 "or", 845 "order", 846 "outer", 847 "over", 848 "partition", 849 "preceding", 850 "proto", 851 "qualify", 852 "range", 853 "recursive", 854 "respect", 855 "right", 856 "rollup", 857 "rows", 858 "select", 859 "set", 860 "some", 861 "struct", 862 "tablesample", 863 "then", 864 "to", 865 "treat", 866 "true", 867 "unbounded", 868 "union", 869 "unnest", 870 "using", 871 "when", 872 "where", 873 "window", 874 "with", 875 "within", 876 } 877 878 def mod_sql(self, expression: exp.Mod) -> str: 879 this = expression.this 880 expr = expression.expression 881 return self.func( 882 "MOD", 883 this.unnest() if isinstance(this, exp.Paren) else this, 884 expr.unnest() if isinstance(expr, exp.Paren) else expr, 885 ) 886 887 def column_parts(self, expression: exp.Column) -> str: 888 if expression.meta.get("quoted_column"): 889 # If a column reference is of the form `dataset.table`.name, we need 890 # to preserve the quoted table path, otherwise the reference breaks 891 table_parts = ".".join(p.name for p in expression.parts[:-1]) 892 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 893 return f"{table_path}.{self.sql(expression, 'this')}" 894 895 return super().column_parts(expression) 896 897 def table_parts(self, expression: exp.Table) -> str: 898 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 899 # we need to make sure the correct quoting is used in each case. 900 # 901 # For example, if there is a CTE x that clashes with a schema name, then the former will 902 # return the table y in that schema, whereas the latter will return the CTE's y column: 903 # 904 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 905 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 906 if expression.meta.get("quoted_table"): 907 table_parts = ".".join(p.name for p in expression.parts) 908 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 909 910 return super().table_parts(expression) 911 912 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 913 if isinstance(expression.this, exp.TsOrDsToTimestamp): 914 func_name = "FORMAT_DATETIME" 915 else: 916 func_name = "FORMAT_DATE" 917 this = ( 918 expression.this 919 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 920 else expression 921 ) 922 return self.func(func_name, self.format_time(expression), this.this) 923 924 def eq_sql(self, expression: exp.EQ) -> str: 925 # Operands of = cannot be NULL in BigQuery 926 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 927 if not isinstance(expression.parent, exp.Update): 928 return "NULL" 929 930 return self.binary(expression, "=") 931 932 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 933 parent = expression.parent 934 935 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 936 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 937 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 938 return self.func( 939 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 940 ) 941 942 return super().attimezone_sql(expression) 943 944 def trycast_sql(self, expression: exp.TryCast) -> str: 945 return self.cast_sql(expression, safe_prefix="SAFE_") 946 947 def bracket_sql(self, expression: exp.Bracket) -> str: 948 this = expression.this 949 expressions = expression.expressions 950 951 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 952 arg = expressions[0] 953 if arg.type is None: 954 from sqlglot.optimizer.annotate_types import annotate_types 955 956 arg = annotate_types(arg) 957 958 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 959 # BQ doesn't support bracket syntax with string values for structs 960 return f"{self.sql(this)}.{arg.name}" 961 962 expressions_sql = self.expressions(expression, flat=True) 963 offset = expression.args.get("offset") 964 965 if offset == 0: 966 expressions_sql = f"OFFSET({expressions_sql})" 967 elif offset == 1: 968 expressions_sql = f"ORDINAL({expressions_sql})" 969 elif offset is not None: 970 self.unsupported(f"Unsupported array offset: {offset}") 971 972 if expression.args.get("safe"): 973 expressions_sql = f"SAFE_{expressions_sql}" 974 975 return f"{self.sql(this)}[{expressions_sql}]" 976 977 def in_unnest_op(self, expression: exp.Unnest) -> str: 978 return self.sql(expression) 979 980 def version_sql(self, expression: exp.Version) -> str: 981 if expression.name == "TIMESTAMP": 982 expression.set("this", "SYSTEM_TIME") 983 return super().version_sql(expression)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether alias reference expansion before qualification should only happen for the GROUP BY clause.
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
282 def normalize_identifier(self, expression: E) -> E: 283 if ( 284 isinstance(expression, exp.Identifier) 285 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 286 ): 287 parent = expression.parent 288 while isinstance(parent, exp.Dot): 289 parent = parent.parent 290 291 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 292 # by default. The following check uses a heuristic to detect tables based on whether 293 # they are qualified. This should generally be correct, because tables in BigQuery 294 # must be qualified with at least a dataset, unless @@dataset_id is set. 295 case_sensitive = ( 296 isinstance(parent, exp.UserDefinedFunction) 297 or ( 298 isinstance(parent, exp.Table) 299 and parent.db 300 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 301 ) 302 or expression.meta.get("is_table") 303 ) 304 if not case_sensitive: 305 expression.set("this", expression.this.lower()) 306 307 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- COPY_PARAMS_ARE_CSV
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- REGEXP_EXTRACT_DEFAULT_GROUP
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
309 class Tokenizer(tokens.Tokenizer): 310 QUOTES = ["'", '"', '"""', "'''"] 311 COMMENTS = ["--", "#", ("/*", "*/")] 312 IDENTIFIERS = ["`"] 313 STRING_ESCAPES = ["\\"] 314 315 HEX_STRINGS = [("0x", ""), ("0X", "")] 316 317 BYTE_STRINGS = [ 318 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 319 ] 320 321 RAW_STRINGS = [ 322 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 323 ] 324 325 KEYWORDS = { 326 **tokens.Tokenizer.KEYWORDS, 327 "ANY TYPE": TokenType.VARIANT, 328 "BEGIN": TokenType.COMMAND, 329 "BEGIN TRANSACTION": TokenType.BEGIN, 330 "BYTEINT": TokenType.INT, 331 "BYTES": TokenType.BINARY, 332 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 333 "DATETIME": TokenType.TIMESTAMP, 334 "DECLARE": TokenType.COMMAND, 335 "ELSEIF": TokenType.COMMAND, 336 "EXCEPTION": TokenType.COMMAND, 337 "FLOAT64": TokenType.DOUBLE, 338 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 339 "MODEL": TokenType.MODEL, 340 "NOT DETERMINISTIC": TokenType.VOLATILE, 341 "RECORD": TokenType.STRUCT, 342 "TIMESTAMP": TokenType.TIMESTAMPTZ, 343 } 344 KEYWORDS.pop("DIV") 345 KEYWORDS.pop("VALUES") 346 KEYWORDS.pop("/*+")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIER_ESCAPES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
348 class Parser(parser.Parser): 349 PREFIXED_PIVOT_COLUMNS = True 350 LOG_DEFAULTS_TO_LN = True 351 SUPPORTS_IMPLICIT_UNNEST = True 352 353 FUNCTIONS = { 354 **parser.Parser.FUNCTIONS, 355 "DATE": _build_date, 356 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 357 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 358 "DATE_TRUNC": lambda args: exp.DateTrunc( 359 unit=exp.Literal.string(str(seq_get(args, 1))), 360 this=seq_get(args, 0), 361 ), 362 "DATETIME": _build_datetime, 363 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 364 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 365 "DIV": binary_from_function(exp.IntDiv), 366 "FORMAT_DATE": lambda args: exp.TimeToStr( 367 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 368 ), 369 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 370 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 371 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 372 ), 373 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 374 "MD5": exp.MD5Digest.from_arg_list, 375 "TO_HEX": _build_to_hex, 376 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 377 [seq_get(args, 1), seq_get(args, 0)] 378 ), 379 "PARSE_TIMESTAMP": _build_parse_timestamp, 380 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 381 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 382 this=seq_get(args, 0), 383 expression=seq_get(args, 1), 384 position=seq_get(args, 2), 385 occurrence=seq_get(args, 3), 386 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 387 ), 388 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 389 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 390 "SPLIT": lambda args: exp.Split( 391 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 392 this=seq_get(args, 0), 393 expression=seq_get(args, 1) or exp.Literal.string(","), 394 ), 395 "TIME": _build_time, 396 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 397 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 398 "TIMESTAMP": _build_timestamp, 399 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 400 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 401 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 402 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 403 ), 404 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 405 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 406 ), 407 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 408 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 409 "FORMAT_DATETIME": lambda args: exp.TimeToStr( 410 this=exp.TsOrDsToTimestamp(this=seq_get(args, 1)), format=seq_get(args, 0) 411 ), 412 } 413 414 FUNCTION_PARSERS = { 415 **parser.Parser.FUNCTION_PARSERS, 416 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 417 } 418 FUNCTION_PARSERS.pop("TRIM") 419 420 NO_PAREN_FUNCTIONS = { 421 **parser.Parser.NO_PAREN_FUNCTIONS, 422 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 423 } 424 425 NESTED_TYPE_TOKENS = { 426 *parser.Parser.NESTED_TYPE_TOKENS, 427 TokenType.TABLE, 428 } 429 430 PROPERTY_PARSERS = { 431 **parser.Parser.PROPERTY_PARSERS, 432 "NOT DETERMINISTIC": lambda self: self.expression( 433 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 434 ), 435 "OPTIONS": lambda self: self._parse_with_property(), 436 } 437 438 CONSTRAINT_PARSERS = { 439 **parser.Parser.CONSTRAINT_PARSERS, 440 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 441 } 442 443 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 444 RANGE_PARSERS.pop(TokenType.OVERLAPS) 445 446 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 447 448 STATEMENT_PARSERS = { 449 **parser.Parser.STATEMENT_PARSERS, 450 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 451 TokenType.END: lambda self: self._parse_as_command(self._prev), 452 TokenType.FOR: lambda self: self._parse_for_in(), 453 } 454 455 BRACKET_OFFSETS = { 456 "OFFSET": (0, False), 457 "ORDINAL": (1, False), 458 "SAFE_OFFSET": (0, True), 459 "SAFE_ORDINAL": (1, True), 460 } 461 462 def _parse_for_in(self) -> exp.ForIn: 463 this = self._parse_range() 464 self._match_text_seq("DO") 465 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 466 467 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 468 this = super()._parse_table_part(schema=schema) or self._parse_number() 469 470 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 471 if isinstance(this, exp.Identifier): 472 table_name = this.name 473 while self._match(TokenType.DASH, advance=False) and self._next: 474 text = "" 475 while self._curr and self._curr.token_type != TokenType.DOT: 476 self._advance() 477 text += self._prev.text 478 table_name += text 479 480 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 481 elif isinstance(this, exp.Literal): 482 table_name = this.name 483 484 if self._is_connected() and self._parse_var(any_token=True): 485 table_name += self._prev.text 486 487 this = exp.Identifier(this=table_name, quoted=True) 488 489 return this 490 491 def _parse_table_parts( 492 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 493 ) -> exp.Table: 494 table = super()._parse_table_parts( 495 schema=schema, is_db_reference=is_db_reference, wildcard=True 496 ) 497 498 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 499 if not table.catalog: 500 if table.db: 501 parts = table.db.split(".") 502 if len(parts) == 2 and not table.args["db"].quoted: 503 table.set("catalog", exp.Identifier(this=parts[0])) 504 table.set("db", exp.Identifier(this=parts[1])) 505 else: 506 parts = table.name.split(".") 507 if len(parts) == 2 and not table.this.quoted: 508 table.set("db", exp.Identifier(this=parts[0])) 509 table.set("this", exp.Identifier(this=parts[1])) 510 511 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 512 catalog, db, this, *rest = ( 513 exp.to_identifier(p, quoted=True) 514 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 515 ) 516 517 if rest and this: 518 this = exp.Dot.build([this, *rest]) # type: ignore 519 520 table = exp.Table( 521 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 522 ) 523 table.meta["quoted_table"] = True 524 525 return table 526 527 def _parse_column(self) -> t.Optional[exp.Expression]: 528 column = super()._parse_column() 529 if isinstance(column, exp.Column): 530 parts = column.parts 531 if any("." in p.name for p in parts): 532 catalog, db, table, this, *rest = ( 533 exp.to_identifier(p, quoted=True) 534 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 535 ) 536 537 if rest and this: 538 this = exp.Dot.build([this, *rest]) # type: ignore 539 540 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 541 column.meta["quoted_column"] = True 542 543 return column 544 545 @t.overload 546 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 547 548 @t.overload 549 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 550 551 def _parse_json_object(self, agg=False): 552 json_object = super()._parse_json_object() 553 array_kv_pair = seq_get(json_object.expressions, 0) 554 555 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 556 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 557 if ( 558 array_kv_pair 559 and isinstance(array_kv_pair.this, exp.Array) 560 and isinstance(array_kv_pair.expression, exp.Array) 561 ): 562 keys = array_kv_pair.this.expressions 563 values = array_kv_pair.expression.expressions 564 565 json_object.set( 566 "expressions", 567 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 568 ) 569 570 return json_object 571 572 def _parse_bracket( 573 self, this: t.Optional[exp.Expression] = None 574 ) -> t.Optional[exp.Expression]: 575 bracket = super()._parse_bracket(this) 576 577 if this is bracket: 578 return bracket 579 580 if isinstance(bracket, exp.Bracket): 581 for expression in bracket.expressions: 582 name = expression.name.upper() 583 584 if name not in self.BRACKET_OFFSETS: 585 break 586 587 offset, safe = self.BRACKET_OFFSETS[name] 588 bracket.set("offset", offset) 589 bracket.set("safe", safe) 590 expression.replace(expression.expressions[0]) 591 592 return bracket 593 594 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 595 unnest = super()._parse_unnest(with_alias=with_alias) 596 597 if not unnest: 598 return None 599 600 unnest_expr = seq_get(unnest.expressions, 0) 601 if unnest_expr: 602 from sqlglot.optimizer.annotate_types import annotate_types 603 604 unnest_expr = annotate_types(unnest_expr) 605 606 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 607 # in contrast to other dialects such as DuckDB which flattens only the array by default 608 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 609 array_elem.is_type(exp.DataType.Type.STRUCT) 610 for array_elem in unnest_expr._type.expressions 611 ): 612 unnest.set("explode_array", True) 613 614 return unnest
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
616 class Generator(generator.Generator): 617 INTERVAL_ALLOWS_PLURAL_FORM = False 618 JOIN_HINTS = False 619 QUERY_HINTS = False 620 TABLE_HINTS = False 621 LIMIT_FETCH = "LIMIT" 622 RENAME_TABLE_WITH_DB = False 623 NVL2_SUPPORTED = False 624 UNNEST_WITH_ORDINALITY = False 625 COLLATE_IS_FUNC = True 626 LIMIT_ONLY_LITERALS = True 627 SUPPORTS_TABLE_ALIAS_COLUMNS = False 628 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 629 JSON_KEY_VALUE_PAIR_SEP = "," 630 NULL_ORDERING_SUPPORTED = False 631 IGNORE_NULLS_IN_FUNC = True 632 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 633 CAN_IMPLEMENT_ARRAY_ANY = True 634 SUPPORTS_TO_NUMBER = False 635 NAMED_PLACEHOLDER_TOKEN = "@" 636 HEX_FUNC = "TO_HEX" 637 WITH_PROPERTIES_PREFIX = "OPTIONS" 638 SUPPORTS_EXPLODING_PROJECTIONS = False 639 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 640 641 TRANSFORMS = { 642 **generator.Generator.TRANSFORMS, 643 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 644 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 645 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 646 exp.Array: inline_array_unless_query, 647 exp.ArrayContains: _array_contains_sql, 648 exp.ArrayFilter: filter_array_using_unnest, 649 exp.ArraySize: rename_func("ARRAY_LENGTH"), 650 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 651 exp.CollateProperty: lambda self, e: ( 652 f"DEFAULT COLLATE {self.sql(e, 'this')}" 653 if e.args.get("default") 654 else f"COLLATE {self.sql(e, 'this')}" 655 ), 656 exp.Commit: lambda *_: "COMMIT TRANSACTION", 657 exp.CountIf: rename_func("COUNTIF"), 658 exp.Create: _create_sql, 659 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 660 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 661 exp.DateDiff: lambda self, e: self.func( 662 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 663 ), 664 exp.DateFromParts: rename_func("DATE"), 665 exp.DateStrToDate: datestrtodate_sql, 666 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 667 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 668 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 669 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 670 exp.FromTimeZone: lambda self, e: self.func( 671 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 672 ), 673 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 674 exp.GroupConcat: rename_func("STRING_AGG"), 675 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 676 exp.If: if_sql(false_value="NULL"), 677 exp.ILike: no_ilike_sql, 678 exp.IntDiv: rename_func("DIV"), 679 exp.JSONFormat: rename_func("TO_JSON_STRING"), 680 exp.Max: max_or_greatest, 681 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 682 exp.MD5Digest: rename_func("MD5"), 683 exp.Min: min_or_least, 684 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 685 exp.RegexpExtract: lambda self, e: self.func( 686 "REGEXP_EXTRACT", 687 e.this, 688 e.expression, 689 e.args.get("position"), 690 e.args.get("occurrence"), 691 ), 692 exp.RegexpReplace: regexp_replace_sql, 693 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 694 exp.ReturnsProperty: _returnsproperty_sql, 695 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 696 exp.Select: transforms.preprocess( 697 [ 698 transforms.explode_to_unnest(), 699 transforms.unqualify_unnest, 700 transforms.eliminate_distinct_on, 701 _alias_ordered_group, 702 transforms.eliminate_semi_and_anti_joins, 703 ] 704 ), 705 exp.SHA: rename_func("SHA1"), 706 exp.SHA2: sha256_sql, 707 exp.StabilityProperty: lambda self, e: ( 708 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 709 ), 710 exp.StrToDate: _str_to_datetime_sql, 711 exp.StrToTime: _str_to_datetime_sql, 712 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 713 exp.TimeFromParts: rename_func("TIME"), 714 exp.TimestampFromParts: rename_func("DATETIME"), 715 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 716 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 717 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 718 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 719 exp.TimeStrToTime: timestrtotime_sql, 720 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 721 exp.TsOrDsAdd: _ts_or_ds_add_sql, 722 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 723 exp.TsOrDsToTime: rename_func("TIME"), 724 exp.TsOrDsToTimestamp: rename_func("DATETIME"), 725 exp.Unhex: rename_func("FROM_HEX"), 726 exp.UnixDate: rename_func("UNIX_DATE"), 727 exp.UnixToTime: _unix_to_time_sql, 728 exp.Uuid: lambda *_: "GENERATE_UUID()", 729 exp.Values: _derived_table_values_to_unnest, 730 exp.VariancePop: rename_func("VAR_POP"), 731 } 732 733 SUPPORTED_JSON_PATH_PARTS = { 734 exp.JSONPathKey, 735 exp.JSONPathRoot, 736 exp.JSONPathSubscript, 737 } 738 739 TYPE_MAPPING = { 740 **generator.Generator.TYPE_MAPPING, 741 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 742 exp.DataType.Type.BIGINT: "INT64", 743 exp.DataType.Type.BINARY: "BYTES", 744 exp.DataType.Type.BOOLEAN: "BOOL", 745 exp.DataType.Type.CHAR: "STRING", 746 exp.DataType.Type.DECIMAL: "NUMERIC", 747 exp.DataType.Type.DOUBLE: "FLOAT64", 748 exp.DataType.Type.FLOAT: "FLOAT64", 749 exp.DataType.Type.INT: "INT64", 750 exp.DataType.Type.NCHAR: "STRING", 751 exp.DataType.Type.NVARCHAR: "STRING", 752 exp.DataType.Type.SMALLINT: "INT64", 753 exp.DataType.Type.TEXT: "STRING", 754 exp.DataType.Type.TIMESTAMP: "DATETIME", 755 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 756 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 757 exp.DataType.Type.TINYINT: "INT64", 758 exp.DataType.Type.ROWVERSION: "BYTES", 759 exp.DataType.Type.UUID: "STRING", 760 exp.DataType.Type.VARBINARY: "BYTES", 761 exp.DataType.Type.VARCHAR: "STRING", 762 exp.DataType.Type.VARIANT: "ANY TYPE", 763 } 764 765 PROPERTIES_LOCATION = { 766 **generator.Generator.PROPERTIES_LOCATION, 767 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 768 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 769 } 770 771 # WINDOW comes after QUALIFY 772 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 773 AFTER_HAVING_MODIFIER_TRANSFORMS = { 774 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 775 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 776 } 777 778 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 779 RESERVED_KEYWORDS = { 780 "all", 781 "and", 782 "any", 783 "array", 784 "as", 785 "asc", 786 "assert_rows_modified", 787 "at", 788 "between", 789 "by", 790 "case", 791 "cast", 792 "collate", 793 "contains", 794 "create", 795 "cross", 796 "cube", 797 "current", 798 "default", 799 "define", 800 "desc", 801 "distinct", 802 "else", 803 "end", 804 "enum", 805 "escape", 806 "except", 807 "exclude", 808 "exists", 809 "extract", 810 "false", 811 "fetch", 812 "following", 813 "for", 814 "from", 815 "full", 816 "group", 817 "grouping", 818 "groups", 819 "hash", 820 "having", 821 "if", 822 "ignore", 823 "in", 824 "inner", 825 "intersect", 826 "interval", 827 "into", 828 "is", 829 "join", 830 "lateral", 831 "left", 832 "like", 833 "limit", 834 "lookup", 835 "merge", 836 "natural", 837 "new", 838 "no", 839 "not", 840 "null", 841 "nulls", 842 "of", 843 "on", 844 "or", 845 "order", 846 "outer", 847 "over", 848 "partition", 849 "preceding", 850 "proto", 851 "qualify", 852 "range", 853 "recursive", 854 "respect", 855 "right", 856 "rollup", 857 "rows", 858 "select", 859 "set", 860 "some", 861 "struct", 862 "tablesample", 863 "then", 864 "to", 865 "treat", 866 "true", 867 "unbounded", 868 "union", 869 "unnest", 870 "using", 871 "when", 872 "where", 873 "window", 874 "with", 875 "within", 876 } 877 878 def mod_sql(self, expression: exp.Mod) -> str: 879 this = expression.this 880 expr = expression.expression 881 return self.func( 882 "MOD", 883 this.unnest() if isinstance(this, exp.Paren) else this, 884 expr.unnest() if isinstance(expr, exp.Paren) else expr, 885 ) 886 887 def column_parts(self, expression: exp.Column) -> str: 888 if expression.meta.get("quoted_column"): 889 # If a column reference is of the form `dataset.table`.name, we need 890 # to preserve the quoted table path, otherwise the reference breaks 891 table_parts = ".".join(p.name for p in expression.parts[:-1]) 892 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 893 return f"{table_path}.{self.sql(expression, 'this')}" 894 895 return super().column_parts(expression) 896 897 def table_parts(self, expression: exp.Table) -> str: 898 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 899 # we need to make sure the correct quoting is used in each case. 900 # 901 # For example, if there is a CTE x that clashes with a schema name, then the former will 902 # return the table y in that schema, whereas the latter will return the CTE's y column: 903 # 904 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 905 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 906 if expression.meta.get("quoted_table"): 907 table_parts = ".".join(p.name for p in expression.parts) 908 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 909 910 return super().table_parts(expression) 911 912 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 913 if isinstance(expression.this, exp.TsOrDsToTimestamp): 914 func_name = "FORMAT_DATETIME" 915 else: 916 func_name = "FORMAT_DATE" 917 this = ( 918 expression.this 919 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 920 else expression 921 ) 922 return self.func(func_name, self.format_time(expression), this.this) 923 924 def eq_sql(self, expression: exp.EQ) -> str: 925 # Operands of = cannot be NULL in BigQuery 926 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 927 if not isinstance(expression.parent, exp.Update): 928 return "NULL" 929 930 return self.binary(expression, "=") 931 932 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 933 parent = expression.parent 934 935 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 936 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 937 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 938 return self.func( 939 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 940 ) 941 942 return super().attimezone_sql(expression) 943 944 def trycast_sql(self, expression: exp.TryCast) -> str: 945 return self.cast_sql(expression, safe_prefix="SAFE_") 946 947 def bracket_sql(self, expression: exp.Bracket) -> str: 948 this = expression.this 949 expressions = expression.expressions 950 951 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 952 arg = expressions[0] 953 if arg.type is None: 954 from sqlglot.optimizer.annotate_types import annotate_types 955 956 arg = annotate_types(arg) 957 958 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 959 # BQ doesn't support bracket syntax with string values for structs 960 return f"{self.sql(this)}.{arg.name}" 961 962 expressions_sql = self.expressions(expression, flat=True) 963 offset = expression.args.get("offset") 964 965 if offset == 0: 966 expressions_sql = f"OFFSET({expressions_sql})" 967 elif offset == 1: 968 expressions_sql = f"ORDINAL({expressions_sql})" 969 elif offset is not None: 970 self.unsupported(f"Unsupported array offset: {offset}") 971 972 if expression.args.get("safe"): 973 expressions_sql = f"SAFE_{expressions_sql}" 974 975 return f"{self.sql(this)}[{expressions_sql}]" 976 977 def in_unnest_op(self, expression: exp.Unnest) -> str: 978 return self.sql(expression) 979 980 def version_sql(self, expression: exp.Version) -> str: 981 if expression.name == "TIMESTAMP": 982 expression.set("this", "SYSTEM_TIME") 983 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
887 def column_parts(self, expression: exp.Column) -> str: 888 if expression.meta.get("quoted_column"): 889 # If a column reference is of the form `dataset.table`.name, we need 890 # to preserve the quoted table path, otherwise the reference breaks 891 table_parts = ".".join(p.name for p in expression.parts[:-1]) 892 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 893 return f"{table_path}.{self.sql(expression, 'this')}" 894 895 return super().column_parts(expression)
897 def table_parts(self, expression: exp.Table) -> str: 898 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 899 # we need to make sure the correct quoting is used in each case. 900 # 901 # For example, if there is a CTE x that clashes with a schema name, then the former will 902 # return the table y in that schema, whereas the latter will return the CTE's y column: 903 # 904 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 905 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 906 if expression.meta.get("quoted_table"): 907 table_parts = ".".join(p.name for p in expression.parts) 908 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 909 910 return super().table_parts(expression)
912 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 913 if isinstance(expression.this, exp.TsOrDsToTimestamp): 914 func_name = "FORMAT_DATETIME" 915 else: 916 func_name = "FORMAT_DATE" 917 this = ( 918 expression.this 919 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 920 else expression 921 ) 922 return self.func(func_name, self.format_time(expression), this.this)
932 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 933 parent = expression.parent 934 935 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 936 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 937 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 938 return self.func( 939 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 940 ) 941 942 return super().attimezone_sql(expression)
947 def bracket_sql(self, expression: exp.Bracket) -> str: 948 this = expression.this 949 expressions = expression.expressions 950 951 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 952 arg = expressions[0] 953 if arg.type is None: 954 from sqlglot.optimizer.annotate_types import annotate_types 955 956 arg = annotate_types(arg) 957 958 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 959 # BQ doesn't support bracket syntax with string values for structs 960 return f"{self.sql(this)}.{arg.name}" 961 962 expressions_sql = self.expressions(expression, flat=True) 963 offset = expression.args.get("offset") 964 965 if offset == 0: 966 expressions_sql = f"OFFSET({expressions_sql})" 967 elif offset == 1: 968 expressions_sql = f"ORDINAL({expressions_sql})" 969 elif offset is not None: 970 self.unsupported(f"Unsupported array offset: {offset}") 971 972 if expression.args.get("safe"): 973 expressions_sql = f"SAFE_{expressions_sql}" 974 975 return f"{self.sql(this)}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql