sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 build_formatted_time, 16 filter_array_using_unnest, 17 if_sql, 18 inline_array_unless_query, 19 max_or_greatest, 20 min_or_least, 21 no_ilike_sql, 22 build_date_delta_with_interval, 23 regexp_replace_sql, 24 rename_func, 25 sha256_sql, 26 timestrtotime_sql, 27 ts_or_ds_add_cast, 28 unit_to_var, 29) 30from sqlglot.helper import seq_get, split_num_words 31from sqlglot.tokens import TokenType 32 33if t.TYPE_CHECKING: 34 from sqlglot._typing import E, Lit 35 36logger = logging.getLogger("sqlglot") 37 38 39def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 40 if not expression.find_ancestor(exp.From, exp.Join): 41 return self.values_sql(expression) 42 43 structs = [] 44 alias = expression.args.get("alias") 45 for tup in expression.find_all(exp.Tuple): 46 field_aliases = ( 47 alias.columns 48 if alias and alias.columns 49 else (f"_c{i}" for i in range(len(tup.expressions))) 50 ) 51 expressions = [ 52 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 53 for name, fld in zip(field_aliases, tup.expressions) 54 ] 55 structs.append(exp.Struct(expressions=expressions)) 56 57 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 58 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 59 return self.unnest_sql( 60 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 61 ) 62 63 64def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 65 this = expression.this 66 if isinstance(this, exp.Schema): 67 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 68 else: 69 this = self.sql(this) 70 return f"RETURNS {this}" 71 72 73def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 74 returns = expression.find(exp.ReturnsProperty) 75 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 76 expression.set("kind", "TABLE FUNCTION") 77 78 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 79 expression.set("expression", expression.expression.this) 80 81 return self.create_sql(expression) 82 83 84# https://issuetracker.google.com/issues/162294746 85# workaround for bigquery bug when grouping by an expression and then ordering 86# WITH x AS (SELECT 1 y) 87# SELECT y + 1 z 88# FROM x 89# GROUP BY x + 1 90# ORDER by z 91def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 92 if isinstance(expression, exp.Select): 93 group = expression.args.get("group") 94 order = expression.args.get("order") 95 96 if group and order: 97 aliases = { 98 select.this: select.args["alias"] 99 for select in expression.selects 100 if isinstance(select, exp.Alias) 101 } 102 103 for grouped in group.expressions: 104 if grouped.is_int: 105 continue 106 alias = aliases.get(grouped) 107 if alias: 108 grouped.replace(exp.column(alias)) 109 110 return expression 111 112 113def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 114 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 115 if isinstance(expression, exp.CTE) and expression.alias_column_names: 116 cte_query = expression.this 117 118 if cte_query.is_star: 119 logger.warning( 120 "Can't push down CTE column names for star queries. Run the query through" 121 " the optimizer or use 'qualify' to expand the star projections first." 122 ) 123 return expression 124 125 column_names = expression.alias_column_names 126 expression.args["alias"].set("columns", None) 127 128 for name, select in zip(column_names, cte_query.selects): 129 to_replace = select 130 131 if isinstance(select, exp.Alias): 132 select = select.this 133 134 # Inner aliases are shadowed by the CTE column names 135 to_replace.replace(exp.alias_(select, name)) 136 137 return expression 138 139 140def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 141 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 142 this.set("zone", seq_get(args, 2)) 143 return this 144 145 146def _build_timestamp(args: t.List) -> exp.Timestamp: 147 timestamp = exp.Timestamp.from_arg_list(args) 148 timestamp.set("with_tz", True) 149 return timestamp 150 151 152def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 153 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 154 return expr_type.from_arg_list(args) 155 156 157def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 158 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 159 arg = seq_get(args, 0) 160 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg) 161 162 163def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 164 return self.sql( 165 exp.Exists( 166 this=exp.select("1") 167 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 168 .where(exp.column("_col").eq(expression.right)) 169 ) 170 ) 171 172 173def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 174 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 175 176 177def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 178 expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 179 expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)) 180 unit = unit_to_var(expression) 181 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 182 183 184def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 185 scale = expression.args.get("scale") 186 timestamp = expression.this 187 188 if scale in (None, exp.UnixToTime.SECONDS): 189 return self.func("TIMESTAMP_SECONDS", timestamp) 190 if scale == exp.UnixToTime.MILLIS: 191 return self.func("TIMESTAMP_MILLIS", timestamp) 192 if scale == exp.UnixToTime.MICROS: 193 return self.func("TIMESTAMP_MICROS", timestamp) 194 195 unix_seconds = exp.cast( 196 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 197 ) 198 return self.func("TIMESTAMP_SECONDS", unix_seconds) 199 200 201def _build_time(args: t.List) -> exp.Func: 202 if len(args) == 1: 203 return exp.TsOrDsToTime(this=args[0]) 204 if len(args) == 2: 205 return exp.Time.from_arg_list(args) 206 return exp.TimeFromParts.from_arg_list(args) 207 208 209def _build_datetime(args: t.List) -> exp.Func: 210 if len(args) == 1: 211 return exp.TsOrDsToTimestamp.from_arg_list(args) 212 if len(args) == 2: 213 return exp.Datetime.from_arg_list(args) 214 return exp.TimestampFromParts.from_arg_list(args) 215 216 217def _str_to_datetime_sql( 218 self: BigQuery.Generator, expression: exp.StrToDate | exp.StrToTime 219) -> str: 220 this = self.sql(expression, "this") 221 dtype = "DATE" if isinstance(expression, exp.StrToDate) else "TIMESTAMP" 222 223 if expression.args.get("safe"): 224 fmt = self.format_time( 225 expression, 226 self.dialect.INVERSE_FORMAT_MAPPING, 227 self.dialect.INVERSE_FORMAT_TRIE, 228 ) 229 return f"SAFE_CAST({this} AS {dtype} FORMAT {fmt})" 230 231 fmt = self.format_time(expression) 232 return self.func(f"PARSE_{dtype}", fmt, this, expression.args.get("zone")) 233 234 235class BigQuery(Dialect): 236 WEEK_OFFSET = -1 237 UNNEST_COLUMN_ONLY = True 238 SUPPORTS_USER_DEFINED_TYPES = False 239 SUPPORTS_SEMI_ANTI_JOIN = False 240 LOG_BASE_FIRST = False 241 HEX_LOWERCASE = True 242 FORCE_EARLY_ALIAS_REF_EXPANSION = True 243 EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = True 244 245 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 246 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 247 248 # bigquery udfs are case sensitive 249 NORMALIZE_FUNCTIONS = False 250 251 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 252 TIME_MAPPING = { 253 "%D": "%m/%d/%y", 254 "%E6S": "%S.%f", 255 "%e": "%-d", 256 } 257 258 FORMAT_MAPPING = { 259 "DD": "%d", 260 "MM": "%m", 261 "MON": "%b", 262 "MONTH": "%B", 263 "YYYY": "%Y", 264 "YY": "%y", 265 "HH": "%I", 266 "HH12": "%I", 267 "HH24": "%H", 268 "MI": "%M", 269 "SS": "%S", 270 "SSSSS": "%f", 271 "TZH": "%z", 272 } 273 274 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 275 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 276 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 277 278 def normalize_identifier(self, expression: E) -> E: 279 if ( 280 isinstance(expression, exp.Identifier) 281 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 282 ): 283 parent = expression.parent 284 while isinstance(parent, exp.Dot): 285 parent = parent.parent 286 287 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 288 # by default. The following check uses a heuristic to detect tables based on whether 289 # they are qualified. This should generally be correct, because tables in BigQuery 290 # must be qualified with at least a dataset, unless @@dataset_id is set. 291 case_sensitive = ( 292 isinstance(parent, exp.UserDefinedFunction) 293 or ( 294 isinstance(parent, exp.Table) 295 and parent.db 296 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 297 ) 298 or expression.meta.get("is_table") 299 ) 300 if not case_sensitive: 301 expression.set("this", expression.this.lower()) 302 303 return expression 304 305 class Tokenizer(tokens.Tokenizer): 306 QUOTES = ["'", '"', '"""', "'''"] 307 COMMENTS = ["--", "#", ("/*", "*/")] 308 IDENTIFIERS = ["`"] 309 STRING_ESCAPES = ["\\"] 310 311 HEX_STRINGS = [("0x", ""), ("0X", "")] 312 313 BYTE_STRINGS = [ 314 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 315 ] 316 317 RAW_STRINGS = [ 318 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 319 ] 320 321 KEYWORDS = { 322 **tokens.Tokenizer.KEYWORDS, 323 "ANY TYPE": TokenType.VARIANT, 324 "BEGIN": TokenType.COMMAND, 325 "BEGIN TRANSACTION": TokenType.BEGIN, 326 "BYTEINT": TokenType.INT, 327 "BYTES": TokenType.BINARY, 328 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 329 "DATETIME": TokenType.TIMESTAMP, 330 "DECLARE": TokenType.COMMAND, 331 "ELSEIF": TokenType.COMMAND, 332 "EXCEPTION": TokenType.COMMAND, 333 "FLOAT64": TokenType.DOUBLE, 334 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 335 "MODEL": TokenType.MODEL, 336 "NOT DETERMINISTIC": TokenType.VOLATILE, 337 "RECORD": TokenType.STRUCT, 338 "TIMESTAMP": TokenType.TIMESTAMPTZ, 339 } 340 KEYWORDS.pop("DIV") 341 KEYWORDS.pop("VALUES") 342 KEYWORDS.pop("/*+") 343 344 class Parser(parser.Parser): 345 PREFIXED_PIVOT_COLUMNS = True 346 LOG_DEFAULTS_TO_LN = True 347 SUPPORTS_IMPLICIT_UNNEST = True 348 349 FUNCTIONS = { 350 **parser.Parser.FUNCTIONS, 351 "DATE": _build_date, 352 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 353 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 354 "DATE_TRUNC": lambda args: exp.DateTrunc( 355 unit=exp.Literal.string(str(seq_get(args, 1))), 356 this=seq_get(args, 0), 357 ), 358 "DATETIME": _build_datetime, 359 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 360 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 361 "DIV": binary_from_function(exp.IntDiv), 362 "FORMAT_DATE": lambda args: exp.TimeToStr( 363 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 364 ), 365 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 366 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 367 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 368 ), 369 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 370 "MD5": exp.MD5Digest.from_arg_list, 371 "TO_HEX": _build_to_hex, 372 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 373 [seq_get(args, 1), seq_get(args, 0)] 374 ), 375 "PARSE_TIMESTAMP": _build_parse_timestamp, 376 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 377 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 378 this=seq_get(args, 0), 379 expression=seq_get(args, 1), 380 position=seq_get(args, 2), 381 occurrence=seq_get(args, 3), 382 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 383 ), 384 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 385 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 386 "SPLIT": lambda args: exp.Split( 387 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 388 this=seq_get(args, 0), 389 expression=seq_get(args, 1) or exp.Literal.string(","), 390 ), 391 "TIME": _build_time, 392 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 393 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 394 "TIMESTAMP": _build_timestamp, 395 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 396 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 397 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 398 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 399 ), 400 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 401 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 402 ), 403 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 404 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 405 "FORMAT_DATETIME": lambda args: exp.TimeToStr( 406 this=exp.TsOrDsToTimestamp(this=seq_get(args, 1)), format=seq_get(args, 0) 407 ), 408 } 409 410 FUNCTION_PARSERS = { 411 **parser.Parser.FUNCTION_PARSERS, 412 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 413 } 414 FUNCTION_PARSERS.pop("TRIM") 415 416 NO_PAREN_FUNCTIONS = { 417 **parser.Parser.NO_PAREN_FUNCTIONS, 418 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 419 } 420 421 NESTED_TYPE_TOKENS = { 422 *parser.Parser.NESTED_TYPE_TOKENS, 423 TokenType.TABLE, 424 } 425 426 PROPERTY_PARSERS = { 427 **parser.Parser.PROPERTY_PARSERS, 428 "NOT DETERMINISTIC": lambda self: self.expression( 429 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 430 ), 431 "OPTIONS": lambda self: self._parse_with_property(), 432 } 433 434 CONSTRAINT_PARSERS = { 435 **parser.Parser.CONSTRAINT_PARSERS, 436 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 437 } 438 439 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 440 RANGE_PARSERS.pop(TokenType.OVERLAPS) 441 442 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 443 444 STATEMENT_PARSERS = { 445 **parser.Parser.STATEMENT_PARSERS, 446 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 447 TokenType.END: lambda self: self._parse_as_command(self._prev), 448 TokenType.FOR: lambda self: self._parse_for_in(), 449 } 450 451 BRACKET_OFFSETS = { 452 "OFFSET": (0, False), 453 "ORDINAL": (1, False), 454 "SAFE_OFFSET": (0, True), 455 "SAFE_ORDINAL": (1, True), 456 } 457 458 def _parse_for_in(self) -> exp.ForIn: 459 this = self._parse_range() 460 self._match_text_seq("DO") 461 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 462 463 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 464 this = super()._parse_table_part(schema=schema) or self._parse_number() 465 466 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 467 if isinstance(this, exp.Identifier): 468 table_name = this.name 469 while self._match(TokenType.DASH, advance=False) and self._next: 470 text = "" 471 while self._curr and self._curr.token_type != TokenType.DOT: 472 self._advance() 473 text += self._prev.text 474 table_name += text 475 476 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 477 elif isinstance(this, exp.Literal): 478 table_name = this.name 479 480 if self._is_connected() and self._parse_var(any_token=True): 481 table_name += self._prev.text 482 483 this = exp.Identifier(this=table_name, quoted=True) 484 485 return this 486 487 def _parse_table_parts( 488 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 489 ) -> exp.Table: 490 table = super()._parse_table_parts( 491 schema=schema, is_db_reference=is_db_reference, wildcard=True 492 ) 493 494 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 495 if not table.catalog: 496 if table.db: 497 parts = table.db.split(".") 498 if len(parts) == 2 and not table.args["db"].quoted: 499 table.set("catalog", exp.Identifier(this=parts[0])) 500 table.set("db", exp.Identifier(this=parts[1])) 501 else: 502 parts = table.name.split(".") 503 if len(parts) == 2 and not table.this.quoted: 504 table.set("db", exp.Identifier(this=parts[0])) 505 table.set("this", exp.Identifier(this=parts[1])) 506 507 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 508 catalog, db, this, *rest = ( 509 exp.to_identifier(p, quoted=True) 510 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 511 ) 512 513 if rest and this: 514 this = exp.Dot.build([this, *rest]) # type: ignore 515 516 table = exp.Table( 517 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 518 ) 519 table.meta["quoted_table"] = True 520 521 return table 522 523 def _parse_column(self) -> t.Optional[exp.Expression]: 524 column = super()._parse_column() 525 if isinstance(column, exp.Column): 526 parts = column.parts 527 if any("." in p.name for p in parts): 528 catalog, db, table, this, *rest = ( 529 exp.to_identifier(p, quoted=True) 530 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 531 ) 532 533 if rest and this: 534 this = exp.Dot.build([this, *rest]) # type: ignore 535 536 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 537 column.meta["quoted_column"] = True 538 539 return column 540 541 @t.overload 542 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 543 544 @t.overload 545 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 546 547 def _parse_json_object(self, agg=False): 548 json_object = super()._parse_json_object() 549 array_kv_pair = seq_get(json_object.expressions, 0) 550 551 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 552 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 553 if ( 554 array_kv_pair 555 and isinstance(array_kv_pair.this, exp.Array) 556 and isinstance(array_kv_pair.expression, exp.Array) 557 ): 558 keys = array_kv_pair.this.expressions 559 values = array_kv_pair.expression.expressions 560 561 json_object.set( 562 "expressions", 563 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 564 ) 565 566 return json_object 567 568 def _parse_bracket( 569 self, this: t.Optional[exp.Expression] = None 570 ) -> t.Optional[exp.Expression]: 571 bracket = super()._parse_bracket(this) 572 573 if this is bracket: 574 return bracket 575 576 if isinstance(bracket, exp.Bracket): 577 for expression in bracket.expressions: 578 name = expression.name.upper() 579 580 if name not in self.BRACKET_OFFSETS: 581 break 582 583 offset, safe = self.BRACKET_OFFSETS[name] 584 bracket.set("offset", offset) 585 bracket.set("safe", safe) 586 expression.replace(expression.expressions[0]) 587 588 return bracket 589 590 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 591 unnest = super()._parse_unnest(with_alias=with_alias) 592 593 if not unnest: 594 return None 595 596 unnest_expr = seq_get(unnest.expressions, 0) 597 if unnest_expr: 598 from sqlglot.optimizer.annotate_types import annotate_types 599 600 unnest_expr = annotate_types(unnest_expr) 601 602 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 603 # in contrast to other dialects such as DuckDB which flattens only the array by default 604 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 605 array_elem.is_type(exp.DataType.Type.STRUCT) 606 for array_elem in unnest_expr._type.expressions 607 ): 608 unnest.set("explode_array", True) 609 610 return unnest 611 612 class Generator(generator.Generator): 613 EXPLICIT_SET_OP = True 614 INTERVAL_ALLOWS_PLURAL_FORM = False 615 JOIN_HINTS = False 616 QUERY_HINTS = False 617 TABLE_HINTS = False 618 LIMIT_FETCH = "LIMIT" 619 RENAME_TABLE_WITH_DB = False 620 NVL2_SUPPORTED = False 621 UNNEST_WITH_ORDINALITY = False 622 COLLATE_IS_FUNC = True 623 LIMIT_ONLY_LITERALS = True 624 SUPPORTS_TABLE_ALIAS_COLUMNS = False 625 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 626 JSON_KEY_VALUE_PAIR_SEP = "," 627 NULL_ORDERING_SUPPORTED = False 628 IGNORE_NULLS_IN_FUNC = True 629 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 630 CAN_IMPLEMENT_ARRAY_ANY = True 631 SUPPORTS_TO_NUMBER = False 632 NAMED_PLACEHOLDER_TOKEN = "@" 633 HEX_FUNC = "TO_HEX" 634 WITH_PROPERTIES_PREFIX = "OPTIONS" 635 SUPPORTS_EXPLODING_PROJECTIONS = False 636 637 TRANSFORMS = { 638 **generator.Generator.TRANSFORMS, 639 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 640 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 641 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 642 exp.Array: inline_array_unless_query, 643 exp.ArrayContains: _array_contains_sql, 644 exp.ArrayFilter: filter_array_using_unnest, 645 exp.ArraySize: rename_func("ARRAY_LENGTH"), 646 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 647 exp.CollateProperty: lambda self, e: ( 648 f"DEFAULT COLLATE {self.sql(e, 'this')}" 649 if e.args.get("default") 650 else f"COLLATE {self.sql(e, 'this')}" 651 ), 652 exp.Commit: lambda *_: "COMMIT TRANSACTION", 653 exp.CountIf: rename_func("COUNTIF"), 654 exp.Create: _create_sql, 655 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 656 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 657 exp.DateDiff: lambda self, e: self.func( 658 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 659 ), 660 exp.DateFromParts: rename_func("DATE"), 661 exp.DateStrToDate: datestrtodate_sql, 662 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 663 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 664 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 665 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 666 exp.FromTimeZone: lambda self, e: self.func( 667 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 668 ), 669 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 670 exp.GroupConcat: rename_func("STRING_AGG"), 671 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 672 exp.If: if_sql(false_value="NULL"), 673 exp.ILike: no_ilike_sql, 674 exp.IntDiv: rename_func("DIV"), 675 exp.JSONFormat: rename_func("TO_JSON_STRING"), 676 exp.Max: max_or_greatest, 677 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 678 exp.MD5Digest: rename_func("MD5"), 679 exp.Min: min_or_least, 680 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 681 exp.RegexpExtract: lambda self, e: self.func( 682 "REGEXP_EXTRACT", 683 e.this, 684 e.expression, 685 e.args.get("position"), 686 e.args.get("occurrence"), 687 ), 688 exp.RegexpReplace: regexp_replace_sql, 689 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 690 exp.ReturnsProperty: _returnsproperty_sql, 691 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 692 exp.Select: transforms.preprocess( 693 [ 694 transforms.explode_to_unnest(), 695 transforms.unqualify_unnest, 696 transforms.eliminate_distinct_on, 697 _alias_ordered_group, 698 transforms.eliminate_semi_and_anti_joins, 699 ] 700 ), 701 exp.SHA: rename_func("SHA1"), 702 exp.SHA2: sha256_sql, 703 exp.StabilityProperty: lambda self, e: ( 704 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 705 ), 706 exp.StrToDate: _str_to_datetime_sql, 707 exp.StrToTime: _str_to_datetime_sql, 708 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 709 exp.TimeFromParts: rename_func("TIME"), 710 exp.TimestampFromParts: rename_func("DATETIME"), 711 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 712 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 713 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 714 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 715 exp.TimeStrToTime: timestrtotime_sql, 716 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 717 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 718 exp.TsOrDsAdd: _ts_or_ds_add_sql, 719 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 720 exp.TsOrDsToTime: rename_func("TIME"), 721 exp.TsOrDsToTimestamp: rename_func("DATETIME"), 722 exp.Unhex: rename_func("FROM_HEX"), 723 exp.UnixDate: rename_func("UNIX_DATE"), 724 exp.UnixToTime: _unix_to_time_sql, 725 exp.Values: _derived_table_values_to_unnest, 726 exp.VariancePop: rename_func("VAR_POP"), 727 } 728 729 SUPPORTED_JSON_PATH_PARTS = { 730 exp.JSONPathKey, 731 exp.JSONPathRoot, 732 exp.JSONPathSubscript, 733 } 734 735 TYPE_MAPPING = { 736 **generator.Generator.TYPE_MAPPING, 737 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 738 exp.DataType.Type.BIGINT: "INT64", 739 exp.DataType.Type.BINARY: "BYTES", 740 exp.DataType.Type.BOOLEAN: "BOOL", 741 exp.DataType.Type.CHAR: "STRING", 742 exp.DataType.Type.DECIMAL: "NUMERIC", 743 exp.DataType.Type.DOUBLE: "FLOAT64", 744 exp.DataType.Type.FLOAT: "FLOAT64", 745 exp.DataType.Type.INT: "INT64", 746 exp.DataType.Type.NCHAR: "STRING", 747 exp.DataType.Type.NVARCHAR: "STRING", 748 exp.DataType.Type.SMALLINT: "INT64", 749 exp.DataType.Type.TEXT: "STRING", 750 exp.DataType.Type.TIMESTAMP: "DATETIME", 751 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 752 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 753 exp.DataType.Type.TINYINT: "INT64", 754 exp.DataType.Type.VARBINARY: "BYTES", 755 exp.DataType.Type.ROWVERSION: "BYTES", 756 exp.DataType.Type.VARCHAR: "STRING", 757 exp.DataType.Type.VARIANT: "ANY TYPE", 758 } 759 760 PROPERTIES_LOCATION = { 761 **generator.Generator.PROPERTIES_LOCATION, 762 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 763 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 764 } 765 766 # WINDOW comes after QUALIFY 767 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 768 AFTER_HAVING_MODIFIER_TRANSFORMS = { 769 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 770 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 771 } 772 773 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 774 RESERVED_KEYWORDS = { 775 "all", 776 "and", 777 "any", 778 "array", 779 "as", 780 "asc", 781 "assert_rows_modified", 782 "at", 783 "between", 784 "by", 785 "case", 786 "cast", 787 "collate", 788 "contains", 789 "create", 790 "cross", 791 "cube", 792 "current", 793 "default", 794 "define", 795 "desc", 796 "distinct", 797 "else", 798 "end", 799 "enum", 800 "escape", 801 "except", 802 "exclude", 803 "exists", 804 "extract", 805 "false", 806 "fetch", 807 "following", 808 "for", 809 "from", 810 "full", 811 "group", 812 "grouping", 813 "groups", 814 "hash", 815 "having", 816 "if", 817 "ignore", 818 "in", 819 "inner", 820 "intersect", 821 "interval", 822 "into", 823 "is", 824 "join", 825 "lateral", 826 "left", 827 "like", 828 "limit", 829 "lookup", 830 "merge", 831 "natural", 832 "new", 833 "no", 834 "not", 835 "null", 836 "nulls", 837 "of", 838 "on", 839 "or", 840 "order", 841 "outer", 842 "over", 843 "partition", 844 "preceding", 845 "proto", 846 "qualify", 847 "range", 848 "recursive", 849 "respect", 850 "right", 851 "rollup", 852 "rows", 853 "select", 854 "set", 855 "some", 856 "struct", 857 "tablesample", 858 "then", 859 "to", 860 "treat", 861 "true", 862 "unbounded", 863 "union", 864 "unnest", 865 "using", 866 "when", 867 "where", 868 "window", 869 "with", 870 "within", 871 } 872 873 def mod_sql(self, expression: exp.Mod) -> str: 874 this = expression.this 875 expr = expression.expression 876 return self.func( 877 "MOD", 878 this.unnest() if isinstance(this, exp.Paren) else this, 879 expr.unnest() if isinstance(expr, exp.Paren) else expr, 880 ) 881 882 def column_parts(self, expression: exp.Column) -> str: 883 if expression.meta.get("quoted_column"): 884 # If a column reference is of the form `dataset.table`.name, we need 885 # to preserve the quoted table path, otherwise the reference breaks 886 table_parts = ".".join(p.name for p in expression.parts[:-1]) 887 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 888 return f"{table_path}.{self.sql(expression, 'this')}" 889 890 return super().column_parts(expression) 891 892 def table_parts(self, expression: exp.Table) -> str: 893 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 894 # we need to make sure the correct quoting is used in each case. 895 # 896 # For example, if there is a CTE x that clashes with a schema name, then the former will 897 # return the table y in that schema, whereas the latter will return the CTE's y column: 898 # 899 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 900 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 901 if expression.meta.get("quoted_table"): 902 table_parts = ".".join(p.name for p in expression.parts) 903 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 904 905 return super().table_parts(expression) 906 907 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 908 if isinstance(expression.this, exp.TsOrDsToTimestamp): 909 func_name = "FORMAT_DATETIME" 910 else: 911 func_name = "FORMAT_DATE" 912 this = ( 913 expression.this 914 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 915 else expression 916 ) 917 return self.func(func_name, self.format_time(expression), this.this) 918 919 def eq_sql(self, expression: exp.EQ) -> str: 920 # Operands of = cannot be NULL in BigQuery 921 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 922 if not isinstance(expression.parent, exp.Update): 923 return "NULL" 924 925 return self.binary(expression, "=") 926 927 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 928 parent = expression.parent 929 930 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 931 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 932 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 933 return self.func( 934 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 935 ) 936 937 return super().attimezone_sql(expression) 938 939 def trycast_sql(self, expression: exp.TryCast) -> str: 940 return self.cast_sql(expression, safe_prefix="SAFE_") 941 942 def bracket_sql(self, expression: exp.Bracket) -> str: 943 this = expression.this 944 expressions = expression.expressions 945 946 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 947 arg = expressions[0] 948 if arg.type is None: 949 from sqlglot.optimizer.annotate_types import annotate_types 950 951 arg = annotate_types(arg) 952 953 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 954 # BQ doesn't support bracket syntax with string values for structs 955 return f"{self.sql(this)}.{arg.name}" 956 957 expressions_sql = self.expressions(expression, flat=True) 958 offset = expression.args.get("offset") 959 960 if offset == 0: 961 expressions_sql = f"OFFSET({expressions_sql})" 962 elif offset == 1: 963 expressions_sql = f"ORDINAL({expressions_sql})" 964 elif offset is not None: 965 self.unsupported(f"Unsupported array offset: {offset}") 966 967 if expression.args.get("safe"): 968 expressions_sql = f"SAFE_{expressions_sql}" 969 970 return f"{self.sql(this)}[{expressions_sql}]" 971 972 def in_unnest_op(self, expression: exp.Unnest) -> str: 973 return self.sql(expression) 974 975 def except_op(self, expression: exp.Except) -> str: 976 if not expression.args.get("distinct"): 977 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 978 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 979 980 def intersect_op(self, expression: exp.Intersect) -> str: 981 if not expression.args.get("distinct"): 982 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 983 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 984 985 def version_sql(self, expression: exp.Version) -> str: 986 if expression.name == "TIMESTAMP": 987 expression.set("this", "SYSTEM_TIME") 988 return super().version_sql(expression)
236class BigQuery(Dialect): 237 WEEK_OFFSET = -1 238 UNNEST_COLUMN_ONLY = True 239 SUPPORTS_USER_DEFINED_TYPES = False 240 SUPPORTS_SEMI_ANTI_JOIN = False 241 LOG_BASE_FIRST = False 242 HEX_LOWERCASE = True 243 FORCE_EARLY_ALIAS_REF_EXPANSION = True 244 EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = True 245 246 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 247 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 248 249 # bigquery udfs are case sensitive 250 NORMALIZE_FUNCTIONS = False 251 252 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 253 TIME_MAPPING = { 254 "%D": "%m/%d/%y", 255 "%E6S": "%S.%f", 256 "%e": "%-d", 257 } 258 259 FORMAT_MAPPING = { 260 "DD": "%d", 261 "MM": "%m", 262 "MON": "%b", 263 "MONTH": "%B", 264 "YYYY": "%Y", 265 "YY": "%y", 266 "HH": "%I", 267 "HH12": "%I", 268 "HH24": "%H", 269 "MI": "%M", 270 "SS": "%S", 271 "SSSSS": "%f", 272 "TZH": "%z", 273 } 274 275 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 276 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 277 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 278 279 def normalize_identifier(self, expression: E) -> E: 280 if ( 281 isinstance(expression, exp.Identifier) 282 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 283 ): 284 parent = expression.parent 285 while isinstance(parent, exp.Dot): 286 parent = parent.parent 287 288 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 289 # by default. The following check uses a heuristic to detect tables based on whether 290 # they are qualified. This should generally be correct, because tables in BigQuery 291 # must be qualified with at least a dataset, unless @@dataset_id is set. 292 case_sensitive = ( 293 isinstance(parent, exp.UserDefinedFunction) 294 or ( 295 isinstance(parent, exp.Table) 296 and parent.db 297 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 298 ) 299 or expression.meta.get("is_table") 300 ) 301 if not case_sensitive: 302 expression.set("this", expression.this.lower()) 303 304 return expression 305 306 class Tokenizer(tokens.Tokenizer): 307 QUOTES = ["'", '"', '"""', "'''"] 308 COMMENTS = ["--", "#", ("/*", "*/")] 309 IDENTIFIERS = ["`"] 310 STRING_ESCAPES = ["\\"] 311 312 HEX_STRINGS = [("0x", ""), ("0X", "")] 313 314 BYTE_STRINGS = [ 315 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 316 ] 317 318 RAW_STRINGS = [ 319 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 320 ] 321 322 KEYWORDS = { 323 **tokens.Tokenizer.KEYWORDS, 324 "ANY TYPE": TokenType.VARIANT, 325 "BEGIN": TokenType.COMMAND, 326 "BEGIN TRANSACTION": TokenType.BEGIN, 327 "BYTEINT": TokenType.INT, 328 "BYTES": TokenType.BINARY, 329 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 330 "DATETIME": TokenType.TIMESTAMP, 331 "DECLARE": TokenType.COMMAND, 332 "ELSEIF": TokenType.COMMAND, 333 "EXCEPTION": TokenType.COMMAND, 334 "FLOAT64": TokenType.DOUBLE, 335 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 336 "MODEL": TokenType.MODEL, 337 "NOT DETERMINISTIC": TokenType.VOLATILE, 338 "RECORD": TokenType.STRUCT, 339 "TIMESTAMP": TokenType.TIMESTAMPTZ, 340 } 341 KEYWORDS.pop("DIV") 342 KEYWORDS.pop("VALUES") 343 KEYWORDS.pop("/*+") 344 345 class Parser(parser.Parser): 346 PREFIXED_PIVOT_COLUMNS = True 347 LOG_DEFAULTS_TO_LN = True 348 SUPPORTS_IMPLICIT_UNNEST = True 349 350 FUNCTIONS = { 351 **parser.Parser.FUNCTIONS, 352 "DATE": _build_date, 353 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 354 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 355 "DATE_TRUNC": lambda args: exp.DateTrunc( 356 unit=exp.Literal.string(str(seq_get(args, 1))), 357 this=seq_get(args, 0), 358 ), 359 "DATETIME": _build_datetime, 360 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 361 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 362 "DIV": binary_from_function(exp.IntDiv), 363 "FORMAT_DATE": lambda args: exp.TimeToStr( 364 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 365 ), 366 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 367 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 368 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 369 ), 370 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 371 "MD5": exp.MD5Digest.from_arg_list, 372 "TO_HEX": _build_to_hex, 373 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 374 [seq_get(args, 1), seq_get(args, 0)] 375 ), 376 "PARSE_TIMESTAMP": _build_parse_timestamp, 377 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 378 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 379 this=seq_get(args, 0), 380 expression=seq_get(args, 1), 381 position=seq_get(args, 2), 382 occurrence=seq_get(args, 3), 383 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 384 ), 385 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 386 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 387 "SPLIT": lambda args: exp.Split( 388 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 389 this=seq_get(args, 0), 390 expression=seq_get(args, 1) or exp.Literal.string(","), 391 ), 392 "TIME": _build_time, 393 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 394 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 395 "TIMESTAMP": _build_timestamp, 396 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 397 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 398 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 399 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 400 ), 401 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 402 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 403 ), 404 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 405 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 406 "FORMAT_DATETIME": lambda args: exp.TimeToStr( 407 this=exp.TsOrDsToTimestamp(this=seq_get(args, 1)), format=seq_get(args, 0) 408 ), 409 } 410 411 FUNCTION_PARSERS = { 412 **parser.Parser.FUNCTION_PARSERS, 413 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 414 } 415 FUNCTION_PARSERS.pop("TRIM") 416 417 NO_PAREN_FUNCTIONS = { 418 **parser.Parser.NO_PAREN_FUNCTIONS, 419 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 420 } 421 422 NESTED_TYPE_TOKENS = { 423 *parser.Parser.NESTED_TYPE_TOKENS, 424 TokenType.TABLE, 425 } 426 427 PROPERTY_PARSERS = { 428 **parser.Parser.PROPERTY_PARSERS, 429 "NOT DETERMINISTIC": lambda self: self.expression( 430 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 431 ), 432 "OPTIONS": lambda self: self._parse_with_property(), 433 } 434 435 CONSTRAINT_PARSERS = { 436 **parser.Parser.CONSTRAINT_PARSERS, 437 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 438 } 439 440 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 441 RANGE_PARSERS.pop(TokenType.OVERLAPS) 442 443 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 444 445 STATEMENT_PARSERS = { 446 **parser.Parser.STATEMENT_PARSERS, 447 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 448 TokenType.END: lambda self: self._parse_as_command(self._prev), 449 TokenType.FOR: lambda self: self._parse_for_in(), 450 } 451 452 BRACKET_OFFSETS = { 453 "OFFSET": (0, False), 454 "ORDINAL": (1, False), 455 "SAFE_OFFSET": (0, True), 456 "SAFE_ORDINAL": (1, True), 457 } 458 459 def _parse_for_in(self) -> exp.ForIn: 460 this = self._parse_range() 461 self._match_text_seq("DO") 462 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 463 464 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 465 this = super()._parse_table_part(schema=schema) or self._parse_number() 466 467 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 468 if isinstance(this, exp.Identifier): 469 table_name = this.name 470 while self._match(TokenType.DASH, advance=False) and self._next: 471 text = "" 472 while self._curr and self._curr.token_type != TokenType.DOT: 473 self._advance() 474 text += self._prev.text 475 table_name += text 476 477 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 478 elif isinstance(this, exp.Literal): 479 table_name = this.name 480 481 if self._is_connected() and self._parse_var(any_token=True): 482 table_name += self._prev.text 483 484 this = exp.Identifier(this=table_name, quoted=True) 485 486 return this 487 488 def _parse_table_parts( 489 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 490 ) -> exp.Table: 491 table = super()._parse_table_parts( 492 schema=schema, is_db_reference=is_db_reference, wildcard=True 493 ) 494 495 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 496 if not table.catalog: 497 if table.db: 498 parts = table.db.split(".") 499 if len(parts) == 2 and not table.args["db"].quoted: 500 table.set("catalog", exp.Identifier(this=parts[0])) 501 table.set("db", exp.Identifier(this=parts[1])) 502 else: 503 parts = table.name.split(".") 504 if len(parts) == 2 and not table.this.quoted: 505 table.set("db", exp.Identifier(this=parts[0])) 506 table.set("this", exp.Identifier(this=parts[1])) 507 508 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 509 catalog, db, this, *rest = ( 510 exp.to_identifier(p, quoted=True) 511 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 512 ) 513 514 if rest and this: 515 this = exp.Dot.build([this, *rest]) # type: ignore 516 517 table = exp.Table( 518 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 519 ) 520 table.meta["quoted_table"] = True 521 522 return table 523 524 def _parse_column(self) -> t.Optional[exp.Expression]: 525 column = super()._parse_column() 526 if isinstance(column, exp.Column): 527 parts = column.parts 528 if any("." in p.name for p in parts): 529 catalog, db, table, this, *rest = ( 530 exp.to_identifier(p, quoted=True) 531 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 532 ) 533 534 if rest and this: 535 this = exp.Dot.build([this, *rest]) # type: ignore 536 537 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 538 column.meta["quoted_column"] = True 539 540 return column 541 542 @t.overload 543 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 544 545 @t.overload 546 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 547 548 def _parse_json_object(self, agg=False): 549 json_object = super()._parse_json_object() 550 array_kv_pair = seq_get(json_object.expressions, 0) 551 552 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 553 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 554 if ( 555 array_kv_pair 556 and isinstance(array_kv_pair.this, exp.Array) 557 and isinstance(array_kv_pair.expression, exp.Array) 558 ): 559 keys = array_kv_pair.this.expressions 560 values = array_kv_pair.expression.expressions 561 562 json_object.set( 563 "expressions", 564 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 565 ) 566 567 return json_object 568 569 def _parse_bracket( 570 self, this: t.Optional[exp.Expression] = None 571 ) -> t.Optional[exp.Expression]: 572 bracket = super()._parse_bracket(this) 573 574 if this is bracket: 575 return bracket 576 577 if isinstance(bracket, exp.Bracket): 578 for expression in bracket.expressions: 579 name = expression.name.upper() 580 581 if name not in self.BRACKET_OFFSETS: 582 break 583 584 offset, safe = self.BRACKET_OFFSETS[name] 585 bracket.set("offset", offset) 586 bracket.set("safe", safe) 587 expression.replace(expression.expressions[0]) 588 589 return bracket 590 591 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 592 unnest = super()._parse_unnest(with_alias=with_alias) 593 594 if not unnest: 595 return None 596 597 unnest_expr = seq_get(unnest.expressions, 0) 598 if unnest_expr: 599 from sqlglot.optimizer.annotate_types import annotate_types 600 601 unnest_expr = annotate_types(unnest_expr) 602 603 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 604 # in contrast to other dialects such as DuckDB which flattens only the array by default 605 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 606 array_elem.is_type(exp.DataType.Type.STRUCT) 607 for array_elem in unnest_expr._type.expressions 608 ): 609 unnest.set("explode_array", True) 610 611 return unnest 612 613 class Generator(generator.Generator): 614 EXPLICIT_SET_OP = True 615 INTERVAL_ALLOWS_PLURAL_FORM = False 616 JOIN_HINTS = False 617 QUERY_HINTS = False 618 TABLE_HINTS = False 619 LIMIT_FETCH = "LIMIT" 620 RENAME_TABLE_WITH_DB = False 621 NVL2_SUPPORTED = False 622 UNNEST_WITH_ORDINALITY = False 623 COLLATE_IS_FUNC = True 624 LIMIT_ONLY_LITERALS = True 625 SUPPORTS_TABLE_ALIAS_COLUMNS = False 626 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 627 JSON_KEY_VALUE_PAIR_SEP = "," 628 NULL_ORDERING_SUPPORTED = False 629 IGNORE_NULLS_IN_FUNC = True 630 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 631 CAN_IMPLEMENT_ARRAY_ANY = True 632 SUPPORTS_TO_NUMBER = False 633 NAMED_PLACEHOLDER_TOKEN = "@" 634 HEX_FUNC = "TO_HEX" 635 WITH_PROPERTIES_PREFIX = "OPTIONS" 636 SUPPORTS_EXPLODING_PROJECTIONS = False 637 638 TRANSFORMS = { 639 **generator.Generator.TRANSFORMS, 640 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 641 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 642 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 643 exp.Array: inline_array_unless_query, 644 exp.ArrayContains: _array_contains_sql, 645 exp.ArrayFilter: filter_array_using_unnest, 646 exp.ArraySize: rename_func("ARRAY_LENGTH"), 647 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 648 exp.CollateProperty: lambda self, e: ( 649 f"DEFAULT COLLATE {self.sql(e, 'this')}" 650 if e.args.get("default") 651 else f"COLLATE {self.sql(e, 'this')}" 652 ), 653 exp.Commit: lambda *_: "COMMIT TRANSACTION", 654 exp.CountIf: rename_func("COUNTIF"), 655 exp.Create: _create_sql, 656 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 657 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 658 exp.DateDiff: lambda self, e: self.func( 659 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 660 ), 661 exp.DateFromParts: rename_func("DATE"), 662 exp.DateStrToDate: datestrtodate_sql, 663 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 664 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 665 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 666 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 667 exp.FromTimeZone: lambda self, e: self.func( 668 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 669 ), 670 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 671 exp.GroupConcat: rename_func("STRING_AGG"), 672 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 673 exp.If: if_sql(false_value="NULL"), 674 exp.ILike: no_ilike_sql, 675 exp.IntDiv: rename_func("DIV"), 676 exp.JSONFormat: rename_func("TO_JSON_STRING"), 677 exp.Max: max_or_greatest, 678 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 679 exp.MD5Digest: rename_func("MD5"), 680 exp.Min: min_or_least, 681 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 682 exp.RegexpExtract: lambda self, e: self.func( 683 "REGEXP_EXTRACT", 684 e.this, 685 e.expression, 686 e.args.get("position"), 687 e.args.get("occurrence"), 688 ), 689 exp.RegexpReplace: regexp_replace_sql, 690 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 691 exp.ReturnsProperty: _returnsproperty_sql, 692 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 693 exp.Select: transforms.preprocess( 694 [ 695 transforms.explode_to_unnest(), 696 transforms.unqualify_unnest, 697 transforms.eliminate_distinct_on, 698 _alias_ordered_group, 699 transforms.eliminate_semi_and_anti_joins, 700 ] 701 ), 702 exp.SHA: rename_func("SHA1"), 703 exp.SHA2: sha256_sql, 704 exp.StabilityProperty: lambda self, e: ( 705 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 706 ), 707 exp.StrToDate: _str_to_datetime_sql, 708 exp.StrToTime: _str_to_datetime_sql, 709 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 710 exp.TimeFromParts: rename_func("TIME"), 711 exp.TimestampFromParts: rename_func("DATETIME"), 712 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 713 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 714 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 715 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 716 exp.TimeStrToTime: timestrtotime_sql, 717 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 718 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 719 exp.TsOrDsAdd: _ts_or_ds_add_sql, 720 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 721 exp.TsOrDsToTime: rename_func("TIME"), 722 exp.TsOrDsToTimestamp: rename_func("DATETIME"), 723 exp.Unhex: rename_func("FROM_HEX"), 724 exp.UnixDate: rename_func("UNIX_DATE"), 725 exp.UnixToTime: _unix_to_time_sql, 726 exp.Values: _derived_table_values_to_unnest, 727 exp.VariancePop: rename_func("VAR_POP"), 728 } 729 730 SUPPORTED_JSON_PATH_PARTS = { 731 exp.JSONPathKey, 732 exp.JSONPathRoot, 733 exp.JSONPathSubscript, 734 } 735 736 TYPE_MAPPING = { 737 **generator.Generator.TYPE_MAPPING, 738 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 739 exp.DataType.Type.BIGINT: "INT64", 740 exp.DataType.Type.BINARY: "BYTES", 741 exp.DataType.Type.BOOLEAN: "BOOL", 742 exp.DataType.Type.CHAR: "STRING", 743 exp.DataType.Type.DECIMAL: "NUMERIC", 744 exp.DataType.Type.DOUBLE: "FLOAT64", 745 exp.DataType.Type.FLOAT: "FLOAT64", 746 exp.DataType.Type.INT: "INT64", 747 exp.DataType.Type.NCHAR: "STRING", 748 exp.DataType.Type.NVARCHAR: "STRING", 749 exp.DataType.Type.SMALLINT: "INT64", 750 exp.DataType.Type.TEXT: "STRING", 751 exp.DataType.Type.TIMESTAMP: "DATETIME", 752 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 753 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 754 exp.DataType.Type.TINYINT: "INT64", 755 exp.DataType.Type.VARBINARY: "BYTES", 756 exp.DataType.Type.ROWVERSION: "BYTES", 757 exp.DataType.Type.VARCHAR: "STRING", 758 exp.DataType.Type.VARIANT: "ANY TYPE", 759 } 760 761 PROPERTIES_LOCATION = { 762 **generator.Generator.PROPERTIES_LOCATION, 763 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 764 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 765 } 766 767 # WINDOW comes after QUALIFY 768 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 769 AFTER_HAVING_MODIFIER_TRANSFORMS = { 770 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 771 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 772 } 773 774 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 775 RESERVED_KEYWORDS = { 776 "all", 777 "and", 778 "any", 779 "array", 780 "as", 781 "asc", 782 "assert_rows_modified", 783 "at", 784 "between", 785 "by", 786 "case", 787 "cast", 788 "collate", 789 "contains", 790 "create", 791 "cross", 792 "cube", 793 "current", 794 "default", 795 "define", 796 "desc", 797 "distinct", 798 "else", 799 "end", 800 "enum", 801 "escape", 802 "except", 803 "exclude", 804 "exists", 805 "extract", 806 "false", 807 "fetch", 808 "following", 809 "for", 810 "from", 811 "full", 812 "group", 813 "grouping", 814 "groups", 815 "hash", 816 "having", 817 "if", 818 "ignore", 819 "in", 820 "inner", 821 "intersect", 822 "interval", 823 "into", 824 "is", 825 "join", 826 "lateral", 827 "left", 828 "like", 829 "limit", 830 "lookup", 831 "merge", 832 "natural", 833 "new", 834 "no", 835 "not", 836 "null", 837 "nulls", 838 "of", 839 "on", 840 "or", 841 "order", 842 "outer", 843 "over", 844 "partition", 845 "preceding", 846 "proto", 847 "qualify", 848 "range", 849 "recursive", 850 "respect", 851 "right", 852 "rollup", 853 "rows", 854 "select", 855 "set", 856 "some", 857 "struct", 858 "tablesample", 859 "then", 860 "to", 861 "treat", 862 "true", 863 "unbounded", 864 "union", 865 "unnest", 866 "using", 867 "when", 868 "where", 869 "window", 870 "with", 871 "within", 872 } 873 874 def mod_sql(self, expression: exp.Mod) -> str: 875 this = expression.this 876 expr = expression.expression 877 return self.func( 878 "MOD", 879 this.unnest() if isinstance(this, exp.Paren) else this, 880 expr.unnest() if isinstance(expr, exp.Paren) else expr, 881 ) 882 883 def column_parts(self, expression: exp.Column) -> str: 884 if expression.meta.get("quoted_column"): 885 # If a column reference is of the form `dataset.table`.name, we need 886 # to preserve the quoted table path, otherwise the reference breaks 887 table_parts = ".".join(p.name for p in expression.parts[:-1]) 888 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 889 return f"{table_path}.{self.sql(expression, 'this')}" 890 891 return super().column_parts(expression) 892 893 def table_parts(self, expression: exp.Table) -> str: 894 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 895 # we need to make sure the correct quoting is used in each case. 896 # 897 # For example, if there is a CTE x that clashes with a schema name, then the former will 898 # return the table y in that schema, whereas the latter will return the CTE's y column: 899 # 900 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 901 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 902 if expression.meta.get("quoted_table"): 903 table_parts = ".".join(p.name for p in expression.parts) 904 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 905 906 return super().table_parts(expression) 907 908 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 909 if isinstance(expression.this, exp.TsOrDsToTimestamp): 910 func_name = "FORMAT_DATETIME" 911 else: 912 func_name = "FORMAT_DATE" 913 this = ( 914 expression.this 915 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 916 else expression 917 ) 918 return self.func(func_name, self.format_time(expression), this.this) 919 920 def eq_sql(self, expression: exp.EQ) -> str: 921 # Operands of = cannot be NULL in BigQuery 922 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 923 if not isinstance(expression.parent, exp.Update): 924 return "NULL" 925 926 return self.binary(expression, "=") 927 928 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 929 parent = expression.parent 930 931 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 932 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 933 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 934 return self.func( 935 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 936 ) 937 938 return super().attimezone_sql(expression) 939 940 def trycast_sql(self, expression: exp.TryCast) -> str: 941 return self.cast_sql(expression, safe_prefix="SAFE_") 942 943 def bracket_sql(self, expression: exp.Bracket) -> str: 944 this = expression.this 945 expressions = expression.expressions 946 947 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 948 arg = expressions[0] 949 if arg.type is None: 950 from sqlglot.optimizer.annotate_types import annotate_types 951 952 arg = annotate_types(arg) 953 954 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 955 # BQ doesn't support bracket syntax with string values for structs 956 return f"{self.sql(this)}.{arg.name}" 957 958 expressions_sql = self.expressions(expression, flat=True) 959 offset = expression.args.get("offset") 960 961 if offset == 0: 962 expressions_sql = f"OFFSET({expressions_sql})" 963 elif offset == 1: 964 expressions_sql = f"ORDINAL({expressions_sql})" 965 elif offset is not None: 966 self.unsupported(f"Unsupported array offset: {offset}") 967 968 if expression.args.get("safe"): 969 expressions_sql = f"SAFE_{expressions_sql}" 970 971 return f"{self.sql(this)}[{expressions_sql}]" 972 973 def in_unnest_op(self, expression: exp.Unnest) -> str: 974 return self.sql(expression) 975 976 def except_op(self, expression: exp.Except) -> str: 977 if not expression.args.get("distinct"): 978 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 979 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 980 981 def intersect_op(self, expression: exp.Intersect) -> str: 982 if not expression.args.get("distinct"): 983 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 984 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 985 986 def version_sql(self, expression: exp.Version) -> str: 987 if expression.name == "TIMESTAMP": 988 expression.set("this", "SYSTEM_TIME") 989 return super().version_sql(expression)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects "my_id" would refer to "data.my_id" (which is done in _qualify_columns()) across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether alias reference expansion before qualification should only happen for the GROUP BY clause.
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
279 def normalize_identifier(self, expression: E) -> E: 280 if ( 281 isinstance(expression, exp.Identifier) 282 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 283 ): 284 parent = expression.parent 285 while isinstance(parent, exp.Dot): 286 parent = parent.parent 287 288 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 289 # by default. The following check uses a heuristic to detect tables based on whether 290 # they are qualified. This should generally be correct, because tables in BigQuery 291 # must be qualified with at least a dataset, unless @@dataset_id is set. 292 case_sensitive = ( 293 isinstance(parent, exp.UserDefinedFunction) 294 or ( 295 isinstance(parent, exp.Table) 296 and parent.db 297 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 298 ) 299 or expression.meta.get("is_table") 300 ) 301 if not case_sensitive: 302 expression.set("this", expression.this.lower()) 303 304 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- COPY_PARAMS_ARE_CSV
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
306 class Tokenizer(tokens.Tokenizer): 307 QUOTES = ["'", '"', '"""', "'''"] 308 COMMENTS = ["--", "#", ("/*", "*/")] 309 IDENTIFIERS = ["`"] 310 STRING_ESCAPES = ["\\"] 311 312 HEX_STRINGS = [("0x", ""), ("0X", "")] 313 314 BYTE_STRINGS = [ 315 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 316 ] 317 318 RAW_STRINGS = [ 319 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 320 ] 321 322 KEYWORDS = { 323 **tokens.Tokenizer.KEYWORDS, 324 "ANY TYPE": TokenType.VARIANT, 325 "BEGIN": TokenType.COMMAND, 326 "BEGIN TRANSACTION": TokenType.BEGIN, 327 "BYTEINT": TokenType.INT, 328 "BYTES": TokenType.BINARY, 329 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 330 "DATETIME": TokenType.TIMESTAMP, 331 "DECLARE": TokenType.COMMAND, 332 "ELSEIF": TokenType.COMMAND, 333 "EXCEPTION": TokenType.COMMAND, 334 "FLOAT64": TokenType.DOUBLE, 335 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 336 "MODEL": TokenType.MODEL, 337 "NOT DETERMINISTIC": TokenType.VOLATILE, 338 "RECORD": TokenType.STRUCT, 339 "TIMESTAMP": TokenType.TIMESTAMPTZ, 340 } 341 KEYWORDS.pop("DIV") 342 KEYWORDS.pop("VALUES") 343 KEYWORDS.pop("/*+")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIER_ESCAPES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
345 class Parser(parser.Parser): 346 PREFIXED_PIVOT_COLUMNS = True 347 LOG_DEFAULTS_TO_LN = True 348 SUPPORTS_IMPLICIT_UNNEST = True 349 350 FUNCTIONS = { 351 **parser.Parser.FUNCTIONS, 352 "DATE": _build_date, 353 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 354 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 355 "DATE_TRUNC": lambda args: exp.DateTrunc( 356 unit=exp.Literal.string(str(seq_get(args, 1))), 357 this=seq_get(args, 0), 358 ), 359 "DATETIME": _build_datetime, 360 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 361 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 362 "DIV": binary_from_function(exp.IntDiv), 363 "FORMAT_DATE": lambda args: exp.TimeToStr( 364 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 365 ), 366 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 367 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 368 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 369 ), 370 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 371 "MD5": exp.MD5Digest.from_arg_list, 372 "TO_HEX": _build_to_hex, 373 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 374 [seq_get(args, 1), seq_get(args, 0)] 375 ), 376 "PARSE_TIMESTAMP": _build_parse_timestamp, 377 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 378 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 379 this=seq_get(args, 0), 380 expression=seq_get(args, 1), 381 position=seq_get(args, 2), 382 occurrence=seq_get(args, 3), 383 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 384 ), 385 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 386 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 387 "SPLIT": lambda args: exp.Split( 388 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 389 this=seq_get(args, 0), 390 expression=seq_get(args, 1) or exp.Literal.string(","), 391 ), 392 "TIME": _build_time, 393 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 394 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 395 "TIMESTAMP": _build_timestamp, 396 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 397 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 398 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 399 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 400 ), 401 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 402 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 403 ), 404 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 405 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 406 "FORMAT_DATETIME": lambda args: exp.TimeToStr( 407 this=exp.TsOrDsToTimestamp(this=seq_get(args, 1)), format=seq_get(args, 0) 408 ), 409 } 410 411 FUNCTION_PARSERS = { 412 **parser.Parser.FUNCTION_PARSERS, 413 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 414 } 415 FUNCTION_PARSERS.pop("TRIM") 416 417 NO_PAREN_FUNCTIONS = { 418 **parser.Parser.NO_PAREN_FUNCTIONS, 419 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 420 } 421 422 NESTED_TYPE_TOKENS = { 423 *parser.Parser.NESTED_TYPE_TOKENS, 424 TokenType.TABLE, 425 } 426 427 PROPERTY_PARSERS = { 428 **parser.Parser.PROPERTY_PARSERS, 429 "NOT DETERMINISTIC": lambda self: self.expression( 430 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 431 ), 432 "OPTIONS": lambda self: self._parse_with_property(), 433 } 434 435 CONSTRAINT_PARSERS = { 436 **parser.Parser.CONSTRAINT_PARSERS, 437 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 438 } 439 440 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 441 RANGE_PARSERS.pop(TokenType.OVERLAPS) 442 443 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 444 445 STATEMENT_PARSERS = { 446 **parser.Parser.STATEMENT_PARSERS, 447 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 448 TokenType.END: lambda self: self._parse_as_command(self._prev), 449 TokenType.FOR: lambda self: self._parse_for_in(), 450 } 451 452 BRACKET_OFFSETS = { 453 "OFFSET": (0, False), 454 "ORDINAL": (1, False), 455 "SAFE_OFFSET": (0, True), 456 "SAFE_ORDINAL": (1, True), 457 } 458 459 def _parse_for_in(self) -> exp.ForIn: 460 this = self._parse_range() 461 self._match_text_seq("DO") 462 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 463 464 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 465 this = super()._parse_table_part(schema=schema) or self._parse_number() 466 467 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 468 if isinstance(this, exp.Identifier): 469 table_name = this.name 470 while self._match(TokenType.DASH, advance=False) and self._next: 471 text = "" 472 while self._curr and self._curr.token_type != TokenType.DOT: 473 self._advance() 474 text += self._prev.text 475 table_name += text 476 477 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 478 elif isinstance(this, exp.Literal): 479 table_name = this.name 480 481 if self._is_connected() and self._parse_var(any_token=True): 482 table_name += self._prev.text 483 484 this = exp.Identifier(this=table_name, quoted=True) 485 486 return this 487 488 def _parse_table_parts( 489 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 490 ) -> exp.Table: 491 table = super()._parse_table_parts( 492 schema=schema, is_db_reference=is_db_reference, wildcard=True 493 ) 494 495 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 496 if not table.catalog: 497 if table.db: 498 parts = table.db.split(".") 499 if len(parts) == 2 and not table.args["db"].quoted: 500 table.set("catalog", exp.Identifier(this=parts[0])) 501 table.set("db", exp.Identifier(this=parts[1])) 502 else: 503 parts = table.name.split(".") 504 if len(parts) == 2 and not table.this.quoted: 505 table.set("db", exp.Identifier(this=parts[0])) 506 table.set("this", exp.Identifier(this=parts[1])) 507 508 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 509 catalog, db, this, *rest = ( 510 exp.to_identifier(p, quoted=True) 511 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 512 ) 513 514 if rest and this: 515 this = exp.Dot.build([this, *rest]) # type: ignore 516 517 table = exp.Table( 518 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 519 ) 520 table.meta["quoted_table"] = True 521 522 return table 523 524 def _parse_column(self) -> t.Optional[exp.Expression]: 525 column = super()._parse_column() 526 if isinstance(column, exp.Column): 527 parts = column.parts 528 if any("." in p.name for p in parts): 529 catalog, db, table, this, *rest = ( 530 exp.to_identifier(p, quoted=True) 531 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 532 ) 533 534 if rest and this: 535 this = exp.Dot.build([this, *rest]) # type: ignore 536 537 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 538 column.meta["quoted_column"] = True 539 540 return column 541 542 @t.overload 543 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 544 545 @t.overload 546 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 547 548 def _parse_json_object(self, agg=False): 549 json_object = super()._parse_json_object() 550 array_kv_pair = seq_get(json_object.expressions, 0) 551 552 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 553 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 554 if ( 555 array_kv_pair 556 and isinstance(array_kv_pair.this, exp.Array) 557 and isinstance(array_kv_pair.expression, exp.Array) 558 ): 559 keys = array_kv_pair.this.expressions 560 values = array_kv_pair.expression.expressions 561 562 json_object.set( 563 "expressions", 564 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 565 ) 566 567 return json_object 568 569 def _parse_bracket( 570 self, this: t.Optional[exp.Expression] = None 571 ) -> t.Optional[exp.Expression]: 572 bracket = super()._parse_bracket(this) 573 574 if this is bracket: 575 return bracket 576 577 if isinstance(bracket, exp.Bracket): 578 for expression in bracket.expressions: 579 name = expression.name.upper() 580 581 if name not in self.BRACKET_OFFSETS: 582 break 583 584 offset, safe = self.BRACKET_OFFSETS[name] 585 bracket.set("offset", offset) 586 bracket.set("safe", safe) 587 expression.replace(expression.expressions[0]) 588 589 return bracket 590 591 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 592 unnest = super()._parse_unnest(with_alias=with_alias) 593 594 if not unnest: 595 return None 596 597 unnest_expr = seq_get(unnest.expressions, 0) 598 if unnest_expr: 599 from sqlglot.optimizer.annotate_types import annotate_types 600 601 unnest_expr = annotate_types(unnest_expr) 602 603 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 604 # in contrast to other dialects such as DuckDB which flattens only the array by default 605 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 606 array_elem.is_type(exp.DataType.Type.STRUCT) 607 for array_elem in unnest_expr._type.expressions 608 ): 609 unnest.set("explode_array", True) 610 611 return unnest
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
613 class Generator(generator.Generator): 614 EXPLICIT_SET_OP = True 615 INTERVAL_ALLOWS_PLURAL_FORM = False 616 JOIN_HINTS = False 617 QUERY_HINTS = False 618 TABLE_HINTS = False 619 LIMIT_FETCH = "LIMIT" 620 RENAME_TABLE_WITH_DB = False 621 NVL2_SUPPORTED = False 622 UNNEST_WITH_ORDINALITY = False 623 COLLATE_IS_FUNC = True 624 LIMIT_ONLY_LITERALS = True 625 SUPPORTS_TABLE_ALIAS_COLUMNS = False 626 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 627 JSON_KEY_VALUE_PAIR_SEP = "," 628 NULL_ORDERING_SUPPORTED = False 629 IGNORE_NULLS_IN_FUNC = True 630 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 631 CAN_IMPLEMENT_ARRAY_ANY = True 632 SUPPORTS_TO_NUMBER = False 633 NAMED_PLACEHOLDER_TOKEN = "@" 634 HEX_FUNC = "TO_HEX" 635 WITH_PROPERTIES_PREFIX = "OPTIONS" 636 SUPPORTS_EXPLODING_PROJECTIONS = False 637 638 TRANSFORMS = { 639 **generator.Generator.TRANSFORMS, 640 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 641 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 642 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 643 exp.Array: inline_array_unless_query, 644 exp.ArrayContains: _array_contains_sql, 645 exp.ArrayFilter: filter_array_using_unnest, 646 exp.ArraySize: rename_func("ARRAY_LENGTH"), 647 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 648 exp.CollateProperty: lambda self, e: ( 649 f"DEFAULT COLLATE {self.sql(e, 'this')}" 650 if e.args.get("default") 651 else f"COLLATE {self.sql(e, 'this')}" 652 ), 653 exp.Commit: lambda *_: "COMMIT TRANSACTION", 654 exp.CountIf: rename_func("COUNTIF"), 655 exp.Create: _create_sql, 656 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 657 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 658 exp.DateDiff: lambda self, e: self.func( 659 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 660 ), 661 exp.DateFromParts: rename_func("DATE"), 662 exp.DateStrToDate: datestrtodate_sql, 663 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 664 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 665 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 666 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 667 exp.FromTimeZone: lambda self, e: self.func( 668 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 669 ), 670 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 671 exp.GroupConcat: rename_func("STRING_AGG"), 672 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 673 exp.If: if_sql(false_value="NULL"), 674 exp.ILike: no_ilike_sql, 675 exp.IntDiv: rename_func("DIV"), 676 exp.JSONFormat: rename_func("TO_JSON_STRING"), 677 exp.Max: max_or_greatest, 678 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 679 exp.MD5Digest: rename_func("MD5"), 680 exp.Min: min_or_least, 681 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 682 exp.RegexpExtract: lambda self, e: self.func( 683 "REGEXP_EXTRACT", 684 e.this, 685 e.expression, 686 e.args.get("position"), 687 e.args.get("occurrence"), 688 ), 689 exp.RegexpReplace: regexp_replace_sql, 690 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 691 exp.ReturnsProperty: _returnsproperty_sql, 692 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 693 exp.Select: transforms.preprocess( 694 [ 695 transforms.explode_to_unnest(), 696 transforms.unqualify_unnest, 697 transforms.eliminate_distinct_on, 698 _alias_ordered_group, 699 transforms.eliminate_semi_and_anti_joins, 700 ] 701 ), 702 exp.SHA: rename_func("SHA1"), 703 exp.SHA2: sha256_sql, 704 exp.StabilityProperty: lambda self, e: ( 705 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 706 ), 707 exp.StrToDate: _str_to_datetime_sql, 708 exp.StrToTime: _str_to_datetime_sql, 709 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 710 exp.TimeFromParts: rename_func("TIME"), 711 exp.TimestampFromParts: rename_func("DATETIME"), 712 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 713 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 714 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 715 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 716 exp.TimeStrToTime: timestrtotime_sql, 717 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 718 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 719 exp.TsOrDsAdd: _ts_or_ds_add_sql, 720 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 721 exp.TsOrDsToTime: rename_func("TIME"), 722 exp.TsOrDsToTimestamp: rename_func("DATETIME"), 723 exp.Unhex: rename_func("FROM_HEX"), 724 exp.UnixDate: rename_func("UNIX_DATE"), 725 exp.UnixToTime: _unix_to_time_sql, 726 exp.Values: _derived_table_values_to_unnest, 727 exp.VariancePop: rename_func("VAR_POP"), 728 } 729 730 SUPPORTED_JSON_PATH_PARTS = { 731 exp.JSONPathKey, 732 exp.JSONPathRoot, 733 exp.JSONPathSubscript, 734 } 735 736 TYPE_MAPPING = { 737 **generator.Generator.TYPE_MAPPING, 738 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 739 exp.DataType.Type.BIGINT: "INT64", 740 exp.DataType.Type.BINARY: "BYTES", 741 exp.DataType.Type.BOOLEAN: "BOOL", 742 exp.DataType.Type.CHAR: "STRING", 743 exp.DataType.Type.DECIMAL: "NUMERIC", 744 exp.DataType.Type.DOUBLE: "FLOAT64", 745 exp.DataType.Type.FLOAT: "FLOAT64", 746 exp.DataType.Type.INT: "INT64", 747 exp.DataType.Type.NCHAR: "STRING", 748 exp.DataType.Type.NVARCHAR: "STRING", 749 exp.DataType.Type.SMALLINT: "INT64", 750 exp.DataType.Type.TEXT: "STRING", 751 exp.DataType.Type.TIMESTAMP: "DATETIME", 752 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 753 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 754 exp.DataType.Type.TINYINT: "INT64", 755 exp.DataType.Type.VARBINARY: "BYTES", 756 exp.DataType.Type.ROWVERSION: "BYTES", 757 exp.DataType.Type.VARCHAR: "STRING", 758 exp.DataType.Type.VARIANT: "ANY TYPE", 759 } 760 761 PROPERTIES_LOCATION = { 762 **generator.Generator.PROPERTIES_LOCATION, 763 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 764 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 765 } 766 767 # WINDOW comes after QUALIFY 768 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 769 AFTER_HAVING_MODIFIER_TRANSFORMS = { 770 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 771 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 772 } 773 774 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 775 RESERVED_KEYWORDS = { 776 "all", 777 "and", 778 "any", 779 "array", 780 "as", 781 "asc", 782 "assert_rows_modified", 783 "at", 784 "between", 785 "by", 786 "case", 787 "cast", 788 "collate", 789 "contains", 790 "create", 791 "cross", 792 "cube", 793 "current", 794 "default", 795 "define", 796 "desc", 797 "distinct", 798 "else", 799 "end", 800 "enum", 801 "escape", 802 "except", 803 "exclude", 804 "exists", 805 "extract", 806 "false", 807 "fetch", 808 "following", 809 "for", 810 "from", 811 "full", 812 "group", 813 "grouping", 814 "groups", 815 "hash", 816 "having", 817 "if", 818 "ignore", 819 "in", 820 "inner", 821 "intersect", 822 "interval", 823 "into", 824 "is", 825 "join", 826 "lateral", 827 "left", 828 "like", 829 "limit", 830 "lookup", 831 "merge", 832 "natural", 833 "new", 834 "no", 835 "not", 836 "null", 837 "nulls", 838 "of", 839 "on", 840 "or", 841 "order", 842 "outer", 843 "over", 844 "partition", 845 "preceding", 846 "proto", 847 "qualify", 848 "range", 849 "recursive", 850 "respect", 851 "right", 852 "rollup", 853 "rows", 854 "select", 855 "set", 856 "some", 857 "struct", 858 "tablesample", 859 "then", 860 "to", 861 "treat", 862 "true", 863 "unbounded", 864 "union", 865 "unnest", 866 "using", 867 "when", 868 "where", 869 "window", 870 "with", 871 "within", 872 } 873 874 def mod_sql(self, expression: exp.Mod) -> str: 875 this = expression.this 876 expr = expression.expression 877 return self.func( 878 "MOD", 879 this.unnest() if isinstance(this, exp.Paren) else this, 880 expr.unnest() if isinstance(expr, exp.Paren) else expr, 881 ) 882 883 def column_parts(self, expression: exp.Column) -> str: 884 if expression.meta.get("quoted_column"): 885 # If a column reference is of the form `dataset.table`.name, we need 886 # to preserve the quoted table path, otherwise the reference breaks 887 table_parts = ".".join(p.name for p in expression.parts[:-1]) 888 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 889 return f"{table_path}.{self.sql(expression, 'this')}" 890 891 return super().column_parts(expression) 892 893 def table_parts(self, expression: exp.Table) -> str: 894 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 895 # we need to make sure the correct quoting is used in each case. 896 # 897 # For example, if there is a CTE x that clashes with a schema name, then the former will 898 # return the table y in that schema, whereas the latter will return the CTE's y column: 899 # 900 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 901 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 902 if expression.meta.get("quoted_table"): 903 table_parts = ".".join(p.name for p in expression.parts) 904 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 905 906 return super().table_parts(expression) 907 908 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 909 if isinstance(expression.this, exp.TsOrDsToTimestamp): 910 func_name = "FORMAT_DATETIME" 911 else: 912 func_name = "FORMAT_DATE" 913 this = ( 914 expression.this 915 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 916 else expression 917 ) 918 return self.func(func_name, self.format_time(expression), this.this) 919 920 def eq_sql(self, expression: exp.EQ) -> str: 921 # Operands of = cannot be NULL in BigQuery 922 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 923 if not isinstance(expression.parent, exp.Update): 924 return "NULL" 925 926 return self.binary(expression, "=") 927 928 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 929 parent = expression.parent 930 931 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 932 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 933 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 934 return self.func( 935 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 936 ) 937 938 return super().attimezone_sql(expression) 939 940 def trycast_sql(self, expression: exp.TryCast) -> str: 941 return self.cast_sql(expression, safe_prefix="SAFE_") 942 943 def bracket_sql(self, expression: exp.Bracket) -> str: 944 this = expression.this 945 expressions = expression.expressions 946 947 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 948 arg = expressions[0] 949 if arg.type is None: 950 from sqlglot.optimizer.annotate_types import annotate_types 951 952 arg = annotate_types(arg) 953 954 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 955 # BQ doesn't support bracket syntax with string values for structs 956 return f"{self.sql(this)}.{arg.name}" 957 958 expressions_sql = self.expressions(expression, flat=True) 959 offset = expression.args.get("offset") 960 961 if offset == 0: 962 expressions_sql = f"OFFSET({expressions_sql})" 963 elif offset == 1: 964 expressions_sql = f"ORDINAL({expressions_sql})" 965 elif offset is not None: 966 self.unsupported(f"Unsupported array offset: {offset}") 967 968 if expression.args.get("safe"): 969 expressions_sql = f"SAFE_{expressions_sql}" 970 971 return f"{self.sql(this)}[{expressions_sql}]" 972 973 def in_unnest_op(self, expression: exp.Unnest) -> str: 974 return self.sql(expression) 975 976 def except_op(self, expression: exp.Except) -> str: 977 if not expression.args.get("distinct"): 978 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 979 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 980 981 def intersect_op(self, expression: exp.Intersect) -> str: 982 if not expression.args.get("distinct"): 983 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 984 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 985 986 def version_sql(self, expression: exp.Version) -> str: 987 if expression.name == "TIMESTAMP": 988 expression.set("this", "SYSTEM_TIME") 989 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
883 def column_parts(self, expression: exp.Column) -> str: 884 if expression.meta.get("quoted_column"): 885 # If a column reference is of the form `dataset.table`.name, we need 886 # to preserve the quoted table path, otherwise the reference breaks 887 table_parts = ".".join(p.name for p in expression.parts[:-1]) 888 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 889 return f"{table_path}.{self.sql(expression, 'this')}" 890 891 return super().column_parts(expression)
893 def table_parts(self, expression: exp.Table) -> str: 894 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 895 # we need to make sure the correct quoting is used in each case. 896 # 897 # For example, if there is a CTE x that clashes with a schema name, then the former will 898 # return the table y in that schema, whereas the latter will return the CTE's y column: 899 # 900 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 901 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 902 if expression.meta.get("quoted_table"): 903 table_parts = ".".join(p.name for p in expression.parts) 904 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 905 906 return super().table_parts(expression)
908 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 909 if isinstance(expression.this, exp.TsOrDsToTimestamp): 910 func_name = "FORMAT_DATETIME" 911 else: 912 func_name = "FORMAT_DATE" 913 this = ( 914 expression.this 915 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 916 else expression 917 ) 918 return self.func(func_name, self.format_time(expression), this.this)
928 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 929 parent = expression.parent 930 931 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 932 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 933 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 934 return self.func( 935 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 936 ) 937 938 return super().attimezone_sql(expression)
943 def bracket_sql(self, expression: exp.Bracket) -> str: 944 this = expression.this 945 expressions = expression.expressions 946 947 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 948 arg = expressions[0] 949 if arg.type is None: 950 from sqlglot.optimizer.annotate_types import annotate_types 951 952 arg = annotate_types(arg) 953 954 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 955 # BQ doesn't support bracket syntax with string values for structs 956 return f"{self.sql(this)}.{arg.name}" 957 958 expressions_sql = self.expressions(expression, flat=True) 959 offset = expression.args.get("offset") 960 961 if offset == 0: 962 expressions_sql = f"OFFSET({expressions_sql})" 963 elif offset == 1: 964 expressions_sql = f"ORDINAL({expressions_sql})" 965 elif offset is not None: 966 self.unsupported(f"Unsupported array offset: {offset}") 967 968 if expression.args.get("safe"): 969 expressions_sql = f"SAFE_{expressions_sql}" 970 971 return f"{self.sql(this)}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql