sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 format_time_lambda, 16 if_sql, 17 inline_array_sql, 18 max_or_greatest, 19 min_or_least, 20 no_ilike_sql, 21 parse_date_delta_with_interval, 22 regexp_replace_sql, 23 rename_func, 24 timestrtotime_sql, 25 ts_or_ds_add_cast, 26) 27from sqlglot.helper import seq_get, split_num_words 28from sqlglot.tokens import TokenType 29 30if t.TYPE_CHECKING: 31 from sqlglot._typing import E, Lit 32 33logger = logging.getLogger("sqlglot") 34 35 36def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 37 if not expression.find_ancestor(exp.From, exp.Join): 38 return self.values_sql(expression) 39 40 alias = expression.args.get("alias") 41 42 return self.unnest_sql( 43 exp.Unnest( 44 expressions=[ 45 exp.array( 46 *( 47 exp.Struct( 48 expressions=[ 49 exp.alias_(value, column_name) 50 for value, column_name in zip( 51 t.expressions, 52 ( 53 alias.columns 54 if alias and alias.columns 55 else (f"_c{i}" for i in range(len(t.expressions))) 56 ), 57 ) 58 ] 59 ) 60 for t in expression.find_all(exp.Tuple) 61 ), 62 copy=False, 63 ) 64 ] 65 ) 66 ) 67 68 69def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 70 this = expression.this 71 if isinstance(this, exp.Schema): 72 this = f"{this.this} <{self.expressions(this)}>" 73 else: 74 this = self.sql(this) 75 return f"RETURNS {this}" 76 77 78def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 79 kind = expression.args["kind"] 80 returns = expression.find(exp.ReturnsProperty) 81 82 if kind.upper() == "FUNCTION" and returns and returns.args.get("is_table"): 83 expression.set("kind", "TABLE FUNCTION") 84 85 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 86 expression.set("expression", expression.expression.this) 87 88 return self.create_sql(expression) 89 90 return self.create_sql(expression) 91 92 93def _unqualify_unnest(expression: exp.Expression) -> exp.Expression: 94 """Remove references to unnest table aliases since bigquery doesn't allow them. 95 96 These are added by the optimizer's qualify_column step. 97 """ 98 from sqlglot.optimizer.scope import find_all_in_scope 99 100 if isinstance(expression, exp.Select): 101 unnest_aliases = { 102 unnest.alias 103 for unnest in find_all_in_scope(expression, exp.Unnest) 104 if isinstance(unnest.parent, (exp.From, exp.Join)) 105 } 106 if unnest_aliases: 107 for column in expression.find_all(exp.Column): 108 if column.table in unnest_aliases: 109 column.set("table", None) 110 elif column.db in unnest_aliases: 111 column.set("db", None) 112 113 return expression 114 115 116# https://issuetracker.google.com/issues/162294746 117# workaround for bigquery bug when grouping by an expression and then ordering 118# WITH x AS (SELECT 1 y) 119# SELECT y + 1 z 120# FROM x 121# GROUP BY x + 1 122# ORDER by z 123def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 124 if isinstance(expression, exp.Select): 125 group = expression.args.get("group") 126 order = expression.args.get("order") 127 128 if group and order: 129 aliases = { 130 select.this: select.args["alias"] 131 for select in expression.selects 132 if isinstance(select, exp.Alias) 133 } 134 135 for e in group.expressions: 136 alias = aliases.get(e) 137 138 if alias: 139 e.replace(exp.column(alias)) 140 141 return expression 142 143 144def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 145 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 146 if isinstance(expression, exp.CTE) and expression.alias_column_names: 147 cte_query = expression.this 148 149 if cte_query.is_star: 150 logger.warning( 151 "Can't push down CTE column names for star queries. Run the query through" 152 " the optimizer or use 'qualify' to expand the star projections first." 153 ) 154 return expression 155 156 column_names = expression.alias_column_names 157 expression.args["alias"].set("columns", None) 158 159 for name, select in zip(column_names, cte_query.selects): 160 to_replace = select 161 162 if isinstance(select, exp.Alias): 163 select = select.this 164 165 # Inner aliases are shadowed by the CTE column names 166 to_replace.replace(exp.alias_(select, name)) 167 168 return expression 169 170 171def _parse_parse_timestamp(args: t.List) -> exp.StrToTime: 172 this = format_time_lambda(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 173 this.set("zone", seq_get(args, 2)) 174 return this 175 176 177def _parse_timestamp(args: t.List) -> exp.Timestamp: 178 timestamp = exp.Timestamp.from_arg_list(args) 179 timestamp.set("with_tz", True) 180 return timestamp 181 182 183def _parse_date(args: t.List) -> exp.Date | exp.DateFromParts: 184 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 185 return expr_type.from_arg_list(args) 186 187 188def _parse_to_hex(args: t.List) -> exp.Hex | exp.MD5: 189 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 190 arg = seq_get(args, 0) 191 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.Hex(this=arg) 192 193 194def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 195 return self.sql( 196 exp.Exists( 197 this=exp.select("1") 198 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 199 .where(exp.column("_col").eq(expression.right)) 200 ) 201 ) 202 203 204def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 205 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 206 207 208def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 209 expression.this.replace(exp.cast(expression.this, "TIMESTAMP", copy=True)) 210 expression.expression.replace(exp.cast(expression.expression, "TIMESTAMP", copy=True)) 211 unit = expression.args.get("unit") or "DAY" 212 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 213 214 215def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 216 scale = expression.args.get("scale") 217 timestamp = self.sql(expression, "this") 218 if scale in (None, exp.UnixToTime.SECONDS): 219 return f"TIMESTAMP_SECONDS({timestamp})" 220 if scale == exp.UnixToTime.MILLIS: 221 return f"TIMESTAMP_MILLIS({timestamp})" 222 if scale == exp.UnixToTime.MICROS: 223 return f"TIMESTAMP_MICROS({timestamp})" 224 225 return f"TIMESTAMP_SECONDS(CAST({timestamp} / POW(10, {scale}) AS INT64))" 226 227 228def _parse_time(args: t.List) -> exp.Func: 229 if len(args) == 1: 230 return exp.TsOrDsToTime(this=args[0]) 231 if len(args) == 3: 232 return exp.TimeFromParts.from_arg_list(args) 233 234 return exp.Anonymous(this="TIME", expressions=args) 235 236 237class BigQuery(Dialect): 238 WEEK_OFFSET = -1 239 UNNEST_COLUMN_ONLY = True 240 SUPPORTS_USER_DEFINED_TYPES = False 241 SUPPORTS_SEMI_ANTI_JOIN = False 242 LOG_BASE_FIRST = False 243 244 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 245 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 246 247 # bigquery udfs are case sensitive 248 NORMALIZE_FUNCTIONS = False 249 250 TIME_MAPPING = { 251 "%D": "%m/%d/%y", 252 } 253 254 ESCAPE_SEQUENCES = { 255 "\\a": "\a", 256 "\\b": "\b", 257 "\\f": "\f", 258 "\\n": "\n", 259 "\\r": "\r", 260 "\\t": "\t", 261 "\\v": "\v", 262 } 263 264 FORMAT_MAPPING = { 265 "DD": "%d", 266 "MM": "%m", 267 "MON": "%b", 268 "MONTH": "%B", 269 "YYYY": "%Y", 270 "YY": "%y", 271 "HH": "%I", 272 "HH12": "%I", 273 "HH24": "%H", 274 "MI": "%M", 275 "SS": "%S", 276 "SSSSS": "%f", 277 "TZH": "%z", 278 } 279 280 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 281 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 282 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 283 284 def normalize_identifier(self, expression: E) -> E: 285 if isinstance(expression, exp.Identifier): 286 parent = expression.parent 287 while isinstance(parent, exp.Dot): 288 parent = parent.parent 289 290 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 291 # The following check is essentially a heuristic to detect tables based on whether or 292 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 293 if ( 294 not isinstance(parent, exp.UserDefinedFunction) 295 and not (isinstance(parent, exp.Table) and parent.db) 296 and not expression.meta.get("is_table") 297 ): 298 expression.set("this", expression.this.lower()) 299 300 return expression 301 302 class Tokenizer(tokens.Tokenizer): 303 QUOTES = ["'", '"', '"""', "'''"] 304 COMMENTS = ["--", "#", ("/*", "*/")] 305 IDENTIFIERS = ["`"] 306 STRING_ESCAPES = ["\\"] 307 308 HEX_STRINGS = [("0x", ""), ("0X", "")] 309 310 BYTE_STRINGS = [ 311 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 312 ] 313 314 RAW_STRINGS = [ 315 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 316 ] 317 318 KEYWORDS = { 319 **tokens.Tokenizer.KEYWORDS, 320 "ANY TYPE": TokenType.VARIANT, 321 "BEGIN": TokenType.COMMAND, 322 "BEGIN TRANSACTION": TokenType.BEGIN, 323 "BYTES": TokenType.BINARY, 324 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 325 "DECLARE": TokenType.COMMAND, 326 "FLOAT64": TokenType.DOUBLE, 327 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 328 "MODEL": TokenType.MODEL, 329 "NOT DETERMINISTIC": TokenType.VOLATILE, 330 "RECORD": TokenType.STRUCT, 331 "TIMESTAMP": TokenType.TIMESTAMPTZ, 332 } 333 KEYWORDS.pop("DIV") 334 335 class Parser(parser.Parser): 336 PREFIXED_PIVOT_COLUMNS = True 337 338 LOG_DEFAULTS_TO_LN = True 339 340 FUNCTIONS = { 341 **parser.Parser.FUNCTIONS, 342 "DATE": _parse_date, 343 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 344 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 345 "DATE_TRUNC": lambda args: exp.DateTrunc( 346 unit=exp.Literal.string(str(seq_get(args, 1))), 347 this=seq_get(args, 0), 348 ), 349 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 350 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 351 "DIV": binary_from_function(exp.IntDiv), 352 "FORMAT_DATE": lambda args: exp.TimeToStr( 353 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 354 ), 355 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 356 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 357 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 358 ), 359 "MD5": exp.MD5Digest.from_arg_list, 360 "TO_HEX": _parse_to_hex, 361 "PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")( 362 [seq_get(args, 1), seq_get(args, 0)] 363 ), 364 "PARSE_TIMESTAMP": _parse_parse_timestamp, 365 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 366 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 367 this=seq_get(args, 0), 368 expression=seq_get(args, 1), 369 position=seq_get(args, 2), 370 occurrence=seq_get(args, 3), 371 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 372 ), 373 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 374 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 375 "SPLIT": lambda args: exp.Split( 376 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 377 this=seq_get(args, 0), 378 expression=seq_get(args, 1) or exp.Literal.string(","), 379 ), 380 "TIME": _parse_time, 381 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 382 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 383 "TIMESTAMP": _parse_timestamp, 384 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 385 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 386 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 387 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 388 ), 389 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 390 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 391 ), 392 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 393 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 394 } 395 396 FUNCTION_PARSERS = { 397 **parser.Parser.FUNCTION_PARSERS, 398 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 399 } 400 FUNCTION_PARSERS.pop("TRIM") 401 402 NO_PAREN_FUNCTIONS = { 403 **parser.Parser.NO_PAREN_FUNCTIONS, 404 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 405 } 406 407 NESTED_TYPE_TOKENS = { 408 *parser.Parser.NESTED_TYPE_TOKENS, 409 TokenType.TABLE, 410 } 411 412 ID_VAR_TOKENS = { 413 *parser.Parser.ID_VAR_TOKENS, 414 TokenType.VALUES, 415 } 416 417 PROPERTY_PARSERS = { 418 **parser.Parser.PROPERTY_PARSERS, 419 "NOT DETERMINISTIC": lambda self: self.expression( 420 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 421 ), 422 "OPTIONS": lambda self: self._parse_with_property(), 423 } 424 425 CONSTRAINT_PARSERS = { 426 **parser.Parser.CONSTRAINT_PARSERS, 427 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 428 } 429 430 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 431 RANGE_PARSERS.pop(TokenType.OVERLAPS, None) 432 433 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 434 435 STATEMENT_PARSERS = { 436 **parser.Parser.STATEMENT_PARSERS, 437 TokenType.END: lambda self: self._parse_as_command(self._prev), 438 TokenType.FOR: lambda self: self._parse_for_in(), 439 } 440 441 BRACKET_OFFSETS = { 442 "OFFSET": (0, False), 443 "ORDINAL": (1, False), 444 "SAFE_OFFSET": (0, True), 445 "SAFE_ORDINAL": (1, True), 446 } 447 448 def _parse_for_in(self) -> exp.ForIn: 449 this = self._parse_range() 450 self._match_text_seq("DO") 451 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 452 453 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 454 this = super()._parse_table_part(schema=schema) or self._parse_number() 455 456 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 457 if isinstance(this, exp.Identifier): 458 table_name = this.name 459 while self._match(TokenType.DASH, advance=False) and self._next: 460 self._advance(2) 461 table_name += f"-{self._prev.text}" 462 463 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 464 elif isinstance(this, exp.Literal): 465 table_name = this.name 466 467 if self._is_connected() and self._parse_var(any_token=True): 468 table_name += self._prev.text 469 470 this = exp.Identifier(this=table_name, quoted=True) 471 472 return this 473 474 def _parse_table_parts( 475 self, schema: bool = False, is_db_reference: bool = False 476 ) -> exp.Table: 477 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 478 if isinstance(table.this, exp.Identifier) and "." in table.name: 479 catalog, db, this, *rest = ( 480 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 481 for x in split_num_words(table.name, ".", 3) 482 ) 483 484 if rest and this: 485 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 486 487 table = exp.Table(this=this, db=db, catalog=catalog) 488 489 return table 490 491 @t.overload 492 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: 493 ... 494 495 @t.overload 496 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: 497 ... 498 499 def _parse_json_object(self, agg=False): 500 json_object = super()._parse_json_object() 501 array_kv_pair = seq_get(json_object.expressions, 0) 502 503 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 504 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 505 if ( 506 array_kv_pair 507 and isinstance(array_kv_pair.this, exp.Array) 508 and isinstance(array_kv_pair.expression, exp.Array) 509 ): 510 keys = array_kv_pair.this.expressions 511 values = array_kv_pair.expression.expressions 512 513 json_object.set( 514 "expressions", 515 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 516 ) 517 518 return json_object 519 520 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 521 bracket = super()._parse_bracket(this) 522 523 if this is bracket: 524 return bracket 525 526 if isinstance(bracket, exp.Bracket): 527 for expression in bracket.expressions: 528 name = expression.name.upper() 529 530 if name not in self.BRACKET_OFFSETS: 531 break 532 533 offset, safe = self.BRACKET_OFFSETS[name] 534 bracket.set("offset", offset) 535 bracket.set("safe", safe) 536 expression.replace(expression.expressions[0]) 537 538 return bracket 539 540 class Generator(generator.Generator): 541 EXPLICIT_UNION = True 542 INTERVAL_ALLOWS_PLURAL_FORM = False 543 JOIN_HINTS = False 544 QUERY_HINTS = False 545 TABLE_HINTS = False 546 LIMIT_FETCH = "LIMIT" 547 RENAME_TABLE_WITH_DB = False 548 NVL2_SUPPORTED = False 549 UNNEST_WITH_ORDINALITY = False 550 COLLATE_IS_FUNC = True 551 LIMIT_ONLY_LITERALS = True 552 SUPPORTS_TABLE_ALIAS_COLUMNS = False 553 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 554 JSON_KEY_VALUE_PAIR_SEP = "," 555 NULL_ORDERING_SUPPORTED = False 556 IGNORE_NULLS_IN_FUNC = True 557 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 558 559 TRANSFORMS = { 560 **generator.Generator.TRANSFORMS, 561 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 562 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 563 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 564 exp.ArrayContains: _array_contains_sql, 565 exp.ArraySize: rename_func("ARRAY_LENGTH"), 566 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 567 exp.CollateProperty: lambda self, e: ( 568 f"DEFAULT COLLATE {self.sql(e, 'this')}" 569 if e.args.get("default") 570 else f"COLLATE {self.sql(e, 'this')}" 571 ), 572 exp.CountIf: rename_func("COUNTIF"), 573 exp.Create: _create_sql, 574 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 575 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 576 exp.DateDiff: lambda self, 577 e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 578 exp.DateFromParts: rename_func("DATE"), 579 exp.DateStrToDate: datestrtodate_sql, 580 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 581 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 582 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 583 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 584 exp.FromTimeZone: lambda self, e: self.func( 585 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 586 ), 587 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 588 exp.GroupConcat: rename_func("STRING_AGG"), 589 exp.Hex: rename_func("TO_HEX"), 590 exp.If: if_sql(false_value="NULL"), 591 exp.ILike: no_ilike_sql, 592 exp.IntDiv: rename_func("DIV"), 593 exp.JSONFormat: rename_func("TO_JSON_STRING"), 594 exp.Max: max_or_greatest, 595 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 596 exp.MD5Digest: rename_func("MD5"), 597 exp.Min: min_or_least, 598 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 599 exp.RegexpExtract: lambda self, e: self.func( 600 "REGEXP_EXTRACT", 601 e.this, 602 e.expression, 603 e.args.get("position"), 604 e.args.get("occurrence"), 605 ), 606 exp.RegexpReplace: regexp_replace_sql, 607 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 608 exp.ReturnsProperty: _returnsproperty_sql, 609 exp.Select: transforms.preprocess( 610 [ 611 transforms.explode_to_unnest(), 612 _unqualify_unnest, 613 transforms.eliminate_distinct_on, 614 _alias_ordered_group, 615 transforms.eliminate_semi_and_anti_joins, 616 ] 617 ), 618 exp.SHA2: lambda self, e: self.func( 619 "SHA256" if e.text("length") == "256" else "SHA512", e.this 620 ), 621 exp.StabilityProperty: lambda self, e: ( 622 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 623 ), 624 exp.StrToDate: lambda self, 625 e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})", 626 exp.StrToTime: lambda self, e: self.func( 627 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 628 ), 629 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 630 exp.TimeFromParts: rename_func("TIME"), 631 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 632 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 633 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 634 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 635 exp.TimeStrToTime: timestrtotime_sql, 636 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 637 exp.TsOrDsAdd: _ts_or_ds_add_sql, 638 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 639 exp.TsOrDsToTime: rename_func("TIME"), 640 exp.Unhex: rename_func("FROM_HEX"), 641 exp.UnixDate: rename_func("UNIX_DATE"), 642 exp.UnixToTime: _unix_to_time_sql, 643 exp.Values: _derived_table_values_to_unnest, 644 exp.VariancePop: rename_func("VAR_POP"), 645 } 646 647 SUPPORTED_JSON_PATH_PARTS = { 648 exp.JSONPathKey, 649 exp.JSONPathRoot, 650 exp.JSONPathSubscript, 651 } 652 653 TYPE_MAPPING = { 654 **generator.Generator.TYPE_MAPPING, 655 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 656 exp.DataType.Type.BIGINT: "INT64", 657 exp.DataType.Type.BINARY: "BYTES", 658 exp.DataType.Type.BOOLEAN: "BOOL", 659 exp.DataType.Type.CHAR: "STRING", 660 exp.DataType.Type.DECIMAL: "NUMERIC", 661 exp.DataType.Type.DOUBLE: "FLOAT64", 662 exp.DataType.Type.FLOAT: "FLOAT64", 663 exp.DataType.Type.INT: "INT64", 664 exp.DataType.Type.NCHAR: "STRING", 665 exp.DataType.Type.NVARCHAR: "STRING", 666 exp.DataType.Type.SMALLINT: "INT64", 667 exp.DataType.Type.TEXT: "STRING", 668 exp.DataType.Type.TIMESTAMP: "DATETIME", 669 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 670 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 671 exp.DataType.Type.TINYINT: "INT64", 672 exp.DataType.Type.VARBINARY: "BYTES", 673 exp.DataType.Type.VARCHAR: "STRING", 674 exp.DataType.Type.VARIANT: "ANY TYPE", 675 } 676 677 PROPERTIES_LOCATION = { 678 **generator.Generator.PROPERTIES_LOCATION, 679 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 680 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 681 } 682 683 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 684 RESERVED_KEYWORDS = { 685 *generator.Generator.RESERVED_KEYWORDS, 686 "all", 687 "and", 688 "any", 689 "array", 690 "as", 691 "asc", 692 "assert_rows_modified", 693 "at", 694 "between", 695 "by", 696 "case", 697 "cast", 698 "collate", 699 "contains", 700 "create", 701 "cross", 702 "cube", 703 "current", 704 "default", 705 "define", 706 "desc", 707 "distinct", 708 "else", 709 "end", 710 "enum", 711 "escape", 712 "except", 713 "exclude", 714 "exists", 715 "extract", 716 "false", 717 "fetch", 718 "following", 719 "for", 720 "from", 721 "full", 722 "group", 723 "grouping", 724 "groups", 725 "hash", 726 "having", 727 "if", 728 "ignore", 729 "in", 730 "inner", 731 "intersect", 732 "interval", 733 "into", 734 "is", 735 "join", 736 "lateral", 737 "left", 738 "like", 739 "limit", 740 "lookup", 741 "merge", 742 "natural", 743 "new", 744 "no", 745 "not", 746 "null", 747 "nulls", 748 "of", 749 "on", 750 "or", 751 "order", 752 "outer", 753 "over", 754 "partition", 755 "preceding", 756 "proto", 757 "qualify", 758 "range", 759 "recursive", 760 "respect", 761 "right", 762 "rollup", 763 "rows", 764 "select", 765 "set", 766 "some", 767 "struct", 768 "tablesample", 769 "then", 770 "to", 771 "treat", 772 "true", 773 "unbounded", 774 "union", 775 "unnest", 776 "using", 777 "when", 778 "where", 779 "window", 780 "with", 781 "within", 782 } 783 784 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 785 if isinstance(expression.this, exp.TsOrDsToDate): 786 this: exp.Expression = expression.this 787 else: 788 this = expression 789 790 return f"FORMAT_DATE({self.format_time(expression)}, {self.sql(this, 'this')})" 791 792 def struct_sql(self, expression: exp.Struct) -> str: 793 args = [] 794 for expr in expression.expressions: 795 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 796 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 797 else: 798 arg = self.sql(expr) 799 800 args.append(arg) 801 802 return self.func("STRUCT", *args) 803 804 def eq_sql(self, expression: exp.EQ) -> str: 805 # Operands of = cannot be NULL in BigQuery 806 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 807 if not isinstance(expression.parent, exp.Update): 808 return "NULL" 809 810 return self.binary(expression, "=") 811 812 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 813 parent = expression.parent 814 815 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 816 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 817 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 818 return self.func( 819 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 820 ) 821 822 return super().attimezone_sql(expression) 823 824 def trycast_sql(self, expression: exp.TryCast) -> str: 825 return self.cast_sql(expression, safe_prefix="SAFE_") 826 827 def cte_sql(self, expression: exp.CTE) -> str: 828 if expression.alias_column_names: 829 self.unsupported("Column names in CTE definition are not supported.") 830 return super().cte_sql(expression) 831 832 def array_sql(self, expression: exp.Array) -> str: 833 first_arg = seq_get(expression.expressions, 0) 834 if isinstance(first_arg, exp.Subqueryable): 835 return f"ARRAY{self.wrap(self.sql(first_arg))}" 836 837 return inline_array_sql(self, expression) 838 839 def bracket_sql(self, expression: exp.Bracket) -> str: 840 this = self.sql(expression, "this") 841 expressions = expression.expressions 842 843 if len(expressions) == 1: 844 arg = expressions[0] 845 if arg.type is None: 846 from sqlglot.optimizer.annotate_types import annotate_types 847 848 arg = annotate_types(arg) 849 850 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 851 # BQ doesn't support bracket syntax with string values 852 return f"{this}.{arg.name}" 853 854 expressions_sql = ", ".join(self.sql(e) for e in expressions) 855 offset = expression.args.get("offset") 856 857 if offset == 0: 858 expressions_sql = f"OFFSET({expressions_sql})" 859 elif offset == 1: 860 expressions_sql = f"ORDINAL({expressions_sql})" 861 elif offset is not None: 862 self.unsupported(f"Unsupported array offset: {offset}") 863 864 if expression.args.get("safe"): 865 expressions_sql = f"SAFE_{expressions_sql}" 866 867 return f"{this}[{expressions_sql}]" 868 869 def transaction_sql(self, *_) -> str: 870 return "BEGIN TRANSACTION" 871 872 def commit_sql(self, *_) -> str: 873 return "COMMIT TRANSACTION" 874 875 def rollback_sql(self, *_) -> str: 876 return "ROLLBACK TRANSACTION" 877 878 def in_unnest_op(self, expression: exp.Unnest) -> str: 879 return self.sql(expression) 880 881 def except_op(self, expression: exp.Except) -> str: 882 if not expression.args.get("distinct", False): 883 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 884 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 885 886 def intersect_op(self, expression: exp.Intersect) -> str: 887 if not expression.args.get("distinct", False): 888 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 889 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 890 891 def with_properties(self, properties: exp.Properties) -> str: 892 return self.properties(properties, prefix=self.seg("OPTIONS")) 893 894 def version_sql(self, expression: exp.Version) -> str: 895 if expression.name == "TIMESTAMP": 896 expression.set("this", "SYSTEM_TIME") 897 return super().version_sql(expression)
238class BigQuery(Dialect): 239 WEEK_OFFSET = -1 240 UNNEST_COLUMN_ONLY = True 241 SUPPORTS_USER_DEFINED_TYPES = False 242 SUPPORTS_SEMI_ANTI_JOIN = False 243 LOG_BASE_FIRST = False 244 245 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 246 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 247 248 # bigquery udfs are case sensitive 249 NORMALIZE_FUNCTIONS = False 250 251 TIME_MAPPING = { 252 "%D": "%m/%d/%y", 253 } 254 255 ESCAPE_SEQUENCES = { 256 "\\a": "\a", 257 "\\b": "\b", 258 "\\f": "\f", 259 "\\n": "\n", 260 "\\r": "\r", 261 "\\t": "\t", 262 "\\v": "\v", 263 } 264 265 FORMAT_MAPPING = { 266 "DD": "%d", 267 "MM": "%m", 268 "MON": "%b", 269 "MONTH": "%B", 270 "YYYY": "%Y", 271 "YY": "%y", 272 "HH": "%I", 273 "HH12": "%I", 274 "HH24": "%H", 275 "MI": "%M", 276 "SS": "%S", 277 "SSSSS": "%f", 278 "TZH": "%z", 279 } 280 281 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 282 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 283 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 284 285 def normalize_identifier(self, expression: E) -> E: 286 if isinstance(expression, exp.Identifier): 287 parent = expression.parent 288 while isinstance(parent, exp.Dot): 289 parent = parent.parent 290 291 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 292 # The following check is essentially a heuristic to detect tables based on whether or 293 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 294 if ( 295 not isinstance(parent, exp.UserDefinedFunction) 296 and not (isinstance(parent, exp.Table) and parent.db) 297 and not expression.meta.get("is_table") 298 ): 299 expression.set("this", expression.this.lower()) 300 301 return expression 302 303 class Tokenizer(tokens.Tokenizer): 304 QUOTES = ["'", '"', '"""', "'''"] 305 COMMENTS = ["--", "#", ("/*", "*/")] 306 IDENTIFIERS = ["`"] 307 STRING_ESCAPES = ["\\"] 308 309 HEX_STRINGS = [("0x", ""), ("0X", "")] 310 311 BYTE_STRINGS = [ 312 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 313 ] 314 315 RAW_STRINGS = [ 316 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 317 ] 318 319 KEYWORDS = { 320 **tokens.Tokenizer.KEYWORDS, 321 "ANY TYPE": TokenType.VARIANT, 322 "BEGIN": TokenType.COMMAND, 323 "BEGIN TRANSACTION": TokenType.BEGIN, 324 "BYTES": TokenType.BINARY, 325 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 326 "DECLARE": TokenType.COMMAND, 327 "FLOAT64": TokenType.DOUBLE, 328 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 329 "MODEL": TokenType.MODEL, 330 "NOT DETERMINISTIC": TokenType.VOLATILE, 331 "RECORD": TokenType.STRUCT, 332 "TIMESTAMP": TokenType.TIMESTAMPTZ, 333 } 334 KEYWORDS.pop("DIV") 335 336 class Parser(parser.Parser): 337 PREFIXED_PIVOT_COLUMNS = True 338 339 LOG_DEFAULTS_TO_LN = True 340 341 FUNCTIONS = { 342 **parser.Parser.FUNCTIONS, 343 "DATE": _parse_date, 344 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 345 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 346 "DATE_TRUNC": lambda args: exp.DateTrunc( 347 unit=exp.Literal.string(str(seq_get(args, 1))), 348 this=seq_get(args, 0), 349 ), 350 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 351 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 352 "DIV": binary_from_function(exp.IntDiv), 353 "FORMAT_DATE": lambda args: exp.TimeToStr( 354 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 355 ), 356 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 357 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 358 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 359 ), 360 "MD5": exp.MD5Digest.from_arg_list, 361 "TO_HEX": _parse_to_hex, 362 "PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")( 363 [seq_get(args, 1), seq_get(args, 0)] 364 ), 365 "PARSE_TIMESTAMP": _parse_parse_timestamp, 366 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 367 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 368 this=seq_get(args, 0), 369 expression=seq_get(args, 1), 370 position=seq_get(args, 2), 371 occurrence=seq_get(args, 3), 372 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 373 ), 374 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 375 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 376 "SPLIT": lambda args: exp.Split( 377 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 378 this=seq_get(args, 0), 379 expression=seq_get(args, 1) or exp.Literal.string(","), 380 ), 381 "TIME": _parse_time, 382 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 383 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 384 "TIMESTAMP": _parse_timestamp, 385 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 386 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 387 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 388 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 389 ), 390 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 391 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 392 ), 393 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 394 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 395 } 396 397 FUNCTION_PARSERS = { 398 **parser.Parser.FUNCTION_PARSERS, 399 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 400 } 401 FUNCTION_PARSERS.pop("TRIM") 402 403 NO_PAREN_FUNCTIONS = { 404 **parser.Parser.NO_PAREN_FUNCTIONS, 405 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 406 } 407 408 NESTED_TYPE_TOKENS = { 409 *parser.Parser.NESTED_TYPE_TOKENS, 410 TokenType.TABLE, 411 } 412 413 ID_VAR_TOKENS = { 414 *parser.Parser.ID_VAR_TOKENS, 415 TokenType.VALUES, 416 } 417 418 PROPERTY_PARSERS = { 419 **parser.Parser.PROPERTY_PARSERS, 420 "NOT DETERMINISTIC": lambda self: self.expression( 421 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 422 ), 423 "OPTIONS": lambda self: self._parse_with_property(), 424 } 425 426 CONSTRAINT_PARSERS = { 427 **parser.Parser.CONSTRAINT_PARSERS, 428 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 429 } 430 431 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 432 RANGE_PARSERS.pop(TokenType.OVERLAPS, None) 433 434 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 435 436 STATEMENT_PARSERS = { 437 **parser.Parser.STATEMENT_PARSERS, 438 TokenType.END: lambda self: self._parse_as_command(self._prev), 439 TokenType.FOR: lambda self: self._parse_for_in(), 440 } 441 442 BRACKET_OFFSETS = { 443 "OFFSET": (0, False), 444 "ORDINAL": (1, False), 445 "SAFE_OFFSET": (0, True), 446 "SAFE_ORDINAL": (1, True), 447 } 448 449 def _parse_for_in(self) -> exp.ForIn: 450 this = self._parse_range() 451 self._match_text_seq("DO") 452 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 453 454 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 455 this = super()._parse_table_part(schema=schema) or self._parse_number() 456 457 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 458 if isinstance(this, exp.Identifier): 459 table_name = this.name 460 while self._match(TokenType.DASH, advance=False) and self._next: 461 self._advance(2) 462 table_name += f"-{self._prev.text}" 463 464 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 465 elif isinstance(this, exp.Literal): 466 table_name = this.name 467 468 if self._is_connected() and self._parse_var(any_token=True): 469 table_name += self._prev.text 470 471 this = exp.Identifier(this=table_name, quoted=True) 472 473 return this 474 475 def _parse_table_parts( 476 self, schema: bool = False, is_db_reference: bool = False 477 ) -> exp.Table: 478 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 479 if isinstance(table.this, exp.Identifier) and "." in table.name: 480 catalog, db, this, *rest = ( 481 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 482 for x in split_num_words(table.name, ".", 3) 483 ) 484 485 if rest and this: 486 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 487 488 table = exp.Table(this=this, db=db, catalog=catalog) 489 490 return table 491 492 @t.overload 493 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: 494 ... 495 496 @t.overload 497 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: 498 ... 499 500 def _parse_json_object(self, agg=False): 501 json_object = super()._parse_json_object() 502 array_kv_pair = seq_get(json_object.expressions, 0) 503 504 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 505 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 506 if ( 507 array_kv_pair 508 and isinstance(array_kv_pair.this, exp.Array) 509 and isinstance(array_kv_pair.expression, exp.Array) 510 ): 511 keys = array_kv_pair.this.expressions 512 values = array_kv_pair.expression.expressions 513 514 json_object.set( 515 "expressions", 516 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 517 ) 518 519 return json_object 520 521 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 522 bracket = super()._parse_bracket(this) 523 524 if this is bracket: 525 return bracket 526 527 if isinstance(bracket, exp.Bracket): 528 for expression in bracket.expressions: 529 name = expression.name.upper() 530 531 if name not in self.BRACKET_OFFSETS: 532 break 533 534 offset, safe = self.BRACKET_OFFSETS[name] 535 bracket.set("offset", offset) 536 bracket.set("safe", safe) 537 expression.replace(expression.expressions[0]) 538 539 return bracket 540 541 class Generator(generator.Generator): 542 EXPLICIT_UNION = True 543 INTERVAL_ALLOWS_PLURAL_FORM = False 544 JOIN_HINTS = False 545 QUERY_HINTS = False 546 TABLE_HINTS = False 547 LIMIT_FETCH = "LIMIT" 548 RENAME_TABLE_WITH_DB = False 549 NVL2_SUPPORTED = False 550 UNNEST_WITH_ORDINALITY = False 551 COLLATE_IS_FUNC = True 552 LIMIT_ONLY_LITERALS = True 553 SUPPORTS_TABLE_ALIAS_COLUMNS = False 554 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 555 JSON_KEY_VALUE_PAIR_SEP = "," 556 NULL_ORDERING_SUPPORTED = False 557 IGNORE_NULLS_IN_FUNC = True 558 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 559 560 TRANSFORMS = { 561 **generator.Generator.TRANSFORMS, 562 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 563 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 564 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 565 exp.ArrayContains: _array_contains_sql, 566 exp.ArraySize: rename_func("ARRAY_LENGTH"), 567 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 568 exp.CollateProperty: lambda self, e: ( 569 f"DEFAULT COLLATE {self.sql(e, 'this')}" 570 if e.args.get("default") 571 else f"COLLATE {self.sql(e, 'this')}" 572 ), 573 exp.CountIf: rename_func("COUNTIF"), 574 exp.Create: _create_sql, 575 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 576 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 577 exp.DateDiff: lambda self, 578 e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 579 exp.DateFromParts: rename_func("DATE"), 580 exp.DateStrToDate: datestrtodate_sql, 581 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 582 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 583 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 584 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 585 exp.FromTimeZone: lambda self, e: self.func( 586 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 587 ), 588 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 589 exp.GroupConcat: rename_func("STRING_AGG"), 590 exp.Hex: rename_func("TO_HEX"), 591 exp.If: if_sql(false_value="NULL"), 592 exp.ILike: no_ilike_sql, 593 exp.IntDiv: rename_func("DIV"), 594 exp.JSONFormat: rename_func("TO_JSON_STRING"), 595 exp.Max: max_or_greatest, 596 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 597 exp.MD5Digest: rename_func("MD5"), 598 exp.Min: min_or_least, 599 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 600 exp.RegexpExtract: lambda self, e: self.func( 601 "REGEXP_EXTRACT", 602 e.this, 603 e.expression, 604 e.args.get("position"), 605 e.args.get("occurrence"), 606 ), 607 exp.RegexpReplace: regexp_replace_sql, 608 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 609 exp.ReturnsProperty: _returnsproperty_sql, 610 exp.Select: transforms.preprocess( 611 [ 612 transforms.explode_to_unnest(), 613 _unqualify_unnest, 614 transforms.eliminate_distinct_on, 615 _alias_ordered_group, 616 transforms.eliminate_semi_and_anti_joins, 617 ] 618 ), 619 exp.SHA2: lambda self, e: self.func( 620 "SHA256" if e.text("length") == "256" else "SHA512", e.this 621 ), 622 exp.StabilityProperty: lambda self, e: ( 623 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 624 ), 625 exp.StrToDate: lambda self, 626 e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})", 627 exp.StrToTime: lambda self, e: self.func( 628 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 629 ), 630 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 631 exp.TimeFromParts: rename_func("TIME"), 632 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 633 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 634 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 635 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 636 exp.TimeStrToTime: timestrtotime_sql, 637 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 638 exp.TsOrDsAdd: _ts_or_ds_add_sql, 639 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 640 exp.TsOrDsToTime: rename_func("TIME"), 641 exp.Unhex: rename_func("FROM_HEX"), 642 exp.UnixDate: rename_func("UNIX_DATE"), 643 exp.UnixToTime: _unix_to_time_sql, 644 exp.Values: _derived_table_values_to_unnest, 645 exp.VariancePop: rename_func("VAR_POP"), 646 } 647 648 SUPPORTED_JSON_PATH_PARTS = { 649 exp.JSONPathKey, 650 exp.JSONPathRoot, 651 exp.JSONPathSubscript, 652 } 653 654 TYPE_MAPPING = { 655 **generator.Generator.TYPE_MAPPING, 656 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 657 exp.DataType.Type.BIGINT: "INT64", 658 exp.DataType.Type.BINARY: "BYTES", 659 exp.DataType.Type.BOOLEAN: "BOOL", 660 exp.DataType.Type.CHAR: "STRING", 661 exp.DataType.Type.DECIMAL: "NUMERIC", 662 exp.DataType.Type.DOUBLE: "FLOAT64", 663 exp.DataType.Type.FLOAT: "FLOAT64", 664 exp.DataType.Type.INT: "INT64", 665 exp.DataType.Type.NCHAR: "STRING", 666 exp.DataType.Type.NVARCHAR: "STRING", 667 exp.DataType.Type.SMALLINT: "INT64", 668 exp.DataType.Type.TEXT: "STRING", 669 exp.DataType.Type.TIMESTAMP: "DATETIME", 670 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 671 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 672 exp.DataType.Type.TINYINT: "INT64", 673 exp.DataType.Type.VARBINARY: "BYTES", 674 exp.DataType.Type.VARCHAR: "STRING", 675 exp.DataType.Type.VARIANT: "ANY TYPE", 676 } 677 678 PROPERTIES_LOCATION = { 679 **generator.Generator.PROPERTIES_LOCATION, 680 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 681 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 682 } 683 684 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 685 RESERVED_KEYWORDS = { 686 *generator.Generator.RESERVED_KEYWORDS, 687 "all", 688 "and", 689 "any", 690 "array", 691 "as", 692 "asc", 693 "assert_rows_modified", 694 "at", 695 "between", 696 "by", 697 "case", 698 "cast", 699 "collate", 700 "contains", 701 "create", 702 "cross", 703 "cube", 704 "current", 705 "default", 706 "define", 707 "desc", 708 "distinct", 709 "else", 710 "end", 711 "enum", 712 "escape", 713 "except", 714 "exclude", 715 "exists", 716 "extract", 717 "false", 718 "fetch", 719 "following", 720 "for", 721 "from", 722 "full", 723 "group", 724 "grouping", 725 "groups", 726 "hash", 727 "having", 728 "if", 729 "ignore", 730 "in", 731 "inner", 732 "intersect", 733 "interval", 734 "into", 735 "is", 736 "join", 737 "lateral", 738 "left", 739 "like", 740 "limit", 741 "lookup", 742 "merge", 743 "natural", 744 "new", 745 "no", 746 "not", 747 "null", 748 "nulls", 749 "of", 750 "on", 751 "or", 752 "order", 753 "outer", 754 "over", 755 "partition", 756 "preceding", 757 "proto", 758 "qualify", 759 "range", 760 "recursive", 761 "respect", 762 "right", 763 "rollup", 764 "rows", 765 "select", 766 "set", 767 "some", 768 "struct", 769 "tablesample", 770 "then", 771 "to", 772 "treat", 773 "true", 774 "unbounded", 775 "union", 776 "unnest", 777 "using", 778 "when", 779 "where", 780 "window", 781 "with", 782 "within", 783 } 784 785 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 786 if isinstance(expression.this, exp.TsOrDsToDate): 787 this: exp.Expression = expression.this 788 else: 789 this = expression 790 791 return f"FORMAT_DATE({self.format_time(expression)}, {self.sql(this, 'this')})" 792 793 def struct_sql(self, expression: exp.Struct) -> str: 794 args = [] 795 for expr in expression.expressions: 796 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 797 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 798 else: 799 arg = self.sql(expr) 800 801 args.append(arg) 802 803 return self.func("STRUCT", *args) 804 805 def eq_sql(self, expression: exp.EQ) -> str: 806 # Operands of = cannot be NULL in BigQuery 807 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 808 if not isinstance(expression.parent, exp.Update): 809 return "NULL" 810 811 return self.binary(expression, "=") 812 813 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 814 parent = expression.parent 815 816 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 817 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 818 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 819 return self.func( 820 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 821 ) 822 823 return super().attimezone_sql(expression) 824 825 def trycast_sql(self, expression: exp.TryCast) -> str: 826 return self.cast_sql(expression, safe_prefix="SAFE_") 827 828 def cte_sql(self, expression: exp.CTE) -> str: 829 if expression.alias_column_names: 830 self.unsupported("Column names in CTE definition are not supported.") 831 return super().cte_sql(expression) 832 833 def array_sql(self, expression: exp.Array) -> str: 834 first_arg = seq_get(expression.expressions, 0) 835 if isinstance(first_arg, exp.Subqueryable): 836 return f"ARRAY{self.wrap(self.sql(first_arg))}" 837 838 return inline_array_sql(self, expression) 839 840 def bracket_sql(self, expression: exp.Bracket) -> str: 841 this = self.sql(expression, "this") 842 expressions = expression.expressions 843 844 if len(expressions) == 1: 845 arg = expressions[0] 846 if arg.type is None: 847 from sqlglot.optimizer.annotate_types import annotate_types 848 849 arg = annotate_types(arg) 850 851 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 852 # BQ doesn't support bracket syntax with string values 853 return f"{this}.{arg.name}" 854 855 expressions_sql = ", ".join(self.sql(e) for e in expressions) 856 offset = expression.args.get("offset") 857 858 if offset == 0: 859 expressions_sql = f"OFFSET({expressions_sql})" 860 elif offset == 1: 861 expressions_sql = f"ORDINAL({expressions_sql})" 862 elif offset is not None: 863 self.unsupported(f"Unsupported array offset: {offset}") 864 865 if expression.args.get("safe"): 866 expressions_sql = f"SAFE_{expressions_sql}" 867 868 return f"{this}[{expressions_sql}]" 869 870 def transaction_sql(self, *_) -> str: 871 return "BEGIN TRANSACTION" 872 873 def commit_sql(self, *_) -> str: 874 return "COMMIT TRANSACTION" 875 876 def rollback_sql(self, *_) -> str: 877 return "ROLLBACK TRANSACTION" 878 879 def in_unnest_op(self, expression: exp.Unnest) -> str: 880 return self.sql(expression) 881 882 def except_op(self, expression: exp.Except) -> str: 883 if not expression.args.get("distinct", False): 884 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 885 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 886 887 def intersect_op(self, expression: exp.Intersect) -> str: 888 if not expression.args.get("distinct", False): 889 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 890 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 891 892 def with_properties(self, properties: exp.Properties) -> str: 893 return self.properties(properties, prefix=self.seg("OPTIONS")) 894 895 def version_sql(self, expression: exp.Version) -> str: 896 if expression.name == "TIMESTAMP": 897 expression.set("this", "SYSTEM_TIME") 898 return super().version_sql(expression)
Determines the day of week of DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Determines whether or not UNNEST
table aliases are treated as column aliases.
Determines whether or not user-defined data types are supported.
Specifies the strategy according to which identifiers should be normalized.
Associates this dialect's time formats with their equivalent Python strftime
format.
Mapping of an unescaped escape sequence to the corresponding character.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
285 def normalize_identifier(self, expression: E) -> E: 286 if isinstance(expression, exp.Identifier): 287 parent = expression.parent 288 while isinstance(parent, exp.Dot): 289 parent = parent.parent 290 291 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 292 # The following check is essentially a heuristic to detect tables based on whether or 293 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 294 if ( 295 not isinstance(parent, exp.UserDefinedFunction) 296 and not (isinstance(parent, exp.Table) and parent.db) 297 and not expression.meta.get("is_table") 298 ): 299 expression.set("this", expression.this.lower()) 300 301 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- get_or_raise
- format_time
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
303 class Tokenizer(tokens.Tokenizer): 304 QUOTES = ["'", '"', '"""', "'''"] 305 COMMENTS = ["--", "#", ("/*", "*/")] 306 IDENTIFIERS = ["`"] 307 STRING_ESCAPES = ["\\"] 308 309 HEX_STRINGS = [("0x", ""), ("0X", "")] 310 311 BYTE_STRINGS = [ 312 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 313 ] 314 315 RAW_STRINGS = [ 316 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 317 ] 318 319 KEYWORDS = { 320 **tokens.Tokenizer.KEYWORDS, 321 "ANY TYPE": TokenType.VARIANT, 322 "BEGIN": TokenType.COMMAND, 323 "BEGIN TRANSACTION": TokenType.BEGIN, 324 "BYTES": TokenType.BINARY, 325 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 326 "DECLARE": TokenType.COMMAND, 327 "FLOAT64": TokenType.DOUBLE, 328 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 329 "MODEL": TokenType.MODEL, 330 "NOT DETERMINISTIC": TokenType.VOLATILE, 331 "RECORD": TokenType.STRUCT, 332 "TIMESTAMP": TokenType.TIMESTAMPTZ, 333 } 334 KEYWORDS.pop("DIV")
Inherited Members
336 class Parser(parser.Parser): 337 PREFIXED_PIVOT_COLUMNS = True 338 339 LOG_DEFAULTS_TO_LN = True 340 341 FUNCTIONS = { 342 **parser.Parser.FUNCTIONS, 343 "DATE": _parse_date, 344 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 345 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 346 "DATE_TRUNC": lambda args: exp.DateTrunc( 347 unit=exp.Literal.string(str(seq_get(args, 1))), 348 this=seq_get(args, 0), 349 ), 350 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 351 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 352 "DIV": binary_from_function(exp.IntDiv), 353 "FORMAT_DATE": lambda args: exp.TimeToStr( 354 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 355 ), 356 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 357 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 358 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 359 ), 360 "MD5": exp.MD5Digest.from_arg_list, 361 "TO_HEX": _parse_to_hex, 362 "PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")( 363 [seq_get(args, 1), seq_get(args, 0)] 364 ), 365 "PARSE_TIMESTAMP": _parse_parse_timestamp, 366 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 367 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 368 this=seq_get(args, 0), 369 expression=seq_get(args, 1), 370 position=seq_get(args, 2), 371 occurrence=seq_get(args, 3), 372 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 373 ), 374 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 375 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 376 "SPLIT": lambda args: exp.Split( 377 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 378 this=seq_get(args, 0), 379 expression=seq_get(args, 1) or exp.Literal.string(","), 380 ), 381 "TIME": _parse_time, 382 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 383 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 384 "TIMESTAMP": _parse_timestamp, 385 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 386 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 387 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 388 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 389 ), 390 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 391 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 392 ), 393 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 394 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 395 } 396 397 FUNCTION_PARSERS = { 398 **parser.Parser.FUNCTION_PARSERS, 399 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 400 } 401 FUNCTION_PARSERS.pop("TRIM") 402 403 NO_PAREN_FUNCTIONS = { 404 **parser.Parser.NO_PAREN_FUNCTIONS, 405 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 406 } 407 408 NESTED_TYPE_TOKENS = { 409 *parser.Parser.NESTED_TYPE_TOKENS, 410 TokenType.TABLE, 411 } 412 413 ID_VAR_TOKENS = { 414 *parser.Parser.ID_VAR_TOKENS, 415 TokenType.VALUES, 416 } 417 418 PROPERTY_PARSERS = { 419 **parser.Parser.PROPERTY_PARSERS, 420 "NOT DETERMINISTIC": lambda self: self.expression( 421 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 422 ), 423 "OPTIONS": lambda self: self._parse_with_property(), 424 } 425 426 CONSTRAINT_PARSERS = { 427 **parser.Parser.CONSTRAINT_PARSERS, 428 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 429 } 430 431 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 432 RANGE_PARSERS.pop(TokenType.OVERLAPS, None) 433 434 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 435 436 STATEMENT_PARSERS = { 437 **parser.Parser.STATEMENT_PARSERS, 438 TokenType.END: lambda self: self._parse_as_command(self._prev), 439 TokenType.FOR: lambda self: self._parse_for_in(), 440 } 441 442 BRACKET_OFFSETS = { 443 "OFFSET": (0, False), 444 "ORDINAL": (1, False), 445 "SAFE_OFFSET": (0, True), 446 "SAFE_ORDINAL": (1, True), 447 } 448 449 def _parse_for_in(self) -> exp.ForIn: 450 this = self._parse_range() 451 self._match_text_seq("DO") 452 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 453 454 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 455 this = super()._parse_table_part(schema=schema) or self._parse_number() 456 457 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 458 if isinstance(this, exp.Identifier): 459 table_name = this.name 460 while self._match(TokenType.DASH, advance=False) and self._next: 461 self._advance(2) 462 table_name += f"-{self._prev.text}" 463 464 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 465 elif isinstance(this, exp.Literal): 466 table_name = this.name 467 468 if self._is_connected() and self._parse_var(any_token=True): 469 table_name += self._prev.text 470 471 this = exp.Identifier(this=table_name, quoted=True) 472 473 return this 474 475 def _parse_table_parts( 476 self, schema: bool = False, is_db_reference: bool = False 477 ) -> exp.Table: 478 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 479 if isinstance(table.this, exp.Identifier) and "." in table.name: 480 catalog, db, this, *rest = ( 481 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 482 for x in split_num_words(table.name, ".", 3) 483 ) 484 485 if rest and this: 486 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 487 488 table = exp.Table(this=this, db=db, catalog=catalog) 489 490 return table 491 492 @t.overload 493 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: 494 ... 495 496 @t.overload 497 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: 498 ... 499 500 def _parse_json_object(self, agg=False): 501 json_object = super()._parse_json_object() 502 array_kv_pair = seq_get(json_object.expressions, 0) 503 504 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 505 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 506 if ( 507 array_kv_pair 508 and isinstance(array_kv_pair.this, exp.Array) 509 and isinstance(array_kv_pair.expression, exp.Array) 510 ): 511 keys = array_kv_pair.this.expressions 512 values = array_kv_pair.expression.expressions 513 514 json_object.set( 515 "expressions", 516 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 517 ) 518 519 return json_object 520 521 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 522 bracket = super()._parse_bracket(this) 523 524 if this is bracket: 525 return bracket 526 527 if isinstance(bracket, exp.Bracket): 528 for expression in bracket.expressions: 529 name = expression.name.upper() 530 531 if name not in self.BRACKET_OFFSETS: 532 break 533 534 offset, safe = self.BRACKET_OFFSETS[name] 535 bracket.set("offset", offset) 536 bracket.set("safe", safe) 537 expression.replace(expression.expressions[0]) 538 539 return bracket
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
541 class Generator(generator.Generator): 542 EXPLICIT_UNION = True 543 INTERVAL_ALLOWS_PLURAL_FORM = False 544 JOIN_HINTS = False 545 QUERY_HINTS = False 546 TABLE_HINTS = False 547 LIMIT_FETCH = "LIMIT" 548 RENAME_TABLE_WITH_DB = False 549 NVL2_SUPPORTED = False 550 UNNEST_WITH_ORDINALITY = False 551 COLLATE_IS_FUNC = True 552 LIMIT_ONLY_LITERALS = True 553 SUPPORTS_TABLE_ALIAS_COLUMNS = False 554 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 555 JSON_KEY_VALUE_PAIR_SEP = "," 556 NULL_ORDERING_SUPPORTED = False 557 IGNORE_NULLS_IN_FUNC = True 558 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 559 560 TRANSFORMS = { 561 **generator.Generator.TRANSFORMS, 562 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 563 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 564 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 565 exp.ArrayContains: _array_contains_sql, 566 exp.ArraySize: rename_func("ARRAY_LENGTH"), 567 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 568 exp.CollateProperty: lambda self, e: ( 569 f"DEFAULT COLLATE {self.sql(e, 'this')}" 570 if e.args.get("default") 571 else f"COLLATE {self.sql(e, 'this')}" 572 ), 573 exp.CountIf: rename_func("COUNTIF"), 574 exp.Create: _create_sql, 575 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 576 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 577 exp.DateDiff: lambda self, 578 e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 579 exp.DateFromParts: rename_func("DATE"), 580 exp.DateStrToDate: datestrtodate_sql, 581 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 582 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 583 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 584 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 585 exp.FromTimeZone: lambda self, e: self.func( 586 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 587 ), 588 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 589 exp.GroupConcat: rename_func("STRING_AGG"), 590 exp.Hex: rename_func("TO_HEX"), 591 exp.If: if_sql(false_value="NULL"), 592 exp.ILike: no_ilike_sql, 593 exp.IntDiv: rename_func("DIV"), 594 exp.JSONFormat: rename_func("TO_JSON_STRING"), 595 exp.Max: max_or_greatest, 596 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 597 exp.MD5Digest: rename_func("MD5"), 598 exp.Min: min_or_least, 599 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 600 exp.RegexpExtract: lambda self, e: self.func( 601 "REGEXP_EXTRACT", 602 e.this, 603 e.expression, 604 e.args.get("position"), 605 e.args.get("occurrence"), 606 ), 607 exp.RegexpReplace: regexp_replace_sql, 608 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 609 exp.ReturnsProperty: _returnsproperty_sql, 610 exp.Select: transforms.preprocess( 611 [ 612 transforms.explode_to_unnest(), 613 _unqualify_unnest, 614 transforms.eliminate_distinct_on, 615 _alias_ordered_group, 616 transforms.eliminate_semi_and_anti_joins, 617 ] 618 ), 619 exp.SHA2: lambda self, e: self.func( 620 "SHA256" if e.text("length") == "256" else "SHA512", e.this 621 ), 622 exp.StabilityProperty: lambda self, e: ( 623 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 624 ), 625 exp.StrToDate: lambda self, 626 e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})", 627 exp.StrToTime: lambda self, e: self.func( 628 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 629 ), 630 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 631 exp.TimeFromParts: rename_func("TIME"), 632 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 633 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 634 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 635 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 636 exp.TimeStrToTime: timestrtotime_sql, 637 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 638 exp.TsOrDsAdd: _ts_or_ds_add_sql, 639 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 640 exp.TsOrDsToTime: rename_func("TIME"), 641 exp.Unhex: rename_func("FROM_HEX"), 642 exp.UnixDate: rename_func("UNIX_DATE"), 643 exp.UnixToTime: _unix_to_time_sql, 644 exp.Values: _derived_table_values_to_unnest, 645 exp.VariancePop: rename_func("VAR_POP"), 646 } 647 648 SUPPORTED_JSON_PATH_PARTS = { 649 exp.JSONPathKey, 650 exp.JSONPathRoot, 651 exp.JSONPathSubscript, 652 } 653 654 TYPE_MAPPING = { 655 **generator.Generator.TYPE_MAPPING, 656 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 657 exp.DataType.Type.BIGINT: "INT64", 658 exp.DataType.Type.BINARY: "BYTES", 659 exp.DataType.Type.BOOLEAN: "BOOL", 660 exp.DataType.Type.CHAR: "STRING", 661 exp.DataType.Type.DECIMAL: "NUMERIC", 662 exp.DataType.Type.DOUBLE: "FLOAT64", 663 exp.DataType.Type.FLOAT: "FLOAT64", 664 exp.DataType.Type.INT: "INT64", 665 exp.DataType.Type.NCHAR: "STRING", 666 exp.DataType.Type.NVARCHAR: "STRING", 667 exp.DataType.Type.SMALLINT: "INT64", 668 exp.DataType.Type.TEXT: "STRING", 669 exp.DataType.Type.TIMESTAMP: "DATETIME", 670 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 671 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 672 exp.DataType.Type.TINYINT: "INT64", 673 exp.DataType.Type.VARBINARY: "BYTES", 674 exp.DataType.Type.VARCHAR: "STRING", 675 exp.DataType.Type.VARIANT: "ANY TYPE", 676 } 677 678 PROPERTIES_LOCATION = { 679 **generator.Generator.PROPERTIES_LOCATION, 680 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 681 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 682 } 683 684 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 685 RESERVED_KEYWORDS = { 686 *generator.Generator.RESERVED_KEYWORDS, 687 "all", 688 "and", 689 "any", 690 "array", 691 "as", 692 "asc", 693 "assert_rows_modified", 694 "at", 695 "between", 696 "by", 697 "case", 698 "cast", 699 "collate", 700 "contains", 701 "create", 702 "cross", 703 "cube", 704 "current", 705 "default", 706 "define", 707 "desc", 708 "distinct", 709 "else", 710 "end", 711 "enum", 712 "escape", 713 "except", 714 "exclude", 715 "exists", 716 "extract", 717 "false", 718 "fetch", 719 "following", 720 "for", 721 "from", 722 "full", 723 "group", 724 "grouping", 725 "groups", 726 "hash", 727 "having", 728 "if", 729 "ignore", 730 "in", 731 "inner", 732 "intersect", 733 "interval", 734 "into", 735 "is", 736 "join", 737 "lateral", 738 "left", 739 "like", 740 "limit", 741 "lookup", 742 "merge", 743 "natural", 744 "new", 745 "no", 746 "not", 747 "null", 748 "nulls", 749 "of", 750 "on", 751 "or", 752 "order", 753 "outer", 754 "over", 755 "partition", 756 "preceding", 757 "proto", 758 "qualify", 759 "range", 760 "recursive", 761 "respect", 762 "right", 763 "rollup", 764 "rows", 765 "select", 766 "set", 767 "some", 768 "struct", 769 "tablesample", 770 "then", 771 "to", 772 "treat", 773 "true", 774 "unbounded", 775 "union", 776 "unnest", 777 "using", 778 "when", 779 "where", 780 "window", 781 "with", 782 "within", 783 } 784 785 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 786 if isinstance(expression.this, exp.TsOrDsToDate): 787 this: exp.Expression = expression.this 788 else: 789 this = expression 790 791 return f"FORMAT_DATE({self.format_time(expression)}, {self.sql(this, 'this')})" 792 793 def struct_sql(self, expression: exp.Struct) -> str: 794 args = [] 795 for expr in expression.expressions: 796 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 797 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 798 else: 799 arg = self.sql(expr) 800 801 args.append(arg) 802 803 return self.func("STRUCT", *args) 804 805 def eq_sql(self, expression: exp.EQ) -> str: 806 # Operands of = cannot be NULL in BigQuery 807 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 808 if not isinstance(expression.parent, exp.Update): 809 return "NULL" 810 811 return self.binary(expression, "=") 812 813 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 814 parent = expression.parent 815 816 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 817 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 818 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 819 return self.func( 820 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 821 ) 822 823 return super().attimezone_sql(expression) 824 825 def trycast_sql(self, expression: exp.TryCast) -> str: 826 return self.cast_sql(expression, safe_prefix="SAFE_") 827 828 def cte_sql(self, expression: exp.CTE) -> str: 829 if expression.alias_column_names: 830 self.unsupported("Column names in CTE definition are not supported.") 831 return super().cte_sql(expression) 832 833 def array_sql(self, expression: exp.Array) -> str: 834 first_arg = seq_get(expression.expressions, 0) 835 if isinstance(first_arg, exp.Subqueryable): 836 return f"ARRAY{self.wrap(self.sql(first_arg))}" 837 838 return inline_array_sql(self, expression) 839 840 def bracket_sql(self, expression: exp.Bracket) -> str: 841 this = self.sql(expression, "this") 842 expressions = expression.expressions 843 844 if len(expressions) == 1: 845 arg = expressions[0] 846 if arg.type is None: 847 from sqlglot.optimizer.annotate_types import annotate_types 848 849 arg = annotate_types(arg) 850 851 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 852 # BQ doesn't support bracket syntax with string values 853 return f"{this}.{arg.name}" 854 855 expressions_sql = ", ".join(self.sql(e) for e in expressions) 856 offset = expression.args.get("offset") 857 858 if offset == 0: 859 expressions_sql = f"OFFSET({expressions_sql})" 860 elif offset == 1: 861 expressions_sql = f"ORDINAL({expressions_sql})" 862 elif offset is not None: 863 self.unsupported(f"Unsupported array offset: {offset}") 864 865 if expression.args.get("safe"): 866 expressions_sql = f"SAFE_{expressions_sql}" 867 868 return f"{this}[{expressions_sql}]" 869 870 def transaction_sql(self, *_) -> str: 871 return "BEGIN TRANSACTION" 872 873 def commit_sql(self, *_) -> str: 874 return "COMMIT TRANSACTION" 875 876 def rollback_sql(self, *_) -> str: 877 return "ROLLBACK TRANSACTION" 878 879 def in_unnest_op(self, expression: exp.Unnest) -> str: 880 return self.sql(expression) 881 882 def except_op(self, expression: exp.Except) -> str: 883 if not expression.args.get("distinct", False): 884 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 885 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 886 887 def intersect_op(self, expression: exp.Intersect) -> str: 888 if not expression.args.get("distinct", False): 889 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 890 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 891 892 def with_properties(self, properties: exp.Properties) -> str: 893 return self.properties(properties, prefix=self.seg("OPTIONS")) 894 895 def version_sql(self, expression: exp.Version) -> str: 896 if expression.name == "TIMESTAMP": 897 expression.set("this", "SYSTEM_TIME") 898 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
793 def struct_sql(self, expression: exp.Struct) -> str: 794 args = [] 795 for expr in expression.expressions: 796 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 797 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 798 else: 799 arg = self.sql(expr) 800 801 args.append(arg) 802 803 return self.func("STRUCT", *args)
813 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 814 parent = expression.parent 815 816 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 817 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 818 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 819 return self.func( 820 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 821 ) 822 823 return super().attimezone_sql(expression)
840 def bracket_sql(self, expression: exp.Bracket) -> str: 841 this = self.sql(expression, "this") 842 expressions = expression.expressions 843 844 if len(expressions) == 1: 845 arg = expressions[0] 846 if arg.type is None: 847 from sqlglot.optimizer.annotate_types import annotate_types 848 849 arg = annotate_types(arg) 850 851 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 852 # BQ doesn't support bracket syntax with string values 853 return f"{this}.{arg.name}" 854 855 expressions_sql = ", ".join(self.sql(e) for e in expressions) 856 offset = expression.args.get("offset") 857 858 if offset == 0: 859 expressions_sql = f"OFFSET({expressions_sql})" 860 elif offset == 1: 861 expressions_sql = f"ORDINAL({expressions_sql})" 862 elif offset is not None: 863 self.unsupported(f"Unsupported array offset: {offset}") 864 865 if expression.args.get("safe"): 866 expressions_sql = f"SAFE_{expressions_sql}" 867 868 return f"{this}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- KEY_VALUE_DEFINITIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql