sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot._typing import E 9from sqlglot.dialects.dialect import ( 10 Dialect, 11 NormalizationStrategy, 12 arg_max_or_min_no_count, 13 binary_from_function, 14 date_add_interval_sql, 15 datestrtodate_sql, 16 format_time_lambda, 17 if_sql, 18 inline_array_sql, 19 json_keyvalue_comma_sql, 20 max_or_greatest, 21 min_or_least, 22 no_ilike_sql, 23 parse_date_delta_with_interval, 24 path_to_jsonpath, 25 regexp_replace_sql, 26 rename_func, 27 timestrtotime_sql, 28 ts_or_ds_add_cast, 29) 30from sqlglot.helper import seq_get, split_num_words 31from sqlglot.tokens import TokenType 32 33logger = logging.getLogger("sqlglot") 34 35 36def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 37 if not expression.find_ancestor(exp.From, exp.Join): 38 return self.values_sql(expression) 39 40 alias = expression.args.get("alias") 41 42 structs = [ 43 exp.Struct( 44 expressions=[ 45 exp.alias_(value, column_name) 46 for value, column_name in zip( 47 t.expressions, 48 alias.columns 49 if alias and alias.columns 50 else (f"_c{i}" for i in range(len(t.expressions))), 51 ) 52 ] 53 ) 54 for t in expression.find_all(exp.Tuple) 55 ] 56 57 return self.unnest_sql(exp.Unnest(expressions=[exp.Array(expressions=structs)])) 58 59 60def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 61 this = expression.this 62 if isinstance(this, exp.Schema): 63 this = f"{this.this} <{self.expressions(this)}>" 64 else: 65 this = self.sql(this) 66 return f"RETURNS {this}" 67 68 69def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 70 kind = expression.args["kind"] 71 returns = expression.find(exp.ReturnsProperty) 72 73 if kind.upper() == "FUNCTION" and returns and returns.args.get("is_table"): 74 expression.set("kind", "TABLE FUNCTION") 75 76 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 77 expression.set("expression", expression.expression.this) 78 79 return self.create_sql(expression) 80 81 return self.create_sql(expression) 82 83 84def _unqualify_unnest(expression: exp.Expression) -> exp.Expression: 85 """Remove references to unnest table aliases since bigquery doesn't allow them. 86 87 These are added by the optimizer's qualify_column step. 88 """ 89 from sqlglot.optimizer.scope import find_all_in_scope 90 91 if isinstance(expression, exp.Select): 92 unnest_aliases = { 93 unnest.alias 94 for unnest in find_all_in_scope(expression, exp.Unnest) 95 if isinstance(unnest.parent, (exp.From, exp.Join)) 96 } 97 if unnest_aliases: 98 for column in expression.find_all(exp.Column): 99 if column.table in unnest_aliases: 100 column.set("table", None) 101 elif column.db in unnest_aliases: 102 column.set("db", None) 103 104 return expression 105 106 107# https://issuetracker.google.com/issues/162294746 108# workaround for bigquery bug when grouping by an expression and then ordering 109# WITH x AS (SELECT 1 y) 110# SELECT y + 1 z 111# FROM x 112# GROUP BY x + 1 113# ORDER by z 114def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 115 if isinstance(expression, exp.Select): 116 group = expression.args.get("group") 117 order = expression.args.get("order") 118 119 if group and order: 120 aliases = { 121 select.this: select.args["alias"] 122 for select in expression.selects 123 if isinstance(select, exp.Alias) 124 } 125 126 for e in group.expressions: 127 alias = aliases.get(e) 128 129 if alias: 130 e.replace(exp.column(alias)) 131 132 return expression 133 134 135def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 136 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 137 if isinstance(expression, exp.CTE) and expression.alias_column_names: 138 cte_query = expression.this 139 140 if cte_query.is_star: 141 logger.warning( 142 "Can't push down CTE column names for star queries. Run the query through" 143 " the optimizer or use 'qualify' to expand the star projections first." 144 ) 145 return expression 146 147 column_names = expression.alias_column_names 148 expression.args["alias"].set("columns", None) 149 150 for name, select in zip(column_names, cte_query.selects): 151 to_replace = select 152 153 if isinstance(select, exp.Alias): 154 select = select.this 155 156 # Inner aliases are shadowed by the CTE column names 157 to_replace.replace(exp.alias_(select, name)) 158 159 return expression 160 161 162def _parse_timestamp(args: t.List) -> exp.StrToTime: 163 this = format_time_lambda(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 164 this.set("zone", seq_get(args, 2)) 165 return this 166 167 168def _parse_date(args: t.List) -> exp.Date | exp.DateFromParts: 169 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 170 return expr_type.from_arg_list(args) 171 172 173def _parse_to_hex(args: t.List) -> exp.Hex | exp.MD5: 174 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 175 arg = seq_get(args, 0) 176 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.Hex(this=arg) 177 178 179def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 180 return self.sql( 181 exp.Exists( 182 this=exp.select("1") 183 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 184 .where(exp.column("_col").eq(expression.right)) 185 ) 186 ) 187 188 189def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 190 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 191 192 193def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 194 expression.this.replace(exp.cast(expression.this, "TIMESTAMP", copy=True)) 195 expression.expression.replace(exp.cast(expression.expression, "TIMESTAMP", copy=True)) 196 unit = expression.args.get("unit") or "DAY" 197 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 198 199 200def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 201 scale = expression.args.get("scale") 202 timestamp = self.sql(expression, "this") 203 if scale in (None, exp.UnixToTime.SECONDS): 204 return f"TIMESTAMP_SECONDS({timestamp})" 205 if scale == exp.UnixToTime.MILLIS: 206 return f"TIMESTAMP_MILLIS({timestamp})" 207 if scale == exp.UnixToTime.MICROS: 208 return f"TIMESTAMP_MICROS({timestamp})" 209 if scale == exp.UnixToTime.NANOS: 210 # We need to cast to INT64 because that's what BQ expects 211 return f"TIMESTAMP_MICROS(CAST({timestamp} / 1000 AS INT64))" 212 213 self.unsupported(f"Unsupported scale for timestamp: {scale}.") 214 return "" 215 216 217def _parse_time(args: t.List) -> exp.Func: 218 if len(args) == 1: 219 return exp.TsOrDsToTime(this=args[0]) 220 if len(args) == 3: 221 return exp.TimeFromParts.from_arg_list(args) 222 223 return exp.Anonymous(this="TIME", expressions=args) 224 225 226class BigQuery(Dialect): 227 WEEK_OFFSET = -1 228 UNNEST_COLUMN_ONLY = True 229 SUPPORTS_USER_DEFINED_TYPES = False 230 SUPPORTS_SEMI_ANTI_JOIN = False 231 LOG_BASE_FIRST = False 232 233 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 234 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 235 236 # bigquery udfs are case sensitive 237 NORMALIZE_FUNCTIONS = False 238 239 TIME_MAPPING = { 240 "%D": "%m/%d/%y", 241 } 242 243 ESCAPE_SEQUENCES = { 244 "\\a": "\a", 245 "\\b": "\b", 246 "\\f": "\f", 247 "\\n": "\n", 248 "\\r": "\r", 249 "\\t": "\t", 250 "\\v": "\v", 251 } 252 253 FORMAT_MAPPING = { 254 "DD": "%d", 255 "MM": "%m", 256 "MON": "%b", 257 "MONTH": "%B", 258 "YYYY": "%Y", 259 "YY": "%y", 260 "HH": "%I", 261 "HH12": "%I", 262 "HH24": "%H", 263 "MI": "%M", 264 "SS": "%S", 265 "SSSSS": "%f", 266 "TZH": "%z", 267 } 268 269 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 270 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 271 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 272 273 def normalize_identifier(self, expression: E) -> E: 274 if isinstance(expression, exp.Identifier): 275 parent = expression.parent 276 while isinstance(parent, exp.Dot): 277 parent = parent.parent 278 279 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 280 # The following check is essentially a heuristic to detect tables based on whether or 281 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 282 if ( 283 not isinstance(parent, exp.UserDefinedFunction) 284 and not (isinstance(parent, exp.Table) and parent.db) 285 and not expression.meta.get("is_table") 286 ): 287 expression.set("this", expression.this.lower()) 288 289 return expression 290 291 class Tokenizer(tokens.Tokenizer): 292 QUOTES = ["'", '"', '"""', "'''"] 293 COMMENTS = ["--", "#", ("/*", "*/")] 294 IDENTIFIERS = ["`"] 295 STRING_ESCAPES = ["\\"] 296 297 HEX_STRINGS = [("0x", ""), ("0X", "")] 298 299 BYTE_STRINGS = [ 300 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 301 ] 302 303 RAW_STRINGS = [ 304 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 305 ] 306 307 KEYWORDS = { 308 **tokens.Tokenizer.KEYWORDS, 309 "ANY TYPE": TokenType.VARIANT, 310 "BEGIN": TokenType.COMMAND, 311 "BEGIN TRANSACTION": TokenType.BEGIN, 312 "BYTES": TokenType.BINARY, 313 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 314 "DECLARE": TokenType.COMMAND, 315 "FLOAT64": TokenType.DOUBLE, 316 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 317 "MODEL": TokenType.MODEL, 318 "NOT DETERMINISTIC": TokenType.VOLATILE, 319 "RECORD": TokenType.STRUCT, 320 "TIMESTAMP": TokenType.TIMESTAMPTZ, 321 } 322 KEYWORDS.pop("DIV") 323 324 class Parser(parser.Parser): 325 PREFIXED_PIVOT_COLUMNS = True 326 327 LOG_DEFAULTS_TO_LN = True 328 329 FUNCTIONS = { 330 **parser.Parser.FUNCTIONS, 331 "DATE": _parse_date, 332 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 333 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 334 "DATE_TRUNC": lambda args: exp.DateTrunc( 335 unit=exp.Literal.string(str(seq_get(args, 1))), 336 this=seq_get(args, 0), 337 ), 338 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 339 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 340 "DIV": binary_from_function(exp.IntDiv), 341 "FORMAT_DATE": lambda args: exp.TimeToStr( 342 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 343 ), 344 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 345 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 346 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 347 ), 348 "MD5": exp.MD5Digest.from_arg_list, 349 "TO_HEX": _parse_to_hex, 350 "PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")( 351 [seq_get(args, 1), seq_get(args, 0)] 352 ), 353 "PARSE_TIMESTAMP": _parse_timestamp, 354 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 355 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 356 this=seq_get(args, 0), 357 expression=seq_get(args, 1), 358 position=seq_get(args, 2), 359 occurrence=seq_get(args, 3), 360 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 361 ), 362 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 363 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 364 "SPLIT": lambda args: exp.Split( 365 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 366 this=seq_get(args, 0), 367 expression=seq_get(args, 1) or exp.Literal.string(","), 368 ), 369 "TIME": _parse_time, 370 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 371 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 372 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 373 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 374 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 375 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 376 ), 377 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 378 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 379 ), 380 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime( 381 this=seq_get(args, 0), scale=exp.UnixToTime.SECONDS 382 ), 383 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 384 } 385 386 FUNCTION_PARSERS = { 387 **parser.Parser.FUNCTION_PARSERS, 388 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 389 } 390 FUNCTION_PARSERS.pop("TRIM") 391 392 NO_PAREN_FUNCTIONS = { 393 **parser.Parser.NO_PAREN_FUNCTIONS, 394 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 395 } 396 397 NESTED_TYPE_TOKENS = { 398 *parser.Parser.NESTED_TYPE_TOKENS, 399 TokenType.TABLE, 400 } 401 402 ID_VAR_TOKENS = { 403 *parser.Parser.ID_VAR_TOKENS, 404 TokenType.VALUES, 405 } 406 407 PROPERTY_PARSERS = { 408 **parser.Parser.PROPERTY_PARSERS, 409 "NOT DETERMINISTIC": lambda self: self.expression( 410 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 411 ), 412 "OPTIONS": lambda self: self._parse_with_property(), 413 } 414 415 CONSTRAINT_PARSERS = { 416 **parser.Parser.CONSTRAINT_PARSERS, 417 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 418 } 419 420 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 421 RANGE_PARSERS.pop(TokenType.OVERLAPS, None) 422 423 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 424 425 STATEMENT_PARSERS = { 426 **parser.Parser.STATEMENT_PARSERS, 427 TokenType.END: lambda self: self._parse_as_command(self._prev), 428 TokenType.FOR: lambda self: self._parse_for_in(), 429 } 430 431 BRACKET_OFFSETS = { 432 "OFFSET": (0, False), 433 "ORDINAL": (1, False), 434 "SAFE_OFFSET": (0, True), 435 "SAFE_ORDINAL": (1, True), 436 } 437 438 def _parse_for_in(self) -> exp.ForIn: 439 this = self._parse_range() 440 self._match_text_seq("DO") 441 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 442 443 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 444 this = super()._parse_table_part(schema=schema) or self._parse_number() 445 446 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 447 if isinstance(this, exp.Identifier): 448 table_name = this.name 449 while self._match(TokenType.DASH, advance=False) and self._next: 450 self._advance(2) 451 table_name += f"-{self._prev.text}" 452 453 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 454 elif isinstance(this, exp.Literal): 455 table_name = this.name 456 457 if self._is_connected() and self._parse_var(any_token=True): 458 table_name += self._prev.text 459 460 this = exp.Identifier(this=table_name, quoted=True) 461 462 return this 463 464 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 465 table = super()._parse_table_parts(schema=schema) 466 if isinstance(table.this, exp.Identifier) and "." in table.name: 467 catalog, db, this, *rest = ( 468 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 469 for x in split_num_words(table.name, ".", 3) 470 ) 471 472 if rest and this: 473 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 474 475 table = exp.Table(this=this, db=db, catalog=catalog) 476 477 return table 478 479 def _parse_json_object(self) -> exp.JSONObject: 480 json_object = super()._parse_json_object() 481 array_kv_pair = seq_get(json_object.expressions, 0) 482 483 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 484 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 485 if ( 486 array_kv_pair 487 and isinstance(array_kv_pair.this, exp.Array) 488 and isinstance(array_kv_pair.expression, exp.Array) 489 ): 490 keys = array_kv_pair.this.expressions 491 values = array_kv_pair.expression.expressions 492 493 json_object.set( 494 "expressions", 495 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 496 ) 497 498 return json_object 499 500 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 501 bracket = super()._parse_bracket(this) 502 503 if this is bracket: 504 return bracket 505 506 if isinstance(bracket, exp.Bracket): 507 for expression in bracket.expressions: 508 name = expression.name.upper() 509 510 if name not in self.BRACKET_OFFSETS: 511 break 512 513 offset, safe = self.BRACKET_OFFSETS[name] 514 bracket.set("offset", offset) 515 bracket.set("safe", safe) 516 expression.replace(expression.expressions[0]) 517 518 return bracket 519 520 class Generator(generator.Generator): 521 EXPLICIT_UNION = True 522 INTERVAL_ALLOWS_PLURAL_FORM = False 523 JOIN_HINTS = False 524 QUERY_HINTS = False 525 TABLE_HINTS = False 526 LIMIT_FETCH = "LIMIT" 527 RENAME_TABLE_WITH_DB = False 528 NVL2_SUPPORTED = False 529 UNNEST_WITH_ORDINALITY = False 530 COLLATE_IS_FUNC = True 531 LIMIT_ONLY_LITERALS = True 532 SUPPORTS_TABLE_ALIAS_COLUMNS = False 533 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 534 535 TRANSFORMS = { 536 **generator.Generator.TRANSFORMS, 537 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 538 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 539 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 540 exp.ArrayContains: _array_contains_sql, 541 exp.ArraySize: rename_func("ARRAY_LENGTH"), 542 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 543 exp.CollateProperty: lambda self, e: f"DEFAULT COLLATE {self.sql(e, 'this')}" 544 if e.args.get("default") 545 else f"COLLATE {self.sql(e, 'this')}", 546 exp.CountIf: rename_func("COUNTIF"), 547 exp.Create: _create_sql, 548 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 549 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 550 exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 551 exp.DateFromParts: rename_func("DATE"), 552 exp.DateStrToDate: datestrtodate_sql, 553 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 554 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 555 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 556 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 557 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 558 exp.GetPath: path_to_jsonpath(), 559 exp.GroupConcat: rename_func("STRING_AGG"), 560 exp.Hex: rename_func("TO_HEX"), 561 exp.If: if_sql(false_value="NULL"), 562 exp.ILike: no_ilike_sql, 563 exp.IntDiv: rename_func("DIV"), 564 exp.JSONFormat: rename_func("TO_JSON_STRING"), 565 exp.JSONKeyValue: json_keyvalue_comma_sql, 566 exp.Max: max_or_greatest, 567 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 568 exp.MD5Digest: rename_func("MD5"), 569 exp.Min: min_or_least, 570 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 571 exp.RegexpExtract: lambda self, e: self.func( 572 "REGEXP_EXTRACT", 573 e.this, 574 e.expression, 575 e.args.get("position"), 576 e.args.get("occurrence"), 577 ), 578 exp.RegexpReplace: regexp_replace_sql, 579 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 580 exp.ReturnsProperty: _returnsproperty_sql, 581 exp.Select: transforms.preprocess( 582 [ 583 transforms.explode_to_unnest(), 584 _unqualify_unnest, 585 transforms.eliminate_distinct_on, 586 _alias_ordered_group, 587 transforms.eliminate_semi_and_anti_joins, 588 ] 589 ), 590 exp.SHA2: lambda self, e: self.func( 591 f"SHA256" if e.text("length") == "256" else "SHA512", e.this 592 ), 593 exp.StabilityProperty: lambda self, e: f"DETERMINISTIC" 594 if e.name == "IMMUTABLE" 595 else "NOT DETERMINISTIC", 596 exp.StrToDate: lambda self, e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})", 597 exp.StrToTime: lambda self, e: self.func( 598 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 599 ), 600 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 601 exp.TimeFromParts: rename_func("TIME"), 602 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 603 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 604 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 605 exp.TimeStrToTime: timestrtotime_sql, 606 exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression), 607 exp.TsOrDsAdd: _ts_or_ds_add_sql, 608 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 609 exp.TsOrDsToTime: rename_func("TIME"), 610 exp.Unhex: rename_func("FROM_HEX"), 611 exp.UnixDate: rename_func("UNIX_DATE"), 612 exp.UnixToTime: _unix_to_time_sql, 613 exp.Values: _derived_table_values_to_unnest, 614 exp.VariancePop: rename_func("VAR_POP"), 615 } 616 617 TYPE_MAPPING = { 618 **generator.Generator.TYPE_MAPPING, 619 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 620 exp.DataType.Type.BIGINT: "INT64", 621 exp.DataType.Type.BINARY: "BYTES", 622 exp.DataType.Type.BOOLEAN: "BOOL", 623 exp.DataType.Type.CHAR: "STRING", 624 exp.DataType.Type.DECIMAL: "NUMERIC", 625 exp.DataType.Type.DOUBLE: "FLOAT64", 626 exp.DataType.Type.FLOAT: "FLOAT64", 627 exp.DataType.Type.INT: "INT64", 628 exp.DataType.Type.NCHAR: "STRING", 629 exp.DataType.Type.NVARCHAR: "STRING", 630 exp.DataType.Type.SMALLINT: "INT64", 631 exp.DataType.Type.TEXT: "STRING", 632 exp.DataType.Type.TIMESTAMP: "DATETIME", 633 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 634 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 635 exp.DataType.Type.TINYINT: "INT64", 636 exp.DataType.Type.VARBINARY: "BYTES", 637 exp.DataType.Type.VARCHAR: "STRING", 638 exp.DataType.Type.VARIANT: "ANY TYPE", 639 } 640 641 PROPERTIES_LOCATION = { 642 **generator.Generator.PROPERTIES_LOCATION, 643 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 644 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 645 } 646 647 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 648 RESERVED_KEYWORDS = { 649 *generator.Generator.RESERVED_KEYWORDS, 650 "all", 651 "and", 652 "any", 653 "array", 654 "as", 655 "asc", 656 "assert_rows_modified", 657 "at", 658 "between", 659 "by", 660 "case", 661 "cast", 662 "collate", 663 "contains", 664 "create", 665 "cross", 666 "cube", 667 "current", 668 "default", 669 "define", 670 "desc", 671 "distinct", 672 "else", 673 "end", 674 "enum", 675 "escape", 676 "except", 677 "exclude", 678 "exists", 679 "extract", 680 "false", 681 "fetch", 682 "following", 683 "for", 684 "from", 685 "full", 686 "group", 687 "grouping", 688 "groups", 689 "hash", 690 "having", 691 "if", 692 "ignore", 693 "in", 694 "inner", 695 "intersect", 696 "interval", 697 "into", 698 "is", 699 "join", 700 "lateral", 701 "left", 702 "like", 703 "limit", 704 "lookup", 705 "merge", 706 "natural", 707 "new", 708 "no", 709 "not", 710 "null", 711 "nulls", 712 "of", 713 "on", 714 "or", 715 "order", 716 "outer", 717 "over", 718 "partition", 719 "preceding", 720 "proto", 721 "qualify", 722 "range", 723 "recursive", 724 "respect", 725 "right", 726 "rollup", 727 "rows", 728 "select", 729 "set", 730 "some", 731 "struct", 732 "tablesample", 733 "then", 734 "to", 735 "treat", 736 "true", 737 "unbounded", 738 "union", 739 "unnest", 740 "using", 741 "when", 742 "where", 743 "window", 744 "with", 745 "within", 746 } 747 748 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 749 if isinstance(expression.this, exp.TsOrDsToDate): 750 this: exp.Expression = expression.this 751 else: 752 this = expression 753 754 return f"FORMAT_DATE({self.format_time(expression)}, {self.sql(this, 'this')})" 755 756 def struct_sql(self, expression: exp.Struct) -> str: 757 args = [] 758 for expr in expression.expressions: 759 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 760 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 761 else: 762 arg = self.sql(expr) 763 764 args.append(arg) 765 766 return self.func("STRUCT", *args) 767 768 def eq_sql(self, expression: exp.EQ) -> str: 769 # Operands of = cannot be NULL in BigQuery 770 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 771 if not isinstance(expression.parent, exp.Update): 772 return "NULL" 773 774 return self.binary(expression, "=") 775 776 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 777 parent = expression.parent 778 779 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 780 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 781 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 782 return self.func( 783 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 784 ) 785 786 return super().attimezone_sql(expression) 787 788 def trycast_sql(self, expression: exp.TryCast) -> str: 789 return self.cast_sql(expression, safe_prefix="SAFE_") 790 791 def cte_sql(self, expression: exp.CTE) -> str: 792 if expression.alias_column_names: 793 self.unsupported("Column names in CTE definition are not supported.") 794 return super().cte_sql(expression) 795 796 def array_sql(self, expression: exp.Array) -> str: 797 first_arg = seq_get(expression.expressions, 0) 798 if isinstance(first_arg, exp.Subqueryable): 799 return f"ARRAY{self.wrap(self.sql(first_arg))}" 800 801 return inline_array_sql(self, expression) 802 803 def bracket_sql(self, expression: exp.Bracket) -> str: 804 this = self.sql(expression, "this") 805 expressions = expression.expressions 806 807 if len(expressions) == 1: 808 arg = expressions[0] 809 if arg.type is None: 810 from sqlglot.optimizer.annotate_types import annotate_types 811 812 arg = annotate_types(arg) 813 814 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 815 # BQ doesn't support bracket syntax with string values 816 return f"{this}.{arg.name}" 817 818 expressions_sql = ", ".join(self.sql(e) for e in expressions) 819 offset = expression.args.get("offset") 820 821 if offset == 0: 822 expressions_sql = f"OFFSET({expressions_sql})" 823 elif offset == 1: 824 expressions_sql = f"ORDINAL({expressions_sql})" 825 elif offset is not None: 826 self.unsupported(f"Unsupported array offset: {offset}") 827 828 if expression.args.get("safe"): 829 expressions_sql = f"SAFE_{expressions_sql}" 830 831 return f"{this}[{expressions_sql}]" 832 833 def transaction_sql(self, *_) -> str: 834 return "BEGIN TRANSACTION" 835 836 def commit_sql(self, *_) -> str: 837 return "COMMIT TRANSACTION" 838 839 def rollback_sql(self, *_) -> str: 840 return "ROLLBACK TRANSACTION" 841 842 def in_unnest_op(self, expression: exp.Unnest) -> str: 843 return self.sql(expression) 844 845 def except_op(self, expression: exp.Except) -> str: 846 if not expression.args.get("distinct", False): 847 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 848 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 849 850 def intersect_op(self, expression: exp.Intersect) -> str: 851 if not expression.args.get("distinct", False): 852 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 853 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 854 855 def with_properties(self, properties: exp.Properties) -> str: 856 return self.properties(properties, prefix=self.seg("OPTIONS")) 857 858 def version_sql(self, expression: exp.Version) -> str: 859 if expression.name == "TIMESTAMP": 860 expression.set("this", "SYSTEM_TIME") 861 return super().version_sql(expression)
227class BigQuery(Dialect): 228 WEEK_OFFSET = -1 229 UNNEST_COLUMN_ONLY = True 230 SUPPORTS_USER_DEFINED_TYPES = False 231 SUPPORTS_SEMI_ANTI_JOIN = False 232 LOG_BASE_FIRST = False 233 234 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 235 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 236 237 # bigquery udfs are case sensitive 238 NORMALIZE_FUNCTIONS = False 239 240 TIME_MAPPING = { 241 "%D": "%m/%d/%y", 242 } 243 244 ESCAPE_SEQUENCES = { 245 "\\a": "\a", 246 "\\b": "\b", 247 "\\f": "\f", 248 "\\n": "\n", 249 "\\r": "\r", 250 "\\t": "\t", 251 "\\v": "\v", 252 } 253 254 FORMAT_MAPPING = { 255 "DD": "%d", 256 "MM": "%m", 257 "MON": "%b", 258 "MONTH": "%B", 259 "YYYY": "%Y", 260 "YY": "%y", 261 "HH": "%I", 262 "HH12": "%I", 263 "HH24": "%H", 264 "MI": "%M", 265 "SS": "%S", 266 "SSSSS": "%f", 267 "TZH": "%z", 268 } 269 270 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 271 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 272 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 273 274 def normalize_identifier(self, expression: E) -> E: 275 if isinstance(expression, exp.Identifier): 276 parent = expression.parent 277 while isinstance(parent, exp.Dot): 278 parent = parent.parent 279 280 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 281 # The following check is essentially a heuristic to detect tables based on whether or 282 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 283 if ( 284 not isinstance(parent, exp.UserDefinedFunction) 285 and not (isinstance(parent, exp.Table) and parent.db) 286 and not expression.meta.get("is_table") 287 ): 288 expression.set("this", expression.this.lower()) 289 290 return expression 291 292 class Tokenizer(tokens.Tokenizer): 293 QUOTES = ["'", '"', '"""', "'''"] 294 COMMENTS = ["--", "#", ("/*", "*/")] 295 IDENTIFIERS = ["`"] 296 STRING_ESCAPES = ["\\"] 297 298 HEX_STRINGS = [("0x", ""), ("0X", "")] 299 300 BYTE_STRINGS = [ 301 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 302 ] 303 304 RAW_STRINGS = [ 305 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 306 ] 307 308 KEYWORDS = { 309 **tokens.Tokenizer.KEYWORDS, 310 "ANY TYPE": TokenType.VARIANT, 311 "BEGIN": TokenType.COMMAND, 312 "BEGIN TRANSACTION": TokenType.BEGIN, 313 "BYTES": TokenType.BINARY, 314 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 315 "DECLARE": TokenType.COMMAND, 316 "FLOAT64": TokenType.DOUBLE, 317 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 318 "MODEL": TokenType.MODEL, 319 "NOT DETERMINISTIC": TokenType.VOLATILE, 320 "RECORD": TokenType.STRUCT, 321 "TIMESTAMP": TokenType.TIMESTAMPTZ, 322 } 323 KEYWORDS.pop("DIV") 324 325 class Parser(parser.Parser): 326 PREFIXED_PIVOT_COLUMNS = True 327 328 LOG_DEFAULTS_TO_LN = True 329 330 FUNCTIONS = { 331 **parser.Parser.FUNCTIONS, 332 "DATE": _parse_date, 333 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 334 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 335 "DATE_TRUNC": lambda args: exp.DateTrunc( 336 unit=exp.Literal.string(str(seq_get(args, 1))), 337 this=seq_get(args, 0), 338 ), 339 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 340 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 341 "DIV": binary_from_function(exp.IntDiv), 342 "FORMAT_DATE": lambda args: exp.TimeToStr( 343 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 344 ), 345 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 346 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 347 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 348 ), 349 "MD5": exp.MD5Digest.from_arg_list, 350 "TO_HEX": _parse_to_hex, 351 "PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")( 352 [seq_get(args, 1), seq_get(args, 0)] 353 ), 354 "PARSE_TIMESTAMP": _parse_timestamp, 355 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 356 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 357 this=seq_get(args, 0), 358 expression=seq_get(args, 1), 359 position=seq_get(args, 2), 360 occurrence=seq_get(args, 3), 361 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 362 ), 363 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 364 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 365 "SPLIT": lambda args: exp.Split( 366 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 367 this=seq_get(args, 0), 368 expression=seq_get(args, 1) or exp.Literal.string(","), 369 ), 370 "TIME": _parse_time, 371 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 372 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 373 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 374 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 375 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 376 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 377 ), 378 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 379 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 380 ), 381 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime( 382 this=seq_get(args, 0), scale=exp.UnixToTime.SECONDS 383 ), 384 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 385 } 386 387 FUNCTION_PARSERS = { 388 **parser.Parser.FUNCTION_PARSERS, 389 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 390 } 391 FUNCTION_PARSERS.pop("TRIM") 392 393 NO_PAREN_FUNCTIONS = { 394 **parser.Parser.NO_PAREN_FUNCTIONS, 395 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 396 } 397 398 NESTED_TYPE_TOKENS = { 399 *parser.Parser.NESTED_TYPE_TOKENS, 400 TokenType.TABLE, 401 } 402 403 ID_VAR_TOKENS = { 404 *parser.Parser.ID_VAR_TOKENS, 405 TokenType.VALUES, 406 } 407 408 PROPERTY_PARSERS = { 409 **parser.Parser.PROPERTY_PARSERS, 410 "NOT DETERMINISTIC": lambda self: self.expression( 411 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 412 ), 413 "OPTIONS": lambda self: self._parse_with_property(), 414 } 415 416 CONSTRAINT_PARSERS = { 417 **parser.Parser.CONSTRAINT_PARSERS, 418 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 419 } 420 421 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 422 RANGE_PARSERS.pop(TokenType.OVERLAPS, None) 423 424 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 425 426 STATEMENT_PARSERS = { 427 **parser.Parser.STATEMENT_PARSERS, 428 TokenType.END: lambda self: self._parse_as_command(self._prev), 429 TokenType.FOR: lambda self: self._parse_for_in(), 430 } 431 432 BRACKET_OFFSETS = { 433 "OFFSET": (0, False), 434 "ORDINAL": (1, False), 435 "SAFE_OFFSET": (0, True), 436 "SAFE_ORDINAL": (1, True), 437 } 438 439 def _parse_for_in(self) -> exp.ForIn: 440 this = self._parse_range() 441 self._match_text_seq("DO") 442 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 443 444 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 445 this = super()._parse_table_part(schema=schema) or self._parse_number() 446 447 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 448 if isinstance(this, exp.Identifier): 449 table_name = this.name 450 while self._match(TokenType.DASH, advance=False) and self._next: 451 self._advance(2) 452 table_name += f"-{self._prev.text}" 453 454 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 455 elif isinstance(this, exp.Literal): 456 table_name = this.name 457 458 if self._is_connected() and self._parse_var(any_token=True): 459 table_name += self._prev.text 460 461 this = exp.Identifier(this=table_name, quoted=True) 462 463 return this 464 465 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 466 table = super()._parse_table_parts(schema=schema) 467 if isinstance(table.this, exp.Identifier) and "." in table.name: 468 catalog, db, this, *rest = ( 469 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 470 for x in split_num_words(table.name, ".", 3) 471 ) 472 473 if rest and this: 474 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 475 476 table = exp.Table(this=this, db=db, catalog=catalog) 477 478 return table 479 480 def _parse_json_object(self) -> exp.JSONObject: 481 json_object = super()._parse_json_object() 482 array_kv_pair = seq_get(json_object.expressions, 0) 483 484 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 485 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 486 if ( 487 array_kv_pair 488 and isinstance(array_kv_pair.this, exp.Array) 489 and isinstance(array_kv_pair.expression, exp.Array) 490 ): 491 keys = array_kv_pair.this.expressions 492 values = array_kv_pair.expression.expressions 493 494 json_object.set( 495 "expressions", 496 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 497 ) 498 499 return json_object 500 501 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 502 bracket = super()._parse_bracket(this) 503 504 if this is bracket: 505 return bracket 506 507 if isinstance(bracket, exp.Bracket): 508 for expression in bracket.expressions: 509 name = expression.name.upper() 510 511 if name not in self.BRACKET_OFFSETS: 512 break 513 514 offset, safe = self.BRACKET_OFFSETS[name] 515 bracket.set("offset", offset) 516 bracket.set("safe", safe) 517 expression.replace(expression.expressions[0]) 518 519 return bracket 520 521 class Generator(generator.Generator): 522 EXPLICIT_UNION = True 523 INTERVAL_ALLOWS_PLURAL_FORM = False 524 JOIN_HINTS = False 525 QUERY_HINTS = False 526 TABLE_HINTS = False 527 LIMIT_FETCH = "LIMIT" 528 RENAME_TABLE_WITH_DB = False 529 NVL2_SUPPORTED = False 530 UNNEST_WITH_ORDINALITY = False 531 COLLATE_IS_FUNC = True 532 LIMIT_ONLY_LITERALS = True 533 SUPPORTS_TABLE_ALIAS_COLUMNS = False 534 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 535 536 TRANSFORMS = { 537 **generator.Generator.TRANSFORMS, 538 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 539 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 540 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 541 exp.ArrayContains: _array_contains_sql, 542 exp.ArraySize: rename_func("ARRAY_LENGTH"), 543 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 544 exp.CollateProperty: lambda self, e: f"DEFAULT COLLATE {self.sql(e, 'this')}" 545 if e.args.get("default") 546 else f"COLLATE {self.sql(e, 'this')}", 547 exp.CountIf: rename_func("COUNTIF"), 548 exp.Create: _create_sql, 549 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 550 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 551 exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 552 exp.DateFromParts: rename_func("DATE"), 553 exp.DateStrToDate: datestrtodate_sql, 554 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 555 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 556 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 557 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 558 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 559 exp.GetPath: path_to_jsonpath(), 560 exp.GroupConcat: rename_func("STRING_AGG"), 561 exp.Hex: rename_func("TO_HEX"), 562 exp.If: if_sql(false_value="NULL"), 563 exp.ILike: no_ilike_sql, 564 exp.IntDiv: rename_func("DIV"), 565 exp.JSONFormat: rename_func("TO_JSON_STRING"), 566 exp.JSONKeyValue: json_keyvalue_comma_sql, 567 exp.Max: max_or_greatest, 568 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 569 exp.MD5Digest: rename_func("MD5"), 570 exp.Min: min_or_least, 571 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 572 exp.RegexpExtract: lambda self, e: self.func( 573 "REGEXP_EXTRACT", 574 e.this, 575 e.expression, 576 e.args.get("position"), 577 e.args.get("occurrence"), 578 ), 579 exp.RegexpReplace: regexp_replace_sql, 580 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 581 exp.ReturnsProperty: _returnsproperty_sql, 582 exp.Select: transforms.preprocess( 583 [ 584 transforms.explode_to_unnest(), 585 _unqualify_unnest, 586 transforms.eliminate_distinct_on, 587 _alias_ordered_group, 588 transforms.eliminate_semi_and_anti_joins, 589 ] 590 ), 591 exp.SHA2: lambda self, e: self.func( 592 f"SHA256" if e.text("length") == "256" else "SHA512", e.this 593 ), 594 exp.StabilityProperty: lambda self, e: f"DETERMINISTIC" 595 if e.name == "IMMUTABLE" 596 else "NOT DETERMINISTIC", 597 exp.StrToDate: lambda self, e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})", 598 exp.StrToTime: lambda self, e: self.func( 599 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 600 ), 601 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 602 exp.TimeFromParts: rename_func("TIME"), 603 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 604 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 605 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 606 exp.TimeStrToTime: timestrtotime_sql, 607 exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression), 608 exp.TsOrDsAdd: _ts_or_ds_add_sql, 609 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 610 exp.TsOrDsToTime: rename_func("TIME"), 611 exp.Unhex: rename_func("FROM_HEX"), 612 exp.UnixDate: rename_func("UNIX_DATE"), 613 exp.UnixToTime: _unix_to_time_sql, 614 exp.Values: _derived_table_values_to_unnest, 615 exp.VariancePop: rename_func("VAR_POP"), 616 } 617 618 TYPE_MAPPING = { 619 **generator.Generator.TYPE_MAPPING, 620 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 621 exp.DataType.Type.BIGINT: "INT64", 622 exp.DataType.Type.BINARY: "BYTES", 623 exp.DataType.Type.BOOLEAN: "BOOL", 624 exp.DataType.Type.CHAR: "STRING", 625 exp.DataType.Type.DECIMAL: "NUMERIC", 626 exp.DataType.Type.DOUBLE: "FLOAT64", 627 exp.DataType.Type.FLOAT: "FLOAT64", 628 exp.DataType.Type.INT: "INT64", 629 exp.DataType.Type.NCHAR: "STRING", 630 exp.DataType.Type.NVARCHAR: "STRING", 631 exp.DataType.Type.SMALLINT: "INT64", 632 exp.DataType.Type.TEXT: "STRING", 633 exp.DataType.Type.TIMESTAMP: "DATETIME", 634 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 635 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 636 exp.DataType.Type.TINYINT: "INT64", 637 exp.DataType.Type.VARBINARY: "BYTES", 638 exp.DataType.Type.VARCHAR: "STRING", 639 exp.DataType.Type.VARIANT: "ANY TYPE", 640 } 641 642 PROPERTIES_LOCATION = { 643 **generator.Generator.PROPERTIES_LOCATION, 644 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 645 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 646 } 647 648 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 649 RESERVED_KEYWORDS = { 650 *generator.Generator.RESERVED_KEYWORDS, 651 "all", 652 "and", 653 "any", 654 "array", 655 "as", 656 "asc", 657 "assert_rows_modified", 658 "at", 659 "between", 660 "by", 661 "case", 662 "cast", 663 "collate", 664 "contains", 665 "create", 666 "cross", 667 "cube", 668 "current", 669 "default", 670 "define", 671 "desc", 672 "distinct", 673 "else", 674 "end", 675 "enum", 676 "escape", 677 "except", 678 "exclude", 679 "exists", 680 "extract", 681 "false", 682 "fetch", 683 "following", 684 "for", 685 "from", 686 "full", 687 "group", 688 "grouping", 689 "groups", 690 "hash", 691 "having", 692 "if", 693 "ignore", 694 "in", 695 "inner", 696 "intersect", 697 "interval", 698 "into", 699 "is", 700 "join", 701 "lateral", 702 "left", 703 "like", 704 "limit", 705 "lookup", 706 "merge", 707 "natural", 708 "new", 709 "no", 710 "not", 711 "null", 712 "nulls", 713 "of", 714 "on", 715 "or", 716 "order", 717 "outer", 718 "over", 719 "partition", 720 "preceding", 721 "proto", 722 "qualify", 723 "range", 724 "recursive", 725 "respect", 726 "right", 727 "rollup", 728 "rows", 729 "select", 730 "set", 731 "some", 732 "struct", 733 "tablesample", 734 "then", 735 "to", 736 "treat", 737 "true", 738 "unbounded", 739 "union", 740 "unnest", 741 "using", 742 "when", 743 "where", 744 "window", 745 "with", 746 "within", 747 } 748 749 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 750 if isinstance(expression.this, exp.TsOrDsToDate): 751 this: exp.Expression = expression.this 752 else: 753 this = expression 754 755 return f"FORMAT_DATE({self.format_time(expression)}, {self.sql(this, 'this')})" 756 757 def struct_sql(self, expression: exp.Struct) -> str: 758 args = [] 759 for expr in expression.expressions: 760 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 761 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 762 else: 763 arg = self.sql(expr) 764 765 args.append(arg) 766 767 return self.func("STRUCT", *args) 768 769 def eq_sql(self, expression: exp.EQ) -> str: 770 # Operands of = cannot be NULL in BigQuery 771 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 772 if not isinstance(expression.parent, exp.Update): 773 return "NULL" 774 775 return self.binary(expression, "=") 776 777 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 778 parent = expression.parent 779 780 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 781 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 782 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 783 return self.func( 784 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 785 ) 786 787 return super().attimezone_sql(expression) 788 789 def trycast_sql(self, expression: exp.TryCast) -> str: 790 return self.cast_sql(expression, safe_prefix="SAFE_") 791 792 def cte_sql(self, expression: exp.CTE) -> str: 793 if expression.alias_column_names: 794 self.unsupported("Column names in CTE definition are not supported.") 795 return super().cte_sql(expression) 796 797 def array_sql(self, expression: exp.Array) -> str: 798 first_arg = seq_get(expression.expressions, 0) 799 if isinstance(first_arg, exp.Subqueryable): 800 return f"ARRAY{self.wrap(self.sql(first_arg))}" 801 802 return inline_array_sql(self, expression) 803 804 def bracket_sql(self, expression: exp.Bracket) -> str: 805 this = self.sql(expression, "this") 806 expressions = expression.expressions 807 808 if len(expressions) == 1: 809 arg = expressions[0] 810 if arg.type is None: 811 from sqlglot.optimizer.annotate_types import annotate_types 812 813 arg = annotate_types(arg) 814 815 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 816 # BQ doesn't support bracket syntax with string values 817 return f"{this}.{arg.name}" 818 819 expressions_sql = ", ".join(self.sql(e) for e in expressions) 820 offset = expression.args.get("offset") 821 822 if offset == 0: 823 expressions_sql = f"OFFSET({expressions_sql})" 824 elif offset == 1: 825 expressions_sql = f"ORDINAL({expressions_sql})" 826 elif offset is not None: 827 self.unsupported(f"Unsupported array offset: {offset}") 828 829 if expression.args.get("safe"): 830 expressions_sql = f"SAFE_{expressions_sql}" 831 832 return f"{this}[{expressions_sql}]" 833 834 def transaction_sql(self, *_) -> str: 835 return "BEGIN TRANSACTION" 836 837 def commit_sql(self, *_) -> str: 838 return "COMMIT TRANSACTION" 839 840 def rollback_sql(self, *_) -> str: 841 return "ROLLBACK TRANSACTION" 842 843 def in_unnest_op(self, expression: exp.Unnest) -> str: 844 return self.sql(expression) 845 846 def except_op(self, expression: exp.Except) -> str: 847 if not expression.args.get("distinct", False): 848 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 849 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 850 851 def intersect_op(self, expression: exp.Intersect) -> str: 852 if not expression.args.get("distinct", False): 853 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 854 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 855 856 def with_properties(self, properties: exp.Properties) -> str: 857 return self.properties(properties, prefix=self.seg("OPTIONS")) 858 859 def version_sql(self, expression: exp.Version) -> str: 860 if expression.name == "TIMESTAMP": 861 expression.set("this", "SYSTEM_TIME") 862 return super().version_sql(expression)
Determines the day of week of DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Determines whether or not UNNEST
table aliases are treated as column aliases.
Determines whether or not user-defined data types are supported.
Specifies the strategy according to which identifiers should be normalized.
Associates this dialect's time formats with their equivalent Python strftime
format.
Mapping of an unescaped escape sequence to the corresponding character.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
274 def normalize_identifier(self, expression: E) -> E: 275 if isinstance(expression, exp.Identifier): 276 parent = expression.parent 277 while isinstance(parent, exp.Dot): 278 parent = parent.parent 279 280 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 281 # The following check is essentially a heuristic to detect tables based on whether or 282 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 283 if ( 284 not isinstance(parent, exp.UserDefinedFunction) 285 and not (isinstance(parent, exp.Table) and parent.db) 286 and not expression.meta.get("is_table") 287 ): 288 expression.set("this", expression.this.lower()) 289 290 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- get_or_raise
- format_time
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
292 class Tokenizer(tokens.Tokenizer): 293 QUOTES = ["'", '"', '"""', "'''"] 294 COMMENTS = ["--", "#", ("/*", "*/")] 295 IDENTIFIERS = ["`"] 296 STRING_ESCAPES = ["\\"] 297 298 HEX_STRINGS = [("0x", ""), ("0X", "")] 299 300 BYTE_STRINGS = [ 301 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 302 ] 303 304 RAW_STRINGS = [ 305 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 306 ] 307 308 KEYWORDS = { 309 **tokens.Tokenizer.KEYWORDS, 310 "ANY TYPE": TokenType.VARIANT, 311 "BEGIN": TokenType.COMMAND, 312 "BEGIN TRANSACTION": TokenType.BEGIN, 313 "BYTES": TokenType.BINARY, 314 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 315 "DECLARE": TokenType.COMMAND, 316 "FLOAT64": TokenType.DOUBLE, 317 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 318 "MODEL": TokenType.MODEL, 319 "NOT DETERMINISTIC": TokenType.VOLATILE, 320 "RECORD": TokenType.STRUCT, 321 "TIMESTAMP": TokenType.TIMESTAMPTZ, 322 } 323 KEYWORDS.pop("DIV")
325 class Parser(parser.Parser): 326 PREFIXED_PIVOT_COLUMNS = True 327 328 LOG_DEFAULTS_TO_LN = True 329 330 FUNCTIONS = { 331 **parser.Parser.FUNCTIONS, 332 "DATE": _parse_date, 333 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 334 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 335 "DATE_TRUNC": lambda args: exp.DateTrunc( 336 unit=exp.Literal.string(str(seq_get(args, 1))), 337 this=seq_get(args, 0), 338 ), 339 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 340 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 341 "DIV": binary_from_function(exp.IntDiv), 342 "FORMAT_DATE": lambda args: exp.TimeToStr( 343 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 344 ), 345 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 346 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 347 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 348 ), 349 "MD5": exp.MD5Digest.from_arg_list, 350 "TO_HEX": _parse_to_hex, 351 "PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")( 352 [seq_get(args, 1), seq_get(args, 0)] 353 ), 354 "PARSE_TIMESTAMP": _parse_timestamp, 355 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 356 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 357 this=seq_get(args, 0), 358 expression=seq_get(args, 1), 359 position=seq_get(args, 2), 360 occurrence=seq_get(args, 3), 361 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 362 ), 363 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 364 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 365 "SPLIT": lambda args: exp.Split( 366 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 367 this=seq_get(args, 0), 368 expression=seq_get(args, 1) or exp.Literal.string(","), 369 ), 370 "TIME": _parse_time, 371 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 372 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 373 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 374 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 375 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 376 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 377 ), 378 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 379 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 380 ), 381 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime( 382 this=seq_get(args, 0), scale=exp.UnixToTime.SECONDS 383 ), 384 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 385 } 386 387 FUNCTION_PARSERS = { 388 **parser.Parser.FUNCTION_PARSERS, 389 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 390 } 391 FUNCTION_PARSERS.pop("TRIM") 392 393 NO_PAREN_FUNCTIONS = { 394 **parser.Parser.NO_PAREN_FUNCTIONS, 395 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 396 } 397 398 NESTED_TYPE_TOKENS = { 399 *parser.Parser.NESTED_TYPE_TOKENS, 400 TokenType.TABLE, 401 } 402 403 ID_VAR_TOKENS = { 404 *parser.Parser.ID_VAR_TOKENS, 405 TokenType.VALUES, 406 } 407 408 PROPERTY_PARSERS = { 409 **parser.Parser.PROPERTY_PARSERS, 410 "NOT DETERMINISTIC": lambda self: self.expression( 411 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 412 ), 413 "OPTIONS": lambda self: self._parse_with_property(), 414 } 415 416 CONSTRAINT_PARSERS = { 417 **parser.Parser.CONSTRAINT_PARSERS, 418 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 419 } 420 421 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 422 RANGE_PARSERS.pop(TokenType.OVERLAPS, None) 423 424 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 425 426 STATEMENT_PARSERS = { 427 **parser.Parser.STATEMENT_PARSERS, 428 TokenType.END: lambda self: self._parse_as_command(self._prev), 429 TokenType.FOR: lambda self: self._parse_for_in(), 430 } 431 432 BRACKET_OFFSETS = { 433 "OFFSET": (0, False), 434 "ORDINAL": (1, False), 435 "SAFE_OFFSET": (0, True), 436 "SAFE_ORDINAL": (1, True), 437 } 438 439 def _parse_for_in(self) -> exp.ForIn: 440 this = self._parse_range() 441 self._match_text_seq("DO") 442 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 443 444 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 445 this = super()._parse_table_part(schema=schema) or self._parse_number() 446 447 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 448 if isinstance(this, exp.Identifier): 449 table_name = this.name 450 while self._match(TokenType.DASH, advance=False) and self._next: 451 self._advance(2) 452 table_name += f"-{self._prev.text}" 453 454 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 455 elif isinstance(this, exp.Literal): 456 table_name = this.name 457 458 if self._is_connected() and self._parse_var(any_token=True): 459 table_name += self._prev.text 460 461 this = exp.Identifier(this=table_name, quoted=True) 462 463 return this 464 465 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 466 table = super()._parse_table_parts(schema=schema) 467 if isinstance(table.this, exp.Identifier) and "." in table.name: 468 catalog, db, this, *rest = ( 469 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 470 for x in split_num_words(table.name, ".", 3) 471 ) 472 473 if rest and this: 474 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 475 476 table = exp.Table(this=this, db=db, catalog=catalog) 477 478 return table 479 480 def _parse_json_object(self) -> exp.JSONObject: 481 json_object = super()._parse_json_object() 482 array_kv_pair = seq_get(json_object.expressions, 0) 483 484 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 485 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 486 if ( 487 array_kv_pair 488 and isinstance(array_kv_pair.this, exp.Array) 489 and isinstance(array_kv_pair.expression, exp.Array) 490 ): 491 keys = array_kv_pair.this.expressions 492 values = array_kv_pair.expression.expressions 493 494 json_object.set( 495 "expressions", 496 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 497 ) 498 499 return json_object 500 501 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 502 bracket = super()._parse_bracket(this) 503 504 if this is bracket: 505 return bracket 506 507 if isinstance(bracket, exp.Bracket): 508 for expression in bracket.expressions: 509 name = expression.name.upper() 510 511 if name not in self.BRACKET_OFFSETS: 512 break 513 514 offset, safe = self.BRACKET_OFFSETS[name] 515 bracket.set("offset", offset) 516 bracket.set("safe", safe) 517 expression.replace(expression.expressions[0]) 518 519 return bracket
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
521 class Generator(generator.Generator): 522 EXPLICIT_UNION = True 523 INTERVAL_ALLOWS_PLURAL_FORM = False 524 JOIN_HINTS = False 525 QUERY_HINTS = False 526 TABLE_HINTS = False 527 LIMIT_FETCH = "LIMIT" 528 RENAME_TABLE_WITH_DB = False 529 NVL2_SUPPORTED = False 530 UNNEST_WITH_ORDINALITY = False 531 COLLATE_IS_FUNC = True 532 LIMIT_ONLY_LITERALS = True 533 SUPPORTS_TABLE_ALIAS_COLUMNS = False 534 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 535 536 TRANSFORMS = { 537 **generator.Generator.TRANSFORMS, 538 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 539 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 540 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 541 exp.ArrayContains: _array_contains_sql, 542 exp.ArraySize: rename_func("ARRAY_LENGTH"), 543 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 544 exp.CollateProperty: lambda self, e: f"DEFAULT COLLATE {self.sql(e, 'this')}" 545 if e.args.get("default") 546 else f"COLLATE {self.sql(e, 'this')}", 547 exp.CountIf: rename_func("COUNTIF"), 548 exp.Create: _create_sql, 549 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 550 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 551 exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 552 exp.DateFromParts: rename_func("DATE"), 553 exp.DateStrToDate: datestrtodate_sql, 554 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 555 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 556 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 557 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 558 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 559 exp.GetPath: path_to_jsonpath(), 560 exp.GroupConcat: rename_func("STRING_AGG"), 561 exp.Hex: rename_func("TO_HEX"), 562 exp.If: if_sql(false_value="NULL"), 563 exp.ILike: no_ilike_sql, 564 exp.IntDiv: rename_func("DIV"), 565 exp.JSONFormat: rename_func("TO_JSON_STRING"), 566 exp.JSONKeyValue: json_keyvalue_comma_sql, 567 exp.Max: max_or_greatest, 568 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 569 exp.MD5Digest: rename_func("MD5"), 570 exp.Min: min_or_least, 571 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 572 exp.RegexpExtract: lambda self, e: self.func( 573 "REGEXP_EXTRACT", 574 e.this, 575 e.expression, 576 e.args.get("position"), 577 e.args.get("occurrence"), 578 ), 579 exp.RegexpReplace: regexp_replace_sql, 580 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 581 exp.ReturnsProperty: _returnsproperty_sql, 582 exp.Select: transforms.preprocess( 583 [ 584 transforms.explode_to_unnest(), 585 _unqualify_unnest, 586 transforms.eliminate_distinct_on, 587 _alias_ordered_group, 588 transforms.eliminate_semi_and_anti_joins, 589 ] 590 ), 591 exp.SHA2: lambda self, e: self.func( 592 f"SHA256" if e.text("length") == "256" else "SHA512", e.this 593 ), 594 exp.StabilityProperty: lambda self, e: f"DETERMINISTIC" 595 if e.name == "IMMUTABLE" 596 else "NOT DETERMINISTIC", 597 exp.StrToDate: lambda self, e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})", 598 exp.StrToTime: lambda self, e: self.func( 599 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 600 ), 601 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 602 exp.TimeFromParts: rename_func("TIME"), 603 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 604 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 605 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 606 exp.TimeStrToTime: timestrtotime_sql, 607 exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression), 608 exp.TsOrDsAdd: _ts_or_ds_add_sql, 609 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 610 exp.TsOrDsToTime: rename_func("TIME"), 611 exp.Unhex: rename_func("FROM_HEX"), 612 exp.UnixDate: rename_func("UNIX_DATE"), 613 exp.UnixToTime: _unix_to_time_sql, 614 exp.Values: _derived_table_values_to_unnest, 615 exp.VariancePop: rename_func("VAR_POP"), 616 } 617 618 TYPE_MAPPING = { 619 **generator.Generator.TYPE_MAPPING, 620 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 621 exp.DataType.Type.BIGINT: "INT64", 622 exp.DataType.Type.BINARY: "BYTES", 623 exp.DataType.Type.BOOLEAN: "BOOL", 624 exp.DataType.Type.CHAR: "STRING", 625 exp.DataType.Type.DECIMAL: "NUMERIC", 626 exp.DataType.Type.DOUBLE: "FLOAT64", 627 exp.DataType.Type.FLOAT: "FLOAT64", 628 exp.DataType.Type.INT: "INT64", 629 exp.DataType.Type.NCHAR: "STRING", 630 exp.DataType.Type.NVARCHAR: "STRING", 631 exp.DataType.Type.SMALLINT: "INT64", 632 exp.DataType.Type.TEXT: "STRING", 633 exp.DataType.Type.TIMESTAMP: "DATETIME", 634 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 635 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 636 exp.DataType.Type.TINYINT: "INT64", 637 exp.DataType.Type.VARBINARY: "BYTES", 638 exp.DataType.Type.VARCHAR: "STRING", 639 exp.DataType.Type.VARIANT: "ANY TYPE", 640 } 641 642 PROPERTIES_LOCATION = { 643 **generator.Generator.PROPERTIES_LOCATION, 644 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 645 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 646 } 647 648 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 649 RESERVED_KEYWORDS = { 650 *generator.Generator.RESERVED_KEYWORDS, 651 "all", 652 "and", 653 "any", 654 "array", 655 "as", 656 "asc", 657 "assert_rows_modified", 658 "at", 659 "between", 660 "by", 661 "case", 662 "cast", 663 "collate", 664 "contains", 665 "create", 666 "cross", 667 "cube", 668 "current", 669 "default", 670 "define", 671 "desc", 672 "distinct", 673 "else", 674 "end", 675 "enum", 676 "escape", 677 "except", 678 "exclude", 679 "exists", 680 "extract", 681 "false", 682 "fetch", 683 "following", 684 "for", 685 "from", 686 "full", 687 "group", 688 "grouping", 689 "groups", 690 "hash", 691 "having", 692 "if", 693 "ignore", 694 "in", 695 "inner", 696 "intersect", 697 "interval", 698 "into", 699 "is", 700 "join", 701 "lateral", 702 "left", 703 "like", 704 "limit", 705 "lookup", 706 "merge", 707 "natural", 708 "new", 709 "no", 710 "not", 711 "null", 712 "nulls", 713 "of", 714 "on", 715 "or", 716 "order", 717 "outer", 718 "over", 719 "partition", 720 "preceding", 721 "proto", 722 "qualify", 723 "range", 724 "recursive", 725 "respect", 726 "right", 727 "rollup", 728 "rows", 729 "select", 730 "set", 731 "some", 732 "struct", 733 "tablesample", 734 "then", 735 "to", 736 "treat", 737 "true", 738 "unbounded", 739 "union", 740 "unnest", 741 "using", 742 "when", 743 "where", 744 "window", 745 "with", 746 "within", 747 } 748 749 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 750 if isinstance(expression.this, exp.TsOrDsToDate): 751 this: exp.Expression = expression.this 752 else: 753 this = expression 754 755 return f"FORMAT_DATE({self.format_time(expression)}, {self.sql(this, 'this')})" 756 757 def struct_sql(self, expression: exp.Struct) -> str: 758 args = [] 759 for expr in expression.expressions: 760 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 761 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 762 else: 763 arg = self.sql(expr) 764 765 args.append(arg) 766 767 return self.func("STRUCT", *args) 768 769 def eq_sql(self, expression: exp.EQ) -> str: 770 # Operands of = cannot be NULL in BigQuery 771 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 772 if not isinstance(expression.parent, exp.Update): 773 return "NULL" 774 775 return self.binary(expression, "=") 776 777 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 778 parent = expression.parent 779 780 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 781 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 782 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 783 return self.func( 784 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 785 ) 786 787 return super().attimezone_sql(expression) 788 789 def trycast_sql(self, expression: exp.TryCast) -> str: 790 return self.cast_sql(expression, safe_prefix="SAFE_") 791 792 def cte_sql(self, expression: exp.CTE) -> str: 793 if expression.alias_column_names: 794 self.unsupported("Column names in CTE definition are not supported.") 795 return super().cte_sql(expression) 796 797 def array_sql(self, expression: exp.Array) -> str: 798 first_arg = seq_get(expression.expressions, 0) 799 if isinstance(first_arg, exp.Subqueryable): 800 return f"ARRAY{self.wrap(self.sql(first_arg))}" 801 802 return inline_array_sql(self, expression) 803 804 def bracket_sql(self, expression: exp.Bracket) -> str: 805 this = self.sql(expression, "this") 806 expressions = expression.expressions 807 808 if len(expressions) == 1: 809 arg = expressions[0] 810 if arg.type is None: 811 from sqlglot.optimizer.annotate_types import annotate_types 812 813 arg = annotate_types(arg) 814 815 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 816 # BQ doesn't support bracket syntax with string values 817 return f"{this}.{arg.name}" 818 819 expressions_sql = ", ".join(self.sql(e) for e in expressions) 820 offset = expression.args.get("offset") 821 822 if offset == 0: 823 expressions_sql = f"OFFSET({expressions_sql})" 824 elif offset == 1: 825 expressions_sql = f"ORDINAL({expressions_sql})" 826 elif offset is not None: 827 self.unsupported(f"Unsupported array offset: {offset}") 828 829 if expression.args.get("safe"): 830 expressions_sql = f"SAFE_{expressions_sql}" 831 832 return f"{this}[{expressions_sql}]" 833 834 def transaction_sql(self, *_) -> str: 835 return "BEGIN TRANSACTION" 836 837 def commit_sql(self, *_) -> str: 838 return "COMMIT TRANSACTION" 839 840 def rollback_sql(self, *_) -> str: 841 return "ROLLBACK TRANSACTION" 842 843 def in_unnest_op(self, expression: exp.Unnest) -> str: 844 return self.sql(expression) 845 846 def except_op(self, expression: exp.Except) -> str: 847 if not expression.args.get("distinct", False): 848 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 849 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 850 851 def intersect_op(self, expression: exp.Intersect) -> str: 852 if not expression.args.get("distinct", False): 853 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 854 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 855 856 def with_properties(self, properties: exp.Properties) -> str: 857 return self.properties(properties, prefix=self.seg("OPTIONS")) 858 859 def version_sql(self, expression: exp.Version) -> str: 860 if expression.name == "TIMESTAMP": 861 expression.set("this", "SYSTEM_TIME") 862 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
757 def struct_sql(self, expression: exp.Struct) -> str: 758 args = [] 759 for expr in expression.expressions: 760 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 761 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 762 else: 763 arg = self.sql(expr) 764 765 args.append(arg) 766 767 return self.func("STRUCT", *args)
777 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 778 parent = expression.parent 779 780 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 781 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 782 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 783 return self.func( 784 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 785 ) 786 787 return super().attimezone_sql(expression)
804 def bracket_sql(self, expression: exp.Bracket) -> str: 805 this = self.sql(expression, "this") 806 expressions = expression.expressions 807 808 if len(expressions) == 1: 809 arg = expressions[0] 810 if arg.type is None: 811 from sqlglot.optimizer.annotate_types import annotate_types 812 813 arg = annotate_types(arg) 814 815 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 816 # BQ doesn't support bracket syntax with string values 817 return f"{this}.{arg.name}" 818 819 expressions_sql = ", ".join(self.sql(e) for e in expressions) 820 offset = expression.args.get("offset") 821 822 if offset == 0: 823 expressions_sql = f"OFFSET({expressions_sql})" 824 elif offset == 1: 825 expressions_sql = f"ORDINAL({expressions_sql})" 826 elif offset is not None: 827 self.unsupported(f"Unsupported array offset: {offset}") 828 829 if expression.args.get("safe"): 830 expressions_sql = f"SAFE_{expressions_sql}" 831 832 return f"{this}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- KEY_VALUE_DEFINITIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- formatjson_sql
- jsonobject_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql