sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot._typing import E 9from sqlglot.dialects.dialect import ( 10 Dialect, 11 NormalizationStrategy, 12 arg_max_or_min_no_count, 13 binary_from_function, 14 date_add_interval_sql, 15 datestrtodate_sql, 16 format_time_lambda, 17 if_sql, 18 inline_array_sql, 19 json_keyvalue_comma_sql, 20 max_or_greatest, 21 min_or_least, 22 no_ilike_sql, 23 parse_date_delta_with_interval, 24 regexp_replace_sql, 25 rename_func, 26 timestrtotime_sql, 27 ts_or_ds_add_cast, 28 ts_or_ds_to_date_sql, 29) 30from sqlglot.helper import seq_get, split_num_words 31from sqlglot.tokens import TokenType 32 33logger = logging.getLogger("sqlglot") 34 35 36def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 37 if not expression.find_ancestor(exp.From, exp.Join): 38 return self.values_sql(expression) 39 40 alias = expression.args.get("alias") 41 42 structs = [ 43 exp.Struct( 44 expressions=[ 45 exp.alias_(value, column_name) 46 for value, column_name in zip( 47 t.expressions, 48 alias.columns 49 if alias and alias.columns 50 else (f"_c{i}" for i in range(len(t.expressions))), 51 ) 52 ] 53 ) 54 for t in expression.find_all(exp.Tuple) 55 ] 56 57 return self.unnest_sql(exp.Unnest(expressions=[exp.Array(expressions=structs)])) 58 59 60def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 61 this = expression.this 62 if isinstance(this, exp.Schema): 63 this = f"{this.this} <{self.expressions(this)}>" 64 else: 65 this = self.sql(this) 66 return f"RETURNS {this}" 67 68 69def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 70 kind = expression.args["kind"] 71 returns = expression.find(exp.ReturnsProperty) 72 73 if kind.upper() == "FUNCTION" and returns and returns.args.get("is_table"): 74 expression.set("kind", "TABLE FUNCTION") 75 76 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 77 expression.set("expression", expression.expression.this) 78 79 return self.create_sql(expression) 80 81 return self.create_sql(expression) 82 83 84def _unqualify_unnest(expression: exp.Expression) -> exp.Expression: 85 """Remove references to unnest table aliases since bigquery doesn't allow them. 86 87 These are added by the optimizer's qualify_column step. 88 """ 89 from sqlglot.optimizer.scope import find_all_in_scope 90 91 if isinstance(expression, exp.Select): 92 unnest_aliases = { 93 unnest.alias 94 for unnest in find_all_in_scope(expression, exp.Unnest) 95 if isinstance(unnest.parent, (exp.From, exp.Join)) 96 } 97 if unnest_aliases: 98 for column in expression.find_all(exp.Column): 99 if column.table in unnest_aliases: 100 column.set("table", None) 101 elif column.db in unnest_aliases: 102 column.set("db", None) 103 104 return expression 105 106 107# https://issuetracker.google.com/issues/162294746 108# workaround for bigquery bug when grouping by an expression and then ordering 109# WITH x AS (SELECT 1 y) 110# SELECT y + 1 z 111# FROM x 112# GROUP BY x + 1 113# ORDER by z 114def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 115 if isinstance(expression, exp.Select): 116 group = expression.args.get("group") 117 order = expression.args.get("order") 118 119 if group and order: 120 aliases = { 121 select.this: select.args["alias"] 122 for select in expression.selects 123 if isinstance(select, exp.Alias) 124 } 125 126 for e in group.expressions: 127 alias = aliases.get(e) 128 129 if alias: 130 e.replace(exp.column(alias)) 131 132 return expression 133 134 135def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 136 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 137 if isinstance(expression, exp.CTE) and expression.alias_column_names: 138 cte_query = expression.this 139 140 if cte_query.is_star: 141 logger.warning( 142 "Can't push down CTE column names for star queries. Run the query through" 143 " the optimizer or use 'qualify' to expand the star projections first." 144 ) 145 return expression 146 147 column_names = expression.alias_column_names 148 expression.args["alias"].set("columns", None) 149 150 for name, select in zip(column_names, cte_query.selects): 151 to_replace = select 152 153 if isinstance(select, exp.Alias): 154 select = select.this 155 156 # Inner aliases are shadowed by the CTE column names 157 to_replace.replace(exp.alias_(select, name)) 158 159 return expression 160 161 162def _parse_timestamp(args: t.List) -> exp.StrToTime: 163 this = format_time_lambda(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 164 this.set("zone", seq_get(args, 2)) 165 return this 166 167 168def _parse_date(args: t.List) -> exp.Date | exp.DateFromParts: 169 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 170 return expr_type.from_arg_list(args) 171 172 173def _parse_to_hex(args: t.List) -> exp.Hex | exp.MD5: 174 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 175 arg = seq_get(args, 0) 176 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.Hex(this=arg) 177 178 179def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 180 return self.sql( 181 exp.Exists( 182 this=exp.select("1") 183 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 184 .where(exp.column("_col").eq(expression.right)) 185 ) 186 ) 187 188 189def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 190 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 191 192 193def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 194 expression.this.replace(exp.cast(expression.this, "TIMESTAMP", copy=True)) 195 expression.expression.replace(exp.cast(expression.expression, "TIMESTAMP", copy=True)) 196 unit = expression.args.get("unit") or "DAY" 197 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 198 199 200def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 201 scale = expression.args.get("scale") 202 timestamp = self.sql(expression, "this") 203 if scale in (None, exp.UnixToTime.SECONDS): 204 return f"TIMESTAMP_SECONDS({timestamp})" 205 if scale == exp.UnixToTime.MILLIS: 206 return f"TIMESTAMP_MILLIS({timestamp})" 207 if scale == exp.UnixToTime.MICROS: 208 return f"TIMESTAMP_MICROS({timestamp})" 209 if scale == exp.UnixToTime.NANOS: 210 # We need to cast to INT64 because that's what BQ expects 211 return f"TIMESTAMP_MICROS(CAST({timestamp} / 1000 AS INT64))" 212 213 self.unsupported(f"Unsupported scale for timestamp: {scale}.") 214 return "" 215 216 217class BigQuery(Dialect): 218 UNNEST_COLUMN_ONLY = True 219 SUPPORTS_USER_DEFINED_TYPES = False 220 SUPPORTS_SEMI_ANTI_JOIN = False 221 LOG_BASE_FIRST = False 222 223 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 224 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 225 226 # bigquery udfs are case sensitive 227 NORMALIZE_FUNCTIONS = False 228 229 TIME_MAPPING = { 230 "%D": "%m/%d/%y", 231 } 232 233 ESCAPE_SEQUENCES = { 234 "\\a": "\a", 235 "\\b": "\b", 236 "\\f": "\f", 237 "\\n": "\n", 238 "\\r": "\r", 239 "\\t": "\t", 240 "\\v": "\v", 241 } 242 243 FORMAT_MAPPING = { 244 "DD": "%d", 245 "MM": "%m", 246 "MON": "%b", 247 "MONTH": "%B", 248 "YYYY": "%Y", 249 "YY": "%y", 250 "HH": "%I", 251 "HH12": "%I", 252 "HH24": "%H", 253 "MI": "%M", 254 "SS": "%S", 255 "SSSSS": "%f", 256 "TZH": "%z", 257 } 258 259 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 260 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 261 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 262 263 def normalize_identifier(self, expression: E) -> E: 264 if isinstance(expression, exp.Identifier): 265 parent = expression.parent 266 while isinstance(parent, exp.Dot): 267 parent = parent.parent 268 269 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 270 # The following check is essentially a heuristic to detect tables based on whether or 271 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 272 if ( 273 not isinstance(parent, exp.UserDefinedFunction) 274 and not (isinstance(parent, exp.Table) and parent.db) 275 and not expression.meta.get("is_table") 276 ): 277 expression.set("this", expression.this.lower()) 278 279 return expression 280 281 class Tokenizer(tokens.Tokenizer): 282 QUOTES = ["'", '"', '"""', "'''"] 283 COMMENTS = ["--", "#", ("/*", "*/")] 284 IDENTIFIERS = ["`"] 285 STRING_ESCAPES = ["\\"] 286 287 HEX_STRINGS = [("0x", ""), ("0X", "")] 288 289 BYTE_STRINGS = [ 290 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 291 ] 292 293 RAW_STRINGS = [ 294 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 295 ] 296 297 KEYWORDS = { 298 **tokens.Tokenizer.KEYWORDS, 299 "ANY TYPE": TokenType.VARIANT, 300 "BEGIN": TokenType.COMMAND, 301 "BEGIN TRANSACTION": TokenType.BEGIN, 302 "BYTES": TokenType.BINARY, 303 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 304 "DECLARE": TokenType.COMMAND, 305 "FLOAT64": TokenType.DOUBLE, 306 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 307 "MODEL": TokenType.MODEL, 308 "NOT DETERMINISTIC": TokenType.VOLATILE, 309 "RECORD": TokenType.STRUCT, 310 "TIMESTAMP": TokenType.TIMESTAMPTZ, 311 } 312 KEYWORDS.pop("DIV") 313 314 class Parser(parser.Parser): 315 PREFIXED_PIVOT_COLUMNS = True 316 317 LOG_DEFAULTS_TO_LN = True 318 319 FUNCTIONS = { 320 **parser.Parser.FUNCTIONS, 321 "DATE": _parse_date, 322 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 323 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 324 "DATE_TRUNC": lambda args: exp.DateTrunc( 325 unit=exp.Literal.string(str(seq_get(args, 1))), 326 this=seq_get(args, 0), 327 ), 328 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 329 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 330 "DIV": binary_from_function(exp.IntDiv), 331 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 332 "MD5": exp.MD5Digest.from_arg_list, 333 "TO_HEX": _parse_to_hex, 334 "PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")( 335 [seq_get(args, 1), seq_get(args, 0)] 336 ), 337 "PARSE_TIMESTAMP": _parse_timestamp, 338 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 339 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 340 this=seq_get(args, 0), 341 expression=seq_get(args, 1), 342 position=seq_get(args, 2), 343 occurrence=seq_get(args, 3), 344 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 345 ), 346 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 347 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 348 "SPLIT": lambda args: exp.Split( 349 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 350 this=seq_get(args, 0), 351 expression=seq_get(args, 1) or exp.Literal.string(","), 352 ), 353 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 354 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 355 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 356 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 357 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 358 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 359 ), 360 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 361 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 362 ), 363 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime( 364 this=seq_get(args, 0), scale=exp.UnixToTime.SECONDS 365 ), 366 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 367 } 368 369 FUNCTION_PARSERS = { 370 **parser.Parser.FUNCTION_PARSERS, 371 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 372 } 373 FUNCTION_PARSERS.pop("TRIM") 374 375 NO_PAREN_FUNCTIONS = { 376 **parser.Parser.NO_PAREN_FUNCTIONS, 377 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 378 } 379 380 NESTED_TYPE_TOKENS = { 381 *parser.Parser.NESTED_TYPE_TOKENS, 382 TokenType.TABLE, 383 } 384 385 ID_VAR_TOKENS = { 386 *parser.Parser.ID_VAR_TOKENS, 387 TokenType.VALUES, 388 } 389 390 PROPERTY_PARSERS = { 391 **parser.Parser.PROPERTY_PARSERS, 392 "NOT DETERMINISTIC": lambda self: self.expression( 393 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 394 ), 395 "OPTIONS": lambda self: self._parse_with_property(), 396 } 397 398 CONSTRAINT_PARSERS = { 399 **parser.Parser.CONSTRAINT_PARSERS, 400 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 401 } 402 403 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 404 RANGE_PARSERS.pop(TokenType.OVERLAPS, None) 405 406 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 407 408 STATEMENT_PARSERS = { 409 **parser.Parser.STATEMENT_PARSERS, 410 TokenType.END: lambda self: self._parse_as_command(self._prev), 411 TokenType.FOR: lambda self: self._parse_for_in(), 412 } 413 414 BRACKET_OFFSETS = { 415 "OFFSET": (0, False), 416 "ORDINAL": (1, False), 417 "SAFE_OFFSET": (0, True), 418 "SAFE_ORDINAL": (1, True), 419 } 420 421 def _parse_for_in(self) -> exp.ForIn: 422 this = self._parse_range() 423 self._match_text_seq("DO") 424 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 425 426 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 427 this = super()._parse_table_part(schema=schema) or self._parse_number() 428 429 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 430 if isinstance(this, exp.Identifier): 431 table_name = this.name 432 while self._match(TokenType.DASH, advance=False) and self._next: 433 self._advance(2) 434 table_name += f"-{self._prev.text}" 435 436 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 437 elif isinstance(this, exp.Literal): 438 table_name = this.name 439 440 if ( 441 self._curr 442 and self._prev.end == self._curr.start - 1 443 and self._parse_var(any_token=True) 444 ): 445 table_name += self._prev.text 446 447 this = exp.Identifier(this=table_name, quoted=True) 448 449 return this 450 451 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 452 table = super()._parse_table_parts(schema=schema) 453 if isinstance(table.this, exp.Identifier) and "." in table.name: 454 catalog, db, this, *rest = ( 455 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 456 for x in split_num_words(table.name, ".", 3) 457 ) 458 459 if rest and this: 460 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 461 462 table = exp.Table(this=this, db=db, catalog=catalog) 463 464 return table 465 466 def _parse_json_object(self) -> exp.JSONObject: 467 json_object = super()._parse_json_object() 468 array_kv_pair = seq_get(json_object.expressions, 0) 469 470 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 471 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 472 if ( 473 array_kv_pair 474 and isinstance(array_kv_pair.this, exp.Array) 475 and isinstance(array_kv_pair.expression, exp.Array) 476 ): 477 keys = array_kv_pair.this.expressions 478 values = array_kv_pair.expression.expressions 479 480 json_object.set( 481 "expressions", 482 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 483 ) 484 485 return json_object 486 487 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 488 bracket = super()._parse_bracket(this) 489 490 if this is bracket: 491 return bracket 492 493 if isinstance(bracket, exp.Bracket): 494 for expression in bracket.expressions: 495 name = expression.name.upper() 496 497 if name not in self.BRACKET_OFFSETS: 498 break 499 500 offset, safe = self.BRACKET_OFFSETS[name] 501 bracket.set("offset", offset) 502 bracket.set("safe", safe) 503 expression.replace(expression.expressions[0]) 504 505 return bracket 506 507 class Generator(generator.Generator): 508 EXPLICIT_UNION = True 509 INTERVAL_ALLOWS_PLURAL_FORM = False 510 JOIN_HINTS = False 511 QUERY_HINTS = False 512 TABLE_HINTS = False 513 LIMIT_FETCH = "LIMIT" 514 RENAME_TABLE_WITH_DB = False 515 NVL2_SUPPORTED = False 516 UNNEST_WITH_ORDINALITY = False 517 COLLATE_IS_FUNC = True 518 LIMIT_ONLY_LITERALS = True 519 520 TRANSFORMS = { 521 **generator.Generator.TRANSFORMS, 522 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 523 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 524 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 525 exp.ArrayContains: _array_contains_sql, 526 exp.ArraySize: rename_func("ARRAY_LENGTH"), 527 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 528 exp.CollateProperty: lambda self, e: f"DEFAULT COLLATE {self.sql(e, 'this')}" 529 if e.args.get("default") 530 else f"COLLATE {self.sql(e, 'this')}", 531 exp.Create: _create_sql, 532 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 533 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 534 exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 535 exp.DateFromParts: rename_func("DATE"), 536 exp.DateStrToDate: datestrtodate_sql, 537 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 538 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 539 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 540 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 541 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 542 exp.GroupConcat: rename_func("STRING_AGG"), 543 exp.Hex: rename_func("TO_HEX"), 544 exp.If: if_sql(false_value="NULL"), 545 exp.ILike: no_ilike_sql, 546 exp.IntDiv: rename_func("DIV"), 547 exp.JSONFormat: rename_func("TO_JSON_STRING"), 548 exp.JSONKeyValue: json_keyvalue_comma_sql, 549 exp.Max: max_or_greatest, 550 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 551 exp.MD5Digest: rename_func("MD5"), 552 exp.Min: min_or_least, 553 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 554 exp.RegexpExtract: lambda self, e: self.func( 555 "REGEXP_EXTRACT", 556 e.this, 557 e.expression, 558 e.args.get("position"), 559 e.args.get("occurrence"), 560 ), 561 exp.RegexpReplace: regexp_replace_sql, 562 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 563 exp.ReturnsProperty: _returnsproperty_sql, 564 exp.Select: transforms.preprocess( 565 [ 566 transforms.explode_to_unnest(), 567 _unqualify_unnest, 568 transforms.eliminate_distinct_on, 569 _alias_ordered_group, 570 transforms.eliminate_semi_and_anti_joins, 571 ] 572 ), 573 exp.SHA2: lambda self, e: self.func( 574 f"SHA256" if e.text("length") == "256" else "SHA512", e.this 575 ), 576 exp.StabilityProperty: lambda self, e: f"DETERMINISTIC" 577 if e.name == "IMMUTABLE" 578 else "NOT DETERMINISTIC", 579 exp.StrToDate: lambda self, e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})", 580 exp.StrToTime: lambda self, e: self.func( 581 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 582 ), 583 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 584 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 585 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 586 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 587 exp.TimeStrToTime: timestrtotime_sql, 588 exp.TimeToStr: lambda self, e: f"FORMAT_DATE({self.format_time(e)}, {self.sql(e, 'this')})", 589 exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression), 590 exp.TsOrDsAdd: _ts_or_ds_add_sql, 591 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 592 exp.TsOrDsToDate: ts_or_ds_to_date_sql("bigquery"), 593 exp.Unhex: rename_func("FROM_HEX"), 594 exp.UnixToTime: _unix_to_time_sql, 595 exp.Values: _derived_table_values_to_unnest, 596 exp.VariancePop: rename_func("VAR_POP"), 597 } 598 599 TYPE_MAPPING = { 600 **generator.Generator.TYPE_MAPPING, 601 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 602 exp.DataType.Type.BIGINT: "INT64", 603 exp.DataType.Type.BINARY: "BYTES", 604 exp.DataType.Type.BOOLEAN: "BOOL", 605 exp.DataType.Type.CHAR: "STRING", 606 exp.DataType.Type.DECIMAL: "NUMERIC", 607 exp.DataType.Type.DOUBLE: "FLOAT64", 608 exp.DataType.Type.FLOAT: "FLOAT64", 609 exp.DataType.Type.INT: "INT64", 610 exp.DataType.Type.NCHAR: "STRING", 611 exp.DataType.Type.NVARCHAR: "STRING", 612 exp.DataType.Type.SMALLINT: "INT64", 613 exp.DataType.Type.TEXT: "STRING", 614 exp.DataType.Type.TIMESTAMP: "DATETIME", 615 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 616 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 617 exp.DataType.Type.TINYINT: "INT64", 618 exp.DataType.Type.VARBINARY: "BYTES", 619 exp.DataType.Type.VARCHAR: "STRING", 620 exp.DataType.Type.VARIANT: "ANY TYPE", 621 } 622 623 PROPERTIES_LOCATION = { 624 **generator.Generator.PROPERTIES_LOCATION, 625 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 626 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 627 } 628 629 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 630 RESERVED_KEYWORDS = { 631 *generator.Generator.RESERVED_KEYWORDS, 632 "all", 633 "and", 634 "any", 635 "array", 636 "as", 637 "asc", 638 "assert_rows_modified", 639 "at", 640 "between", 641 "by", 642 "case", 643 "cast", 644 "collate", 645 "contains", 646 "create", 647 "cross", 648 "cube", 649 "current", 650 "default", 651 "define", 652 "desc", 653 "distinct", 654 "else", 655 "end", 656 "enum", 657 "escape", 658 "except", 659 "exclude", 660 "exists", 661 "extract", 662 "false", 663 "fetch", 664 "following", 665 "for", 666 "from", 667 "full", 668 "group", 669 "grouping", 670 "groups", 671 "hash", 672 "having", 673 "if", 674 "ignore", 675 "in", 676 "inner", 677 "intersect", 678 "interval", 679 "into", 680 "is", 681 "join", 682 "lateral", 683 "left", 684 "like", 685 "limit", 686 "lookup", 687 "merge", 688 "natural", 689 "new", 690 "no", 691 "not", 692 "null", 693 "nulls", 694 "of", 695 "on", 696 "or", 697 "order", 698 "outer", 699 "over", 700 "partition", 701 "preceding", 702 "proto", 703 "qualify", 704 "range", 705 "recursive", 706 "respect", 707 "right", 708 "rollup", 709 "rows", 710 "select", 711 "set", 712 "some", 713 "struct", 714 "tablesample", 715 "then", 716 "to", 717 "treat", 718 "true", 719 "unbounded", 720 "union", 721 "unnest", 722 "using", 723 "when", 724 "where", 725 "window", 726 "with", 727 "within", 728 } 729 730 def eq_sql(self, expression: exp.EQ) -> str: 731 # Operands of = cannot be NULL in BigQuery 732 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 733 return "NULL" 734 735 return self.binary(expression, "=") 736 737 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 738 parent = expression.parent 739 740 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 741 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 742 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 743 return self.func( 744 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 745 ) 746 747 return super().attimezone_sql(expression) 748 749 def trycast_sql(self, expression: exp.TryCast) -> str: 750 return self.cast_sql(expression, safe_prefix="SAFE_") 751 752 def cte_sql(self, expression: exp.CTE) -> str: 753 if expression.alias_column_names: 754 self.unsupported("Column names in CTE definition are not supported.") 755 return super().cte_sql(expression) 756 757 def array_sql(self, expression: exp.Array) -> str: 758 first_arg = seq_get(expression.expressions, 0) 759 if isinstance(first_arg, exp.Subqueryable): 760 return f"ARRAY{self.wrap(self.sql(first_arg))}" 761 762 return inline_array_sql(self, expression) 763 764 def bracket_sql(self, expression: exp.Bracket) -> str: 765 expressions = expression.expressions 766 expressions_sql = ", ".join(self.sql(e) for e in expressions) 767 offset = expression.args.get("offset") 768 769 if offset == 0: 770 expressions_sql = f"OFFSET({expressions_sql})" 771 elif offset == 1: 772 expressions_sql = f"ORDINAL({expressions_sql})" 773 else: 774 self.unsupported(f"Unsupported array offset: {offset}") 775 776 if expression.args.get("safe"): 777 expressions_sql = f"SAFE_{expressions_sql}" 778 779 return f"{self.sql(expression, 'this')}[{expressions_sql}]" 780 781 def transaction_sql(self, *_) -> str: 782 return "BEGIN TRANSACTION" 783 784 def commit_sql(self, *_) -> str: 785 return "COMMIT TRANSACTION" 786 787 def rollback_sql(self, *_) -> str: 788 return "ROLLBACK TRANSACTION" 789 790 def in_unnest_op(self, expression: exp.Unnest) -> str: 791 return self.sql(expression) 792 793 def except_op(self, expression: exp.Except) -> str: 794 if not expression.args.get("distinct", False): 795 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 796 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 797 798 def intersect_op(self, expression: exp.Intersect) -> str: 799 if not expression.args.get("distinct", False): 800 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 801 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 802 803 def with_properties(self, properties: exp.Properties) -> str: 804 return self.properties(properties, prefix=self.seg("OPTIONS")) 805 806 def version_sql(self, expression: exp.Version) -> str: 807 if expression.name == "TIMESTAMP": 808 expression.set("this", "SYSTEM_TIME") 809 return super().version_sql(expression)
218class BigQuery(Dialect): 219 UNNEST_COLUMN_ONLY = True 220 SUPPORTS_USER_DEFINED_TYPES = False 221 SUPPORTS_SEMI_ANTI_JOIN = False 222 LOG_BASE_FIRST = False 223 224 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 225 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 226 227 # bigquery udfs are case sensitive 228 NORMALIZE_FUNCTIONS = False 229 230 TIME_MAPPING = { 231 "%D": "%m/%d/%y", 232 } 233 234 ESCAPE_SEQUENCES = { 235 "\\a": "\a", 236 "\\b": "\b", 237 "\\f": "\f", 238 "\\n": "\n", 239 "\\r": "\r", 240 "\\t": "\t", 241 "\\v": "\v", 242 } 243 244 FORMAT_MAPPING = { 245 "DD": "%d", 246 "MM": "%m", 247 "MON": "%b", 248 "MONTH": "%B", 249 "YYYY": "%Y", 250 "YY": "%y", 251 "HH": "%I", 252 "HH12": "%I", 253 "HH24": "%H", 254 "MI": "%M", 255 "SS": "%S", 256 "SSSSS": "%f", 257 "TZH": "%z", 258 } 259 260 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 261 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 262 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 263 264 def normalize_identifier(self, expression: E) -> E: 265 if isinstance(expression, exp.Identifier): 266 parent = expression.parent 267 while isinstance(parent, exp.Dot): 268 parent = parent.parent 269 270 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 271 # The following check is essentially a heuristic to detect tables based on whether or 272 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 273 if ( 274 not isinstance(parent, exp.UserDefinedFunction) 275 and not (isinstance(parent, exp.Table) and parent.db) 276 and not expression.meta.get("is_table") 277 ): 278 expression.set("this", expression.this.lower()) 279 280 return expression 281 282 class Tokenizer(tokens.Tokenizer): 283 QUOTES = ["'", '"', '"""', "'''"] 284 COMMENTS = ["--", "#", ("/*", "*/")] 285 IDENTIFIERS = ["`"] 286 STRING_ESCAPES = ["\\"] 287 288 HEX_STRINGS = [("0x", ""), ("0X", "")] 289 290 BYTE_STRINGS = [ 291 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 292 ] 293 294 RAW_STRINGS = [ 295 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 296 ] 297 298 KEYWORDS = { 299 **tokens.Tokenizer.KEYWORDS, 300 "ANY TYPE": TokenType.VARIANT, 301 "BEGIN": TokenType.COMMAND, 302 "BEGIN TRANSACTION": TokenType.BEGIN, 303 "BYTES": TokenType.BINARY, 304 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 305 "DECLARE": TokenType.COMMAND, 306 "FLOAT64": TokenType.DOUBLE, 307 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 308 "MODEL": TokenType.MODEL, 309 "NOT DETERMINISTIC": TokenType.VOLATILE, 310 "RECORD": TokenType.STRUCT, 311 "TIMESTAMP": TokenType.TIMESTAMPTZ, 312 } 313 KEYWORDS.pop("DIV") 314 315 class Parser(parser.Parser): 316 PREFIXED_PIVOT_COLUMNS = True 317 318 LOG_DEFAULTS_TO_LN = True 319 320 FUNCTIONS = { 321 **parser.Parser.FUNCTIONS, 322 "DATE": _parse_date, 323 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 324 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 325 "DATE_TRUNC": lambda args: exp.DateTrunc( 326 unit=exp.Literal.string(str(seq_get(args, 1))), 327 this=seq_get(args, 0), 328 ), 329 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 330 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 331 "DIV": binary_from_function(exp.IntDiv), 332 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 333 "MD5": exp.MD5Digest.from_arg_list, 334 "TO_HEX": _parse_to_hex, 335 "PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")( 336 [seq_get(args, 1), seq_get(args, 0)] 337 ), 338 "PARSE_TIMESTAMP": _parse_timestamp, 339 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 340 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 341 this=seq_get(args, 0), 342 expression=seq_get(args, 1), 343 position=seq_get(args, 2), 344 occurrence=seq_get(args, 3), 345 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 346 ), 347 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 348 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 349 "SPLIT": lambda args: exp.Split( 350 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 351 this=seq_get(args, 0), 352 expression=seq_get(args, 1) or exp.Literal.string(","), 353 ), 354 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 355 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 356 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 357 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 358 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 359 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 360 ), 361 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 362 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 363 ), 364 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime( 365 this=seq_get(args, 0), scale=exp.UnixToTime.SECONDS 366 ), 367 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 368 } 369 370 FUNCTION_PARSERS = { 371 **parser.Parser.FUNCTION_PARSERS, 372 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 373 } 374 FUNCTION_PARSERS.pop("TRIM") 375 376 NO_PAREN_FUNCTIONS = { 377 **parser.Parser.NO_PAREN_FUNCTIONS, 378 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 379 } 380 381 NESTED_TYPE_TOKENS = { 382 *parser.Parser.NESTED_TYPE_TOKENS, 383 TokenType.TABLE, 384 } 385 386 ID_VAR_TOKENS = { 387 *parser.Parser.ID_VAR_TOKENS, 388 TokenType.VALUES, 389 } 390 391 PROPERTY_PARSERS = { 392 **parser.Parser.PROPERTY_PARSERS, 393 "NOT DETERMINISTIC": lambda self: self.expression( 394 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 395 ), 396 "OPTIONS": lambda self: self._parse_with_property(), 397 } 398 399 CONSTRAINT_PARSERS = { 400 **parser.Parser.CONSTRAINT_PARSERS, 401 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 402 } 403 404 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 405 RANGE_PARSERS.pop(TokenType.OVERLAPS, None) 406 407 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 408 409 STATEMENT_PARSERS = { 410 **parser.Parser.STATEMENT_PARSERS, 411 TokenType.END: lambda self: self._parse_as_command(self._prev), 412 TokenType.FOR: lambda self: self._parse_for_in(), 413 } 414 415 BRACKET_OFFSETS = { 416 "OFFSET": (0, False), 417 "ORDINAL": (1, False), 418 "SAFE_OFFSET": (0, True), 419 "SAFE_ORDINAL": (1, True), 420 } 421 422 def _parse_for_in(self) -> exp.ForIn: 423 this = self._parse_range() 424 self._match_text_seq("DO") 425 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 426 427 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 428 this = super()._parse_table_part(schema=schema) or self._parse_number() 429 430 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 431 if isinstance(this, exp.Identifier): 432 table_name = this.name 433 while self._match(TokenType.DASH, advance=False) and self._next: 434 self._advance(2) 435 table_name += f"-{self._prev.text}" 436 437 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 438 elif isinstance(this, exp.Literal): 439 table_name = this.name 440 441 if ( 442 self._curr 443 and self._prev.end == self._curr.start - 1 444 and self._parse_var(any_token=True) 445 ): 446 table_name += self._prev.text 447 448 this = exp.Identifier(this=table_name, quoted=True) 449 450 return this 451 452 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 453 table = super()._parse_table_parts(schema=schema) 454 if isinstance(table.this, exp.Identifier) and "." in table.name: 455 catalog, db, this, *rest = ( 456 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 457 for x in split_num_words(table.name, ".", 3) 458 ) 459 460 if rest and this: 461 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 462 463 table = exp.Table(this=this, db=db, catalog=catalog) 464 465 return table 466 467 def _parse_json_object(self) -> exp.JSONObject: 468 json_object = super()._parse_json_object() 469 array_kv_pair = seq_get(json_object.expressions, 0) 470 471 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 472 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 473 if ( 474 array_kv_pair 475 and isinstance(array_kv_pair.this, exp.Array) 476 and isinstance(array_kv_pair.expression, exp.Array) 477 ): 478 keys = array_kv_pair.this.expressions 479 values = array_kv_pair.expression.expressions 480 481 json_object.set( 482 "expressions", 483 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 484 ) 485 486 return json_object 487 488 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 489 bracket = super()._parse_bracket(this) 490 491 if this is bracket: 492 return bracket 493 494 if isinstance(bracket, exp.Bracket): 495 for expression in bracket.expressions: 496 name = expression.name.upper() 497 498 if name not in self.BRACKET_OFFSETS: 499 break 500 501 offset, safe = self.BRACKET_OFFSETS[name] 502 bracket.set("offset", offset) 503 bracket.set("safe", safe) 504 expression.replace(expression.expressions[0]) 505 506 return bracket 507 508 class Generator(generator.Generator): 509 EXPLICIT_UNION = True 510 INTERVAL_ALLOWS_PLURAL_FORM = False 511 JOIN_HINTS = False 512 QUERY_HINTS = False 513 TABLE_HINTS = False 514 LIMIT_FETCH = "LIMIT" 515 RENAME_TABLE_WITH_DB = False 516 NVL2_SUPPORTED = False 517 UNNEST_WITH_ORDINALITY = False 518 COLLATE_IS_FUNC = True 519 LIMIT_ONLY_LITERALS = True 520 521 TRANSFORMS = { 522 **generator.Generator.TRANSFORMS, 523 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 524 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 525 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 526 exp.ArrayContains: _array_contains_sql, 527 exp.ArraySize: rename_func("ARRAY_LENGTH"), 528 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 529 exp.CollateProperty: lambda self, e: f"DEFAULT COLLATE {self.sql(e, 'this')}" 530 if e.args.get("default") 531 else f"COLLATE {self.sql(e, 'this')}", 532 exp.Create: _create_sql, 533 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 534 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 535 exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 536 exp.DateFromParts: rename_func("DATE"), 537 exp.DateStrToDate: datestrtodate_sql, 538 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 539 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 540 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 541 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 542 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 543 exp.GroupConcat: rename_func("STRING_AGG"), 544 exp.Hex: rename_func("TO_HEX"), 545 exp.If: if_sql(false_value="NULL"), 546 exp.ILike: no_ilike_sql, 547 exp.IntDiv: rename_func("DIV"), 548 exp.JSONFormat: rename_func("TO_JSON_STRING"), 549 exp.JSONKeyValue: json_keyvalue_comma_sql, 550 exp.Max: max_or_greatest, 551 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 552 exp.MD5Digest: rename_func("MD5"), 553 exp.Min: min_or_least, 554 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 555 exp.RegexpExtract: lambda self, e: self.func( 556 "REGEXP_EXTRACT", 557 e.this, 558 e.expression, 559 e.args.get("position"), 560 e.args.get("occurrence"), 561 ), 562 exp.RegexpReplace: regexp_replace_sql, 563 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 564 exp.ReturnsProperty: _returnsproperty_sql, 565 exp.Select: transforms.preprocess( 566 [ 567 transforms.explode_to_unnest(), 568 _unqualify_unnest, 569 transforms.eliminate_distinct_on, 570 _alias_ordered_group, 571 transforms.eliminate_semi_and_anti_joins, 572 ] 573 ), 574 exp.SHA2: lambda self, e: self.func( 575 f"SHA256" if e.text("length") == "256" else "SHA512", e.this 576 ), 577 exp.StabilityProperty: lambda self, e: f"DETERMINISTIC" 578 if e.name == "IMMUTABLE" 579 else "NOT DETERMINISTIC", 580 exp.StrToDate: lambda self, e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})", 581 exp.StrToTime: lambda self, e: self.func( 582 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 583 ), 584 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 585 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 586 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 587 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 588 exp.TimeStrToTime: timestrtotime_sql, 589 exp.TimeToStr: lambda self, e: f"FORMAT_DATE({self.format_time(e)}, {self.sql(e, 'this')})", 590 exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression), 591 exp.TsOrDsAdd: _ts_or_ds_add_sql, 592 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 593 exp.TsOrDsToDate: ts_or_ds_to_date_sql("bigquery"), 594 exp.Unhex: rename_func("FROM_HEX"), 595 exp.UnixToTime: _unix_to_time_sql, 596 exp.Values: _derived_table_values_to_unnest, 597 exp.VariancePop: rename_func("VAR_POP"), 598 } 599 600 TYPE_MAPPING = { 601 **generator.Generator.TYPE_MAPPING, 602 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 603 exp.DataType.Type.BIGINT: "INT64", 604 exp.DataType.Type.BINARY: "BYTES", 605 exp.DataType.Type.BOOLEAN: "BOOL", 606 exp.DataType.Type.CHAR: "STRING", 607 exp.DataType.Type.DECIMAL: "NUMERIC", 608 exp.DataType.Type.DOUBLE: "FLOAT64", 609 exp.DataType.Type.FLOAT: "FLOAT64", 610 exp.DataType.Type.INT: "INT64", 611 exp.DataType.Type.NCHAR: "STRING", 612 exp.DataType.Type.NVARCHAR: "STRING", 613 exp.DataType.Type.SMALLINT: "INT64", 614 exp.DataType.Type.TEXT: "STRING", 615 exp.DataType.Type.TIMESTAMP: "DATETIME", 616 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 617 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 618 exp.DataType.Type.TINYINT: "INT64", 619 exp.DataType.Type.VARBINARY: "BYTES", 620 exp.DataType.Type.VARCHAR: "STRING", 621 exp.DataType.Type.VARIANT: "ANY TYPE", 622 } 623 624 PROPERTIES_LOCATION = { 625 **generator.Generator.PROPERTIES_LOCATION, 626 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 627 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 628 } 629 630 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 631 RESERVED_KEYWORDS = { 632 *generator.Generator.RESERVED_KEYWORDS, 633 "all", 634 "and", 635 "any", 636 "array", 637 "as", 638 "asc", 639 "assert_rows_modified", 640 "at", 641 "between", 642 "by", 643 "case", 644 "cast", 645 "collate", 646 "contains", 647 "create", 648 "cross", 649 "cube", 650 "current", 651 "default", 652 "define", 653 "desc", 654 "distinct", 655 "else", 656 "end", 657 "enum", 658 "escape", 659 "except", 660 "exclude", 661 "exists", 662 "extract", 663 "false", 664 "fetch", 665 "following", 666 "for", 667 "from", 668 "full", 669 "group", 670 "grouping", 671 "groups", 672 "hash", 673 "having", 674 "if", 675 "ignore", 676 "in", 677 "inner", 678 "intersect", 679 "interval", 680 "into", 681 "is", 682 "join", 683 "lateral", 684 "left", 685 "like", 686 "limit", 687 "lookup", 688 "merge", 689 "natural", 690 "new", 691 "no", 692 "not", 693 "null", 694 "nulls", 695 "of", 696 "on", 697 "or", 698 "order", 699 "outer", 700 "over", 701 "partition", 702 "preceding", 703 "proto", 704 "qualify", 705 "range", 706 "recursive", 707 "respect", 708 "right", 709 "rollup", 710 "rows", 711 "select", 712 "set", 713 "some", 714 "struct", 715 "tablesample", 716 "then", 717 "to", 718 "treat", 719 "true", 720 "unbounded", 721 "union", 722 "unnest", 723 "using", 724 "when", 725 "where", 726 "window", 727 "with", 728 "within", 729 } 730 731 def eq_sql(self, expression: exp.EQ) -> str: 732 # Operands of = cannot be NULL in BigQuery 733 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 734 return "NULL" 735 736 return self.binary(expression, "=") 737 738 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 739 parent = expression.parent 740 741 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 742 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 743 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 744 return self.func( 745 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 746 ) 747 748 return super().attimezone_sql(expression) 749 750 def trycast_sql(self, expression: exp.TryCast) -> str: 751 return self.cast_sql(expression, safe_prefix="SAFE_") 752 753 def cte_sql(self, expression: exp.CTE) -> str: 754 if expression.alias_column_names: 755 self.unsupported("Column names in CTE definition are not supported.") 756 return super().cte_sql(expression) 757 758 def array_sql(self, expression: exp.Array) -> str: 759 first_arg = seq_get(expression.expressions, 0) 760 if isinstance(first_arg, exp.Subqueryable): 761 return f"ARRAY{self.wrap(self.sql(first_arg))}" 762 763 return inline_array_sql(self, expression) 764 765 def bracket_sql(self, expression: exp.Bracket) -> str: 766 expressions = expression.expressions 767 expressions_sql = ", ".join(self.sql(e) for e in expressions) 768 offset = expression.args.get("offset") 769 770 if offset == 0: 771 expressions_sql = f"OFFSET({expressions_sql})" 772 elif offset == 1: 773 expressions_sql = f"ORDINAL({expressions_sql})" 774 else: 775 self.unsupported(f"Unsupported array offset: {offset}") 776 777 if expression.args.get("safe"): 778 expressions_sql = f"SAFE_{expressions_sql}" 779 780 return f"{self.sql(expression, 'this')}[{expressions_sql}]" 781 782 def transaction_sql(self, *_) -> str: 783 return "BEGIN TRANSACTION" 784 785 def commit_sql(self, *_) -> str: 786 return "COMMIT TRANSACTION" 787 788 def rollback_sql(self, *_) -> str: 789 return "ROLLBACK TRANSACTION" 790 791 def in_unnest_op(self, expression: exp.Unnest) -> str: 792 return self.sql(expression) 793 794 def except_op(self, expression: exp.Except) -> str: 795 if not expression.args.get("distinct", False): 796 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 797 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 798 799 def intersect_op(self, expression: exp.Intersect) -> str: 800 if not expression.args.get("distinct", False): 801 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 802 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 803 804 def with_properties(self, properties: exp.Properties) -> str: 805 return self.properties(properties, prefix=self.seg("OPTIONS")) 806 807 def version_sql(self, expression: exp.Version) -> str: 808 if expression.name == "TIMESTAMP": 809 expression.set("this", "SYSTEM_TIME") 810 return super().version_sql(expression)
264 def normalize_identifier(self, expression: E) -> E: 265 if isinstance(expression, exp.Identifier): 266 parent = expression.parent 267 while isinstance(parent, exp.Dot): 268 parent = parent.parent 269 270 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 271 # The following check is essentially a heuristic to detect tables based on whether or 272 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 273 if ( 274 not isinstance(parent, exp.UserDefinedFunction) 275 and not (isinstance(parent, exp.Table) and parent.db) 276 and not expression.meta.get("is_table") 277 ): 278 expression.set("this", expression.this.lower()) 279 280 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO would be resolved as foo in Postgres, because it lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so it would resolve it as FOO. If it was quoted, it'd need to be treated as case-sensitive, and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- get_or_raise
- format_time
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
282 class Tokenizer(tokens.Tokenizer): 283 QUOTES = ["'", '"', '"""', "'''"] 284 COMMENTS = ["--", "#", ("/*", "*/")] 285 IDENTIFIERS = ["`"] 286 STRING_ESCAPES = ["\\"] 287 288 HEX_STRINGS = [("0x", ""), ("0X", "")] 289 290 BYTE_STRINGS = [ 291 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 292 ] 293 294 RAW_STRINGS = [ 295 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 296 ] 297 298 KEYWORDS = { 299 **tokens.Tokenizer.KEYWORDS, 300 "ANY TYPE": TokenType.VARIANT, 301 "BEGIN": TokenType.COMMAND, 302 "BEGIN TRANSACTION": TokenType.BEGIN, 303 "BYTES": TokenType.BINARY, 304 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 305 "DECLARE": TokenType.COMMAND, 306 "FLOAT64": TokenType.DOUBLE, 307 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 308 "MODEL": TokenType.MODEL, 309 "NOT DETERMINISTIC": TokenType.VOLATILE, 310 "RECORD": TokenType.STRUCT, 311 "TIMESTAMP": TokenType.TIMESTAMPTZ, 312 } 313 KEYWORDS.pop("DIV")
315 class Parser(parser.Parser): 316 PREFIXED_PIVOT_COLUMNS = True 317 318 LOG_DEFAULTS_TO_LN = True 319 320 FUNCTIONS = { 321 **parser.Parser.FUNCTIONS, 322 "DATE": _parse_date, 323 "DATE_ADD": parse_date_delta_with_interval(exp.DateAdd), 324 "DATE_SUB": parse_date_delta_with_interval(exp.DateSub), 325 "DATE_TRUNC": lambda args: exp.DateTrunc( 326 unit=exp.Literal.string(str(seq_get(args, 1))), 327 this=seq_get(args, 0), 328 ), 329 "DATETIME_ADD": parse_date_delta_with_interval(exp.DatetimeAdd), 330 "DATETIME_SUB": parse_date_delta_with_interval(exp.DatetimeSub), 331 "DIV": binary_from_function(exp.IntDiv), 332 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 333 "MD5": exp.MD5Digest.from_arg_list, 334 "TO_HEX": _parse_to_hex, 335 "PARSE_DATE": lambda args: format_time_lambda(exp.StrToDate, "bigquery")( 336 [seq_get(args, 1), seq_get(args, 0)] 337 ), 338 "PARSE_TIMESTAMP": _parse_timestamp, 339 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 340 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 341 this=seq_get(args, 0), 342 expression=seq_get(args, 1), 343 position=seq_get(args, 2), 344 occurrence=seq_get(args, 3), 345 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 346 ), 347 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 348 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 349 "SPLIT": lambda args: exp.Split( 350 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 351 this=seq_get(args, 0), 352 expression=seq_get(args, 1) or exp.Literal.string(","), 353 ), 354 "TIME_ADD": parse_date_delta_with_interval(exp.TimeAdd), 355 "TIME_SUB": parse_date_delta_with_interval(exp.TimeSub), 356 "TIMESTAMP_ADD": parse_date_delta_with_interval(exp.TimestampAdd), 357 "TIMESTAMP_SUB": parse_date_delta_with_interval(exp.TimestampSub), 358 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 359 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 360 ), 361 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 362 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 363 ), 364 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime( 365 this=seq_get(args, 0), scale=exp.UnixToTime.SECONDS 366 ), 367 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 368 } 369 370 FUNCTION_PARSERS = { 371 **parser.Parser.FUNCTION_PARSERS, 372 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 373 } 374 FUNCTION_PARSERS.pop("TRIM") 375 376 NO_PAREN_FUNCTIONS = { 377 **parser.Parser.NO_PAREN_FUNCTIONS, 378 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 379 } 380 381 NESTED_TYPE_TOKENS = { 382 *parser.Parser.NESTED_TYPE_TOKENS, 383 TokenType.TABLE, 384 } 385 386 ID_VAR_TOKENS = { 387 *parser.Parser.ID_VAR_TOKENS, 388 TokenType.VALUES, 389 } 390 391 PROPERTY_PARSERS = { 392 **parser.Parser.PROPERTY_PARSERS, 393 "NOT DETERMINISTIC": lambda self: self.expression( 394 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 395 ), 396 "OPTIONS": lambda self: self._parse_with_property(), 397 } 398 399 CONSTRAINT_PARSERS = { 400 **parser.Parser.CONSTRAINT_PARSERS, 401 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 402 } 403 404 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 405 RANGE_PARSERS.pop(TokenType.OVERLAPS, None) 406 407 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 408 409 STATEMENT_PARSERS = { 410 **parser.Parser.STATEMENT_PARSERS, 411 TokenType.END: lambda self: self._parse_as_command(self._prev), 412 TokenType.FOR: lambda self: self._parse_for_in(), 413 } 414 415 BRACKET_OFFSETS = { 416 "OFFSET": (0, False), 417 "ORDINAL": (1, False), 418 "SAFE_OFFSET": (0, True), 419 "SAFE_ORDINAL": (1, True), 420 } 421 422 def _parse_for_in(self) -> exp.ForIn: 423 this = self._parse_range() 424 self._match_text_seq("DO") 425 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 426 427 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 428 this = super()._parse_table_part(schema=schema) or self._parse_number() 429 430 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 431 if isinstance(this, exp.Identifier): 432 table_name = this.name 433 while self._match(TokenType.DASH, advance=False) and self._next: 434 self._advance(2) 435 table_name += f"-{self._prev.text}" 436 437 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 438 elif isinstance(this, exp.Literal): 439 table_name = this.name 440 441 if ( 442 self._curr 443 and self._prev.end == self._curr.start - 1 444 and self._parse_var(any_token=True) 445 ): 446 table_name += self._prev.text 447 448 this = exp.Identifier(this=table_name, quoted=True) 449 450 return this 451 452 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 453 table = super()._parse_table_parts(schema=schema) 454 if isinstance(table.this, exp.Identifier) and "." in table.name: 455 catalog, db, this, *rest = ( 456 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 457 for x in split_num_words(table.name, ".", 3) 458 ) 459 460 if rest and this: 461 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 462 463 table = exp.Table(this=this, db=db, catalog=catalog) 464 465 return table 466 467 def _parse_json_object(self) -> exp.JSONObject: 468 json_object = super()._parse_json_object() 469 array_kv_pair = seq_get(json_object.expressions, 0) 470 471 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 472 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 473 if ( 474 array_kv_pair 475 and isinstance(array_kv_pair.this, exp.Array) 476 and isinstance(array_kv_pair.expression, exp.Array) 477 ): 478 keys = array_kv_pair.this.expressions 479 values = array_kv_pair.expression.expressions 480 481 json_object.set( 482 "expressions", 483 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 484 ) 485 486 return json_object 487 488 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 489 bracket = super()._parse_bracket(this) 490 491 if this is bracket: 492 return bracket 493 494 if isinstance(bracket, exp.Bracket): 495 for expression in bracket.expressions: 496 name = expression.name.upper() 497 498 if name not in self.BRACKET_OFFSETS: 499 break 500 501 offset, safe = self.BRACKET_OFFSETS[name] 502 bracket.set("offset", offset) 503 bracket.set("safe", safe) 504 expression.replace(expression.expressions[0]) 505 506 return bracket
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- CLONE_KINDS
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- STRICT_CAST
- CONCAT_NULL_OUTPUTS_STRING
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
508 class Generator(generator.Generator): 509 EXPLICIT_UNION = True 510 INTERVAL_ALLOWS_PLURAL_FORM = False 511 JOIN_HINTS = False 512 QUERY_HINTS = False 513 TABLE_HINTS = False 514 LIMIT_FETCH = "LIMIT" 515 RENAME_TABLE_WITH_DB = False 516 NVL2_SUPPORTED = False 517 UNNEST_WITH_ORDINALITY = False 518 COLLATE_IS_FUNC = True 519 LIMIT_ONLY_LITERALS = True 520 521 TRANSFORMS = { 522 **generator.Generator.TRANSFORMS, 523 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 524 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 525 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 526 exp.ArrayContains: _array_contains_sql, 527 exp.ArraySize: rename_func("ARRAY_LENGTH"), 528 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 529 exp.CollateProperty: lambda self, e: f"DEFAULT COLLATE {self.sql(e, 'this')}" 530 if e.args.get("default") 531 else f"COLLATE {self.sql(e, 'this')}", 532 exp.Create: _create_sql, 533 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 534 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 535 exp.DateDiff: lambda self, e: f"DATE_DIFF({self.sql(e, 'this')}, {self.sql(e, 'expression')}, {self.sql(e.args.get('unit', 'DAY'))})", 536 exp.DateFromParts: rename_func("DATE"), 537 exp.DateStrToDate: datestrtodate_sql, 538 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 539 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 540 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 541 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 542 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 543 exp.GroupConcat: rename_func("STRING_AGG"), 544 exp.Hex: rename_func("TO_HEX"), 545 exp.If: if_sql(false_value="NULL"), 546 exp.ILike: no_ilike_sql, 547 exp.IntDiv: rename_func("DIV"), 548 exp.JSONFormat: rename_func("TO_JSON_STRING"), 549 exp.JSONKeyValue: json_keyvalue_comma_sql, 550 exp.Max: max_or_greatest, 551 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 552 exp.MD5Digest: rename_func("MD5"), 553 exp.Min: min_or_least, 554 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 555 exp.RegexpExtract: lambda self, e: self.func( 556 "REGEXP_EXTRACT", 557 e.this, 558 e.expression, 559 e.args.get("position"), 560 e.args.get("occurrence"), 561 ), 562 exp.RegexpReplace: regexp_replace_sql, 563 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 564 exp.ReturnsProperty: _returnsproperty_sql, 565 exp.Select: transforms.preprocess( 566 [ 567 transforms.explode_to_unnest(), 568 _unqualify_unnest, 569 transforms.eliminate_distinct_on, 570 _alias_ordered_group, 571 transforms.eliminate_semi_and_anti_joins, 572 ] 573 ), 574 exp.SHA2: lambda self, e: self.func( 575 f"SHA256" if e.text("length") == "256" else "SHA512", e.this 576 ), 577 exp.StabilityProperty: lambda self, e: f"DETERMINISTIC" 578 if e.name == "IMMUTABLE" 579 else "NOT DETERMINISTIC", 580 exp.StrToDate: lambda self, e: f"PARSE_DATE({self.format_time(e)}, {self.sql(e, 'this')})", 581 exp.StrToTime: lambda self, e: self.func( 582 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 583 ), 584 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 585 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 586 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 587 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 588 exp.TimeStrToTime: timestrtotime_sql, 589 exp.TimeToStr: lambda self, e: f"FORMAT_DATE({self.format_time(e)}, {self.sql(e, 'this')})", 590 exp.Trim: lambda self, e: self.func(f"TRIM", e.this, e.expression), 591 exp.TsOrDsAdd: _ts_or_ds_add_sql, 592 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 593 exp.TsOrDsToDate: ts_or_ds_to_date_sql("bigquery"), 594 exp.Unhex: rename_func("FROM_HEX"), 595 exp.UnixToTime: _unix_to_time_sql, 596 exp.Values: _derived_table_values_to_unnest, 597 exp.VariancePop: rename_func("VAR_POP"), 598 } 599 600 TYPE_MAPPING = { 601 **generator.Generator.TYPE_MAPPING, 602 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 603 exp.DataType.Type.BIGINT: "INT64", 604 exp.DataType.Type.BINARY: "BYTES", 605 exp.DataType.Type.BOOLEAN: "BOOL", 606 exp.DataType.Type.CHAR: "STRING", 607 exp.DataType.Type.DECIMAL: "NUMERIC", 608 exp.DataType.Type.DOUBLE: "FLOAT64", 609 exp.DataType.Type.FLOAT: "FLOAT64", 610 exp.DataType.Type.INT: "INT64", 611 exp.DataType.Type.NCHAR: "STRING", 612 exp.DataType.Type.NVARCHAR: "STRING", 613 exp.DataType.Type.SMALLINT: "INT64", 614 exp.DataType.Type.TEXT: "STRING", 615 exp.DataType.Type.TIMESTAMP: "DATETIME", 616 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 617 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 618 exp.DataType.Type.TINYINT: "INT64", 619 exp.DataType.Type.VARBINARY: "BYTES", 620 exp.DataType.Type.VARCHAR: "STRING", 621 exp.DataType.Type.VARIANT: "ANY TYPE", 622 } 623 624 PROPERTIES_LOCATION = { 625 **generator.Generator.PROPERTIES_LOCATION, 626 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 627 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 628 } 629 630 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 631 RESERVED_KEYWORDS = { 632 *generator.Generator.RESERVED_KEYWORDS, 633 "all", 634 "and", 635 "any", 636 "array", 637 "as", 638 "asc", 639 "assert_rows_modified", 640 "at", 641 "between", 642 "by", 643 "case", 644 "cast", 645 "collate", 646 "contains", 647 "create", 648 "cross", 649 "cube", 650 "current", 651 "default", 652 "define", 653 "desc", 654 "distinct", 655 "else", 656 "end", 657 "enum", 658 "escape", 659 "except", 660 "exclude", 661 "exists", 662 "extract", 663 "false", 664 "fetch", 665 "following", 666 "for", 667 "from", 668 "full", 669 "group", 670 "grouping", 671 "groups", 672 "hash", 673 "having", 674 "if", 675 "ignore", 676 "in", 677 "inner", 678 "intersect", 679 "interval", 680 "into", 681 "is", 682 "join", 683 "lateral", 684 "left", 685 "like", 686 "limit", 687 "lookup", 688 "merge", 689 "natural", 690 "new", 691 "no", 692 "not", 693 "null", 694 "nulls", 695 "of", 696 "on", 697 "or", 698 "order", 699 "outer", 700 "over", 701 "partition", 702 "preceding", 703 "proto", 704 "qualify", 705 "range", 706 "recursive", 707 "respect", 708 "right", 709 "rollup", 710 "rows", 711 "select", 712 "set", 713 "some", 714 "struct", 715 "tablesample", 716 "then", 717 "to", 718 "treat", 719 "true", 720 "unbounded", 721 "union", 722 "unnest", 723 "using", 724 "when", 725 "where", 726 "window", 727 "with", 728 "within", 729 } 730 731 def eq_sql(self, expression: exp.EQ) -> str: 732 # Operands of = cannot be NULL in BigQuery 733 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 734 return "NULL" 735 736 return self.binary(expression, "=") 737 738 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 739 parent = expression.parent 740 741 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 742 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 743 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 744 return self.func( 745 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 746 ) 747 748 return super().attimezone_sql(expression) 749 750 def trycast_sql(self, expression: exp.TryCast) -> str: 751 return self.cast_sql(expression, safe_prefix="SAFE_") 752 753 def cte_sql(self, expression: exp.CTE) -> str: 754 if expression.alias_column_names: 755 self.unsupported("Column names in CTE definition are not supported.") 756 return super().cte_sql(expression) 757 758 def array_sql(self, expression: exp.Array) -> str: 759 first_arg = seq_get(expression.expressions, 0) 760 if isinstance(first_arg, exp.Subqueryable): 761 return f"ARRAY{self.wrap(self.sql(first_arg))}" 762 763 return inline_array_sql(self, expression) 764 765 def bracket_sql(self, expression: exp.Bracket) -> str: 766 expressions = expression.expressions 767 expressions_sql = ", ".join(self.sql(e) for e in expressions) 768 offset = expression.args.get("offset") 769 770 if offset == 0: 771 expressions_sql = f"OFFSET({expressions_sql})" 772 elif offset == 1: 773 expressions_sql = f"ORDINAL({expressions_sql})" 774 else: 775 self.unsupported(f"Unsupported array offset: {offset}") 776 777 if expression.args.get("safe"): 778 expressions_sql = f"SAFE_{expressions_sql}" 779 780 return f"{self.sql(expression, 'this')}[{expressions_sql}]" 781 782 def transaction_sql(self, *_) -> str: 783 return "BEGIN TRANSACTION" 784 785 def commit_sql(self, *_) -> str: 786 return "COMMIT TRANSACTION" 787 788 def rollback_sql(self, *_) -> str: 789 return "ROLLBACK TRANSACTION" 790 791 def in_unnest_op(self, expression: exp.Unnest) -> str: 792 return self.sql(expression) 793 794 def except_op(self, expression: exp.Except) -> str: 795 if not expression.args.get("distinct", False): 796 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 797 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 798 799 def intersect_op(self, expression: exp.Intersect) -> str: 800 if not expression.args.get("distinct", False): 801 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 802 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 803 804 def with_properties(self, properties: exp.Properties) -> str: 805 return self.properties(properties, prefix=self.seg("OPTIONS")) 806 807 def version_sql(self, expression: exp.Version) -> str: 808 if expression.name == "TIMESTAMP": 809 expression.set("this", "SYSTEM_TIME") 810 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
738 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 739 parent = expression.parent 740 741 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 742 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 743 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 744 return self.func( 745 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 746 ) 747 748 return super().attimezone_sql(expression)
765 def bracket_sql(self, expression: exp.Bracket) -> str: 766 expressions = expression.expressions 767 expressions_sql = ", ".join(self.sql(e) for e in expressions) 768 offset = expression.args.get("offset") 769 770 if offset == 0: 771 expressions_sql = f"OFFSET({expressions_sql})" 772 elif offset == 1: 773 expressions_sql = f"ORDINAL({expressions_sql})" 774 else: 775 self.unsupported(f"Unsupported array offset: {offset}") 776 777 if expression.args.get("safe"): 778 expressions_sql = f"SAFE_{expressions_sql}" 779 780 return f"{self.sql(expression, 'this')}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SIZE_IS_PERCENT
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- KEY_VALUE_DEFINITONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- concat_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- formatjson_sql
- jsonobject_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql