sqlglot.dialects.presto
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 bool_xor_sql, 11 date_trunc_to_time, 12 datestrtodate_sql, 13 encode_decode_sql, 14 build_formatted_time, 15 if_sql, 16 left_to_substring_sql, 17 no_ilike_sql, 18 no_pivot_sql, 19 no_safe_divide_sql, 20 no_timestamp_sql, 21 regexp_extract_sql, 22 rename_func, 23 right_to_substring_sql, 24 struct_extract_sql, 25 str_position_sql, 26 timestamptrunc_sql, 27 timestrtotime_sql, 28 ts_or_ds_add_cast, 29 unit_to_str, 30) 31from sqlglot.dialects.hive import Hive 32from sqlglot.dialects.mysql import MySQL 33from sqlglot.helper import apply_index_offset, seq_get 34from sqlglot.tokens import TokenType 35from sqlglot.transforms import unqualify_columns 36 37 38def _explode_to_unnest_sql(self: Presto.Generator, expression: exp.Lateral) -> str: 39 if isinstance(expression.this, exp.Explode): 40 return self.sql( 41 exp.Join( 42 this=exp.Unnest( 43 expressions=[expression.this.this], 44 alias=expression.args.get("alias"), 45 offset=isinstance(expression.this, exp.Posexplode), 46 ), 47 kind="cross", 48 ) 49 ) 50 return self.lateral_sql(expression) 51 52 53def _initcap_sql(self: Presto.Generator, expression: exp.Initcap) -> str: 54 regex = r"(\w)(\w*)" 55 return f"REGEXP_REPLACE({self.sql(expression, 'this')}, '{regex}', x -> UPPER(x[1]) || LOWER(x[2]))" 56 57 58def _no_sort_array(self: Presto.Generator, expression: exp.SortArray) -> str: 59 if expression.args.get("asc") == exp.false(): 60 comparator = "(a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END" 61 else: 62 comparator = None 63 return self.func("ARRAY_SORT", expression.this, comparator) 64 65 66def _schema_sql(self: Presto.Generator, expression: exp.Schema) -> str: 67 if isinstance(expression.parent, exp.Property): 68 columns = ", ".join(f"'{c.name}'" for c in expression.expressions) 69 return f"ARRAY[{columns}]" 70 71 if expression.parent: 72 for schema in expression.parent.find_all(exp.Schema): 73 column_defs = schema.find_all(exp.ColumnDef) 74 if column_defs and isinstance(schema.parent, exp.Property): 75 expression.expressions.extend(column_defs) 76 77 return self.schema_sql(expression) 78 79 80def _quantile_sql(self: Presto.Generator, expression: exp.Quantile) -> str: 81 self.unsupported("Presto does not support exact quantiles") 82 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 83 84 85def _str_to_time_sql( 86 self: Presto.Generator, expression: exp.StrToDate | exp.StrToTime | exp.TsOrDsToDate 87) -> str: 88 return self.func("DATE_PARSE", expression.this, self.format_time(expression)) 89 90 91def _ts_or_ds_to_date_sql(self: Presto.Generator, expression: exp.TsOrDsToDate) -> str: 92 time_format = self.format_time(expression) 93 if time_format and time_format not in (Presto.TIME_FORMAT, Presto.DATE_FORMAT): 94 return self.sql(exp.cast(_str_to_time_sql(self, expression), exp.DataType.Type.DATE)) 95 return self.sql( 96 exp.cast(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP), exp.DataType.Type.DATE) 97 ) 98 99 100def _ts_or_ds_add_sql(self: Presto.Generator, expression: exp.TsOrDsAdd) -> str: 101 expression = ts_or_ds_add_cast(expression) 102 unit = unit_to_str(expression) 103 return self.func("DATE_ADD", unit, expression.expression, expression.this) 104 105 106def _ts_or_ds_diff_sql(self: Presto.Generator, expression: exp.TsOrDsDiff) -> str: 107 this = exp.cast(expression.this, exp.DataType.Type.TIMESTAMP) 108 expr = exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP) 109 unit = unit_to_str(expression) 110 return self.func("DATE_DIFF", unit, expr, this) 111 112 113def _build_approx_percentile(args: t.List) -> exp.Expression: 114 if len(args) == 4: 115 return exp.ApproxQuantile( 116 this=seq_get(args, 0), 117 weight=seq_get(args, 1), 118 quantile=seq_get(args, 2), 119 accuracy=seq_get(args, 3), 120 ) 121 if len(args) == 3: 122 return exp.ApproxQuantile( 123 this=seq_get(args, 0), quantile=seq_get(args, 1), accuracy=seq_get(args, 2) 124 ) 125 return exp.ApproxQuantile.from_arg_list(args) 126 127 128def _build_from_unixtime(args: t.List) -> exp.Expression: 129 if len(args) == 3: 130 return exp.UnixToTime( 131 this=seq_get(args, 0), 132 hours=seq_get(args, 1), 133 minutes=seq_get(args, 2), 134 ) 135 if len(args) == 2: 136 return exp.UnixToTime(this=seq_get(args, 0), zone=seq_get(args, 1)) 137 138 return exp.UnixToTime.from_arg_list(args) 139 140 141def _unnest_sequence(expression: exp.Expression) -> exp.Expression: 142 if isinstance(expression, exp.Table): 143 if isinstance(expression.this, exp.GenerateSeries): 144 unnest = exp.Unnest(expressions=[expression.this]) 145 146 if expression.alias: 147 return exp.alias_(unnest, alias="_u", table=[expression.alias], copy=False) 148 return unnest 149 return expression 150 151 152def _first_last_sql(self: Presto.Generator, expression: exp.Func) -> str: 153 """ 154 Trino doesn't support FIRST / LAST as functions, but they're valid in the context 155 of MATCH_RECOGNIZE, so we need to preserve them in that case. In all other cases 156 they're converted into an ARBITRARY call. 157 158 Reference: https://trino.io/docs/current/sql/match-recognize.html#logical-navigation-functions 159 """ 160 if isinstance(expression.find_ancestor(exp.MatchRecognize, exp.Select), exp.MatchRecognize): 161 return self.function_fallback_sql(expression) 162 163 return rename_func("ARBITRARY")(self, expression) 164 165 166def _unix_to_time_sql(self: Presto.Generator, expression: exp.UnixToTime) -> str: 167 scale = expression.args.get("scale") 168 timestamp = self.sql(expression, "this") 169 if scale in (None, exp.UnixToTime.SECONDS): 170 return rename_func("FROM_UNIXTIME")(self, expression) 171 172 return f"FROM_UNIXTIME(CAST({timestamp} AS DOUBLE) / POW(10, {scale}))" 173 174 175def _to_int(expression: exp.Expression) -> exp.Expression: 176 if not expression.type: 177 from sqlglot.optimizer.annotate_types import annotate_types 178 179 annotate_types(expression) 180 if expression.type and expression.type.this not in exp.DataType.INTEGER_TYPES: 181 return exp.cast(expression, to=exp.DataType.Type.BIGINT) 182 return expression 183 184 185def _build_to_char(args: t.List) -> exp.TimeToStr: 186 fmt = seq_get(args, 1) 187 if isinstance(fmt, exp.Literal): 188 # We uppercase this to match Teradata's format mapping keys 189 fmt.set("this", fmt.this.upper()) 190 191 # We use "teradata" on purpose here, because the time formats are different in Presto. 192 # See https://prestodb.io/docs/current/functions/teradata.html?highlight=to_char#to_char 193 return build_formatted_time(exp.TimeToStr, "teradata")(args) 194 195 196class Presto(Dialect): 197 INDEX_OFFSET = 1 198 NULL_ORDERING = "nulls_are_last" 199 TIME_FORMAT = MySQL.TIME_FORMAT 200 TIME_MAPPING = MySQL.TIME_MAPPING 201 STRICT_STRING_CONCAT = True 202 SUPPORTS_SEMI_ANTI_JOIN = False 203 TYPED_DIVISION = True 204 TABLESAMPLE_SIZE_IS_PERCENT = True 205 LOG_BASE_FIRST: t.Optional[bool] = None 206 207 # https://github.com/trinodb/trino/issues/17 208 # https://github.com/trinodb/trino/issues/12289 209 # https://github.com/prestodb/presto/issues/2863 210 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 211 212 class Tokenizer(tokens.Tokenizer): 213 UNICODE_STRINGS = [ 214 (prefix + q, q) 215 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 216 for prefix in ("U&", "u&") 217 ] 218 219 KEYWORDS = { 220 **tokens.Tokenizer.KEYWORDS, 221 "START": TokenType.BEGIN, 222 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 223 "ROW": TokenType.STRUCT, 224 "IPADDRESS": TokenType.IPADDRESS, 225 "IPPREFIX": TokenType.IPPREFIX, 226 "TDIGEST": TokenType.TDIGEST, 227 "HYPERLOGLOG": TokenType.HLLSKETCH, 228 } 229 230 KEYWORDS.pop("QUALIFY") 231 232 class Parser(parser.Parser): 233 VALUES_FOLLOWED_BY_PAREN = False 234 235 FUNCTIONS = { 236 **parser.Parser.FUNCTIONS, 237 "ARBITRARY": exp.AnyValue.from_arg_list, 238 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 239 "APPROX_PERCENTILE": _build_approx_percentile, 240 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 241 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 242 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 243 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 244 "CARDINALITY": exp.ArraySize.from_arg_list, 245 "CONTAINS": exp.ArrayContains.from_arg_list, 246 "DATE_ADD": lambda args: exp.DateAdd( 247 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 248 ), 249 "DATE_DIFF": lambda args: exp.DateDiff( 250 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 251 ), 252 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 253 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 254 "DATE_TRUNC": date_trunc_to_time, 255 "ELEMENT_AT": lambda args: exp.Bracket( 256 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 257 ), 258 "FROM_HEX": exp.Unhex.from_arg_list, 259 "FROM_UNIXTIME": _build_from_unixtime, 260 "FROM_UTF8": lambda args: exp.Decode( 261 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 262 ), 263 "NOW": exp.CurrentTimestamp.from_arg_list, 264 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 265 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 266 ), 267 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 268 this=seq_get(args, 0), 269 expression=seq_get(args, 1), 270 replacement=seq_get(args, 2) or exp.Literal.string(""), 271 ), 272 "ROW": exp.Struct.from_arg_list, 273 "SEQUENCE": exp.GenerateSeries.from_arg_list, 274 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 275 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 276 "STRPOS": lambda args: exp.StrPosition( 277 this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2) 278 ), 279 "TO_CHAR": _build_to_char, 280 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 281 "TO_UTF8": lambda args: exp.Encode( 282 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 283 ), 284 "MD5": exp.MD5Digest.from_arg_list, 285 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 286 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 287 } 288 289 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 290 FUNCTION_PARSERS.pop("TRIM") 291 292 class Generator(generator.Generator): 293 INTERVAL_ALLOWS_PLURAL_FORM = False 294 JOIN_HINTS = False 295 TABLE_HINTS = False 296 QUERY_HINTS = False 297 IS_BOOL_ALLOWED = False 298 TZ_TO_WITH_TIME_ZONE = True 299 NVL2_SUPPORTED = False 300 STRUCT_DELIMITER = ("(", ")") 301 LIMIT_ONLY_LITERALS = True 302 SUPPORTS_SINGLE_ARG_CONCAT = False 303 LIKE_PROPERTY_INSIDE_SCHEMA = True 304 MULTI_ARG_DISTINCT = False 305 SUPPORTS_TO_NUMBER = False 306 HEX_FUNC = "TO_HEX" 307 308 PROPERTIES_LOCATION = { 309 **generator.Generator.PROPERTIES_LOCATION, 310 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 311 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 312 } 313 314 TYPE_MAPPING = { 315 **generator.Generator.TYPE_MAPPING, 316 exp.DataType.Type.INT: "INTEGER", 317 exp.DataType.Type.FLOAT: "REAL", 318 exp.DataType.Type.BINARY: "VARBINARY", 319 exp.DataType.Type.TEXT: "VARCHAR", 320 exp.DataType.Type.TIMETZ: "TIME", 321 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 322 exp.DataType.Type.STRUCT: "ROW", 323 exp.DataType.Type.DATETIME: "TIMESTAMP", 324 exp.DataType.Type.DATETIME64: "TIMESTAMP", 325 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 326 } 327 328 TRANSFORMS = { 329 **generator.Generator.TRANSFORMS, 330 exp.AnyValue: rename_func("ARBITRARY"), 331 exp.ApproxDistinct: lambda self, e: self.func( 332 "APPROX_DISTINCT", e.this, e.args.get("accuracy") 333 ), 334 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 335 exp.ArgMax: rename_func("MAX_BY"), 336 exp.ArgMin: rename_func("MIN_BY"), 337 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 338 exp.ArrayAny: rename_func("ANY_MATCH"), 339 exp.ArrayConcat: rename_func("CONCAT"), 340 exp.ArrayContains: rename_func("CONTAINS"), 341 exp.ArraySize: rename_func("CARDINALITY"), 342 exp.ArrayToString: rename_func("ARRAY_JOIN"), 343 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 344 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 345 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 346 exp.BitwiseLeftShift: lambda self, e: self.func( 347 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 348 ), 349 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 350 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 351 exp.BitwiseRightShift: lambda self, e: self.func( 352 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 353 ), 354 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 355 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 356 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 357 exp.DateAdd: lambda self, e: self.func( 358 "DATE_ADD", 359 unit_to_str(e), 360 _to_int(e.expression), 361 e.this, 362 ), 363 exp.DateDiff: lambda self, e: self.func( 364 "DATE_DIFF", unit_to_str(e), e.expression, e.this 365 ), 366 exp.DateStrToDate: datestrtodate_sql, 367 exp.DateToDi: lambda self, 368 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 369 exp.DateSub: lambda self, e: self.func( 370 "DATE_ADD", 371 unit_to_str(e), 372 _to_int(e.expression * -1), 373 e.this, 374 ), 375 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 376 exp.DiToDate: lambda self, 377 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 378 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 379 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 380 exp.First: _first_last_sql, 381 exp.FirstValue: _first_last_sql, 382 exp.FromTimeZone: lambda self, 383 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 384 exp.Group: transforms.preprocess([transforms.unalias_group]), 385 exp.GroupConcat: lambda self, e: self.func( 386 "ARRAY_JOIN", self.func("ARRAY_AGG", e.this), e.args.get("separator") 387 ), 388 exp.If: if_sql(), 389 exp.ILike: no_ilike_sql, 390 exp.Initcap: _initcap_sql, 391 exp.ParseJSON: rename_func("JSON_PARSE"), 392 exp.Last: _first_last_sql, 393 exp.LastValue: _first_last_sql, 394 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 395 exp.Lateral: _explode_to_unnest_sql, 396 exp.Left: left_to_substring_sql, 397 exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), 398 exp.LogicalAnd: rename_func("BOOL_AND"), 399 exp.LogicalOr: rename_func("BOOL_OR"), 400 exp.Pivot: no_pivot_sql, 401 exp.Quantile: _quantile_sql, 402 exp.RegexpExtract: regexp_extract_sql, 403 exp.Right: right_to_substring_sql, 404 exp.SafeDivide: no_safe_divide_sql, 405 exp.Schema: _schema_sql, 406 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 407 exp.Select: transforms.preprocess( 408 [ 409 transforms.eliminate_qualify, 410 transforms.eliminate_distinct_on, 411 transforms.explode_to_unnest(1), 412 transforms.eliminate_semi_and_anti_joins, 413 ] 414 ), 415 exp.SortArray: _no_sort_array, 416 exp.StrPosition: lambda self, e: str_position_sql(self, e, generate_instance=True), 417 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 418 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 419 exp.StrToTime: _str_to_time_sql, 420 exp.StructExtract: struct_extract_sql, 421 exp.Table: transforms.preprocess([_unnest_sequence]), 422 exp.Timestamp: no_timestamp_sql, 423 exp.TimestampTrunc: timestamptrunc_sql(), 424 exp.TimeStrToDate: timestrtotime_sql, 425 exp.TimeStrToTime: timestrtotime_sql, 426 exp.TimeStrToUnix: lambda self, e: self.func( 427 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 428 ), 429 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 430 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 431 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 432 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 433 exp.TsOrDiToDi: lambda self, 434 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 435 exp.TsOrDsAdd: _ts_or_ds_add_sql, 436 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 437 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 438 exp.Unhex: rename_func("FROM_HEX"), 439 exp.UnixToStr: lambda self, 440 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 441 exp.UnixToTime: _unix_to_time_sql, 442 exp.UnixToTimeStr: lambda self, 443 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 444 exp.VariancePop: rename_func("VAR_POP"), 445 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 446 exp.WithinGroup: transforms.preprocess( 447 [transforms.remove_within_group_for_percentiles] 448 ), 449 exp.Xor: bool_xor_sql, 450 exp.MD5: lambda self, e: self.func( 451 "LOWER", self.func("TO_HEX", self.func("MD5", self.sql(e, "this"))) 452 ), 453 exp.MD5Digest: rename_func("MD5"), 454 exp.SHA: rename_func("SHA1"), 455 exp.SHA2: lambda self, e: self.func( 456 "SHA256" if e.text("length") == "256" else "SHA512", e.this 457 ), 458 } 459 460 RESERVED_KEYWORDS = { 461 "alter", 462 "and", 463 "as", 464 "between", 465 "by", 466 "case", 467 "cast", 468 "constraint", 469 "create", 470 "cross", 471 "current_time", 472 "current_timestamp", 473 "deallocate", 474 "delete", 475 "describe", 476 "distinct", 477 "drop", 478 "else", 479 "end", 480 "escape", 481 "except", 482 "execute", 483 "exists", 484 "extract", 485 "false", 486 "for", 487 "from", 488 "full", 489 "group", 490 "having", 491 "in", 492 "inner", 493 "insert", 494 "intersect", 495 "into", 496 "is", 497 "join", 498 "left", 499 "like", 500 "natural", 501 "not", 502 "null", 503 "on", 504 "or", 505 "order", 506 "outer", 507 "prepare", 508 "right", 509 "select", 510 "table", 511 "then", 512 "true", 513 "union", 514 "using", 515 "values", 516 "when", 517 "where", 518 "with", 519 } 520 521 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 522 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 523 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 524 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 525 # which seems to be using the same time mapping as Hive, as per: 526 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 527 value_as_text = exp.cast(expression.this, exp.DataType.Type.TEXT) 528 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 529 parse_with_tz = self.func( 530 "PARSE_DATETIME", 531 value_as_text, 532 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 533 ) 534 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 535 return self.func("TO_UNIXTIME", coalesced) 536 537 def bracket_sql(self, expression: exp.Bracket) -> str: 538 if expression.args.get("safe"): 539 return self.func( 540 "ELEMENT_AT", 541 expression.this, 542 seq_get( 543 apply_index_offset( 544 expression.this, 545 expression.expressions, 546 1 - expression.args.get("offset", 0), 547 ), 548 0, 549 ), 550 ) 551 return super().bracket_sql(expression) 552 553 def struct_sql(self, expression: exp.Struct) -> str: 554 from sqlglot.optimizer.annotate_types import annotate_types 555 556 expression = annotate_types(expression) 557 values: t.List[str] = [] 558 schema: t.List[str] = [] 559 unknown_type = False 560 561 for e in expression.expressions: 562 if isinstance(e, exp.PropertyEQ): 563 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 564 unknown_type = True 565 else: 566 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 567 values.append(self.sql(e, "expression")) 568 else: 569 values.append(self.sql(e)) 570 571 size = len(expression.expressions) 572 573 if not size or len(schema) != size: 574 if unknown_type: 575 self.unsupported( 576 "Cannot convert untyped key-value definitions (try annotate_types)." 577 ) 578 return self.func("ROW", *values) 579 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 580 581 def interval_sql(self, expression: exp.Interval) -> str: 582 if expression.this and expression.text("unit").upper().startswith("WEEK"): 583 return f"({expression.this.name} * INTERVAL '7' DAY)" 584 return super().interval_sql(expression) 585 586 def transaction_sql(self, expression: exp.Transaction) -> str: 587 modes = expression.args.get("modes") 588 modes = f" {', '.join(modes)}" if modes else "" 589 return f"START TRANSACTION{modes}" 590 591 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 592 start = expression.args["start"] 593 end = expression.args["end"] 594 step = expression.args.get("step") 595 596 if isinstance(start, exp.Cast): 597 target_type = start.to 598 elif isinstance(end, exp.Cast): 599 target_type = end.to 600 else: 601 target_type = None 602 603 if target_type and target_type.is_type("timestamp"): 604 if target_type is start.to: 605 end = exp.cast(end, target_type) 606 else: 607 start = exp.cast(start, target_type) 608 609 return self.func("SEQUENCE", start, end, step) 610 611 def offset_limit_modifiers( 612 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 613 ) -> t.List[str]: 614 return [ 615 self.sql(expression, "offset"), 616 self.sql(limit), 617 ] 618 619 def create_sql(self, expression: exp.Create) -> str: 620 """ 621 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 622 so we need to remove them 623 """ 624 kind = expression.args["kind"] 625 schema = expression.this 626 if kind == "VIEW" and schema.expressions: 627 expression.this.set("expressions", None) 628 return super().create_sql(expression) 629 630 def delete_sql(self, expression: exp.Delete) -> str: 631 """ 632 Presto only supports DELETE FROM for a single table without an alias, so we need 633 to remove the unnecessary parts. If the original DELETE statement contains more 634 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 635 """ 636 tables = expression.args.get("tables") or [expression.this] 637 if len(tables) > 1: 638 return super().delete_sql(expression) 639 640 table = tables[0] 641 expression.set("this", table) 642 expression.set("tables", None) 643 644 if isinstance(table, exp.Table): 645 table_alias = table.args.get("alias") 646 if table_alias: 647 table_alias.pop() 648 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 649 650 return super().delete_sql(expression)
197class Presto(Dialect): 198 INDEX_OFFSET = 1 199 NULL_ORDERING = "nulls_are_last" 200 TIME_FORMAT = MySQL.TIME_FORMAT 201 TIME_MAPPING = MySQL.TIME_MAPPING 202 STRICT_STRING_CONCAT = True 203 SUPPORTS_SEMI_ANTI_JOIN = False 204 TYPED_DIVISION = True 205 TABLESAMPLE_SIZE_IS_PERCENT = True 206 LOG_BASE_FIRST: t.Optional[bool] = None 207 208 # https://github.com/trinodb/trino/issues/17 209 # https://github.com/trinodb/trino/issues/12289 210 # https://github.com/prestodb/presto/issues/2863 211 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 212 213 class Tokenizer(tokens.Tokenizer): 214 UNICODE_STRINGS = [ 215 (prefix + q, q) 216 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 217 for prefix in ("U&", "u&") 218 ] 219 220 KEYWORDS = { 221 **tokens.Tokenizer.KEYWORDS, 222 "START": TokenType.BEGIN, 223 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 224 "ROW": TokenType.STRUCT, 225 "IPADDRESS": TokenType.IPADDRESS, 226 "IPPREFIX": TokenType.IPPREFIX, 227 "TDIGEST": TokenType.TDIGEST, 228 "HYPERLOGLOG": TokenType.HLLSKETCH, 229 } 230 231 KEYWORDS.pop("QUALIFY") 232 233 class Parser(parser.Parser): 234 VALUES_FOLLOWED_BY_PAREN = False 235 236 FUNCTIONS = { 237 **parser.Parser.FUNCTIONS, 238 "ARBITRARY": exp.AnyValue.from_arg_list, 239 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 240 "APPROX_PERCENTILE": _build_approx_percentile, 241 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 242 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 243 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 244 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 245 "CARDINALITY": exp.ArraySize.from_arg_list, 246 "CONTAINS": exp.ArrayContains.from_arg_list, 247 "DATE_ADD": lambda args: exp.DateAdd( 248 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 249 ), 250 "DATE_DIFF": lambda args: exp.DateDiff( 251 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 252 ), 253 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 254 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 255 "DATE_TRUNC": date_trunc_to_time, 256 "ELEMENT_AT": lambda args: exp.Bracket( 257 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 258 ), 259 "FROM_HEX": exp.Unhex.from_arg_list, 260 "FROM_UNIXTIME": _build_from_unixtime, 261 "FROM_UTF8": lambda args: exp.Decode( 262 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 263 ), 264 "NOW": exp.CurrentTimestamp.from_arg_list, 265 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 266 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 267 ), 268 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 269 this=seq_get(args, 0), 270 expression=seq_get(args, 1), 271 replacement=seq_get(args, 2) or exp.Literal.string(""), 272 ), 273 "ROW": exp.Struct.from_arg_list, 274 "SEQUENCE": exp.GenerateSeries.from_arg_list, 275 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 276 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 277 "STRPOS": lambda args: exp.StrPosition( 278 this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2) 279 ), 280 "TO_CHAR": _build_to_char, 281 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 282 "TO_UTF8": lambda args: exp.Encode( 283 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 284 ), 285 "MD5": exp.MD5Digest.from_arg_list, 286 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 287 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 288 } 289 290 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 291 FUNCTION_PARSERS.pop("TRIM") 292 293 class Generator(generator.Generator): 294 INTERVAL_ALLOWS_PLURAL_FORM = False 295 JOIN_HINTS = False 296 TABLE_HINTS = False 297 QUERY_HINTS = False 298 IS_BOOL_ALLOWED = False 299 TZ_TO_WITH_TIME_ZONE = True 300 NVL2_SUPPORTED = False 301 STRUCT_DELIMITER = ("(", ")") 302 LIMIT_ONLY_LITERALS = True 303 SUPPORTS_SINGLE_ARG_CONCAT = False 304 LIKE_PROPERTY_INSIDE_SCHEMA = True 305 MULTI_ARG_DISTINCT = False 306 SUPPORTS_TO_NUMBER = False 307 HEX_FUNC = "TO_HEX" 308 309 PROPERTIES_LOCATION = { 310 **generator.Generator.PROPERTIES_LOCATION, 311 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 312 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 313 } 314 315 TYPE_MAPPING = { 316 **generator.Generator.TYPE_MAPPING, 317 exp.DataType.Type.INT: "INTEGER", 318 exp.DataType.Type.FLOAT: "REAL", 319 exp.DataType.Type.BINARY: "VARBINARY", 320 exp.DataType.Type.TEXT: "VARCHAR", 321 exp.DataType.Type.TIMETZ: "TIME", 322 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 323 exp.DataType.Type.STRUCT: "ROW", 324 exp.DataType.Type.DATETIME: "TIMESTAMP", 325 exp.DataType.Type.DATETIME64: "TIMESTAMP", 326 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 327 } 328 329 TRANSFORMS = { 330 **generator.Generator.TRANSFORMS, 331 exp.AnyValue: rename_func("ARBITRARY"), 332 exp.ApproxDistinct: lambda self, e: self.func( 333 "APPROX_DISTINCT", e.this, e.args.get("accuracy") 334 ), 335 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 336 exp.ArgMax: rename_func("MAX_BY"), 337 exp.ArgMin: rename_func("MIN_BY"), 338 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 339 exp.ArrayAny: rename_func("ANY_MATCH"), 340 exp.ArrayConcat: rename_func("CONCAT"), 341 exp.ArrayContains: rename_func("CONTAINS"), 342 exp.ArraySize: rename_func("CARDINALITY"), 343 exp.ArrayToString: rename_func("ARRAY_JOIN"), 344 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 345 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 346 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 347 exp.BitwiseLeftShift: lambda self, e: self.func( 348 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 349 ), 350 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 351 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 352 exp.BitwiseRightShift: lambda self, e: self.func( 353 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 354 ), 355 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 356 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 357 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 358 exp.DateAdd: lambda self, e: self.func( 359 "DATE_ADD", 360 unit_to_str(e), 361 _to_int(e.expression), 362 e.this, 363 ), 364 exp.DateDiff: lambda self, e: self.func( 365 "DATE_DIFF", unit_to_str(e), e.expression, e.this 366 ), 367 exp.DateStrToDate: datestrtodate_sql, 368 exp.DateToDi: lambda self, 369 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 370 exp.DateSub: lambda self, e: self.func( 371 "DATE_ADD", 372 unit_to_str(e), 373 _to_int(e.expression * -1), 374 e.this, 375 ), 376 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 377 exp.DiToDate: lambda self, 378 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 379 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 380 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 381 exp.First: _first_last_sql, 382 exp.FirstValue: _first_last_sql, 383 exp.FromTimeZone: lambda self, 384 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 385 exp.Group: transforms.preprocess([transforms.unalias_group]), 386 exp.GroupConcat: lambda self, e: self.func( 387 "ARRAY_JOIN", self.func("ARRAY_AGG", e.this), e.args.get("separator") 388 ), 389 exp.If: if_sql(), 390 exp.ILike: no_ilike_sql, 391 exp.Initcap: _initcap_sql, 392 exp.ParseJSON: rename_func("JSON_PARSE"), 393 exp.Last: _first_last_sql, 394 exp.LastValue: _first_last_sql, 395 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 396 exp.Lateral: _explode_to_unnest_sql, 397 exp.Left: left_to_substring_sql, 398 exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), 399 exp.LogicalAnd: rename_func("BOOL_AND"), 400 exp.LogicalOr: rename_func("BOOL_OR"), 401 exp.Pivot: no_pivot_sql, 402 exp.Quantile: _quantile_sql, 403 exp.RegexpExtract: regexp_extract_sql, 404 exp.Right: right_to_substring_sql, 405 exp.SafeDivide: no_safe_divide_sql, 406 exp.Schema: _schema_sql, 407 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 408 exp.Select: transforms.preprocess( 409 [ 410 transforms.eliminate_qualify, 411 transforms.eliminate_distinct_on, 412 transforms.explode_to_unnest(1), 413 transforms.eliminate_semi_and_anti_joins, 414 ] 415 ), 416 exp.SortArray: _no_sort_array, 417 exp.StrPosition: lambda self, e: str_position_sql(self, e, generate_instance=True), 418 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 419 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 420 exp.StrToTime: _str_to_time_sql, 421 exp.StructExtract: struct_extract_sql, 422 exp.Table: transforms.preprocess([_unnest_sequence]), 423 exp.Timestamp: no_timestamp_sql, 424 exp.TimestampTrunc: timestamptrunc_sql(), 425 exp.TimeStrToDate: timestrtotime_sql, 426 exp.TimeStrToTime: timestrtotime_sql, 427 exp.TimeStrToUnix: lambda self, e: self.func( 428 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 429 ), 430 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 431 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 432 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 433 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 434 exp.TsOrDiToDi: lambda self, 435 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 436 exp.TsOrDsAdd: _ts_or_ds_add_sql, 437 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 438 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 439 exp.Unhex: rename_func("FROM_HEX"), 440 exp.UnixToStr: lambda self, 441 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 442 exp.UnixToTime: _unix_to_time_sql, 443 exp.UnixToTimeStr: lambda self, 444 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 445 exp.VariancePop: rename_func("VAR_POP"), 446 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 447 exp.WithinGroup: transforms.preprocess( 448 [transforms.remove_within_group_for_percentiles] 449 ), 450 exp.Xor: bool_xor_sql, 451 exp.MD5: lambda self, e: self.func( 452 "LOWER", self.func("TO_HEX", self.func("MD5", self.sql(e, "this"))) 453 ), 454 exp.MD5Digest: rename_func("MD5"), 455 exp.SHA: rename_func("SHA1"), 456 exp.SHA2: lambda self, e: self.func( 457 "SHA256" if e.text("length") == "256" else "SHA512", e.this 458 ), 459 } 460 461 RESERVED_KEYWORDS = { 462 "alter", 463 "and", 464 "as", 465 "between", 466 "by", 467 "case", 468 "cast", 469 "constraint", 470 "create", 471 "cross", 472 "current_time", 473 "current_timestamp", 474 "deallocate", 475 "delete", 476 "describe", 477 "distinct", 478 "drop", 479 "else", 480 "end", 481 "escape", 482 "except", 483 "execute", 484 "exists", 485 "extract", 486 "false", 487 "for", 488 "from", 489 "full", 490 "group", 491 "having", 492 "in", 493 "inner", 494 "insert", 495 "intersect", 496 "into", 497 "is", 498 "join", 499 "left", 500 "like", 501 "natural", 502 "not", 503 "null", 504 "on", 505 "or", 506 "order", 507 "outer", 508 "prepare", 509 "right", 510 "select", 511 "table", 512 "then", 513 "true", 514 "union", 515 "using", 516 "values", 517 "when", 518 "where", 519 "with", 520 } 521 522 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 523 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 524 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 525 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 526 # which seems to be using the same time mapping as Hive, as per: 527 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 528 value_as_text = exp.cast(expression.this, exp.DataType.Type.TEXT) 529 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 530 parse_with_tz = self.func( 531 "PARSE_DATETIME", 532 value_as_text, 533 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 534 ) 535 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 536 return self.func("TO_UNIXTIME", coalesced) 537 538 def bracket_sql(self, expression: exp.Bracket) -> str: 539 if expression.args.get("safe"): 540 return self.func( 541 "ELEMENT_AT", 542 expression.this, 543 seq_get( 544 apply_index_offset( 545 expression.this, 546 expression.expressions, 547 1 - expression.args.get("offset", 0), 548 ), 549 0, 550 ), 551 ) 552 return super().bracket_sql(expression) 553 554 def struct_sql(self, expression: exp.Struct) -> str: 555 from sqlglot.optimizer.annotate_types import annotate_types 556 557 expression = annotate_types(expression) 558 values: t.List[str] = [] 559 schema: t.List[str] = [] 560 unknown_type = False 561 562 for e in expression.expressions: 563 if isinstance(e, exp.PropertyEQ): 564 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 565 unknown_type = True 566 else: 567 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 568 values.append(self.sql(e, "expression")) 569 else: 570 values.append(self.sql(e)) 571 572 size = len(expression.expressions) 573 574 if not size or len(schema) != size: 575 if unknown_type: 576 self.unsupported( 577 "Cannot convert untyped key-value definitions (try annotate_types)." 578 ) 579 return self.func("ROW", *values) 580 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 581 582 def interval_sql(self, expression: exp.Interval) -> str: 583 if expression.this and expression.text("unit").upper().startswith("WEEK"): 584 return f"({expression.this.name} * INTERVAL '7' DAY)" 585 return super().interval_sql(expression) 586 587 def transaction_sql(self, expression: exp.Transaction) -> str: 588 modes = expression.args.get("modes") 589 modes = f" {', '.join(modes)}" if modes else "" 590 return f"START TRANSACTION{modes}" 591 592 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 593 start = expression.args["start"] 594 end = expression.args["end"] 595 step = expression.args.get("step") 596 597 if isinstance(start, exp.Cast): 598 target_type = start.to 599 elif isinstance(end, exp.Cast): 600 target_type = end.to 601 else: 602 target_type = None 603 604 if target_type and target_type.is_type("timestamp"): 605 if target_type is start.to: 606 end = exp.cast(end, target_type) 607 else: 608 start = exp.cast(start, target_type) 609 610 return self.func("SEQUENCE", start, end, step) 611 612 def offset_limit_modifiers( 613 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 614 ) -> t.List[str]: 615 return [ 616 self.sql(expression, "offset"), 617 self.sql(limit), 618 ] 619 620 def create_sql(self, expression: exp.Create) -> str: 621 """ 622 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 623 so we need to remove them 624 """ 625 kind = expression.args["kind"] 626 schema = expression.this 627 if kind == "VIEW" and schema.expressions: 628 expression.this.set("expressions", None) 629 return super().create_sql(expression) 630 631 def delete_sql(self, expression: exp.Delete) -> str: 632 """ 633 Presto only supports DELETE FROM for a single table without an alias, so we need 634 to remove the unnecessary parts. If the original DELETE statement contains more 635 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 636 """ 637 tables = expression.args.get("tables") or [expression.this] 638 if len(tables) > 1: 639 return super().delete_sql(expression) 640 641 table = tables[0] 642 expression.set("this", table) 643 expression.set("tables", None) 644 645 if isinstance(table, exp.Table): 646 table_alias = table.args.get("alias") 647 if table_alias: 648 table_alias.pop() 649 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 650 651 return super().delete_sql(expression)
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Associates this dialect's time formats with their equivalent Python strftime
formats.
Whether the behavior of a / b
depends on the types of a
and b
.
False means a / b
is always float division.
True means a / b
is integer division if both a
and b
are integers.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Specifies the strategy according to which identifiers should be normalized.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- SUPPORTS_USER_DEFINED_TYPES
- NORMALIZE_FUNCTIONS
- SAFE_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- UNESCAPED_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- COPY_PARAMS_ARE_CSV
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
213 class Tokenizer(tokens.Tokenizer): 214 UNICODE_STRINGS = [ 215 (prefix + q, q) 216 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 217 for prefix in ("U&", "u&") 218 ] 219 220 KEYWORDS = { 221 **tokens.Tokenizer.KEYWORDS, 222 "START": TokenType.BEGIN, 223 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 224 "ROW": TokenType.STRUCT, 225 "IPADDRESS": TokenType.IPADDRESS, 226 "IPPREFIX": TokenType.IPPREFIX, 227 "TDIGEST": TokenType.TDIGEST, 228 "HYPERLOGLOG": TokenType.HLLSKETCH, 229 } 230 231 KEYWORDS.pop("QUALIFY")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- IDENTIFIERS
- IDENTIFIER_ESCAPES
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
233 class Parser(parser.Parser): 234 VALUES_FOLLOWED_BY_PAREN = False 235 236 FUNCTIONS = { 237 **parser.Parser.FUNCTIONS, 238 "ARBITRARY": exp.AnyValue.from_arg_list, 239 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 240 "APPROX_PERCENTILE": _build_approx_percentile, 241 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 242 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 243 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 244 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 245 "CARDINALITY": exp.ArraySize.from_arg_list, 246 "CONTAINS": exp.ArrayContains.from_arg_list, 247 "DATE_ADD": lambda args: exp.DateAdd( 248 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 249 ), 250 "DATE_DIFF": lambda args: exp.DateDiff( 251 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 252 ), 253 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 254 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 255 "DATE_TRUNC": date_trunc_to_time, 256 "ELEMENT_AT": lambda args: exp.Bracket( 257 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 258 ), 259 "FROM_HEX": exp.Unhex.from_arg_list, 260 "FROM_UNIXTIME": _build_from_unixtime, 261 "FROM_UTF8": lambda args: exp.Decode( 262 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 263 ), 264 "NOW": exp.CurrentTimestamp.from_arg_list, 265 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 266 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 267 ), 268 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 269 this=seq_get(args, 0), 270 expression=seq_get(args, 1), 271 replacement=seq_get(args, 2) or exp.Literal.string(""), 272 ), 273 "ROW": exp.Struct.from_arg_list, 274 "SEQUENCE": exp.GenerateSeries.from_arg_list, 275 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 276 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 277 "STRPOS": lambda args: exp.StrPosition( 278 this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2) 279 ), 280 "TO_CHAR": _build_to_char, 281 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 282 "TO_UTF8": lambda args: exp.Encode( 283 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 284 ), 285 "MD5": exp.MD5Digest.from_arg_list, 286 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 287 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 288 } 289 290 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 291 FUNCTION_PARSERS.pop("TRIM")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTER
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_JSON_EXTRACT
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
293 class Generator(generator.Generator): 294 INTERVAL_ALLOWS_PLURAL_FORM = False 295 JOIN_HINTS = False 296 TABLE_HINTS = False 297 QUERY_HINTS = False 298 IS_BOOL_ALLOWED = False 299 TZ_TO_WITH_TIME_ZONE = True 300 NVL2_SUPPORTED = False 301 STRUCT_DELIMITER = ("(", ")") 302 LIMIT_ONLY_LITERALS = True 303 SUPPORTS_SINGLE_ARG_CONCAT = False 304 LIKE_PROPERTY_INSIDE_SCHEMA = True 305 MULTI_ARG_DISTINCT = False 306 SUPPORTS_TO_NUMBER = False 307 HEX_FUNC = "TO_HEX" 308 309 PROPERTIES_LOCATION = { 310 **generator.Generator.PROPERTIES_LOCATION, 311 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 312 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 313 } 314 315 TYPE_MAPPING = { 316 **generator.Generator.TYPE_MAPPING, 317 exp.DataType.Type.INT: "INTEGER", 318 exp.DataType.Type.FLOAT: "REAL", 319 exp.DataType.Type.BINARY: "VARBINARY", 320 exp.DataType.Type.TEXT: "VARCHAR", 321 exp.DataType.Type.TIMETZ: "TIME", 322 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 323 exp.DataType.Type.STRUCT: "ROW", 324 exp.DataType.Type.DATETIME: "TIMESTAMP", 325 exp.DataType.Type.DATETIME64: "TIMESTAMP", 326 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 327 } 328 329 TRANSFORMS = { 330 **generator.Generator.TRANSFORMS, 331 exp.AnyValue: rename_func("ARBITRARY"), 332 exp.ApproxDistinct: lambda self, e: self.func( 333 "APPROX_DISTINCT", e.this, e.args.get("accuracy") 334 ), 335 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 336 exp.ArgMax: rename_func("MAX_BY"), 337 exp.ArgMin: rename_func("MIN_BY"), 338 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 339 exp.ArrayAny: rename_func("ANY_MATCH"), 340 exp.ArrayConcat: rename_func("CONCAT"), 341 exp.ArrayContains: rename_func("CONTAINS"), 342 exp.ArraySize: rename_func("CARDINALITY"), 343 exp.ArrayToString: rename_func("ARRAY_JOIN"), 344 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 345 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 346 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 347 exp.BitwiseLeftShift: lambda self, e: self.func( 348 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 349 ), 350 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 351 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 352 exp.BitwiseRightShift: lambda self, e: self.func( 353 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 354 ), 355 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 356 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 357 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 358 exp.DateAdd: lambda self, e: self.func( 359 "DATE_ADD", 360 unit_to_str(e), 361 _to_int(e.expression), 362 e.this, 363 ), 364 exp.DateDiff: lambda self, e: self.func( 365 "DATE_DIFF", unit_to_str(e), e.expression, e.this 366 ), 367 exp.DateStrToDate: datestrtodate_sql, 368 exp.DateToDi: lambda self, 369 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 370 exp.DateSub: lambda self, e: self.func( 371 "DATE_ADD", 372 unit_to_str(e), 373 _to_int(e.expression * -1), 374 e.this, 375 ), 376 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 377 exp.DiToDate: lambda self, 378 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 379 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 380 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 381 exp.First: _first_last_sql, 382 exp.FirstValue: _first_last_sql, 383 exp.FromTimeZone: lambda self, 384 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 385 exp.Group: transforms.preprocess([transforms.unalias_group]), 386 exp.GroupConcat: lambda self, e: self.func( 387 "ARRAY_JOIN", self.func("ARRAY_AGG", e.this), e.args.get("separator") 388 ), 389 exp.If: if_sql(), 390 exp.ILike: no_ilike_sql, 391 exp.Initcap: _initcap_sql, 392 exp.ParseJSON: rename_func("JSON_PARSE"), 393 exp.Last: _first_last_sql, 394 exp.LastValue: _first_last_sql, 395 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 396 exp.Lateral: _explode_to_unnest_sql, 397 exp.Left: left_to_substring_sql, 398 exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), 399 exp.LogicalAnd: rename_func("BOOL_AND"), 400 exp.LogicalOr: rename_func("BOOL_OR"), 401 exp.Pivot: no_pivot_sql, 402 exp.Quantile: _quantile_sql, 403 exp.RegexpExtract: regexp_extract_sql, 404 exp.Right: right_to_substring_sql, 405 exp.SafeDivide: no_safe_divide_sql, 406 exp.Schema: _schema_sql, 407 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 408 exp.Select: transforms.preprocess( 409 [ 410 transforms.eliminate_qualify, 411 transforms.eliminate_distinct_on, 412 transforms.explode_to_unnest(1), 413 transforms.eliminate_semi_and_anti_joins, 414 ] 415 ), 416 exp.SortArray: _no_sort_array, 417 exp.StrPosition: lambda self, e: str_position_sql(self, e, generate_instance=True), 418 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 419 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 420 exp.StrToTime: _str_to_time_sql, 421 exp.StructExtract: struct_extract_sql, 422 exp.Table: transforms.preprocess([_unnest_sequence]), 423 exp.Timestamp: no_timestamp_sql, 424 exp.TimestampTrunc: timestamptrunc_sql(), 425 exp.TimeStrToDate: timestrtotime_sql, 426 exp.TimeStrToTime: timestrtotime_sql, 427 exp.TimeStrToUnix: lambda self, e: self.func( 428 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 429 ), 430 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 431 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 432 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 433 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 434 exp.TsOrDiToDi: lambda self, 435 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 436 exp.TsOrDsAdd: _ts_or_ds_add_sql, 437 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 438 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 439 exp.Unhex: rename_func("FROM_HEX"), 440 exp.UnixToStr: lambda self, 441 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 442 exp.UnixToTime: _unix_to_time_sql, 443 exp.UnixToTimeStr: lambda self, 444 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 445 exp.VariancePop: rename_func("VAR_POP"), 446 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 447 exp.WithinGroup: transforms.preprocess( 448 [transforms.remove_within_group_for_percentiles] 449 ), 450 exp.Xor: bool_xor_sql, 451 exp.MD5: lambda self, e: self.func( 452 "LOWER", self.func("TO_HEX", self.func("MD5", self.sql(e, "this"))) 453 ), 454 exp.MD5Digest: rename_func("MD5"), 455 exp.SHA: rename_func("SHA1"), 456 exp.SHA2: lambda self, e: self.func( 457 "SHA256" if e.text("length") == "256" else "SHA512", e.this 458 ), 459 } 460 461 RESERVED_KEYWORDS = { 462 "alter", 463 "and", 464 "as", 465 "between", 466 "by", 467 "case", 468 "cast", 469 "constraint", 470 "create", 471 "cross", 472 "current_time", 473 "current_timestamp", 474 "deallocate", 475 "delete", 476 "describe", 477 "distinct", 478 "drop", 479 "else", 480 "end", 481 "escape", 482 "except", 483 "execute", 484 "exists", 485 "extract", 486 "false", 487 "for", 488 "from", 489 "full", 490 "group", 491 "having", 492 "in", 493 "inner", 494 "insert", 495 "intersect", 496 "into", 497 "is", 498 "join", 499 "left", 500 "like", 501 "natural", 502 "not", 503 "null", 504 "on", 505 "or", 506 "order", 507 "outer", 508 "prepare", 509 "right", 510 "select", 511 "table", 512 "then", 513 "true", 514 "union", 515 "using", 516 "values", 517 "when", 518 "where", 519 "with", 520 } 521 522 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 523 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 524 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 525 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 526 # which seems to be using the same time mapping as Hive, as per: 527 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 528 value_as_text = exp.cast(expression.this, exp.DataType.Type.TEXT) 529 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 530 parse_with_tz = self.func( 531 "PARSE_DATETIME", 532 value_as_text, 533 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 534 ) 535 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 536 return self.func("TO_UNIXTIME", coalesced) 537 538 def bracket_sql(self, expression: exp.Bracket) -> str: 539 if expression.args.get("safe"): 540 return self.func( 541 "ELEMENT_AT", 542 expression.this, 543 seq_get( 544 apply_index_offset( 545 expression.this, 546 expression.expressions, 547 1 - expression.args.get("offset", 0), 548 ), 549 0, 550 ), 551 ) 552 return super().bracket_sql(expression) 553 554 def struct_sql(self, expression: exp.Struct) -> str: 555 from sqlglot.optimizer.annotate_types import annotate_types 556 557 expression = annotate_types(expression) 558 values: t.List[str] = [] 559 schema: t.List[str] = [] 560 unknown_type = False 561 562 for e in expression.expressions: 563 if isinstance(e, exp.PropertyEQ): 564 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 565 unknown_type = True 566 else: 567 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 568 values.append(self.sql(e, "expression")) 569 else: 570 values.append(self.sql(e)) 571 572 size = len(expression.expressions) 573 574 if not size or len(schema) != size: 575 if unknown_type: 576 self.unsupported( 577 "Cannot convert untyped key-value definitions (try annotate_types)." 578 ) 579 return self.func("ROW", *values) 580 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 581 582 def interval_sql(self, expression: exp.Interval) -> str: 583 if expression.this and expression.text("unit").upper().startswith("WEEK"): 584 return f"({expression.this.name} * INTERVAL '7' DAY)" 585 return super().interval_sql(expression) 586 587 def transaction_sql(self, expression: exp.Transaction) -> str: 588 modes = expression.args.get("modes") 589 modes = f" {', '.join(modes)}" if modes else "" 590 return f"START TRANSACTION{modes}" 591 592 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 593 start = expression.args["start"] 594 end = expression.args["end"] 595 step = expression.args.get("step") 596 597 if isinstance(start, exp.Cast): 598 target_type = start.to 599 elif isinstance(end, exp.Cast): 600 target_type = end.to 601 else: 602 target_type = None 603 604 if target_type and target_type.is_type("timestamp"): 605 if target_type is start.to: 606 end = exp.cast(end, target_type) 607 else: 608 start = exp.cast(start, target_type) 609 610 return self.func("SEQUENCE", start, end, step) 611 612 def offset_limit_modifiers( 613 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 614 ) -> t.List[str]: 615 return [ 616 self.sql(expression, "offset"), 617 self.sql(limit), 618 ] 619 620 def create_sql(self, expression: exp.Create) -> str: 621 """ 622 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 623 so we need to remove them 624 """ 625 kind = expression.args["kind"] 626 schema = expression.this 627 if kind == "VIEW" and schema.expressions: 628 expression.this.set("expressions", None) 629 return super().create_sql(expression) 630 631 def delete_sql(self, expression: exp.Delete) -> str: 632 """ 633 Presto only supports DELETE FROM for a single table without an alias, so we need 634 to remove the unnecessary parts. If the original DELETE statement contains more 635 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 636 """ 637 tables = expression.args.get("tables") or [expression.this] 638 if len(tables) > 1: 639 return super().delete_sql(expression) 640 641 table = tables[0] 642 expression.set("this", table) 643 expression.set("tables", None) 644 645 if isinstance(table, exp.Table): 646 table_alias = table.args.get("alias") 647 if table_alias: 648 table_alias.pop() 649 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 650 651 return super().delete_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
522 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 523 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 524 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 525 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 526 # which seems to be using the same time mapping as Hive, as per: 527 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 528 value_as_text = exp.cast(expression.this, exp.DataType.Type.TEXT) 529 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 530 parse_with_tz = self.func( 531 "PARSE_DATETIME", 532 value_as_text, 533 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 534 ) 535 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 536 return self.func("TO_UNIXTIME", coalesced)
538 def bracket_sql(self, expression: exp.Bracket) -> str: 539 if expression.args.get("safe"): 540 return self.func( 541 "ELEMENT_AT", 542 expression.this, 543 seq_get( 544 apply_index_offset( 545 expression.this, 546 expression.expressions, 547 1 - expression.args.get("offset", 0), 548 ), 549 0, 550 ), 551 ) 552 return super().bracket_sql(expression)
554 def struct_sql(self, expression: exp.Struct) -> str: 555 from sqlglot.optimizer.annotate_types import annotate_types 556 557 expression = annotate_types(expression) 558 values: t.List[str] = [] 559 schema: t.List[str] = [] 560 unknown_type = False 561 562 for e in expression.expressions: 563 if isinstance(e, exp.PropertyEQ): 564 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 565 unknown_type = True 566 else: 567 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 568 values.append(self.sql(e, "expression")) 569 else: 570 values.append(self.sql(e)) 571 572 size = len(expression.expressions) 573 574 if not size or len(schema) != size: 575 if unknown_type: 576 self.unsupported( 577 "Cannot convert untyped key-value definitions (try annotate_types)." 578 ) 579 return self.func("ROW", *values) 580 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))"
592 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 593 start = expression.args["start"] 594 end = expression.args["end"] 595 step = expression.args.get("step") 596 597 if isinstance(start, exp.Cast): 598 target_type = start.to 599 elif isinstance(end, exp.Cast): 600 target_type = end.to 601 else: 602 target_type = None 603 604 if target_type and target_type.is_type("timestamp"): 605 if target_type is start.to: 606 end = exp.cast(end, target_type) 607 else: 608 start = exp.cast(start, target_type) 609 610 return self.func("SEQUENCE", start, end, step)
620 def create_sql(self, expression: exp.Create) -> str: 621 """ 622 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 623 so we need to remove them 624 """ 625 kind = expression.args["kind"] 626 schema = expression.this 627 if kind == "VIEW" and schema.expressions: 628 expression.this.set("expressions", None) 629 return super().create_sql(expression)
Presto doesn't support CREATE VIEW with expressions (ex: CREATE VIEW x (cola)
then (cola)
is the expression),
so we need to remove them
631 def delete_sql(self, expression: exp.Delete) -> str: 632 """ 633 Presto only supports DELETE FROM for a single table without an alias, so we need 634 to remove the unnecessary parts. If the original DELETE statement contains more 635 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 636 """ 637 tables = expression.args.get("tables") or [expression.this] 638 if len(tables) > 1: 639 return super().delete_sql(expression) 640 641 table = tables[0] 642 expression.set("this", table) 643 expression.set("tables", None) 644 645 if isinstance(table, exp.Table): 646 table_alias = table.args.get("alias") 647 if table_alias: 648 table_alias.pop() 649 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 650 651 return super().delete_sql(expression)
Presto only supports DELETE FROM for a single table without an alias, so we need to remove the unnecessary parts. If the original DELETE statement contains more than one table to be deleted, we can't safely map it 1-1 to a Presto statement.
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTED_JSON_PATH_PARTS
- CAN_IMPLEMENT_ARRAY_ANY
- OUTER_UNION_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- TRY_SUPPORTED
- STAR_EXCEPT
- WITH_PROPERTIES_PREFIX
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- queryoption_sql
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql