sqlglot.dialects.presto
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 bool_xor_sql, 11 date_trunc_to_time, 12 datestrtodate_sql, 13 encode_decode_sql, 14 build_formatted_time, 15 if_sql, 16 left_to_substring_sql, 17 no_ilike_sql, 18 no_pivot_sql, 19 no_safe_divide_sql, 20 no_timestamp_sql, 21 regexp_extract_sql, 22 rename_func, 23 right_to_substring_sql, 24 sha256_sql, 25 struct_extract_sql, 26 str_position_sql, 27 timestamptrunc_sql, 28 timestrtotime_sql, 29 ts_or_ds_add_cast, 30 unit_to_str, 31 sequence_sql, 32) 33from sqlglot.dialects.hive import Hive 34from sqlglot.dialects.mysql import MySQL 35from sqlglot.helper import apply_index_offset, seq_get 36from sqlglot.tokens import TokenType 37from sqlglot.transforms import unqualify_columns 38 39DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TimestampAdd, exp.DateSub] 40 41 42def _explode_to_unnest_sql(self: Presto.Generator, expression: exp.Lateral) -> str: 43 if isinstance(expression.this, exp.Explode): 44 return self.sql( 45 exp.Join( 46 this=exp.Unnest( 47 expressions=[expression.this.this], 48 alias=expression.args.get("alias"), 49 offset=isinstance(expression.this, exp.Posexplode), 50 ), 51 kind="cross", 52 ) 53 ) 54 return self.lateral_sql(expression) 55 56 57def _initcap_sql(self: Presto.Generator, expression: exp.Initcap) -> str: 58 regex = r"(\w)(\w*)" 59 return f"REGEXP_REPLACE({self.sql(expression, 'this')}, '{regex}', x -> UPPER(x[1]) || LOWER(x[2]))" 60 61 62def _no_sort_array(self: Presto.Generator, expression: exp.SortArray) -> str: 63 if expression.args.get("asc") == exp.false(): 64 comparator = "(a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END" 65 else: 66 comparator = None 67 return self.func("ARRAY_SORT", expression.this, comparator) 68 69 70def _schema_sql(self: Presto.Generator, expression: exp.Schema) -> str: 71 if isinstance(expression.parent, exp.Property): 72 columns = ", ".join(f"'{c.name}'" for c in expression.expressions) 73 return f"ARRAY[{columns}]" 74 75 if expression.parent: 76 for schema in expression.parent.find_all(exp.Schema): 77 column_defs = schema.find_all(exp.ColumnDef) 78 if column_defs and isinstance(schema.parent, exp.Property): 79 expression.expressions.extend(column_defs) 80 81 return self.schema_sql(expression) 82 83 84def _quantile_sql(self: Presto.Generator, expression: exp.Quantile) -> str: 85 self.unsupported("Presto does not support exact quantiles") 86 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 87 88 89def _str_to_time_sql( 90 self: Presto.Generator, expression: exp.StrToDate | exp.StrToTime | exp.TsOrDsToDate 91) -> str: 92 return self.func("DATE_PARSE", expression.this, self.format_time(expression)) 93 94 95def _ts_or_ds_to_date_sql(self: Presto.Generator, expression: exp.TsOrDsToDate) -> str: 96 time_format = self.format_time(expression) 97 if time_format and time_format not in (Presto.TIME_FORMAT, Presto.DATE_FORMAT): 98 return self.sql(exp.cast(_str_to_time_sql(self, expression), exp.DataType.Type.DATE)) 99 return self.sql( 100 exp.cast(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP), exp.DataType.Type.DATE) 101 ) 102 103 104def _ts_or_ds_add_sql(self: Presto.Generator, expression: exp.TsOrDsAdd) -> str: 105 expression = ts_or_ds_add_cast(expression) 106 unit = unit_to_str(expression) 107 return self.func("DATE_ADD", unit, expression.expression, expression.this) 108 109 110def _ts_or_ds_diff_sql(self: Presto.Generator, expression: exp.TsOrDsDiff) -> str: 111 this = exp.cast(expression.this, exp.DataType.Type.TIMESTAMP) 112 expr = exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP) 113 unit = unit_to_str(expression) 114 return self.func("DATE_DIFF", unit, expr, this) 115 116 117def _build_approx_percentile(args: t.List) -> exp.Expression: 118 if len(args) == 4: 119 return exp.ApproxQuantile( 120 this=seq_get(args, 0), 121 weight=seq_get(args, 1), 122 quantile=seq_get(args, 2), 123 accuracy=seq_get(args, 3), 124 ) 125 if len(args) == 3: 126 return exp.ApproxQuantile( 127 this=seq_get(args, 0), quantile=seq_get(args, 1), accuracy=seq_get(args, 2) 128 ) 129 return exp.ApproxQuantile.from_arg_list(args) 130 131 132def _build_from_unixtime(args: t.List) -> exp.Expression: 133 if len(args) == 3: 134 return exp.UnixToTime( 135 this=seq_get(args, 0), 136 hours=seq_get(args, 1), 137 minutes=seq_get(args, 2), 138 ) 139 if len(args) == 2: 140 return exp.UnixToTime(this=seq_get(args, 0), zone=seq_get(args, 1)) 141 142 return exp.UnixToTime.from_arg_list(args) 143 144 145def _unnest_sequence(expression: exp.Expression) -> exp.Expression: 146 if isinstance(expression, exp.Table): 147 if isinstance(expression.this, exp.GenerateSeries): 148 unnest = exp.Unnest(expressions=[expression.this]) 149 150 if expression.alias: 151 return exp.alias_(unnest, alias="_u", table=[expression.alias], copy=False) 152 return unnest 153 return expression 154 155 156def _first_last_sql(self: Presto.Generator, expression: exp.Func) -> str: 157 """ 158 Trino doesn't support FIRST / LAST as functions, but they're valid in the context 159 of MATCH_RECOGNIZE, so we need to preserve them in that case. In all other cases 160 they're converted into an ARBITRARY call. 161 162 Reference: https://trino.io/docs/current/sql/match-recognize.html#logical-navigation-functions 163 """ 164 if isinstance(expression.find_ancestor(exp.MatchRecognize, exp.Select), exp.MatchRecognize): 165 return self.function_fallback_sql(expression) 166 167 return rename_func("ARBITRARY")(self, expression) 168 169 170def _unix_to_time_sql(self: Presto.Generator, expression: exp.UnixToTime) -> str: 171 scale = expression.args.get("scale") 172 timestamp = self.sql(expression, "this") 173 if scale in (None, exp.UnixToTime.SECONDS): 174 return rename_func("FROM_UNIXTIME")(self, expression) 175 176 return f"FROM_UNIXTIME(CAST({timestamp} AS DOUBLE) / POW(10, {scale}))" 177 178 179def _jsonextract_sql(self: Presto.Generator, expression: exp.JSONExtract) -> str: 180 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 181 182 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 183 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 184 if not expression.args.get("variant_extract") or is_json_extract: 185 return self.func( 186 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 187 ) 188 189 this = self.sql(expression, "this") 190 191 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 192 segments = [] 193 for path_key in expression.expression.expressions[1:]: 194 if not isinstance(path_key, exp.JSONPathKey): 195 # Cannot transpile subscripts, wildcards etc to dot notation 196 self.unsupported(f"Cannot transpile JSONPath segment '{path_key}' to ROW access") 197 continue 198 key = path_key.this 199 if not exp.SAFE_IDENTIFIER_RE.match(key): 200 key = f'"{key}"' 201 segments.append(f".{key}") 202 203 expr = "".join(segments) 204 205 return f"{this}{expr}" 206 207 208def _to_int(self: Presto.Generator, expression: exp.Expression) -> exp.Expression: 209 if not expression.type: 210 from sqlglot.optimizer.annotate_types import annotate_types 211 212 annotate_types(expression, dialect=self.dialect) 213 if expression.type and expression.type.this not in exp.DataType.INTEGER_TYPES: 214 return exp.cast(expression, to=exp.DataType.Type.BIGINT) 215 return expression 216 217 218def _build_to_char(args: t.List) -> exp.TimeToStr: 219 fmt = seq_get(args, 1) 220 if isinstance(fmt, exp.Literal): 221 # We uppercase this to match Teradata's format mapping keys 222 fmt.set("this", fmt.this.upper()) 223 224 # We use "teradata" on purpose here, because the time formats are different in Presto. 225 # See https://prestodb.io/docs/current/functions/teradata.html?highlight=to_char#to_char 226 return build_formatted_time(exp.TimeToStr, "teradata")(args) 227 228 229def _date_delta_sql( 230 name: str, negate_interval: bool = False 231) -> t.Callable[[Presto.Generator, DATE_ADD_OR_SUB], str]: 232 def _delta_sql(self: Presto.Generator, expression: DATE_ADD_OR_SUB) -> str: 233 interval = _to_int(self, expression.expression) 234 return self.func( 235 name, 236 unit_to_str(expression), 237 interval * (-1) if negate_interval else interval, 238 expression.this, 239 ) 240 241 return _delta_sql 242 243 244class Presto(Dialect): 245 INDEX_OFFSET = 1 246 NULL_ORDERING = "nulls_are_last" 247 TIME_FORMAT = MySQL.TIME_FORMAT 248 TIME_MAPPING = MySQL.TIME_MAPPING 249 STRICT_STRING_CONCAT = True 250 SUPPORTS_SEMI_ANTI_JOIN = False 251 TYPED_DIVISION = True 252 TABLESAMPLE_SIZE_IS_PERCENT = True 253 LOG_BASE_FIRST: t.Optional[bool] = None 254 255 # https://github.com/trinodb/trino/issues/17 256 # https://github.com/trinodb/trino/issues/12289 257 # https://github.com/prestodb/presto/issues/2863 258 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 259 260 # The result of certain math functions in Presto/Trino is of type 261 # equal to the input type e.g: FLOOR(5.5/2) -> DECIMAL, FLOOR(5/2) -> BIGINT 262 ANNOTATORS = { 263 **Dialect.ANNOTATORS, 264 exp.Floor: lambda self, e: self._annotate_by_args(e, "this"), 265 exp.Ceil: lambda self, e: self._annotate_by_args(e, "this"), 266 exp.Mod: lambda self, e: self._annotate_by_args(e, "this", "expression"), 267 exp.Round: lambda self, e: self._annotate_by_args(e, "this"), 268 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 269 exp.Abs: lambda self, e: self._annotate_by_args(e, "this"), 270 exp.Rand: lambda self, e: self._annotate_by_args(e, "this") 271 if e.this 272 else self._set_type(e, exp.DataType.Type.DOUBLE), 273 } 274 275 class Tokenizer(tokens.Tokenizer): 276 UNICODE_STRINGS = [ 277 (prefix + q, q) 278 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 279 for prefix in ("U&", "u&") 280 ] 281 282 KEYWORDS = { 283 **tokens.Tokenizer.KEYWORDS, 284 "START": TokenType.BEGIN, 285 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 286 "ROW": TokenType.STRUCT, 287 "IPADDRESS": TokenType.IPADDRESS, 288 "IPPREFIX": TokenType.IPPREFIX, 289 "TDIGEST": TokenType.TDIGEST, 290 "HYPERLOGLOG": TokenType.HLLSKETCH, 291 } 292 KEYWORDS.pop("/*+") 293 KEYWORDS.pop("QUALIFY") 294 295 class Parser(parser.Parser): 296 VALUES_FOLLOWED_BY_PAREN = False 297 298 FUNCTIONS = { 299 **parser.Parser.FUNCTIONS, 300 "ARBITRARY": exp.AnyValue.from_arg_list, 301 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 302 "APPROX_PERCENTILE": _build_approx_percentile, 303 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 304 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 305 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 306 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 307 "CARDINALITY": exp.ArraySize.from_arg_list, 308 "CONTAINS": exp.ArrayContains.from_arg_list, 309 "DATE_ADD": lambda args: exp.DateAdd( 310 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 311 ), 312 "DATE_DIFF": lambda args: exp.DateDiff( 313 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 314 ), 315 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 316 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 317 "DATE_TRUNC": date_trunc_to_time, 318 "ELEMENT_AT": lambda args: exp.Bracket( 319 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 320 ), 321 "FROM_HEX": exp.Unhex.from_arg_list, 322 "FROM_UNIXTIME": _build_from_unixtime, 323 "FROM_UTF8": lambda args: exp.Decode( 324 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 325 ), 326 "NOW": exp.CurrentTimestamp.from_arg_list, 327 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 328 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 329 ), 330 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 331 this=seq_get(args, 0), 332 expression=seq_get(args, 1), 333 replacement=seq_get(args, 2) or exp.Literal.string(""), 334 ), 335 "ROW": exp.Struct.from_arg_list, 336 "SEQUENCE": exp.GenerateSeries.from_arg_list, 337 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 338 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 339 "STRPOS": lambda args: exp.StrPosition( 340 this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2) 341 ), 342 "TO_CHAR": _build_to_char, 343 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 344 "TO_UTF8": lambda args: exp.Encode( 345 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 346 ), 347 "MD5": exp.MD5Digest.from_arg_list, 348 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 349 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 350 } 351 352 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 353 FUNCTION_PARSERS.pop("TRIM") 354 355 class Generator(generator.Generator): 356 INTERVAL_ALLOWS_PLURAL_FORM = False 357 JOIN_HINTS = False 358 TABLE_HINTS = False 359 QUERY_HINTS = False 360 IS_BOOL_ALLOWED = False 361 TZ_TO_WITH_TIME_ZONE = True 362 NVL2_SUPPORTED = False 363 STRUCT_DELIMITER = ("(", ")") 364 LIMIT_ONLY_LITERALS = True 365 SUPPORTS_SINGLE_ARG_CONCAT = False 366 LIKE_PROPERTY_INSIDE_SCHEMA = True 367 MULTI_ARG_DISTINCT = False 368 SUPPORTS_TO_NUMBER = False 369 HEX_FUNC = "TO_HEX" 370 PARSE_JSON_NAME = "JSON_PARSE" 371 PAD_FILL_PATTERN_IS_REQUIRED = True 372 373 PROPERTIES_LOCATION = { 374 **generator.Generator.PROPERTIES_LOCATION, 375 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 376 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 377 } 378 379 TYPE_MAPPING = { 380 **generator.Generator.TYPE_MAPPING, 381 exp.DataType.Type.INT: "INTEGER", 382 exp.DataType.Type.FLOAT: "REAL", 383 exp.DataType.Type.BINARY: "VARBINARY", 384 exp.DataType.Type.TEXT: "VARCHAR", 385 exp.DataType.Type.TIMETZ: "TIME", 386 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 387 exp.DataType.Type.STRUCT: "ROW", 388 exp.DataType.Type.DATETIME: "TIMESTAMP", 389 exp.DataType.Type.DATETIME64: "TIMESTAMP", 390 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 391 } 392 393 TRANSFORMS = { 394 **generator.Generator.TRANSFORMS, 395 exp.AnyValue: rename_func("ARBITRARY"), 396 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 397 exp.ArgMax: rename_func("MAX_BY"), 398 exp.ArgMin: rename_func("MIN_BY"), 399 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 400 exp.ArrayAny: rename_func("ANY_MATCH"), 401 exp.ArrayConcat: rename_func("CONCAT"), 402 exp.ArrayContains: rename_func("CONTAINS"), 403 exp.ArraySize: rename_func("CARDINALITY"), 404 exp.ArrayToString: rename_func("ARRAY_JOIN"), 405 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 406 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 407 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 408 exp.BitwiseLeftShift: lambda self, e: self.func( 409 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 410 ), 411 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 412 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 413 exp.BitwiseRightShift: lambda self, e: self.func( 414 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 415 ), 416 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 417 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 418 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 419 exp.DateAdd: _date_delta_sql("DATE_ADD"), 420 exp.DateDiff: lambda self, e: self.func( 421 "DATE_DIFF", unit_to_str(e), e.expression, e.this 422 ), 423 exp.DateStrToDate: datestrtodate_sql, 424 exp.DateToDi: lambda self, 425 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 426 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 427 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 428 exp.DiToDate: lambda self, 429 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 430 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 431 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 432 exp.First: _first_last_sql, 433 exp.FirstValue: _first_last_sql, 434 exp.FromTimeZone: lambda self, 435 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 436 exp.GenerateSeries: sequence_sql, 437 exp.Group: transforms.preprocess([transforms.unalias_group]), 438 exp.GroupConcat: lambda self, e: self.func( 439 "ARRAY_JOIN", self.func("ARRAY_AGG", e.this), e.args.get("separator") 440 ), 441 exp.If: if_sql(), 442 exp.ILike: no_ilike_sql, 443 exp.Initcap: _initcap_sql, 444 exp.JSONExtract: _jsonextract_sql, 445 exp.Last: _first_last_sql, 446 exp.LastValue: _first_last_sql, 447 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 448 exp.Lateral: _explode_to_unnest_sql, 449 exp.Left: left_to_substring_sql, 450 exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), 451 exp.LogicalAnd: rename_func("BOOL_AND"), 452 exp.LogicalOr: rename_func("BOOL_OR"), 453 exp.Pivot: no_pivot_sql, 454 exp.Quantile: _quantile_sql, 455 exp.RegexpExtract: regexp_extract_sql, 456 exp.Right: right_to_substring_sql, 457 exp.SafeDivide: no_safe_divide_sql, 458 exp.Schema: _schema_sql, 459 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 460 exp.Select: transforms.preprocess( 461 [ 462 transforms.eliminate_qualify, 463 transforms.eliminate_distinct_on, 464 transforms.explode_to_unnest(1), 465 transforms.eliminate_semi_and_anti_joins, 466 ] 467 ), 468 exp.SortArray: _no_sort_array, 469 exp.StrPosition: lambda self, e: str_position_sql(self, e, generate_instance=True), 470 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 471 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 472 exp.StrToTime: _str_to_time_sql, 473 exp.StructExtract: struct_extract_sql, 474 exp.Table: transforms.preprocess([_unnest_sequence]), 475 exp.Timestamp: no_timestamp_sql, 476 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 477 exp.TimestampTrunc: timestamptrunc_sql(), 478 exp.TimeStrToDate: timestrtotime_sql, 479 exp.TimeStrToTime: timestrtotime_sql, 480 exp.TimeStrToUnix: lambda self, e: self.func( 481 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 482 ), 483 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 484 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 485 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 486 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 487 exp.TsOrDiToDi: lambda self, 488 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 489 exp.TsOrDsAdd: _ts_or_ds_add_sql, 490 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 491 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 492 exp.Unhex: rename_func("FROM_HEX"), 493 exp.UnixToStr: lambda self, 494 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 495 exp.UnixToTime: _unix_to_time_sql, 496 exp.UnixToTimeStr: lambda self, 497 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 498 exp.VariancePop: rename_func("VAR_POP"), 499 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 500 exp.WithinGroup: transforms.preprocess( 501 [transforms.remove_within_group_for_percentiles] 502 ), 503 exp.Xor: bool_xor_sql, 504 exp.MD5Digest: rename_func("MD5"), 505 exp.SHA: rename_func("SHA1"), 506 exp.SHA2: sha256_sql, 507 } 508 509 RESERVED_KEYWORDS = { 510 "alter", 511 "and", 512 "as", 513 "between", 514 "by", 515 "case", 516 "cast", 517 "constraint", 518 "create", 519 "cross", 520 "current_time", 521 "current_timestamp", 522 "deallocate", 523 "delete", 524 "describe", 525 "distinct", 526 "drop", 527 "else", 528 "end", 529 "escape", 530 "except", 531 "execute", 532 "exists", 533 "extract", 534 "false", 535 "for", 536 "from", 537 "full", 538 "group", 539 "having", 540 "in", 541 "inner", 542 "insert", 543 "intersect", 544 "into", 545 "is", 546 "join", 547 "left", 548 "like", 549 "natural", 550 "not", 551 "null", 552 "on", 553 "or", 554 "order", 555 "outer", 556 "prepare", 557 "right", 558 "select", 559 "table", 560 "then", 561 "true", 562 "union", 563 "using", 564 "values", 565 "when", 566 "where", 567 "with", 568 } 569 570 def md5_sql(self, expression: exp.MD5) -> str: 571 this = expression.this 572 573 if not this.type: 574 from sqlglot.optimizer.annotate_types import annotate_types 575 576 this = annotate_types(this) 577 578 if this.is_type(*exp.DataType.TEXT_TYPES): 579 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 580 581 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 582 583 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 584 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 585 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 586 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 587 # which seems to be using the same time mapping as Hive, as per: 588 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 589 this = expression.this 590 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 591 value_as_timestamp = ( 592 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 593 ) 594 595 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 596 597 formatted_value = self.func( 598 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 599 ) 600 parse_with_tz = self.func( 601 "PARSE_DATETIME", 602 formatted_value, 603 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 604 ) 605 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 606 return self.func("TO_UNIXTIME", coalesced) 607 608 def bracket_sql(self, expression: exp.Bracket) -> str: 609 if expression.args.get("safe"): 610 return self.func( 611 "ELEMENT_AT", 612 expression.this, 613 seq_get( 614 apply_index_offset( 615 expression.this, 616 expression.expressions, 617 1 - expression.args.get("offset", 0), 618 ), 619 0, 620 ), 621 ) 622 return super().bracket_sql(expression) 623 624 def struct_sql(self, expression: exp.Struct) -> str: 625 from sqlglot.optimizer.annotate_types import annotate_types 626 627 expression = annotate_types(expression) 628 values: t.List[str] = [] 629 schema: t.List[str] = [] 630 unknown_type = False 631 632 for e in expression.expressions: 633 if isinstance(e, exp.PropertyEQ): 634 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 635 unknown_type = True 636 else: 637 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 638 values.append(self.sql(e, "expression")) 639 else: 640 values.append(self.sql(e)) 641 642 size = len(expression.expressions) 643 644 if not size or len(schema) != size: 645 if unknown_type: 646 self.unsupported( 647 "Cannot convert untyped key-value definitions (try annotate_types)." 648 ) 649 return self.func("ROW", *values) 650 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 651 652 def interval_sql(self, expression: exp.Interval) -> str: 653 if expression.this and expression.text("unit").upper().startswith("WEEK"): 654 return f"({expression.this.name} * INTERVAL '7' DAY)" 655 return super().interval_sql(expression) 656 657 def transaction_sql(self, expression: exp.Transaction) -> str: 658 modes = expression.args.get("modes") 659 modes = f" {', '.join(modes)}" if modes else "" 660 return f"START TRANSACTION{modes}" 661 662 def offset_limit_modifiers( 663 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 664 ) -> t.List[str]: 665 return [ 666 self.sql(expression, "offset"), 667 self.sql(limit), 668 ] 669 670 def create_sql(self, expression: exp.Create) -> str: 671 """ 672 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 673 so we need to remove them 674 """ 675 kind = expression.args["kind"] 676 schema = expression.this 677 if kind == "VIEW" and schema.expressions: 678 expression.this.set("expressions", None) 679 return super().create_sql(expression) 680 681 def delete_sql(self, expression: exp.Delete) -> str: 682 """ 683 Presto only supports DELETE FROM for a single table without an alias, so we need 684 to remove the unnecessary parts. If the original DELETE statement contains more 685 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 686 """ 687 tables = expression.args.get("tables") or [expression.this] 688 if len(tables) > 1: 689 return super().delete_sql(expression) 690 691 table = tables[0] 692 expression.set("this", table) 693 expression.set("tables", None) 694 695 if isinstance(table, exp.Table): 696 table_alias = table.args.get("alias") 697 if table_alias: 698 table_alias.pop() 699 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 700 701 return super().delete_sql(expression)
245class Presto(Dialect): 246 INDEX_OFFSET = 1 247 NULL_ORDERING = "nulls_are_last" 248 TIME_FORMAT = MySQL.TIME_FORMAT 249 TIME_MAPPING = MySQL.TIME_MAPPING 250 STRICT_STRING_CONCAT = True 251 SUPPORTS_SEMI_ANTI_JOIN = False 252 TYPED_DIVISION = True 253 TABLESAMPLE_SIZE_IS_PERCENT = True 254 LOG_BASE_FIRST: t.Optional[bool] = None 255 256 # https://github.com/trinodb/trino/issues/17 257 # https://github.com/trinodb/trino/issues/12289 258 # https://github.com/prestodb/presto/issues/2863 259 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 260 261 # The result of certain math functions in Presto/Trino is of type 262 # equal to the input type e.g: FLOOR(5.5/2) -> DECIMAL, FLOOR(5/2) -> BIGINT 263 ANNOTATORS = { 264 **Dialect.ANNOTATORS, 265 exp.Floor: lambda self, e: self._annotate_by_args(e, "this"), 266 exp.Ceil: lambda self, e: self._annotate_by_args(e, "this"), 267 exp.Mod: lambda self, e: self._annotate_by_args(e, "this", "expression"), 268 exp.Round: lambda self, e: self._annotate_by_args(e, "this"), 269 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 270 exp.Abs: lambda self, e: self._annotate_by_args(e, "this"), 271 exp.Rand: lambda self, e: self._annotate_by_args(e, "this") 272 if e.this 273 else self._set_type(e, exp.DataType.Type.DOUBLE), 274 } 275 276 class Tokenizer(tokens.Tokenizer): 277 UNICODE_STRINGS = [ 278 (prefix + q, q) 279 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 280 for prefix in ("U&", "u&") 281 ] 282 283 KEYWORDS = { 284 **tokens.Tokenizer.KEYWORDS, 285 "START": TokenType.BEGIN, 286 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 287 "ROW": TokenType.STRUCT, 288 "IPADDRESS": TokenType.IPADDRESS, 289 "IPPREFIX": TokenType.IPPREFIX, 290 "TDIGEST": TokenType.TDIGEST, 291 "HYPERLOGLOG": TokenType.HLLSKETCH, 292 } 293 KEYWORDS.pop("/*+") 294 KEYWORDS.pop("QUALIFY") 295 296 class Parser(parser.Parser): 297 VALUES_FOLLOWED_BY_PAREN = False 298 299 FUNCTIONS = { 300 **parser.Parser.FUNCTIONS, 301 "ARBITRARY": exp.AnyValue.from_arg_list, 302 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 303 "APPROX_PERCENTILE": _build_approx_percentile, 304 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 305 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 306 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 307 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 308 "CARDINALITY": exp.ArraySize.from_arg_list, 309 "CONTAINS": exp.ArrayContains.from_arg_list, 310 "DATE_ADD": lambda args: exp.DateAdd( 311 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 312 ), 313 "DATE_DIFF": lambda args: exp.DateDiff( 314 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 315 ), 316 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 317 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 318 "DATE_TRUNC": date_trunc_to_time, 319 "ELEMENT_AT": lambda args: exp.Bracket( 320 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 321 ), 322 "FROM_HEX": exp.Unhex.from_arg_list, 323 "FROM_UNIXTIME": _build_from_unixtime, 324 "FROM_UTF8": lambda args: exp.Decode( 325 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 326 ), 327 "NOW": exp.CurrentTimestamp.from_arg_list, 328 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 329 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 330 ), 331 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 332 this=seq_get(args, 0), 333 expression=seq_get(args, 1), 334 replacement=seq_get(args, 2) or exp.Literal.string(""), 335 ), 336 "ROW": exp.Struct.from_arg_list, 337 "SEQUENCE": exp.GenerateSeries.from_arg_list, 338 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 339 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 340 "STRPOS": lambda args: exp.StrPosition( 341 this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2) 342 ), 343 "TO_CHAR": _build_to_char, 344 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 345 "TO_UTF8": lambda args: exp.Encode( 346 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 347 ), 348 "MD5": exp.MD5Digest.from_arg_list, 349 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 350 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 351 } 352 353 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 354 FUNCTION_PARSERS.pop("TRIM") 355 356 class Generator(generator.Generator): 357 INTERVAL_ALLOWS_PLURAL_FORM = False 358 JOIN_HINTS = False 359 TABLE_HINTS = False 360 QUERY_HINTS = False 361 IS_BOOL_ALLOWED = False 362 TZ_TO_WITH_TIME_ZONE = True 363 NVL2_SUPPORTED = False 364 STRUCT_DELIMITER = ("(", ")") 365 LIMIT_ONLY_LITERALS = True 366 SUPPORTS_SINGLE_ARG_CONCAT = False 367 LIKE_PROPERTY_INSIDE_SCHEMA = True 368 MULTI_ARG_DISTINCT = False 369 SUPPORTS_TO_NUMBER = False 370 HEX_FUNC = "TO_HEX" 371 PARSE_JSON_NAME = "JSON_PARSE" 372 PAD_FILL_PATTERN_IS_REQUIRED = True 373 374 PROPERTIES_LOCATION = { 375 **generator.Generator.PROPERTIES_LOCATION, 376 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 377 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 378 } 379 380 TYPE_MAPPING = { 381 **generator.Generator.TYPE_MAPPING, 382 exp.DataType.Type.INT: "INTEGER", 383 exp.DataType.Type.FLOAT: "REAL", 384 exp.DataType.Type.BINARY: "VARBINARY", 385 exp.DataType.Type.TEXT: "VARCHAR", 386 exp.DataType.Type.TIMETZ: "TIME", 387 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 388 exp.DataType.Type.STRUCT: "ROW", 389 exp.DataType.Type.DATETIME: "TIMESTAMP", 390 exp.DataType.Type.DATETIME64: "TIMESTAMP", 391 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 392 } 393 394 TRANSFORMS = { 395 **generator.Generator.TRANSFORMS, 396 exp.AnyValue: rename_func("ARBITRARY"), 397 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 398 exp.ArgMax: rename_func("MAX_BY"), 399 exp.ArgMin: rename_func("MIN_BY"), 400 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 401 exp.ArrayAny: rename_func("ANY_MATCH"), 402 exp.ArrayConcat: rename_func("CONCAT"), 403 exp.ArrayContains: rename_func("CONTAINS"), 404 exp.ArraySize: rename_func("CARDINALITY"), 405 exp.ArrayToString: rename_func("ARRAY_JOIN"), 406 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 407 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 408 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 409 exp.BitwiseLeftShift: lambda self, e: self.func( 410 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 411 ), 412 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 413 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 414 exp.BitwiseRightShift: lambda self, e: self.func( 415 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 416 ), 417 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 418 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 419 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 420 exp.DateAdd: _date_delta_sql("DATE_ADD"), 421 exp.DateDiff: lambda self, e: self.func( 422 "DATE_DIFF", unit_to_str(e), e.expression, e.this 423 ), 424 exp.DateStrToDate: datestrtodate_sql, 425 exp.DateToDi: lambda self, 426 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 427 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 428 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 429 exp.DiToDate: lambda self, 430 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 431 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 432 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 433 exp.First: _first_last_sql, 434 exp.FirstValue: _first_last_sql, 435 exp.FromTimeZone: lambda self, 436 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 437 exp.GenerateSeries: sequence_sql, 438 exp.Group: transforms.preprocess([transforms.unalias_group]), 439 exp.GroupConcat: lambda self, e: self.func( 440 "ARRAY_JOIN", self.func("ARRAY_AGG", e.this), e.args.get("separator") 441 ), 442 exp.If: if_sql(), 443 exp.ILike: no_ilike_sql, 444 exp.Initcap: _initcap_sql, 445 exp.JSONExtract: _jsonextract_sql, 446 exp.Last: _first_last_sql, 447 exp.LastValue: _first_last_sql, 448 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 449 exp.Lateral: _explode_to_unnest_sql, 450 exp.Left: left_to_substring_sql, 451 exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), 452 exp.LogicalAnd: rename_func("BOOL_AND"), 453 exp.LogicalOr: rename_func("BOOL_OR"), 454 exp.Pivot: no_pivot_sql, 455 exp.Quantile: _quantile_sql, 456 exp.RegexpExtract: regexp_extract_sql, 457 exp.Right: right_to_substring_sql, 458 exp.SafeDivide: no_safe_divide_sql, 459 exp.Schema: _schema_sql, 460 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 461 exp.Select: transforms.preprocess( 462 [ 463 transforms.eliminate_qualify, 464 transforms.eliminate_distinct_on, 465 transforms.explode_to_unnest(1), 466 transforms.eliminate_semi_and_anti_joins, 467 ] 468 ), 469 exp.SortArray: _no_sort_array, 470 exp.StrPosition: lambda self, e: str_position_sql(self, e, generate_instance=True), 471 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 472 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 473 exp.StrToTime: _str_to_time_sql, 474 exp.StructExtract: struct_extract_sql, 475 exp.Table: transforms.preprocess([_unnest_sequence]), 476 exp.Timestamp: no_timestamp_sql, 477 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 478 exp.TimestampTrunc: timestamptrunc_sql(), 479 exp.TimeStrToDate: timestrtotime_sql, 480 exp.TimeStrToTime: timestrtotime_sql, 481 exp.TimeStrToUnix: lambda self, e: self.func( 482 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 483 ), 484 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 485 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 486 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 487 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 488 exp.TsOrDiToDi: lambda self, 489 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 490 exp.TsOrDsAdd: _ts_or_ds_add_sql, 491 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 492 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 493 exp.Unhex: rename_func("FROM_HEX"), 494 exp.UnixToStr: lambda self, 495 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 496 exp.UnixToTime: _unix_to_time_sql, 497 exp.UnixToTimeStr: lambda self, 498 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 499 exp.VariancePop: rename_func("VAR_POP"), 500 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 501 exp.WithinGroup: transforms.preprocess( 502 [transforms.remove_within_group_for_percentiles] 503 ), 504 exp.Xor: bool_xor_sql, 505 exp.MD5Digest: rename_func("MD5"), 506 exp.SHA: rename_func("SHA1"), 507 exp.SHA2: sha256_sql, 508 } 509 510 RESERVED_KEYWORDS = { 511 "alter", 512 "and", 513 "as", 514 "between", 515 "by", 516 "case", 517 "cast", 518 "constraint", 519 "create", 520 "cross", 521 "current_time", 522 "current_timestamp", 523 "deallocate", 524 "delete", 525 "describe", 526 "distinct", 527 "drop", 528 "else", 529 "end", 530 "escape", 531 "except", 532 "execute", 533 "exists", 534 "extract", 535 "false", 536 "for", 537 "from", 538 "full", 539 "group", 540 "having", 541 "in", 542 "inner", 543 "insert", 544 "intersect", 545 "into", 546 "is", 547 "join", 548 "left", 549 "like", 550 "natural", 551 "not", 552 "null", 553 "on", 554 "or", 555 "order", 556 "outer", 557 "prepare", 558 "right", 559 "select", 560 "table", 561 "then", 562 "true", 563 "union", 564 "using", 565 "values", 566 "when", 567 "where", 568 "with", 569 } 570 571 def md5_sql(self, expression: exp.MD5) -> str: 572 this = expression.this 573 574 if not this.type: 575 from sqlglot.optimizer.annotate_types import annotate_types 576 577 this = annotate_types(this) 578 579 if this.is_type(*exp.DataType.TEXT_TYPES): 580 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 581 582 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 583 584 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 585 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 586 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 587 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 588 # which seems to be using the same time mapping as Hive, as per: 589 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 590 this = expression.this 591 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 592 value_as_timestamp = ( 593 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 594 ) 595 596 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 597 598 formatted_value = self.func( 599 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 600 ) 601 parse_with_tz = self.func( 602 "PARSE_DATETIME", 603 formatted_value, 604 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 605 ) 606 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 607 return self.func("TO_UNIXTIME", coalesced) 608 609 def bracket_sql(self, expression: exp.Bracket) -> str: 610 if expression.args.get("safe"): 611 return self.func( 612 "ELEMENT_AT", 613 expression.this, 614 seq_get( 615 apply_index_offset( 616 expression.this, 617 expression.expressions, 618 1 - expression.args.get("offset", 0), 619 ), 620 0, 621 ), 622 ) 623 return super().bracket_sql(expression) 624 625 def struct_sql(self, expression: exp.Struct) -> str: 626 from sqlglot.optimizer.annotate_types import annotate_types 627 628 expression = annotate_types(expression) 629 values: t.List[str] = [] 630 schema: t.List[str] = [] 631 unknown_type = False 632 633 for e in expression.expressions: 634 if isinstance(e, exp.PropertyEQ): 635 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 636 unknown_type = True 637 else: 638 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 639 values.append(self.sql(e, "expression")) 640 else: 641 values.append(self.sql(e)) 642 643 size = len(expression.expressions) 644 645 if not size or len(schema) != size: 646 if unknown_type: 647 self.unsupported( 648 "Cannot convert untyped key-value definitions (try annotate_types)." 649 ) 650 return self.func("ROW", *values) 651 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 652 653 def interval_sql(self, expression: exp.Interval) -> str: 654 if expression.this and expression.text("unit").upper().startswith("WEEK"): 655 return f"({expression.this.name} * INTERVAL '7' DAY)" 656 return super().interval_sql(expression) 657 658 def transaction_sql(self, expression: exp.Transaction) -> str: 659 modes = expression.args.get("modes") 660 modes = f" {', '.join(modes)}" if modes else "" 661 return f"START TRANSACTION{modes}" 662 663 def offset_limit_modifiers( 664 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 665 ) -> t.List[str]: 666 return [ 667 self.sql(expression, "offset"), 668 self.sql(limit), 669 ] 670 671 def create_sql(self, expression: exp.Create) -> str: 672 """ 673 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 674 so we need to remove them 675 """ 676 kind = expression.args["kind"] 677 schema = expression.this 678 if kind == "VIEW" and schema.expressions: 679 expression.this.set("expressions", None) 680 return super().create_sql(expression) 681 682 def delete_sql(self, expression: exp.Delete) -> str: 683 """ 684 Presto only supports DELETE FROM for a single table without an alias, so we need 685 to remove the unnecessary parts. If the original DELETE statement contains more 686 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 687 """ 688 tables = expression.args.get("tables") or [expression.this] 689 if len(tables) > 1: 690 return super().delete_sql(expression) 691 692 table = tables[0] 693 expression.set("this", table) 694 expression.set("tables", None) 695 696 if isinstance(table, exp.Table): 697 table_alias = table.args.get("alias") 698 if table_alias: 699 table_alias.pop() 700 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 701 702 return super().delete_sql(expression)
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Associates this dialect's time formats with their equivalent Python strftime
formats.
Whether the behavior of a / b
depends on the types of a
and b
.
False means a / b
is always float division.
True means a / b
is integer division if both a
and b
are integers.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Specifies the strategy according to which identifiers should be normalized.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- SUPPORTS_USER_DEFINED_TYPES
- COPY_PARAMS_ARE_CSV
- NORMALIZE_FUNCTIONS
- SAFE_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- UNESCAPED_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
276 class Tokenizer(tokens.Tokenizer): 277 UNICODE_STRINGS = [ 278 (prefix + q, q) 279 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 280 for prefix in ("U&", "u&") 281 ] 282 283 KEYWORDS = { 284 **tokens.Tokenizer.KEYWORDS, 285 "START": TokenType.BEGIN, 286 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 287 "ROW": TokenType.STRUCT, 288 "IPADDRESS": TokenType.IPADDRESS, 289 "IPPREFIX": TokenType.IPPREFIX, 290 "TDIGEST": TokenType.TDIGEST, 291 "HYPERLOGLOG": TokenType.HLLSKETCH, 292 } 293 KEYWORDS.pop("/*+") 294 KEYWORDS.pop("QUALIFY")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- IDENTIFIERS
- IDENTIFIER_ESCAPES
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
296 class Parser(parser.Parser): 297 VALUES_FOLLOWED_BY_PAREN = False 298 299 FUNCTIONS = { 300 **parser.Parser.FUNCTIONS, 301 "ARBITRARY": exp.AnyValue.from_arg_list, 302 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 303 "APPROX_PERCENTILE": _build_approx_percentile, 304 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 305 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 306 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 307 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 308 "CARDINALITY": exp.ArraySize.from_arg_list, 309 "CONTAINS": exp.ArrayContains.from_arg_list, 310 "DATE_ADD": lambda args: exp.DateAdd( 311 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 312 ), 313 "DATE_DIFF": lambda args: exp.DateDiff( 314 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 315 ), 316 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 317 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 318 "DATE_TRUNC": date_trunc_to_time, 319 "ELEMENT_AT": lambda args: exp.Bracket( 320 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 321 ), 322 "FROM_HEX": exp.Unhex.from_arg_list, 323 "FROM_UNIXTIME": _build_from_unixtime, 324 "FROM_UTF8": lambda args: exp.Decode( 325 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 326 ), 327 "NOW": exp.CurrentTimestamp.from_arg_list, 328 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 329 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 330 ), 331 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 332 this=seq_get(args, 0), 333 expression=seq_get(args, 1), 334 replacement=seq_get(args, 2) or exp.Literal.string(""), 335 ), 336 "ROW": exp.Struct.from_arg_list, 337 "SEQUENCE": exp.GenerateSeries.from_arg_list, 338 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 339 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 340 "STRPOS": lambda args: exp.StrPosition( 341 this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2) 342 ), 343 "TO_CHAR": _build_to_char, 344 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 345 "TO_UTF8": lambda args: exp.Encode( 346 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 347 ), 348 "MD5": exp.MD5Digest.from_arg_list, 349 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 350 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 351 } 352 353 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 354 FUNCTION_PARSERS.pop("TRIM")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
356 class Generator(generator.Generator): 357 INTERVAL_ALLOWS_PLURAL_FORM = False 358 JOIN_HINTS = False 359 TABLE_HINTS = False 360 QUERY_HINTS = False 361 IS_BOOL_ALLOWED = False 362 TZ_TO_WITH_TIME_ZONE = True 363 NVL2_SUPPORTED = False 364 STRUCT_DELIMITER = ("(", ")") 365 LIMIT_ONLY_LITERALS = True 366 SUPPORTS_SINGLE_ARG_CONCAT = False 367 LIKE_PROPERTY_INSIDE_SCHEMA = True 368 MULTI_ARG_DISTINCT = False 369 SUPPORTS_TO_NUMBER = False 370 HEX_FUNC = "TO_HEX" 371 PARSE_JSON_NAME = "JSON_PARSE" 372 PAD_FILL_PATTERN_IS_REQUIRED = True 373 374 PROPERTIES_LOCATION = { 375 **generator.Generator.PROPERTIES_LOCATION, 376 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 377 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 378 } 379 380 TYPE_MAPPING = { 381 **generator.Generator.TYPE_MAPPING, 382 exp.DataType.Type.INT: "INTEGER", 383 exp.DataType.Type.FLOAT: "REAL", 384 exp.DataType.Type.BINARY: "VARBINARY", 385 exp.DataType.Type.TEXT: "VARCHAR", 386 exp.DataType.Type.TIMETZ: "TIME", 387 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 388 exp.DataType.Type.STRUCT: "ROW", 389 exp.DataType.Type.DATETIME: "TIMESTAMP", 390 exp.DataType.Type.DATETIME64: "TIMESTAMP", 391 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 392 } 393 394 TRANSFORMS = { 395 **generator.Generator.TRANSFORMS, 396 exp.AnyValue: rename_func("ARBITRARY"), 397 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 398 exp.ArgMax: rename_func("MAX_BY"), 399 exp.ArgMin: rename_func("MIN_BY"), 400 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 401 exp.ArrayAny: rename_func("ANY_MATCH"), 402 exp.ArrayConcat: rename_func("CONCAT"), 403 exp.ArrayContains: rename_func("CONTAINS"), 404 exp.ArraySize: rename_func("CARDINALITY"), 405 exp.ArrayToString: rename_func("ARRAY_JOIN"), 406 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 407 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 408 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 409 exp.BitwiseLeftShift: lambda self, e: self.func( 410 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 411 ), 412 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 413 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 414 exp.BitwiseRightShift: lambda self, e: self.func( 415 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 416 ), 417 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 418 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 419 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 420 exp.DateAdd: _date_delta_sql("DATE_ADD"), 421 exp.DateDiff: lambda self, e: self.func( 422 "DATE_DIFF", unit_to_str(e), e.expression, e.this 423 ), 424 exp.DateStrToDate: datestrtodate_sql, 425 exp.DateToDi: lambda self, 426 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 427 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 428 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 429 exp.DiToDate: lambda self, 430 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 431 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 432 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 433 exp.First: _first_last_sql, 434 exp.FirstValue: _first_last_sql, 435 exp.FromTimeZone: lambda self, 436 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 437 exp.GenerateSeries: sequence_sql, 438 exp.Group: transforms.preprocess([transforms.unalias_group]), 439 exp.GroupConcat: lambda self, e: self.func( 440 "ARRAY_JOIN", self.func("ARRAY_AGG", e.this), e.args.get("separator") 441 ), 442 exp.If: if_sql(), 443 exp.ILike: no_ilike_sql, 444 exp.Initcap: _initcap_sql, 445 exp.JSONExtract: _jsonextract_sql, 446 exp.Last: _first_last_sql, 447 exp.LastValue: _first_last_sql, 448 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 449 exp.Lateral: _explode_to_unnest_sql, 450 exp.Left: left_to_substring_sql, 451 exp.Levenshtein: rename_func("LEVENSHTEIN_DISTANCE"), 452 exp.LogicalAnd: rename_func("BOOL_AND"), 453 exp.LogicalOr: rename_func("BOOL_OR"), 454 exp.Pivot: no_pivot_sql, 455 exp.Quantile: _quantile_sql, 456 exp.RegexpExtract: regexp_extract_sql, 457 exp.Right: right_to_substring_sql, 458 exp.SafeDivide: no_safe_divide_sql, 459 exp.Schema: _schema_sql, 460 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 461 exp.Select: transforms.preprocess( 462 [ 463 transforms.eliminate_qualify, 464 transforms.eliminate_distinct_on, 465 transforms.explode_to_unnest(1), 466 transforms.eliminate_semi_and_anti_joins, 467 ] 468 ), 469 exp.SortArray: _no_sort_array, 470 exp.StrPosition: lambda self, e: str_position_sql(self, e, generate_instance=True), 471 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 472 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 473 exp.StrToTime: _str_to_time_sql, 474 exp.StructExtract: struct_extract_sql, 475 exp.Table: transforms.preprocess([_unnest_sequence]), 476 exp.Timestamp: no_timestamp_sql, 477 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 478 exp.TimestampTrunc: timestamptrunc_sql(), 479 exp.TimeStrToDate: timestrtotime_sql, 480 exp.TimeStrToTime: timestrtotime_sql, 481 exp.TimeStrToUnix: lambda self, e: self.func( 482 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 483 ), 484 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 485 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 486 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 487 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 488 exp.TsOrDiToDi: lambda self, 489 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 490 exp.TsOrDsAdd: _ts_or_ds_add_sql, 491 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 492 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 493 exp.Unhex: rename_func("FROM_HEX"), 494 exp.UnixToStr: lambda self, 495 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 496 exp.UnixToTime: _unix_to_time_sql, 497 exp.UnixToTimeStr: lambda self, 498 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 499 exp.VariancePop: rename_func("VAR_POP"), 500 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 501 exp.WithinGroup: transforms.preprocess( 502 [transforms.remove_within_group_for_percentiles] 503 ), 504 exp.Xor: bool_xor_sql, 505 exp.MD5Digest: rename_func("MD5"), 506 exp.SHA: rename_func("SHA1"), 507 exp.SHA2: sha256_sql, 508 } 509 510 RESERVED_KEYWORDS = { 511 "alter", 512 "and", 513 "as", 514 "between", 515 "by", 516 "case", 517 "cast", 518 "constraint", 519 "create", 520 "cross", 521 "current_time", 522 "current_timestamp", 523 "deallocate", 524 "delete", 525 "describe", 526 "distinct", 527 "drop", 528 "else", 529 "end", 530 "escape", 531 "except", 532 "execute", 533 "exists", 534 "extract", 535 "false", 536 "for", 537 "from", 538 "full", 539 "group", 540 "having", 541 "in", 542 "inner", 543 "insert", 544 "intersect", 545 "into", 546 "is", 547 "join", 548 "left", 549 "like", 550 "natural", 551 "not", 552 "null", 553 "on", 554 "or", 555 "order", 556 "outer", 557 "prepare", 558 "right", 559 "select", 560 "table", 561 "then", 562 "true", 563 "union", 564 "using", 565 "values", 566 "when", 567 "where", 568 "with", 569 } 570 571 def md5_sql(self, expression: exp.MD5) -> str: 572 this = expression.this 573 574 if not this.type: 575 from sqlglot.optimizer.annotate_types import annotate_types 576 577 this = annotate_types(this) 578 579 if this.is_type(*exp.DataType.TEXT_TYPES): 580 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 581 582 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 583 584 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 585 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 586 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 587 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 588 # which seems to be using the same time mapping as Hive, as per: 589 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 590 this = expression.this 591 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 592 value_as_timestamp = ( 593 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 594 ) 595 596 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 597 598 formatted_value = self.func( 599 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 600 ) 601 parse_with_tz = self.func( 602 "PARSE_DATETIME", 603 formatted_value, 604 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 605 ) 606 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 607 return self.func("TO_UNIXTIME", coalesced) 608 609 def bracket_sql(self, expression: exp.Bracket) -> str: 610 if expression.args.get("safe"): 611 return self.func( 612 "ELEMENT_AT", 613 expression.this, 614 seq_get( 615 apply_index_offset( 616 expression.this, 617 expression.expressions, 618 1 - expression.args.get("offset", 0), 619 ), 620 0, 621 ), 622 ) 623 return super().bracket_sql(expression) 624 625 def struct_sql(self, expression: exp.Struct) -> str: 626 from sqlglot.optimizer.annotate_types import annotate_types 627 628 expression = annotate_types(expression) 629 values: t.List[str] = [] 630 schema: t.List[str] = [] 631 unknown_type = False 632 633 for e in expression.expressions: 634 if isinstance(e, exp.PropertyEQ): 635 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 636 unknown_type = True 637 else: 638 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 639 values.append(self.sql(e, "expression")) 640 else: 641 values.append(self.sql(e)) 642 643 size = len(expression.expressions) 644 645 if not size or len(schema) != size: 646 if unknown_type: 647 self.unsupported( 648 "Cannot convert untyped key-value definitions (try annotate_types)." 649 ) 650 return self.func("ROW", *values) 651 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 652 653 def interval_sql(self, expression: exp.Interval) -> str: 654 if expression.this and expression.text("unit").upper().startswith("WEEK"): 655 return f"({expression.this.name} * INTERVAL '7' DAY)" 656 return super().interval_sql(expression) 657 658 def transaction_sql(self, expression: exp.Transaction) -> str: 659 modes = expression.args.get("modes") 660 modes = f" {', '.join(modes)}" if modes else "" 661 return f"START TRANSACTION{modes}" 662 663 def offset_limit_modifiers( 664 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 665 ) -> t.List[str]: 666 return [ 667 self.sql(expression, "offset"), 668 self.sql(limit), 669 ] 670 671 def create_sql(self, expression: exp.Create) -> str: 672 """ 673 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 674 so we need to remove them 675 """ 676 kind = expression.args["kind"] 677 schema = expression.this 678 if kind == "VIEW" and schema.expressions: 679 expression.this.set("expressions", None) 680 return super().create_sql(expression) 681 682 def delete_sql(self, expression: exp.Delete) -> str: 683 """ 684 Presto only supports DELETE FROM for a single table without an alias, so we need 685 to remove the unnecessary parts. If the original DELETE statement contains more 686 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 687 """ 688 tables = expression.args.get("tables") or [expression.this] 689 if len(tables) > 1: 690 return super().delete_sql(expression) 691 692 table = tables[0] 693 expression.set("this", table) 694 expression.set("tables", None) 695 696 if isinstance(table, exp.Table): 697 table_alias = table.args.get("alias") 698 if table_alias: 699 table_alias.pop() 700 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 701 702 return super().delete_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
571 def md5_sql(self, expression: exp.MD5) -> str: 572 this = expression.this 573 574 if not this.type: 575 from sqlglot.optimizer.annotate_types import annotate_types 576 577 this = annotate_types(this) 578 579 if this.is_type(*exp.DataType.TEXT_TYPES): 580 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 581 582 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this))))
584 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 585 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 586 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 587 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 588 # which seems to be using the same time mapping as Hive, as per: 589 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 590 this = expression.this 591 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 592 value_as_timestamp = ( 593 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 594 ) 595 596 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 597 598 formatted_value = self.func( 599 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 600 ) 601 parse_with_tz = self.func( 602 "PARSE_DATETIME", 603 formatted_value, 604 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 605 ) 606 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 607 return self.func("TO_UNIXTIME", coalesced)
609 def bracket_sql(self, expression: exp.Bracket) -> str: 610 if expression.args.get("safe"): 611 return self.func( 612 "ELEMENT_AT", 613 expression.this, 614 seq_get( 615 apply_index_offset( 616 expression.this, 617 expression.expressions, 618 1 - expression.args.get("offset", 0), 619 ), 620 0, 621 ), 622 ) 623 return super().bracket_sql(expression)
625 def struct_sql(self, expression: exp.Struct) -> str: 626 from sqlglot.optimizer.annotate_types import annotate_types 627 628 expression = annotate_types(expression) 629 values: t.List[str] = [] 630 schema: t.List[str] = [] 631 unknown_type = False 632 633 for e in expression.expressions: 634 if isinstance(e, exp.PropertyEQ): 635 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 636 unknown_type = True 637 else: 638 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 639 values.append(self.sql(e, "expression")) 640 else: 641 values.append(self.sql(e)) 642 643 size = len(expression.expressions) 644 645 if not size or len(schema) != size: 646 if unknown_type: 647 self.unsupported( 648 "Cannot convert untyped key-value definitions (try annotate_types)." 649 ) 650 return self.func("ROW", *values) 651 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))"
671 def create_sql(self, expression: exp.Create) -> str: 672 """ 673 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 674 so we need to remove them 675 """ 676 kind = expression.args["kind"] 677 schema = expression.this 678 if kind == "VIEW" and schema.expressions: 679 expression.this.set("expressions", None) 680 return super().create_sql(expression)
Presto doesn't support CREATE VIEW with expressions (ex: CREATE VIEW x (cola)
then (cola)
is the expression),
so we need to remove them
682 def delete_sql(self, expression: exp.Delete) -> str: 683 """ 684 Presto only supports DELETE FROM for a single table without an alias, so we need 685 to remove the unnecessary parts. If the original DELETE statement contains more 686 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 687 """ 688 tables = expression.args.get("tables") or [expression.this] 689 if len(tables) > 1: 690 return super().delete_sql(expression) 691 692 table = tables[0] 693 expression.set("this", table) 694 expression.set("tables", None) 695 696 if isinstance(table, exp.Table): 697 table_alias = table.args.get("alias") 698 if table_alias: 699 table_alias.pop() 700 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 701 702 return super().delete_sql(expression)
Presto only supports DELETE FROM for a single table without an alias, so we need to remove the unnecessary parts. If the original DELETE statement contains more than one table to be deleted, we can't safely map it 1-1 to a Presto statement.
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_SET_OP
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTED_JSON_PATH_PARTS
- CAN_IMPLEMENT_ARRAY_ANY
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- TRY_SUPPORTED
- SUPPORTS_UESCAPE
- STAR_EXCEPT
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql