sqlglot.dialects.presto
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 bool_xor_sql, 11 date_trunc_to_time, 12 datestrtodate_sql, 13 encode_decode_sql, 14 build_formatted_time, 15 if_sql, 16 left_to_substring_sql, 17 no_ilike_sql, 18 no_pivot_sql, 19 no_timestamp_sql, 20 regexp_extract_sql, 21 rename_func, 22 right_to_substring_sql, 23 sha256_sql, 24 strposition_sql, 25 struct_extract_sql, 26 timestamptrunc_sql, 27 timestrtotime_sql, 28 ts_or_ds_add_cast, 29 unit_to_str, 30 sequence_sql, 31 build_regexp_extract, 32 explode_to_unnest_sql, 33) 34from sqlglot.dialects.hive import Hive 35from sqlglot.dialects.mysql import MySQL 36from sqlglot.helper import apply_index_offset, seq_get 37from sqlglot.optimizer.scope import find_all_in_scope 38from sqlglot.tokens import TokenType 39from sqlglot.transforms import unqualify_columns 40from sqlglot.generator import unsupported_args 41 42DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TimestampAdd, exp.DateSub] 43 44 45def _initcap_sql(self: Presto.Generator, expression: exp.Initcap) -> str: 46 regex = r"(\w)(\w*)" 47 return f"REGEXP_REPLACE({self.sql(expression, 'this')}, '{regex}', x -> UPPER(x[1]) || LOWER(x[2]))" 48 49 50def _no_sort_array(self: Presto.Generator, expression: exp.SortArray) -> str: 51 if expression.args.get("asc") == exp.false(): 52 comparator = "(a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END" 53 else: 54 comparator = None 55 return self.func("ARRAY_SORT", expression.this, comparator) 56 57 58def _schema_sql(self: Presto.Generator, expression: exp.Schema) -> str: 59 if isinstance(expression.parent, exp.PartitionedByProperty): 60 columns = ", ".join(f"'{c.name}'" for c in expression.expressions) 61 return f"ARRAY[{columns}]" 62 63 if expression.parent: 64 for schema in expression.parent.find_all(exp.Schema): 65 column_defs = schema.find_all(exp.ColumnDef) 66 if column_defs and isinstance(schema.parent, exp.Property): 67 expression.expressions.extend(column_defs) 68 69 return self.schema_sql(expression) 70 71 72def _quantile_sql(self: Presto.Generator, expression: exp.Quantile) -> str: 73 self.unsupported("Presto does not support exact quantiles") 74 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 75 76 77def _str_to_time_sql( 78 self: Presto.Generator, expression: exp.StrToDate | exp.StrToTime | exp.TsOrDsToDate 79) -> str: 80 return self.func("DATE_PARSE", expression.this, self.format_time(expression)) 81 82 83def _ts_or_ds_to_date_sql(self: Presto.Generator, expression: exp.TsOrDsToDate) -> str: 84 time_format = self.format_time(expression) 85 if time_format and time_format not in (Presto.TIME_FORMAT, Presto.DATE_FORMAT): 86 return self.sql(exp.cast(_str_to_time_sql(self, expression), exp.DataType.Type.DATE)) 87 return self.sql( 88 exp.cast(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP), exp.DataType.Type.DATE) 89 ) 90 91 92def _ts_or_ds_add_sql(self: Presto.Generator, expression: exp.TsOrDsAdd) -> str: 93 expression = ts_or_ds_add_cast(expression) 94 unit = unit_to_str(expression) 95 return self.func("DATE_ADD", unit, expression.expression, expression.this) 96 97 98def _ts_or_ds_diff_sql(self: Presto.Generator, expression: exp.TsOrDsDiff) -> str: 99 this = exp.cast(expression.this, exp.DataType.Type.TIMESTAMP) 100 expr = exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP) 101 unit = unit_to_str(expression) 102 return self.func("DATE_DIFF", unit, expr, this) 103 104 105def _build_approx_percentile(args: t.List) -> exp.Expression: 106 if len(args) == 4: 107 return exp.ApproxQuantile( 108 this=seq_get(args, 0), 109 weight=seq_get(args, 1), 110 quantile=seq_get(args, 2), 111 accuracy=seq_get(args, 3), 112 ) 113 if len(args) == 3: 114 return exp.ApproxQuantile( 115 this=seq_get(args, 0), quantile=seq_get(args, 1), accuracy=seq_get(args, 2) 116 ) 117 return exp.ApproxQuantile.from_arg_list(args) 118 119 120def _build_from_unixtime(args: t.List) -> exp.Expression: 121 if len(args) == 3: 122 return exp.UnixToTime( 123 this=seq_get(args, 0), 124 hours=seq_get(args, 1), 125 minutes=seq_get(args, 2), 126 ) 127 if len(args) == 2: 128 return exp.UnixToTime(this=seq_get(args, 0), zone=seq_get(args, 1)) 129 130 return exp.UnixToTime.from_arg_list(args) 131 132 133def _first_last_sql(self: Presto.Generator, expression: exp.Func) -> str: 134 """ 135 Trino doesn't support FIRST / LAST as functions, but they're valid in the context 136 of MATCH_RECOGNIZE, so we need to preserve them in that case. In all other cases 137 they're converted into an ARBITRARY call. 138 139 Reference: https://trino.io/docs/current/sql/match-recognize.html#logical-navigation-functions 140 """ 141 if isinstance(expression.find_ancestor(exp.MatchRecognize, exp.Select), exp.MatchRecognize): 142 return self.function_fallback_sql(expression) 143 144 return rename_func("ARBITRARY")(self, expression) 145 146 147def _unix_to_time_sql(self: Presto.Generator, expression: exp.UnixToTime) -> str: 148 scale = expression.args.get("scale") 149 timestamp = self.sql(expression, "this") 150 if scale in (None, exp.UnixToTime.SECONDS): 151 return rename_func("FROM_UNIXTIME")(self, expression) 152 153 return f"FROM_UNIXTIME(CAST({timestamp} AS DOUBLE) / POW(10, {scale}))" 154 155 156def _to_int(self: Presto.Generator, expression: exp.Expression) -> exp.Expression: 157 if not expression.type: 158 from sqlglot.optimizer.annotate_types import annotate_types 159 160 annotate_types(expression, dialect=self.dialect) 161 if expression.type and expression.type.this not in exp.DataType.INTEGER_TYPES: 162 return exp.cast(expression, to=exp.DataType.Type.BIGINT) 163 return expression 164 165 166def _build_to_char(args: t.List) -> exp.TimeToStr: 167 fmt = seq_get(args, 1) 168 if isinstance(fmt, exp.Literal): 169 # We uppercase this to match Teradata's format mapping keys 170 fmt.set("this", fmt.this.upper()) 171 172 # We use "teradata" on purpose here, because the time formats are different in Presto. 173 # See https://prestodb.io/docs/current/functions/teradata.html?highlight=to_char#to_char 174 return build_formatted_time(exp.TimeToStr, "teradata")(args) 175 176 177def _date_delta_sql( 178 name: str, negate_interval: bool = False 179) -> t.Callable[[Presto.Generator, DATE_ADD_OR_SUB], str]: 180 def _delta_sql(self: Presto.Generator, expression: DATE_ADD_OR_SUB) -> str: 181 interval = _to_int(self, expression.expression) 182 return self.func( 183 name, 184 unit_to_str(expression), 185 interval * (-1) if negate_interval else interval, 186 expression.this, 187 ) 188 189 return _delta_sql 190 191 192def _explode_to_unnest_sql(self: Presto.Generator, expression: exp.Lateral) -> str: 193 explode = expression.this 194 if isinstance(explode, exp.Explode): 195 exploded_type = explode.this.type 196 alias = expression.args.get("alias") 197 198 # This attempts a best-effort transpilation of LATERAL VIEW EXPLODE on a struct array 199 if ( 200 isinstance(alias, exp.TableAlias) 201 and isinstance(exploded_type, exp.DataType) 202 and exploded_type.is_type(exp.DataType.Type.ARRAY) 203 and exploded_type.expressions 204 and exploded_type.expressions[0].is_type(exp.DataType.Type.STRUCT) 205 ): 206 # When unnesting a ROW in Presto, it produces N columns, so we need to fix the alias 207 alias.set("columns", [c.this.copy() for c in exploded_type.expressions[0].expressions]) 208 elif isinstance(explode, exp.Inline): 209 explode.replace(exp.Explode(this=explode.this.copy())) 210 211 return explode_to_unnest_sql(self, expression) 212 213 214def _amend_exploded_column_table(expression: exp.Expression) -> exp.Expression: 215 # We check for expression.type because the columns can be amended only if types were inferred 216 if isinstance(expression, exp.Select) and expression.type: 217 for lateral in expression.args.get("laterals") or []: 218 alias = lateral.args.get("alias") 219 if ( 220 not isinstance(lateral.this, exp.Explode) 221 or not isinstance(alias, exp.TableAlias) 222 or len(alias.columns) != 1 223 ): 224 continue 225 226 new_table = alias.this 227 old_table = alias.columns[0].name.lower() 228 229 # When transpiling a LATERAL VIEW EXPLODE Spark query, the exploded fields may be qualified 230 # with the struct column, resulting in invalid Presto references that need to be amended 231 for column in find_all_in_scope(expression, exp.Column): 232 if column.db.lower() == old_table: 233 column.set("table", column.args["db"].pop()) 234 elif column.table.lower() == old_table: 235 column.set("table", new_table.copy()) 236 elif column.name.lower() == old_table and isinstance(column.parent, exp.Dot): 237 column.parent.replace(exp.column(column.parent.expression, table=new_table)) 238 239 return expression 240 241 242class Presto(Dialect): 243 INDEX_OFFSET = 1 244 NULL_ORDERING = "nulls_are_last" 245 TIME_FORMAT = MySQL.TIME_FORMAT 246 STRICT_STRING_CONCAT = True 247 SUPPORTS_SEMI_ANTI_JOIN = False 248 TYPED_DIVISION = True 249 TABLESAMPLE_SIZE_IS_PERCENT = True 250 LOG_BASE_FIRST: t.Optional[bool] = None 251 SUPPORTS_VALUES_DEFAULT = False 252 253 TIME_MAPPING = MySQL.TIME_MAPPING 254 255 # https://github.com/trinodb/trino/issues/17 256 # https://github.com/trinodb/trino/issues/12289 257 # https://github.com/prestodb/presto/issues/2863 258 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 259 260 # The result of certain math functions in Presto/Trino is of type 261 # equal to the input type e.g: FLOOR(5.5/2) -> DECIMAL, FLOOR(5/2) -> BIGINT 262 ANNOTATORS = { 263 **Dialect.ANNOTATORS, 264 exp.Floor: lambda self, e: self._annotate_by_args(e, "this"), 265 exp.Ceil: lambda self, e: self._annotate_by_args(e, "this"), 266 exp.Mod: lambda self, e: self._annotate_by_args(e, "this", "expression"), 267 exp.Round: lambda self, e: self._annotate_by_args(e, "this"), 268 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 269 exp.Abs: lambda self, e: self._annotate_by_args(e, "this"), 270 exp.Rand: lambda self, e: self._annotate_by_args(e, "this") 271 if e.this 272 else self._set_type(e, exp.DataType.Type.DOUBLE), 273 } 274 275 class Tokenizer(tokens.Tokenizer): 276 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 277 UNICODE_STRINGS = [ 278 (prefix + q, q) 279 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 280 for prefix in ("U&", "u&") 281 ] 282 283 KEYWORDS = { 284 **tokens.Tokenizer.KEYWORDS, 285 "DEALLOCATE PREPARE": TokenType.COMMAND, 286 "DESCRIBE INPUT": TokenType.COMMAND, 287 "DESCRIBE OUTPUT": TokenType.COMMAND, 288 "RESET SESSION": TokenType.COMMAND, 289 "START": TokenType.BEGIN, 290 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 291 "ROW": TokenType.STRUCT, 292 "IPADDRESS": TokenType.IPADDRESS, 293 "IPPREFIX": TokenType.IPPREFIX, 294 "TDIGEST": TokenType.TDIGEST, 295 "HYPERLOGLOG": TokenType.HLLSKETCH, 296 } 297 KEYWORDS.pop("/*+") 298 KEYWORDS.pop("QUALIFY") 299 300 class Parser(parser.Parser): 301 VALUES_FOLLOWED_BY_PAREN = False 302 303 FUNCTIONS = { 304 **parser.Parser.FUNCTIONS, 305 "ARBITRARY": exp.AnyValue.from_arg_list, 306 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 307 "APPROX_PERCENTILE": _build_approx_percentile, 308 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 309 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 310 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 311 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 312 "CARDINALITY": exp.ArraySize.from_arg_list, 313 "CONTAINS": exp.ArrayContains.from_arg_list, 314 "DATE_ADD": lambda args: exp.DateAdd( 315 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 316 ), 317 "DATE_DIFF": lambda args: exp.DateDiff( 318 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 319 ), 320 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 321 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 322 "DATE_TRUNC": date_trunc_to_time, 323 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 324 "ELEMENT_AT": lambda args: exp.Bracket( 325 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 326 ), 327 "FROM_HEX": exp.Unhex.from_arg_list, 328 "FROM_UNIXTIME": _build_from_unixtime, 329 "FROM_UTF8": lambda args: exp.Decode( 330 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 331 ), 332 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 333 "NOW": exp.CurrentTimestamp.from_arg_list, 334 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 335 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 336 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 337 this=seq_get(args, 0), 338 expression=seq_get(args, 1), 339 replacement=seq_get(args, 2) or exp.Literal.string(""), 340 ), 341 "ROW": exp.Struct.from_arg_list, 342 "SEQUENCE": exp.GenerateSeries.from_arg_list, 343 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 344 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 345 "STRPOS": lambda args: exp.StrPosition( 346 this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) 347 ), 348 "TO_CHAR": _build_to_char, 349 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 350 "TO_UTF8": lambda args: exp.Encode( 351 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 352 ), 353 "MD5": exp.MD5Digest.from_arg_list, 354 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 355 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 356 } 357 358 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 359 FUNCTION_PARSERS.pop("TRIM") 360 361 class Generator(generator.Generator): 362 INTERVAL_ALLOWS_PLURAL_FORM = False 363 JOIN_HINTS = False 364 TABLE_HINTS = False 365 QUERY_HINTS = False 366 IS_BOOL_ALLOWED = False 367 TZ_TO_WITH_TIME_ZONE = True 368 NVL2_SUPPORTED = False 369 STRUCT_DELIMITER = ("(", ")") 370 LIMIT_ONLY_LITERALS = True 371 SUPPORTS_SINGLE_ARG_CONCAT = False 372 LIKE_PROPERTY_INSIDE_SCHEMA = True 373 MULTI_ARG_DISTINCT = False 374 SUPPORTS_TO_NUMBER = False 375 HEX_FUNC = "TO_HEX" 376 PARSE_JSON_NAME = "JSON_PARSE" 377 PAD_FILL_PATTERN_IS_REQUIRED = True 378 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 379 SUPPORTS_MEDIAN = False 380 ARRAY_SIZE_NAME = "CARDINALITY" 381 382 PROPERTIES_LOCATION = { 383 **generator.Generator.PROPERTIES_LOCATION, 384 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 385 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 386 } 387 388 TYPE_MAPPING = { 389 **generator.Generator.TYPE_MAPPING, 390 exp.DataType.Type.BINARY: "VARBINARY", 391 exp.DataType.Type.BIT: "BOOLEAN", 392 exp.DataType.Type.DATETIME: "TIMESTAMP", 393 exp.DataType.Type.DATETIME64: "TIMESTAMP", 394 exp.DataType.Type.FLOAT: "REAL", 395 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 396 exp.DataType.Type.INT: "INTEGER", 397 exp.DataType.Type.STRUCT: "ROW", 398 exp.DataType.Type.TEXT: "VARCHAR", 399 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 400 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 401 exp.DataType.Type.TIMETZ: "TIME", 402 } 403 404 TRANSFORMS = { 405 **generator.Generator.TRANSFORMS, 406 exp.AnyValue: rename_func("ARBITRARY"), 407 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 408 exp.ArgMax: rename_func("MAX_BY"), 409 exp.ArgMin: rename_func("MIN_BY"), 410 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 411 exp.ArrayAny: rename_func("ANY_MATCH"), 412 exp.ArrayConcat: rename_func("CONCAT"), 413 exp.ArrayContains: rename_func("CONTAINS"), 414 exp.ArrayToString: rename_func("ARRAY_JOIN"), 415 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 416 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 417 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 418 exp.BitwiseLeftShift: lambda self, e: self.func( 419 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 420 ), 421 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 422 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 423 exp.BitwiseRightShift: lambda self, e: self.func( 424 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 425 ), 426 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 427 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 428 exp.CurrentTime: lambda *_: "CURRENT_TIME", 429 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 430 exp.CurrentUser: lambda *_: "CURRENT_USER", 431 exp.DateAdd: _date_delta_sql("DATE_ADD"), 432 exp.DateDiff: lambda self, e: self.func( 433 "DATE_DIFF", unit_to_str(e), e.expression, e.this 434 ), 435 exp.DateStrToDate: datestrtodate_sql, 436 exp.DateToDi: lambda self, 437 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 438 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 439 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 440 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 441 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 442 exp.DiToDate: lambda self, 443 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 444 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 445 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 446 exp.First: _first_last_sql, 447 exp.FromTimeZone: lambda self, 448 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 449 exp.GenerateSeries: sequence_sql, 450 exp.GenerateDateArray: sequence_sql, 451 exp.Group: transforms.preprocess([transforms.unalias_group]), 452 exp.If: if_sql(), 453 exp.ILike: no_ilike_sql, 454 exp.Initcap: _initcap_sql, 455 exp.Last: _first_last_sql, 456 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 457 exp.Lateral: _explode_to_unnest_sql, 458 exp.Left: left_to_substring_sql, 459 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 460 rename_func("LEVENSHTEIN_DISTANCE") 461 ), 462 exp.LogicalAnd: rename_func("BOOL_AND"), 463 exp.LogicalOr: rename_func("BOOL_OR"), 464 exp.Pivot: no_pivot_sql, 465 exp.Quantile: _quantile_sql, 466 exp.RegexpExtract: regexp_extract_sql, 467 exp.RegexpExtractAll: regexp_extract_sql, 468 exp.Right: right_to_substring_sql, 469 exp.Schema: _schema_sql, 470 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 471 exp.Select: transforms.preprocess( 472 [ 473 transforms.eliminate_qualify, 474 transforms.eliminate_distinct_on, 475 transforms.explode_projection_to_unnest(1), 476 transforms.eliminate_semi_and_anti_joins, 477 _amend_exploded_column_table, 478 ] 479 ), 480 exp.SortArray: _no_sort_array, 481 exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True), 482 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 483 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 484 exp.StrToTime: _str_to_time_sql, 485 exp.StructExtract: struct_extract_sql, 486 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 487 exp.Timestamp: no_timestamp_sql, 488 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 489 exp.TimestampTrunc: timestamptrunc_sql(), 490 exp.TimeStrToDate: timestrtotime_sql, 491 exp.TimeStrToTime: timestrtotime_sql, 492 exp.TimeStrToUnix: lambda self, e: self.func( 493 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 494 ), 495 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 496 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 497 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 498 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 499 exp.TsOrDiToDi: lambda self, 500 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 501 exp.TsOrDsAdd: _ts_or_ds_add_sql, 502 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 503 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 504 exp.Unhex: rename_func("FROM_HEX"), 505 exp.UnixToStr: lambda self, 506 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 507 exp.UnixToTime: _unix_to_time_sql, 508 exp.UnixToTimeStr: lambda self, 509 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 510 exp.VariancePop: rename_func("VAR_POP"), 511 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 512 exp.WithinGroup: transforms.preprocess( 513 [transforms.remove_within_group_for_percentiles] 514 ), 515 exp.Xor: bool_xor_sql, 516 exp.MD5Digest: rename_func("MD5"), 517 exp.SHA: rename_func("SHA1"), 518 exp.SHA2: sha256_sql, 519 } 520 521 RESERVED_KEYWORDS = { 522 "alter", 523 "and", 524 "as", 525 "between", 526 "by", 527 "case", 528 "cast", 529 "constraint", 530 "create", 531 "cross", 532 "current_time", 533 "current_timestamp", 534 "deallocate", 535 "delete", 536 "describe", 537 "distinct", 538 "drop", 539 "else", 540 "end", 541 "escape", 542 "except", 543 "execute", 544 "exists", 545 "extract", 546 "false", 547 "for", 548 "from", 549 "full", 550 "group", 551 "having", 552 "in", 553 "inner", 554 "insert", 555 "intersect", 556 "into", 557 "is", 558 "join", 559 "left", 560 "like", 561 "natural", 562 "not", 563 "null", 564 "on", 565 "or", 566 "order", 567 "outer", 568 "prepare", 569 "right", 570 "select", 571 "table", 572 "then", 573 "true", 574 "union", 575 "using", 576 "values", 577 "when", 578 "where", 579 "with", 580 } 581 582 def md5_sql(self, expression: exp.MD5) -> str: 583 this = expression.this 584 585 if not this.type: 586 from sqlglot.optimizer.annotate_types import annotate_types 587 588 this = annotate_types(this) 589 590 if this.is_type(*exp.DataType.TEXT_TYPES): 591 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 592 593 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 594 595 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 596 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 597 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 598 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 599 # which seems to be using the same time mapping as Hive, as per: 600 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 601 this = expression.this 602 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 603 value_as_timestamp = ( 604 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 605 ) 606 607 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 608 609 formatted_value = self.func( 610 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 611 ) 612 parse_with_tz = self.func( 613 "PARSE_DATETIME", 614 formatted_value, 615 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 616 ) 617 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 618 return self.func("TO_UNIXTIME", coalesced) 619 620 def bracket_sql(self, expression: exp.Bracket) -> str: 621 if expression.args.get("safe"): 622 return self.func( 623 "ELEMENT_AT", 624 expression.this, 625 seq_get( 626 apply_index_offset( 627 expression.this, 628 expression.expressions, 629 1 - expression.args.get("offset", 0), 630 ), 631 0, 632 ), 633 ) 634 return super().bracket_sql(expression) 635 636 def struct_sql(self, expression: exp.Struct) -> str: 637 from sqlglot.optimizer.annotate_types import annotate_types 638 639 expression = annotate_types(expression) 640 values: t.List[str] = [] 641 schema: t.List[str] = [] 642 unknown_type = False 643 644 for e in expression.expressions: 645 if isinstance(e, exp.PropertyEQ): 646 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 647 unknown_type = True 648 else: 649 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 650 values.append(self.sql(e, "expression")) 651 else: 652 values.append(self.sql(e)) 653 654 size = len(expression.expressions) 655 656 if not size or len(schema) != size: 657 if unknown_type: 658 self.unsupported( 659 "Cannot convert untyped key-value definitions (try annotate_types)." 660 ) 661 return self.func("ROW", *values) 662 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 663 664 def interval_sql(self, expression: exp.Interval) -> str: 665 if expression.this and expression.text("unit").upper().startswith("WEEK"): 666 return f"({expression.this.name} * INTERVAL '7' DAY)" 667 return super().interval_sql(expression) 668 669 def transaction_sql(self, expression: exp.Transaction) -> str: 670 modes = expression.args.get("modes") 671 modes = f" {', '.join(modes)}" if modes else "" 672 return f"START TRANSACTION{modes}" 673 674 def offset_limit_modifiers( 675 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 676 ) -> t.List[str]: 677 return [ 678 self.sql(expression, "offset"), 679 self.sql(limit), 680 ] 681 682 def create_sql(self, expression: exp.Create) -> str: 683 """ 684 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 685 so we need to remove them 686 """ 687 kind = expression.args["kind"] 688 schema = expression.this 689 if kind == "VIEW" and schema.expressions: 690 expression.this.set("expressions", None) 691 return super().create_sql(expression) 692 693 def delete_sql(self, expression: exp.Delete) -> str: 694 """ 695 Presto only supports DELETE FROM for a single table without an alias, so we need 696 to remove the unnecessary parts. If the original DELETE statement contains more 697 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 698 """ 699 tables = expression.args.get("tables") or [expression.this] 700 if len(tables) > 1: 701 return super().delete_sql(expression) 702 703 table = tables[0] 704 expression.set("this", table) 705 expression.set("tables", None) 706 707 if isinstance(table, exp.Table): 708 table_alias = table.args.get("alias") 709 if table_alias: 710 table_alias.pop() 711 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 712 713 return super().delete_sql(expression) 714 715 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 716 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 717 718 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 719 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 720 if not expression.args.get("variant_extract") or is_json_extract: 721 return self.func( 722 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 723 ) 724 725 this = self.sql(expression, "this") 726 727 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 728 segments = [] 729 for path_key in expression.expression.expressions[1:]: 730 if not isinstance(path_key, exp.JSONPathKey): 731 # Cannot transpile subscripts, wildcards etc to dot notation 732 self.unsupported( 733 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 734 ) 735 continue 736 key = path_key.this 737 if not exp.SAFE_IDENTIFIER_RE.match(key): 738 key = f'"{key}"' 739 segments.append(f".{key}") 740 741 expr = "".join(segments) 742 743 return f"{this}{expr}" 744 745 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 746 return self.func( 747 "ARRAY_JOIN", 748 self.func("ARRAY_AGG", expression.this), 749 expression.args.get("separator"), 750 )
243class Presto(Dialect): 244 INDEX_OFFSET = 1 245 NULL_ORDERING = "nulls_are_last" 246 TIME_FORMAT = MySQL.TIME_FORMAT 247 STRICT_STRING_CONCAT = True 248 SUPPORTS_SEMI_ANTI_JOIN = False 249 TYPED_DIVISION = True 250 TABLESAMPLE_SIZE_IS_PERCENT = True 251 LOG_BASE_FIRST: t.Optional[bool] = None 252 SUPPORTS_VALUES_DEFAULT = False 253 254 TIME_MAPPING = MySQL.TIME_MAPPING 255 256 # https://github.com/trinodb/trino/issues/17 257 # https://github.com/trinodb/trino/issues/12289 258 # https://github.com/prestodb/presto/issues/2863 259 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 260 261 # The result of certain math functions in Presto/Trino is of type 262 # equal to the input type e.g: FLOOR(5.5/2) -> DECIMAL, FLOOR(5/2) -> BIGINT 263 ANNOTATORS = { 264 **Dialect.ANNOTATORS, 265 exp.Floor: lambda self, e: self._annotate_by_args(e, "this"), 266 exp.Ceil: lambda self, e: self._annotate_by_args(e, "this"), 267 exp.Mod: lambda self, e: self._annotate_by_args(e, "this", "expression"), 268 exp.Round: lambda self, e: self._annotate_by_args(e, "this"), 269 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 270 exp.Abs: lambda self, e: self._annotate_by_args(e, "this"), 271 exp.Rand: lambda self, e: self._annotate_by_args(e, "this") 272 if e.this 273 else self._set_type(e, exp.DataType.Type.DOUBLE), 274 } 275 276 class Tokenizer(tokens.Tokenizer): 277 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 278 UNICODE_STRINGS = [ 279 (prefix + q, q) 280 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 281 for prefix in ("U&", "u&") 282 ] 283 284 KEYWORDS = { 285 **tokens.Tokenizer.KEYWORDS, 286 "DEALLOCATE PREPARE": TokenType.COMMAND, 287 "DESCRIBE INPUT": TokenType.COMMAND, 288 "DESCRIBE OUTPUT": TokenType.COMMAND, 289 "RESET SESSION": TokenType.COMMAND, 290 "START": TokenType.BEGIN, 291 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 292 "ROW": TokenType.STRUCT, 293 "IPADDRESS": TokenType.IPADDRESS, 294 "IPPREFIX": TokenType.IPPREFIX, 295 "TDIGEST": TokenType.TDIGEST, 296 "HYPERLOGLOG": TokenType.HLLSKETCH, 297 } 298 KEYWORDS.pop("/*+") 299 KEYWORDS.pop("QUALIFY") 300 301 class Parser(parser.Parser): 302 VALUES_FOLLOWED_BY_PAREN = False 303 304 FUNCTIONS = { 305 **parser.Parser.FUNCTIONS, 306 "ARBITRARY": exp.AnyValue.from_arg_list, 307 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 308 "APPROX_PERCENTILE": _build_approx_percentile, 309 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 310 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 311 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 312 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 313 "CARDINALITY": exp.ArraySize.from_arg_list, 314 "CONTAINS": exp.ArrayContains.from_arg_list, 315 "DATE_ADD": lambda args: exp.DateAdd( 316 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 317 ), 318 "DATE_DIFF": lambda args: exp.DateDiff( 319 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 320 ), 321 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 322 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 323 "DATE_TRUNC": date_trunc_to_time, 324 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 325 "ELEMENT_AT": lambda args: exp.Bracket( 326 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 327 ), 328 "FROM_HEX": exp.Unhex.from_arg_list, 329 "FROM_UNIXTIME": _build_from_unixtime, 330 "FROM_UTF8": lambda args: exp.Decode( 331 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 332 ), 333 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 334 "NOW": exp.CurrentTimestamp.from_arg_list, 335 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 336 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 337 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 338 this=seq_get(args, 0), 339 expression=seq_get(args, 1), 340 replacement=seq_get(args, 2) or exp.Literal.string(""), 341 ), 342 "ROW": exp.Struct.from_arg_list, 343 "SEQUENCE": exp.GenerateSeries.from_arg_list, 344 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 345 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 346 "STRPOS": lambda args: exp.StrPosition( 347 this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) 348 ), 349 "TO_CHAR": _build_to_char, 350 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 351 "TO_UTF8": lambda args: exp.Encode( 352 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 353 ), 354 "MD5": exp.MD5Digest.from_arg_list, 355 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 356 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 357 } 358 359 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 360 FUNCTION_PARSERS.pop("TRIM") 361 362 class Generator(generator.Generator): 363 INTERVAL_ALLOWS_PLURAL_FORM = False 364 JOIN_HINTS = False 365 TABLE_HINTS = False 366 QUERY_HINTS = False 367 IS_BOOL_ALLOWED = False 368 TZ_TO_WITH_TIME_ZONE = True 369 NVL2_SUPPORTED = False 370 STRUCT_DELIMITER = ("(", ")") 371 LIMIT_ONLY_LITERALS = True 372 SUPPORTS_SINGLE_ARG_CONCAT = False 373 LIKE_PROPERTY_INSIDE_SCHEMA = True 374 MULTI_ARG_DISTINCT = False 375 SUPPORTS_TO_NUMBER = False 376 HEX_FUNC = "TO_HEX" 377 PARSE_JSON_NAME = "JSON_PARSE" 378 PAD_FILL_PATTERN_IS_REQUIRED = True 379 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 380 SUPPORTS_MEDIAN = False 381 ARRAY_SIZE_NAME = "CARDINALITY" 382 383 PROPERTIES_LOCATION = { 384 **generator.Generator.PROPERTIES_LOCATION, 385 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 386 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 387 } 388 389 TYPE_MAPPING = { 390 **generator.Generator.TYPE_MAPPING, 391 exp.DataType.Type.BINARY: "VARBINARY", 392 exp.DataType.Type.BIT: "BOOLEAN", 393 exp.DataType.Type.DATETIME: "TIMESTAMP", 394 exp.DataType.Type.DATETIME64: "TIMESTAMP", 395 exp.DataType.Type.FLOAT: "REAL", 396 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 397 exp.DataType.Type.INT: "INTEGER", 398 exp.DataType.Type.STRUCT: "ROW", 399 exp.DataType.Type.TEXT: "VARCHAR", 400 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 401 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 402 exp.DataType.Type.TIMETZ: "TIME", 403 } 404 405 TRANSFORMS = { 406 **generator.Generator.TRANSFORMS, 407 exp.AnyValue: rename_func("ARBITRARY"), 408 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 409 exp.ArgMax: rename_func("MAX_BY"), 410 exp.ArgMin: rename_func("MIN_BY"), 411 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 412 exp.ArrayAny: rename_func("ANY_MATCH"), 413 exp.ArrayConcat: rename_func("CONCAT"), 414 exp.ArrayContains: rename_func("CONTAINS"), 415 exp.ArrayToString: rename_func("ARRAY_JOIN"), 416 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 417 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 418 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 419 exp.BitwiseLeftShift: lambda self, e: self.func( 420 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 421 ), 422 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 423 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 424 exp.BitwiseRightShift: lambda self, e: self.func( 425 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 426 ), 427 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 428 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 429 exp.CurrentTime: lambda *_: "CURRENT_TIME", 430 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 431 exp.CurrentUser: lambda *_: "CURRENT_USER", 432 exp.DateAdd: _date_delta_sql("DATE_ADD"), 433 exp.DateDiff: lambda self, e: self.func( 434 "DATE_DIFF", unit_to_str(e), e.expression, e.this 435 ), 436 exp.DateStrToDate: datestrtodate_sql, 437 exp.DateToDi: lambda self, 438 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 439 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 440 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 441 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 442 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 443 exp.DiToDate: lambda self, 444 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 445 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 446 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 447 exp.First: _first_last_sql, 448 exp.FromTimeZone: lambda self, 449 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 450 exp.GenerateSeries: sequence_sql, 451 exp.GenerateDateArray: sequence_sql, 452 exp.Group: transforms.preprocess([transforms.unalias_group]), 453 exp.If: if_sql(), 454 exp.ILike: no_ilike_sql, 455 exp.Initcap: _initcap_sql, 456 exp.Last: _first_last_sql, 457 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 458 exp.Lateral: _explode_to_unnest_sql, 459 exp.Left: left_to_substring_sql, 460 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 461 rename_func("LEVENSHTEIN_DISTANCE") 462 ), 463 exp.LogicalAnd: rename_func("BOOL_AND"), 464 exp.LogicalOr: rename_func("BOOL_OR"), 465 exp.Pivot: no_pivot_sql, 466 exp.Quantile: _quantile_sql, 467 exp.RegexpExtract: regexp_extract_sql, 468 exp.RegexpExtractAll: regexp_extract_sql, 469 exp.Right: right_to_substring_sql, 470 exp.Schema: _schema_sql, 471 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 472 exp.Select: transforms.preprocess( 473 [ 474 transforms.eliminate_qualify, 475 transforms.eliminate_distinct_on, 476 transforms.explode_projection_to_unnest(1), 477 transforms.eliminate_semi_and_anti_joins, 478 _amend_exploded_column_table, 479 ] 480 ), 481 exp.SortArray: _no_sort_array, 482 exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True), 483 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 484 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 485 exp.StrToTime: _str_to_time_sql, 486 exp.StructExtract: struct_extract_sql, 487 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 488 exp.Timestamp: no_timestamp_sql, 489 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 490 exp.TimestampTrunc: timestamptrunc_sql(), 491 exp.TimeStrToDate: timestrtotime_sql, 492 exp.TimeStrToTime: timestrtotime_sql, 493 exp.TimeStrToUnix: lambda self, e: self.func( 494 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 495 ), 496 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 497 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 498 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 499 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 500 exp.TsOrDiToDi: lambda self, 501 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 502 exp.TsOrDsAdd: _ts_or_ds_add_sql, 503 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 504 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 505 exp.Unhex: rename_func("FROM_HEX"), 506 exp.UnixToStr: lambda self, 507 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 508 exp.UnixToTime: _unix_to_time_sql, 509 exp.UnixToTimeStr: lambda self, 510 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 511 exp.VariancePop: rename_func("VAR_POP"), 512 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 513 exp.WithinGroup: transforms.preprocess( 514 [transforms.remove_within_group_for_percentiles] 515 ), 516 exp.Xor: bool_xor_sql, 517 exp.MD5Digest: rename_func("MD5"), 518 exp.SHA: rename_func("SHA1"), 519 exp.SHA2: sha256_sql, 520 } 521 522 RESERVED_KEYWORDS = { 523 "alter", 524 "and", 525 "as", 526 "between", 527 "by", 528 "case", 529 "cast", 530 "constraint", 531 "create", 532 "cross", 533 "current_time", 534 "current_timestamp", 535 "deallocate", 536 "delete", 537 "describe", 538 "distinct", 539 "drop", 540 "else", 541 "end", 542 "escape", 543 "except", 544 "execute", 545 "exists", 546 "extract", 547 "false", 548 "for", 549 "from", 550 "full", 551 "group", 552 "having", 553 "in", 554 "inner", 555 "insert", 556 "intersect", 557 "into", 558 "is", 559 "join", 560 "left", 561 "like", 562 "natural", 563 "not", 564 "null", 565 "on", 566 "or", 567 "order", 568 "outer", 569 "prepare", 570 "right", 571 "select", 572 "table", 573 "then", 574 "true", 575 "union", 576 "using", 577 "values", 578 "when", 579 "where", 580 "with", 581 } 582 583 def md5_sql(self, expression: exp.MD5) -> str: 584 this = expression.this 585 586 if not this.type: 587 from sqlglot.optimizer.annotate_types import annotate_types 588 589 this = annotate_types(this) 590 591 if this.is_type(*exp.DataType.TEXT_TYPES): 592 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 593 594 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 595 596 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 597 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 598 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 599 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 600 # which seems to be using the same time mapping as Hive, as per: 601 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 602 this = expression.this 603 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 604 value_as_timestamp = ( 605 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 606 ) 607 608 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 609 610 formatted_value = self.func( 611 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 612 ) 613 parse_with_tz = self.func( 614 "PARSE_DATETIME", 615 formatted_value, 616 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 617 ) 618 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 619 return self.func("TO_UNIXTIME", coalesced) 620 621 def bracket_sql(self, expression: exp.Bracket) -> str: 622 if expression.args.get("safe"): 623 return self.func( 624 "ELEMENT_AT", 625 expression.this, 626 seq_get( 627 apply_index_offset( 628 expression.this, 629 expression.expressions, 630 1 - expression.args.get("offset", 0), 631 ), 632 0, 633 ), 634 ) 635 return super().bracket_sql(expression) 636 637 def struct_sql(self, expression: exp.Struct) -> str: 638 from sqlglot.optimizer.annotate_types import annotate_types 639 640 expression = annotate_types(expression) 641 values: t.List[str] = [] 642 schema: t.List[str] = [] 643 unknown_type = False 644 645 for e in expression.expressions: 646 if isinstance(e, exp.PropertyEQ): 647 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 648 unknown_type = True 649 else: 650 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 651 values.append(self.sql(e, "expression")) 652 else: 653 values.append(self.sql(e)) 654 655 size = len(expression.expressions) 656 657 if not size or len(schema) != size: 658 if unknown_type: 659 self.unsupported( 660 "Cannot convert untyped key-value definitions (try annotate_types)." 661 ) 662 return self.func("ROW", *values) 663 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 664 665 def interval_sql(self, expression: exp.Interval) -> str: 666 if expression.this and expression.text("unit").upper().startswith("WEEK"): 667 return f"({expression.this.name} * INTERVAL '7' DAY)" 668 return super().interval_sql(expression) 669 670 def transaction_sql(self, expression: exp.Transaction) -> str: 671 modes = expression.args.get("modes") 672 modes = f" {', '.join(modes)}" if modes else "" 673 return f"START TRANSACTION{modes}" 674 675 def offset_limit_modifiers( 676 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 677 ) -> t.List[str]: 678 return [ 679 self.sql(expression, "offset"), 680 self.sql(limit), 681 ] 682 683 def create_sql(self, expression: exp.Create) -> str: 684 """ 685 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 686 so we need to remove them 687 """ 688 kind = expression.args["kind"] 689 schema = expression.this 690 if kind == "VIEW" and schema.expressions: 691 expression.this.set("expressions", None) 692 return super().create_sql(expression) 693 694 def delete_sql(self, expression: exp.Delete) -> str: 695 """ 696 Presto only supports DELETE FROM for a single table without an alias, so we need 697 to remove the unnecessary parts. If the original DELETE statement contains more 698 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 699 """ 700 tables = expression.args.get("tables") or [expression.this] 701 if len(tables) > 1: 702 return super().delete_sql(expression) 703 704 table = tables[0] 705 expression.set("this", table) 706 expression.set("tables", None) 707 708 if isinstance(table, exp.Table): 709 table_alias = table.args.get("alias") 710 if table_alias: 711 table_alias.pop() 712 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 713 714 return super().delete_sql(expression) 715 716 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 717 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 718 719 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 720 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 721 if not expression.args.get("variant_extract") or is_json_extract: 722 return self.func( 723 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 724 ) 725 726 this = self.sql(expression, "this") 727 728 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 729 segments = [] 730 for path_key in expression.expression.expressions[1:]: 731 if not isinstance(path_key, exp.JSONPathKey): 732 # Cannot transpile subscripts, wildcards etc to dot notation 733 self.unsupported( 734 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 735 ) 736 continue 737 key = path_key.this 738 if not exp.SAFE_IDENTIFIER_RE.match(key): 739 key = f'"{key}"' 740 segments.append(f".{key}") 741 742 expr = "".join(segments) 743 744 return f"{this}{expr}" 745 746 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 747 return self.func( 748 "ARRAY_JOIN", 749 self.func("ARRAY_AGG", expression.this), 750 expression.args.get("separator"), 751 )
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the behavior of a / b
depends on the types of a
and b
.
False means a / b
is always float division.
True means a / b
is integer division if both a
and b
are integers.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Associates this dialect's time formats with their equivalent Python strftime
formats.
Specifies the strategy according to which identifiers should be normalized.
276 class Tokenizer(tokens.Tokenizer): 277 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 278 UNICODE_STRINGS = [ 279 (prefix + q, q) 280 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 281 for prefix in ("U&", "u&") 282 ] 283 284 KEYWORDS = { 285 **tokens.Tokenizer.KEYWORDS, 286 "DEALLOCATE PREPARE": TokenType.COMMAND, 287 "DESCRIBE INPUT": TokenType.COMMAND, 288 "DESCRIBE OUTPUT": TokenType.COMMAND, 289 "RESET SESSION": TokenType.COMMAND, 290 "START": TokenType.BEGIN, 291 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 292 "ROW": TokenType.STRUCT, 293 "IPADDRESS": TokenType.IPADDRESS, 294 "IPPREFIX": TokenType.IPPREFIX, 295 "TDIGEST": TokenType.TDIGEST, 296 "HYPERLOGLOG": TokenType.HLLSKETCH, 297 } 298 KEYWORDS.pop("/*+") 299 KEYWORDS.pop("QUALIFY")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- BYTE_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
301 class Parser(parser.Parser): 302 VALUES_FOLLOWED_BY_PAREN = False 303 304 FUNCTIONS = { 305 **parser.Parser.FUNCTIONS, 306 "ARBITRARY": exp.AnyValue.from_arg_list, 307 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 308 "APPROX_PERCENTILE": _build_approx_percentile, 309 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 310 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 311 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 312 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 313 "CARDINALITY": exp.ArraySize.from_arg_list, 314 "CONTAINS": exp.ArrayContains.from_arg_list, 315 "DATE_ADD": lambda args: exp.DateAdd( 316 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 317 ), 318 "DATE_DIFF": lambda args: exp.DateDiff( 319 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 320 ), 321 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 322 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 323 "DATE_TRUNC": date_trunc_to_time, 324 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 325 "ELEMENT_AT": lambda args: exp.Bracket( 326 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 327 ), 328 "FROM_HEX": exp.Unhex.from_arg_list, 329 "FROM_UNIXTIME": _build_from_unixtime, 330 "FROM_UTF8": lambda args: exp.Decode( 331 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 332 ), 333 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 334 "NOW": exp.CurrentTimestamp.from_arg_list, 335 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 336 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 337 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 338 this=seq_get(args, 0), 339 expression=seq_get(args, 1), 340 replacement=seq_get(args, 2) or exp.Literal.string(""), 341 ), 342 "ROW": exp.Struct.from_arg_list, 343 "SEQUENCE": exp.GenerateSeries.from_arg_list, 344 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 345 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 346 "STRPOS": lambda args: exp.StrPosition( 347 this=seq_get(args, 0), substr=seq_get(args, 1), occurrence=seq_get(args, 2) 348 ), 349 "TO_CHAR": _build_to_char, 350 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 351 "TO_UTF8": lambda args: exp.Encode( 352 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 353 ), 354 "MD5": exp.MD5Digest.from_arg_list, 355 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 356 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 357 } 358 359 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 360 FUNCTION_PARSERS.pop("TRIM")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
362 class Generator(generator.Generator): 363 INTERVAL_ALLOWS_PLURAL_FORM = False 364 JOIN_HINTS = False 365 TABLE_HINTS = False 366 QUERY_HINTS = False 367 IS_BOOL_ALLOWED = False 368 TZ_TO_WITH_TIME_ZONE = True 369 NVL2_SUPPORTED = False 370 STRUCT_DELIMITER = ("(", ")") 371 LIMIT_ONLY_LITERALS = True 372 SUPPORTS_SINGLE_ARG_CONCAT = False 373 LIKE_PROPERTY_INSIDE_SCHEMA = True 374 MULTI_ARG_DISTINCT = False 375 SUPPORTS_TO_NUMBER = False 376 HEX_FUNC = "TO_HEX" 377 PARSE_JSON_NAME = "JSON_PARSE" 378 PAD_FILL_PATTERN_IS_REQUIRED = True 379 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 380 SUPPORTS_MEDIAN = False 381 ARRAY_SIZE_NAME = "CARDINALITY" 382 383 PROPERTIES_LOCATION = { 384 **generator.Generator.PROPERTIES_LOCATION, 385 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 386 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 387 } 388 389 TYPE_MAPPING = { 390 **generator.Generator.TYPE_MAPPING, 391 exp.DataType.Type.BINARY: "VARBINARY", 392 exp.DataType.Type.BIT: "BOOLEAN", 393 exp.DataType.Type.DATETIME: "TIMESTAMP", 394 exp.DataType.Type.DATETIME64: "TIMESTAMP", 395 exp.DataType.Type.FLOAT: "REAL", 396 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 397 exp.DataType.Type.INT: "INTEGER", 398 exp.DataType.Type.STRUCT: "ROW", 399 exp.DataType.Type.TEXT: "VARCHAR", 400 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 401 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 402 exp.DataType.Type.TIMETZ: "TIME", 403 } 404 405 TRANSFORMS = { 406 **generator.Generator.TRANSFORMS, 407 exp.AnyValue: rename_func("ARBITRARY"), 408 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 409 exp.ArgMax: rename_func("MAX_BY"), 410 exp.ArgMin: rename_func("MIN_BY"), 411 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 412 exp.ArrayAny: rename_func("ANY_MATCH"), 413 exp.ArrayConcat: rename_func("CONCAT"), 414 exp.ArrayContains: rename_func("CONTAINS"), 415 exp.ArrayToString: rename_func("ARRAY_JOIN"), 416 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 417 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 418 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 419 exp.BitwiseLeftShift: lambda self, e: self.func( 420 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 421 ), 422 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 423 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 424 exp.BitwiseRightShift: lambda self, e: self.func( 425 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 426 ), 427 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 428 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 429 exp.CurrentTime: lambda *_: "CURRENT_TIME", 430 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 431 exp.CurrentUser: lambda *_: "CURRENT_USER", 432 exp.DateAdd: _date_delta_sql("DATE_ADD"), 433 exp.DateDiff: lambda self, e: self.func( 434 "DATE_DIFF", unit_to_str(e), e.expression, e.this 435 ), 436 exp.DateStrToDate: datestrtodate_sql, 437 exp.DateToDi: lambda self, 438 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 439 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 440 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 441 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 442 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 443 exp.DiToDate: lambda self, 444 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 445 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 446 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 447 exp.First: _first_last_sql, 448 exp.FromTimeZone: lambda self, 449 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 450 exp.GenerateSeries: sequence_sql, 451 exp.GenerateDateArray: sequence_sql, 452 exp.Group: transforms.preprocess([transforms.unalias_group]), 453 exp.If: if_sql(), 454 exp.ILike: no_ilike_sql, 455 exp.Initcap: _initcap_sql, 456 exp.Last: _first_last_sql, 457 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 458 exp.Lateral: _explode_to_unnest_sql, 459 exp.Left: left_to_substring_sql, 460 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 461 rename_func("LEVENSHTEIN_DISTANCE") 462 ), 463 exp.LogicalAnd: rename_func("BOOL_AND"), 464 exp.LogicalOr: rename_func("BOOL_OR"), 465 exp.Pivot: no_pivot_sql, 466 exp.Quantile: _quantile_sql, 467 exp.RegexpExtract: regexp_extract_sql, 468 exp.RegexpExtractAll: regexp_extract_sql, 469 exp.Right: right_to_substring_sql, 470 exp.Schema: _schema_sql, 471 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 472 exp.Select: transforms.preprocess( 473 [ 474 transforms.eliminate_qualify, 475 transforms.eliminate_distinct_on, 476 transforms.explode_projection_to_unnest(1), 477 transforms.eliminate_semi_and_anti_joins, 478 _amend_exploded_column_table, 479 ] 480 ), 481 exp.SortArray: _no_sort_array, 482 exp.StrPosition: lambda self, e: strposition_sql(self, e, supports_occurrence=True), 483 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 484 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 485 exp.StrToTime: _str_to_time_sql, 486 exp.StructExtract: struct_extract_sql, 487 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 488 exp.Timestamp: no_timestamp_sql, 489 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 490 exp.TimestampTrunc: timestamptrunc_sql(), 491 exp.TimeStrToDate: timestrtotime_sql, 492 exp.TimeStrToTime: timestrtotime_sql, 493 exp.TimeStrToUnix: lambda self, e: self.func( 494 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 495 ), 496 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 497 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 498 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 499 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 500 exp.TsOrDiToDi: lambda self, 501 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 502 exp.TsOrDsAdd: _ts_or_ds_add_sql, 503 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 504 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 505 exp.Unhex: rename_func("FROM_HEX"), 506 exp.UnixToStr: lambda self, 507 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 508 exp.UnixToTime: _unix_to_time_sql, 509 exp.UnixToTimeStr: lambda self, 510 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 511 exp.VariancePop: rename_func("VAR_POP"), 512 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 513 exp.WithinGroup: transforms.preprocess( 514 [transforms.remove_within_group_for_percentiles] 515 ), 516 exp.Xor: bool_xor_sql, 517 exp.MD5Digest: rename_func("MD5"), 518 exp.SHA: rename_func("SHA1"), 519 exp.SHA2: sha256_sql, 520 } 521 522 RESERVED_KEYWORDS = { 523 "alter", 524 "and", 525 "as", 526 "between", 527 "by", 528 "case", 529 "cast", 530 "constraint", 531 "create", 532 "cross", 533 "current_time", 534 "current_timestamp", 535 "deallocate", 536 "delete", 537 "describe", 538 "distinct", 539 "drop", 540 "else", 541 "end", 542 "escape", 543 "except", 544 "execute", 545 "exists", 546 "extract", 547 "false", 548 "for", 549 "from", 550 "full", 551 "group", 552 "having", 553 "in", 554 "inner", 555 "insert", 556 "intersect", 557 "into", 558 "is", 559 "join", 560 "left", 561 "like", 562 "natural", 563 "not", 564 "null", 565 "on", 566 "or", 567 "order", 568 "outer", 569 "prepare", 570 "right", 571 "select", 572 "table", 573 "then", 574 "true", 575 "union", 576 "using", 577 "values", 578 "when", 579 "where", 580 "with", 581 } 582 583 def md5_sql(self, expression: exp.MD5) -> str: 584 this = expression.this 585 586 if not this.type: 587 from sqlglot.optimizer.annotate_types import annotate_types 588 589 this = annotate_types(this) 590 591 if this.is_type(*exp.DataType.TEXT_TYPES): 592 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 593 594 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 595 596 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 597 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 598 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 599 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 600 # which seems to be using the same time mapping as Hive, as per: 601 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 602 this = expression.this 603 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 604 value_as_timestamp = ( 605 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 606 ) 607 608 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 609 610 formatted_value = self.func( 611 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 612 ) 613 parse_with_tz = self.func( 614 "PARSE_DATETIME", 615 formatted_value, 616 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 617 ) 618 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 619 return self.func("TO_UNIXTIME", coalesced) 620 621 def bracket_sql(self, expression: exp.Bracket) -> str: 622 if expression.args.get("safe"): 623 return self.func( 624 "ELEMENT_AT", 625 expression.this, 626 seq_get( 627 apply_index_offset( 628 expression.this, 629 expression.expressions, 630 1 - expression.args.get("offset", 0), 631 ), 632 0, 633 ), 634 ) 635 return super().bracket_sql(expression) 636 637 def struct_sql(self, expression: exp.Struct) -> str: 638 from sqlglot.optimizer.annotate_types import annotate_types 639 640 expression = annotate_types(expression) 641 values: t.List[str] = [] 642 schema: t.List[str] = [] 643 unknown_type = False 644 645 for e in expression.expressions: 646 if isinstance(e, exp.PropertyEQ): 647 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 648 unknown_type = True 649 else: 650 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 651 values.append(self.sql(e, "expression")) 652 else: 653 values.append(self.sql(e)) 654 655 size = len(expression.expressions) 656 657 if not size or len(schema) != size: 658 if unknown_type: 659 self.unsupported( 660 "Cannot convert untyped key-value definitions (try annotate_types)." 661 ) 662 return self.func("ROW", *values) 663 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 664 665 def interval_sql(self, expression: exp.Interval) -> str: 666 if expression.this and expression.text("unit").upper().startswith("WEEK"): 667 return f"({expression.this.name} * INTERVAL '7' DAY)" 668 return super().interval_sql(expression) 669 670 def transaction_sql(self, expression: exp.Transaction) -> str: 671 modes = expression.args.get("modes") 672 modes = f" {', '.join(modes)}" if modes else "" 673 return f"START TRANSACTION{modes}" 674 675 def offset_limit_modifiers( 676 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 677 ) -> t.List[str]: 678 return [ 679 self.sql(expression, "offset"), 680 self.sql(limit), 681 ] 682 683 def create_sql(self, expression: exp.Create) -> str: 684 """ 685 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 686 so we need to remove them 687 """ 688 kind = expression.args["kind"] 689 schema = expression.this 690 if kind == "VIEW" and schema.expressions: 691 expression.this.set("expressions", None) 692 return super().create_sql(expression) 693 694 def delete_sql(self, expression: exp.Delete) -> str: 695 """ 696 Presto only supports DELETE FROM for a single table without an alias, so we need 697 to remove the unnecessary parts. If the original DELETE statement contains more 698 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 699 """ 700 tables = expression.args.get("tables") or [expression.this] 701 if len(tables) > 1: 702 return super().delete_sql(expression) 703 704 table = tables[0] 705 expression.set("this", table) 706 expression.set("tables", None) 707 708 if isinstance(table, exp.Table): 709 table_alias = table.args.get("alias") 710 if table_alias: 711 table_alias.pop() 712 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 713 714 return super().delete_sql(expression) 715 716 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 717 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 718 719 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 720 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 721 if not expression.args.get("variant_extract") or is_json_extract: 722 return self.func( 723 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 724 ) 725 726 this = self.sql(expression, "this") 727 728 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 729 segments = [] 730 for path_key in expression.expression.expressions[1:]: 731 if not isinstance(path_key, exp.JSONPathKey): 732 # Cannot transpile subscripts, wildcards etc to dot notation 733 self.unsupported( 734 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 735 ) 736 continue 737 key = path_key.this 738 if not exp.SAFE_IDENTIFIER_RE.match(key): 739 key = f'"{key}"' 740 segments.append(f".{key}") 741 742 expr = "".join(segments) 743 744 return f"{this}{expr}" 745 746 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 747 return self.func( 748 "ARRAY_JOIN", 749 self.func("ARRAY_AGG", expression.this), 750 expression.args.get("separator"), 751 )
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
583 def md5_sql(self, expression: exp.MD5) -> str: 584 this = expression.this 585 586 if not this.type: 587 from sqlglot.optimizer.annotate_types import annotate_types 588 589 this = annotate_types(this) 590 591 if this.is_type(*exp.DataType.TEXT_TYPES): 592 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 593 594 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this))))
596 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 597 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 598 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 599 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 600 # which seems to be using the same time mapping as Hive, as per: 601 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 602 this = expression.this 603 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 604 value_as_timestamp = ( 605 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 606 ) 607 608 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 609 610 formatted_value = self.func( 611 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 612 ) 613 parse_with_tz = self.func( 614 "PARSE_DATETIME", 615 formatted_value, 616 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 617 ) 618 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 619 return self.func("TO_UNIXTIME", coalesced)
621 def bracket_sql(self, expression: exp.Bracket) -> str: 622 if expression.args.get("safe"): 623 return self.func( 624 "ELEMENT_AT", 625 expression.this, 626 seq_get( 627 apply_index_offset( 628 expression.this, 629 expression.expressions, 630 1 - expression.args.get("offset", 0), 631 ), 632 0, 633 ), 634 ) 635 return super().bracket_sql(expression)
637 def struct_sql(self, expression: exp.Struct) -> str: 638 from sqlglot.optimizer.annotate_types import annotate_types 639 640 expression = annotate_types(expression) 641 values: t.List[str] = [] 642 schema: t.List[str] = [] 643 unknown_type = False 644 645 for e in expression.expressions: 646 if isinstance(e, exp.PropertyEQ): 647 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 648 unknown_type = True 649 else: 650 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 651 values.append(self.sql(e, "expression")) 652 else: 653 values.append(self.sql(e)) 654 655 size = len(expression.expressions) 656 657 if not size or len(schema) != size: 658 if unknown_type: 659 self.unsupported( 660 "Cannot convert untyped key-value definitions (try annotate_types)." 661 ) 662 return self.func("ROW", *values) 663 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))"
683 def create_sql(self, expression: exp.Create) -> str: 684 """ 685 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 686 so we need to remove them 687 """ 688 kind = expression.args["kind"] 689 schema = expression.this 690 if kind == "VIEW" and schema.expressions: 691 expression.this.set("expressions", None) 692 return super().create_sql(expression)
Presto doesn't support CREATE VIEW with expressions (ex: CREATE VIEW x (cola)
then (cola)
is the expression),
so we need to remove them
694 def delete_sql(self, expression: exp.Delete) -> str: 695 """ 696 Presto only supports DELETE FROM for a single table without an alias, so we need 697 to remove the unnecessary parts. If the original DELETE statement contains more 698 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 699 """ 700 tables = expression.args.get("tables") or [expression.this] 701 if len(tables) > 1: 702 return super().delete_sql(expression) 703 704 table = tables[0] 705 expression.set("this", table) 706 expression.set("tables", None) 707 708 if isinstance(table, exp.Table): 709 table_alias = table.args.get("alias") 710 if table_alias: 711 table_alias.pop() 712 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 713 714 return super().delete_sql(expression)
Presto only supports DELETE FROM for a single table without an alias, so we need to remove the unnecessary parts. If the original DELETE statement contains more than one table to be deleted, we can't safely map it 1-1 to a Presto statement.
716 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 717 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 718 719 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 720 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 721 if not expression.args.get("variant_extract") or is_json_extract: 722 return self.func( 723 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 724 ) 725 726 this = self.sql(expression, "this") 727 728 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 729 segments = [] 730 for path_key in expression.expression.expressions[1:]: 731 if not isinstance(path_key, exp.JSONPathKey): 732 # Cannot transpile subscripts, wildcards etc to dot notation 733 self.unsupported( 734 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 735 ) 736 continue 737 key = path_key.this 738 if not exp.SAFE_IDENTIFIER_RE.match(key): 739 key = f'"{key}"' 740 segments.append(f".{key}") 741 742 expr = "".join(segments) 743 744 return f"{this}{expr}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTED_JSON_PATH_PARTS
- CAN_IMPLEMENT_ARRAY_ANY
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- TRY_SUPPORTED
- SUPPORTS_UESCAPE
- STAR_EXCEPT
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- put_sql