sqlglot.dialects.presto
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 bool_xor_sql, 11 date_trunc_to_time, 12 datestrtodate_sql, 13 encode_decode_sql, 14 build_formatted_time, 15 if_sql, 16 left_to_substring_sql, 17 no_ilike_sql, 18 no_pivot_sql, 19 no_safe_divide_sql, 20 no_timestamp_sql, 21 regexp_extract_sql, 22 rename_func, 23 right_to_substring_sql, 24 sha256_sql, 25 struct_extract_sql, 26 str_position_sql, 27 timestamptrunc_sql, 28 timestrtotime_sql, 29 ts_or_ds_add_cast, 30 unit_to_str, 31 sequence_sql, 32 build_regexp_extract, 33 explode_to_unnest_sql, 34) 35from sqlglot.dialects.hive import Hive 36from sqlglot.dialects.mysql import MySQL 37from sqlglot.helper import apply_index_offset, seq_get 38from sqlglot.tokens import TokenType 39from sqlglot.transforms import unqualify_columns 40from sqlglot.generator import unsupported_args 41 42DATE_ADD_OR_SUB = t.Union[exp.DateAdd, exp.TimestampAdd, exp.DateSub] 43 44 45def _initcap_sql(self: Presto.Generator, expression: exp.Initcap) -> str: 46 regex = r"(\w)(\w*)" 47 return f"REGEXP_REPLACE({self.sql(expression, 'this')}, '{regex}', x -> UPPER(x[1]) || LOWER(x[2]))" 48 49 50def _no_sort_array(self: Presto.Generator, expression: exp.SortArray) -> str: 51 if expression.args.get("asc") == exp.false(): 52 comparator = "(a, b) -> CASE WHEN a < b THEN 1 WHEN a > b THEN -1 ELSE 0 END" 53 else: 54 comparator = None 55 return self.func("ARRAY_SORT", expression.this, comparator) 56 57 58def _schema_sql(self: Presto.Generator, expression: exp.Schema) -> str: 59 if isinstance(expression.parent, exp.Property): 60 columns = ", ".join(f"'{c.name}'" for c in expression.expressions) 61 return f"ARRAY[{columns}]" 62 63 if expression.parent: 64 for schema in expression.parent.find_all(exp.Schema): 65 column_defs = schema.find_all(exp.ColumnDef) 66 if column_defs and isinstance(schema.parent, exp.Property): 67 expression.expressions.extend(column_defs) 68 69 return self.schema_sql(expression) 70 71 72def _quantile_sql(self: Presto.Generator, expression: exp.Quantile) -> str: 73 self.unsupported("Presto does not support exact quantiles") 74 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 75 76 77def _str_to_time_sql( 78 self: Presto.Generator, expression: exp.StrToDate | exp.StrToTime | exp.TsOrDsToDate 79) -> str: 80 return self.func("DATE_PARSE", expression.this, self.format_time(expression)) 81 82 83def _ts_or_ds_to_date_sql(self: Presto.Generator, expression: exp.TsOrDsToDate) -> str: 84 time_format = self.format_time(expression) 85 if time_format and time_format not in (Presto.TIME_FORMAT, Presto.DATE_FORMAT): 86 return self.sql(exp.cast(_str_to_time_sql(self, expression), exp.DataType.Type.DATE)) 87 return self.sql( 88 exp.cast(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP), exp.DataType.Type.DATE) 89 ) 90 91 92def _ts_or_ds_add_sql(self: Presto.Generator, expression: exp.TsOrDsAdd) -> str: 93 expression = ts_or_ds_add_cast(expression) 94 unit = unit_to_str(expression) 95 return self.func("DATE_ADD", unit, expression.expression, expression.this) 96 97 98def _ts_or_ds_diff_sql(self: Presto.Generator, expression: exp.TsOrDsDiff) -> str: 99 this = exp.cast(expression.this, exp.DataType.Type.TIMESTAMP) 100 expr = exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP) 101 unit = unit_to_str(expression) 102 return self.func("DATE_DIFF", unit, expr, this) 103 104 105def _build_approx_percentile(args: t.List) -> exp.Expression: 106 if len(args) == 4: 107 return exp.ApproxQuantile( 108 this=seq_get(args, 0), 109 weight=seq_get(args, 1), 110 quantile=seq_get(args, 2), 111 accuracy=seq_get(args, 3), 112 ) 113 if len(args) == 3: 114 return exp.ApproxQuantile( 115 this=seq_get(args, 0), quantile=seq_get(args, 1), accuracy=seq_get(args, 2) 116 ) 117 return exp.ApproxQuantile.from_arg_list(args) 118 119 120def _build_from_unixtime(args: t.List) -> exp.Expression: 121 if len(args) == 3: 122 return exp.UnixToTime( 123 this=seq_get(args, 0), 124 hours=seq_get(args, 1), 125 minutes=seq_get(args, 2), 126 ) 127 if len(args) == 2: 128 return exp.UnixToTime(this=seq_get(args, 0), zone=seq_get(args, 1)) 129 130 return exp.UnixToTime.from_arg_list(args) 131 132 133def _first_last_sql(self: Presto.Generator, expression: exp.Func) -> str: 134 """ 135 Trino doesn't support FIRST / LAST as functions, but they're valid in the context 136 of MATCH_RECOGNIZE, so we need to preserve them in that case. In all other cases 137 they're converted into an ARBITRARY call. 138 139 Reference: https://trino.io/docs/current/sql/match-recognize.html#logical-navigation-functions 140 """ 141 if isinstance(expression.find_ancestor(exp.MatchRecognize, exp.Select), exp.MatchRecognize): 142 return self.function_fallback_sql(expression) 143 144 return rename_func("ARBITRARY")(self, expression) 145 146 147def _unix_to_time_sql(self: Presto.Generator, expression: exp.UnixToTime) -> str: 148 scale = expression.args.get("scale") 149 timestamp = self.sql(expression, "this") 150 if scale in (None, exp.UnixToTime.SECONDS): 151 return rename_func("FROM_UNIXTIME")(self, expression) 152 153 return f"FROM_UNIXTIME(CAST({timestamp} AS DOUBLE) / POW(10, {scale}))" 154 155 156def _to_int(self: Presto.Generator, expression: exp.Expression) -> exp.Expression: 157 if not expression.type: 158 from sqlglot.optimizer.annotate_types import annotate_types 159 160 annotate_types(expression, dialect=self.dialect) 161 if expression.type and expression.type.this not in exp.DataType.INTEGER_TYPES: 162 return exp.cast(expression, to=exp.DataType.Type.BIGINT) 163 return expression 164 165 166def _build_to_char(args: t.List) -> exp.TimeToStr: 167 fmt = seq_get(args, 1) 168 if isinstance(fmt, exp.Literal): 169 # We uppercase this to match Teradata's format mapping keys 170 fmt.set("this", fmt.this.upper()) 171 172 # We use "teradata" on purpose here, because the time formats are different in Presto. 173 # See https://prestodb.io/docs/current/functions/teradata.html?highlight=to_char#to_char 174 return build_formatted_time(exp.TimeToStr, "teradata")(args) 175 176 177def _date_delta_sql( 178 name: str, negate_interval: bool = False 179) -> t.Callable[[Presto.Generator, DATE_ADD_OR_SUB], str]: 180 def _delta_sql(self: Presto.Generator, expression: DATE_ADD_OR_SUB) -> str: 181 interval = _to_int(self, expression.expression) 182 return self.func( 183 name, 184 unit_to_str(expression), 185 interval * (-1) if negate_interval else interval, 186 expression.this, 187 ) 188 189 return _delta_sql 190 191 192class Presto(Dialect): 193 INDEX_OFFSET = 1 194 NULL_ORDERING = "nulls_are_last" 195 TIME_FORMAT = MySQL.TIME_FORMAT 196 STRICT_STRING_CONCAT = True 197 SUPPORTS_SEMI_ANTI_JOIN = False 198 TYPED_DIVISION = True 199 TABLESAMPLE_SIZE_IS_PERCENT = True 200 LOG_BASE_FIRST: t.Optional[bool] = None 201 202 TIME_MAPPING = MySQL.TIME_MAPPING 203 204 # https://github.com/trinodb/trino/issues/17 205 # https://github.com/trinodb/trino/issues/12289 206 # https://github.com/prestodb/presto/issues/2863 207 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 208 209 # The result of certain math functions in Presto/Trino is of type 210 # equal to the input type e.g: FLOOR(5.5/2) -> DECIMAL, FLOOR(5/2) -> BIGINT 211 ANNOTATORS = { 212 **Dialect.ANNOTATORS, 213 exp.Floor: lambda self, e: self._annotate_by_args(e, "this"), 214 exp.Ceil: lambda self, e: self._annotate_by_args(e, "this"), 215 exp.Mod: lambda self, e: self._annotate_by_args(e, "this", "expression"), 216 exp.Round: lambda self, e: self._annotate_by_args(e, "this"), 217 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 218 exp.Abs: lambda self, e: self._annotate_by_args(e, "this"), 219 exp.Rand: lambda self, e: self._annotate_by_args(e, "this") 220 if e.this 221 else self._set_type(e, exp.DataType.Type.DOUBLE), 222 } 223 224 class Tokenizer(tokens.Tokenizer): 225 UNICODE_STRINGS = [ 226 (prefix + q, q) 227 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 228 for prefix in ("U&", "u&") 229 ] 230 231 KEYWORDS = { 232 **tokens.Tokenizer.KEYWORDS, 233 "START": TokenType.BEGIN, 234 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 235 "ROW": TokenType.STRUCT, 236 "IPADDRESS": TokenType.IPADDRESS, 237 "IPPREFIX": TokenType.IPPREFIX, 238 "TDIGEST": TokenType.TDIGEST, 239 "HYPERLOGLOG": TokenType.HLLSKETCH, 240 } 241 KEYWORDS.pop("/*+") 242 KEYWORDS.pop("QUALIFY") 243 244 class Parser(parser.Parser): 245 VALUES_FOLLOWED_BY_PAREN = False 246 247 FUNCTIONS = { 248 **parser.Parser.FUNCTIONS, 249 "ARBITRARY": exp.AnyValue.from_arg_list, 250 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 251 "APPROX_PERCENTILE": _build_approx_percentile, 252 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 253 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 254 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 255 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 256 "CARDINALITY": exp.ArraySize.from_arg_list, 257 "CONTAINS": exp.ArrayContains.from_arg_list, 258 "DATE_ADD": lambda args: exp.DateAdd( 259 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 260 ), 261 "DATE_DIFF": lambda args: exp.DateDiff( 262 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 263 ), 264 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 265 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 266 "DATE_TRUNC": date_trunc_to_time, 267 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 268 "ELEMENT_AT": lambda args: exp.Bracket( 269 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 270 ), 271 "FROM_HEX": exp.Unhex.from_arg_list, 272 "FROM_UNIXTIME": _build_from_unixtime, 273 "FROM_UTF8": lambda args: exp.Decode( 274 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 275 ), 276 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 277 "NOW": exp.CurrentTimestamp.from_arg_list, 278 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 279 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 280 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 281 this=seq_get(args, 0), 282 expression=seq_get(args, 1), 283 replacement=seq_get(args, 2) or exp.Literal.string(""), 284 ), 285 "ROW": exp.Struct.from_arg_list, 286 "SEQUENCE": exp.GenerateSeries.from_arg_list, 287 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 288 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 289 "STRPOS": lambda args: exp.StrPosition( 290 this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2) 291 ), 292 "TO_CHAR": _build_to_char, 293 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 294 "TO_UTF8": lambda args: exp.Encode( 295 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 296 ), 297 "MD5": exp.MD5Digest.from_arg_list, 298 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 299 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 300 } 301 302 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 303 FUNCTION_PARSERS.pop("TRIM") 304 305 class Generator(generator.Generator): 306 INTERVAL_ALLOWS_PLURAL_FORM = False 307 JOIN_HINTS = False 308 TABLE_HINTS = False 309 QUERY_HINTS = False 310 IS_BOOL_ALLOWED = False 311 TZ_TO_WITH_TIME_ZONE = True 312 NVL2_SUPPORTED = False 313 STRUCT_DELIMITER = ("(", ")") 314 LIMIT_ONLY_LITERALS = True 315 SUPPORTS_SINGLE_ARG_CONCAT = False 316 LIKE_PROPERTY_INSIDE_SCHEMA = True 317 MULTI_ARG_DISTINCT = False 318 SUPPORTS_TO_NUMBER = False 319 HEX_FUNC = "TO_HEX" 320 PARSE_JSON_NAME = "JSON_PARSE" 321 PAD_FILL_PATTERN_IS_REQUIRED = True 322 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 323 SUPPORTS_MEDIAN = False 324 ARRAY_SIZE_NAME = "CARDINALITY" 325 326 PROPERTIES_LOCATION = { 327 **generator.Generator.PROPERTIES_LOCATION, 328 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 329 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 330 } 331 332 TYPE_MAPPING = { 333 **generator.Generator.TYPE_MAPPING, 334 exp.DataType.Type.BINARY: "VARBINARY", 335 exp.DataType.Type.BIT: "BOOLEAN", 336 exp.DataType.Type.DATETIME: "TIMESTAMP", 337 exp.DataType.Type.DATETIME64: "TIMESTAMP", 338 exp.DataType.Type.FLOAT: "REAL", 339 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 340 exp.DataType.Type.INT: "INTEGER", 341 exp.DataType.Type.STRUCT: "ROW", 342 exp.DataType.Type.TEXT: "VARCHAR", 343 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 344 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 345 exp.DataType.Type.TIMETZ: "TIME", 346 } 347 348 TRANSFORMS = { 349 **generator.Generator.TRANSFORMS, 350 exp.AnyValue: rename_func("ARBITRARY"), 351 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 352 exp.ArgMax: rename_func("MAX_BY"), 353 exp.ArgMin: rename_func("MIN_BY"), 354 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 355 exp.ArrayAny: rename_func("ANY_MATCH"), 356 exp.ArrayConcat: rename_func("CONCAT"), 357 exp.ArrayContains: rename_func("CONTAINS"), 358 exp.ArrayToString: rename_func("ARRAY_JOIN"), 359 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 360 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 361 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 362 exp.BitwiseLeftShift: lambda self, e: self.func( 363 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 364 ), 365 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 366 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 367 exp.BitwiseRightShift: lambda self, e: self.func( 368 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 369 ), 370 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 371 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 372 exp.CurrentTime: lambda *_: "CURRENT_TIME", 373 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 374 exp.DateAdd: _date_delta_sql("DATE_ADD"), 375 exp.DateDiff: lambda self, e: self.func( 376 "DATE_DIFF", unit_to_str(e), e.expression, e.this 377 ), 378 exp.DateStrToDate: datestrtodate_sql, 379 exp.DateToDi: lambda self, 380 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 381 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 382 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 383 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 384 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 385 exp.DiToDate: lambda self, 386 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 387 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 388 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 389 exp.First: _first_last_sql, 390 exp.FirstValue: _first_last_sql, 391 exp.FromTimeZone: lambda self, 392 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 393 exp.GenerateSeries: sequence_sql, 394 exp.GenerateDateArray: sequence_sql, 395 exp.Group: transforms.preprocess([transforms.unalias_group]), 396 exp.If: if_sql(), 397 exp.ILike: no_ilike_sql, 398 exp.Initcap: _initcap_sql, 399 exp.JSONExtract: lambda self, e: self.jsonextract_sql(e), 400 exp.Last: _first_last_sql, 401 exp.LastValue: _first_last_sql, 402 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 403 exp.Lateral: explode_to_unnest_sql, 404 exp.Left: left_to_substring_sql, 405 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 406 rename_func("LEVENSHTEIN_DISTANCE") 407 ), 408 exp.LogicalAnd: rename_func("BOOL_AND"), 409 exp.LogicalOr: rename_func("BOOL_OR"), 410 exp.Pivot: no_pivot_sql, 411 exp.Quantile: _quantile_sql, 412 exp.RegexpExtract: regexp_extract_sql, 413 exp.RegexpExtractAll: regexp_extract_sql, 414 exp.Right: right_to_substring_sql, 415 exp.SafeDivide: no_safe_divide_sql, 416 exp.Schema: _schema_sql, 417 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 418 exp.Select: transforms.preprocess( 419 [ 420 transforms.eliminate_qualify, 421 transforms.eliminate_distinct_on, 422 transforms.explode_to_unnest(1), 423 transforms.eliminate_semi_and_anti_joins, 424 ] 425 ), 426 exp.SortArray: _no_sort_array, 427 exp.StrPosition: lambda self, e: str_position_sql(self, e, generate_instance=True), 428 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 429 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 430 exp.StrToTime: _str_to_time_sql, 431 exp.StructExtract: struct_extract_sql, 432 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 433 exp.Timestamp: no_timestamp_sql, 434 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 435 exp.TimestampTrunc: timestamptrunc_sql(), 436 exp.TimeStrToDate: timestrtotime_sql, 437 exp.TimeStrToTime: timestrtotime_sql, 438 exp.TimeStrToUnix: lambda self, e: self.func( 439 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 440 ), 441 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 442 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 443 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 444 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 445 exp.TsOrDiToDi: lambda self, 446 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 447 exp.TsOrDsAdd: _ts_or_ds_add_sql, 448 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 449 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 450 exp.Unhex: rename_func("FROM_HEX"), 451 exp.UnixToStr: lambda self, 452 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 453 exp.UnixToTime: _unix_to_time_sql, 454 exp.UnixToTimeStr: lambda self, 455 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 456 exp.VariancePop: rename_func("VAR_POP"), 457 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 458 exp.WithinGroup: transforms.preprocess( 459 [transforms.remove_within_group_for_percentiles] 460 ), 461 exp.Xor: bool_xor_sql, 462 exp.MD5Digest: rename_func("MD5"), 463 exp.SHA: rename_func("SHA1"), 464 exp.SHA2: sha256_sql, 465 } 466 467 RESERVED_KEYWORDS = { 468 "alter", 469 "and", 470 "as", 471 "between", 472 "by", 473 "case", 474 "cast", 475 "constraint", 476 "create", 477 "cross", 478 "current_time", 479 "current_timestamp", 480 "deallocate", 481 "delete", 482 "describe", 483 "distinct", 484 "drop", 485 "else", 486 "end", 487 "escape", 488 "except", 489 "execute", 490 "exists", 491 "extract", 492 "false", 493 "for", 494 "from", 495 "full", 496 "group", 497 "having", 498 "in", 499 "inner", 500 "insert", 501 "intersect", 502 "into", 503 "is", 504 "join", 505 "left", 506 "like", 507 "natural", 508 "not", 509 "null", 510 "on", 511 "or", 512 "order", 513 "outer", 514 "prepare", 515 "right", 516 "select", 517 "table", 518 "then", 519 "true", 520 "union", 521 "using", 522 "values", 523 "when", 524 "where", 525 "with", 526 } 527 528 def md5_sql(self, expression: exp.MD5) -> str: 529 this = expression.this 530 531 if not this.type: 532 from sqlglot.optimizer.annotate_types import annotate_types 533 534 this = annotate_types(this) 535 536 if this.is_type(*exp.DataType.TEXT_TYPES): 537 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 538 539 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 540 541 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 542 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 543 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 544 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 545 # which seems to be using the same time mapping as Hive, as per: 546 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 547 this = expression.this 548 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 549 value_as_timestamp = ( 550 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 551 ) 552 553 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 554 555 formatted_value = self.func( 556 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 557 ) 558 parse_with_tz = self.func( 559 "PARSE_DATETIME", 560 formatted_value, 561 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 562 ) 563 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 564 return self.func("TO_UNIXTIME", coalesced) 565 566 def bracket_sql(self, expression: exp.Bracket) -> str: 567 if expression.args.get("safe"): 568 return self.func( 569 "ELEMENT_AT", 570 expression.this, 571 seq_get( 572 apply_index_offset( 573 expression.this, 574 expression.expressions, 575 1 - expression.args.get("offset", 0), 576 ), 577 0, 578 ), 579 ) 580 return super().bracket_sql(expression) 581 582 def struct_sql(self, expression: exp.Struct) -> str: 583 from sqlglot.optimizer.annotate_types import annotate_types 584 585 expression = annotate_types(expression) 586 values: t.List[str] = [] 587 schema: t.List[str] = [] 588 unknown_type = False 589 590 for e in expression.expressions: 591 if isinstance(e, exp.PropertyEQ): 592 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 593 unknown_type = True 594 else: 595 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 596 values.append(self.sql(e, "expression")) 597 else: 598 values.append(self.sql(e)) 599 600 size = len(expression.expressions) 601 602 if not size or len(schema) != size: 603 if unknown_type: 604 self.unsupported( 605 "Cannot convert untyped key-value definitions (try annotate_types)." 606 ) 607 return self.func("ROW", *values) 608 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 609 610 def interval_sql(self, expression: exp.Interval) -> str: 611 if expression.this and expression.text("unit").upper().startswith("WEEK"): 612 return f"({expression.this.name} * INTERVAL '7' DAY)" 613 return super().interval_sql(expression) 614 615 def transaction_sql(self, expression: exp.Transaction) -> str: 616 modes = expression.args.get("modes") 617 modes = f" {', '.join(modes)}" if modes else "" 618 return f"START TRANSACTION{modes}" 619 620 def offset_limit_modifiers( 621 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 622 ) -> t.List[str]: 623 return [ 624 self.sql(expression, "offset"), 625 self.sql(limit), 626 ] 627 628 def create_sql(self, expression: exp.Create) -> str: 629 """ 630 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 631 so we need to remove them 632 """ 633 kind = expression.args["kind"] 634 schema = expression.this 635 if kind == "VIEW" and schema.expressions: 636 expression.this.set("expressions", None) 637 return super().create_sql(expression) 638 639 def delete_sql(self, expression: exp.Delete) -> str: 640 """ 641 Presto only supports DELETE FROM for a single table without an alias, so we need 642 to remove the unnecessary parts. If the original DELETE statement contains more 643 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 644 """ 645 tables = expression.args.get("tables") or [expression.this] 646 if len(tables) > 1: 647 return super().delete_sql(expression) 648 649 table = tables[0] 650 expression.set("this", table) 651 expression.set("tables", None) 652 653 if isinstance(table, exp.Table): 654 table_alias = table.args.get("alias") 655 if table_alias: 656 table_alias.pop() 657 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 658 659 return super().delete_sql(expression) 660 661 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 662 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 663 664 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 665 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 666 if not expression.args.get("variant_extract") or is_json_extract: 667 return self.func( 668 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 669 ) 670 671 this = self.sql(expression, "this") 672 673 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 674 segments = [] 675 for path_key in expression.expression.expressions[1:]: 676 if not isinstance(path_key, exp.JSONPathKey): 677 # Cannot transpile subscripts, wildcards etc to dot notation 678 self.unsupported( 679 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 680 ) 681 continue 682 key = path_key.this 683 if not exp.SAFE_IDENTIFIER_RE.match(key): 684 key = f'"{key}"' 685 segments.append(f".{key}") 686 687 expr = "".join(segments) 688 689 return f"{this}{expr}" 690 691 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 692 return self.func( 693 "ARRAY_JOIN", 694 self.func("ARRAY_AGG", expression.this), 695 expression.args.get("separator"), 696 )
193class Presto(Dialect): 194 INDEX_OFFSET = 1 195 NULL_ORDERING = "nulls_are_last" 196 TIME_FORMAT = MySQL.TIME_FORMAT 197 STRICT_STRING_CONCAT = True 198 SUPPORTS_SEMI_ANTI_JOIN = False 199 TYPED_DIVISION = True 200 TABLESAMPLE_SIZE_IS_PERCENT = True 201 LOG_BASE_FIRST: t.Optional[bool] = None 202 203 TIME_MAPPING = MySQL.TIME_MAPPING 204 205 # https://github.com/trinodb/trino/issues/17 206 # https://github.com/trinodb/trino/issues/12289 207 # https://github.com/prestodb/presto/issues/2863 208 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 209 210 # The result of certain math functions in Presto/Trino is of type 211 # equal to the input type e.g: FLOOR(5.5/2) -> DECIMAL, FLOOR(5/2) -> BIGINT 212 ANNOTATORS = { 213 **Dialect.ANNOTATORS, 214 exp.Floor: lambda self, e: self._annotate_by_args(e, "this"), 215 exp.Ceil: lambda self, e: self._annotate_by_args(e, "this"), 216 exp.Mod: lambda self, e: self._annotate_by_args(e, "this", "expression"), 217 exp.Round: lambda self, e: self._annotate_by_args(e, "this"), 218 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 219 exp.Abs: lambda self, e: self._annotate_by_args(e, "this"), 220 exp.Rand: lambda self, e: self._annotate_by_args(e, "this") 221 if e.this 222 else self._set_type(e, exp.DataType.Type.DOUBLE), 223 } 224 225 class Tokenizer(tokens.Tokenizer): 226 UNICODE_STRINGS = [ 227 (prefix + q, q) 228 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 229 for prefix in ("U&", "u&") 230 ] 231 232 KEYWORDS = { 233 **tokens.Tokenizer.KEYWORDS, 234 "START": TokenType.BEGIN, 235 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 236 "ROW": TokenType.STRUCT, 237 "IPADDRESS": TokenType.IPADDRESS, 238 "IPPREFIX": TokenType.IPPREFIX, 239 "TDIGEST": TokenType.TDIGEST, 240 "HYPERLOGLOG": TokenType.HLLSKETCH, 241 } 242 KEYWORDS.pop("/*+") 243 KEYWORDS.pop("QUALIFY") 244 245 class Parser(parser.Parser): 246 VALUES_FOLLOWED_BY_PAREN = False 247 248 FUNCTIONS = { 249 **parser.Parser.FUNCTIONS, 250 "ARBITRARY": exp.AnyValue.from_arg_list, 251 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 252 "APPROX_PERCENTILE": _build_approx_percentile, 253 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 254 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 255 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 256 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 257 "CARDINALITY": exp.ArraySize.from_arg_list, 258 "CONTAINS": exp.ArrayContains.from_arg_list, 259 "DATE_ADD": lambda args: exp.DateAdd( 260 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 261 ), 262 "DATE_DIFF": lambda args: exp.DateDiff( 263 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 264 ), 265 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 266 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 267 "DATE_TRUNC": date_trunc_to_time, 268 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 269 "ELEMENT_AT": lambda args: exp.Bracket( 270 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 271 ), 272 "FROM_HEX": exp.Unhex.from_arg_list, 273 "FROM_UNIXTIME": _build_from_unixtime, 274 "FROM_UTF8": lambda args: exp.Decode( 275 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 276 ), 277 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 278 "NOW": exp.CurrentTimestamp.from_arg_list, 279 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 280 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 281 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 282 this=seq_get(args, 0), 283 expression=seq_get(args, 1), 284 replacement=seq_get(args, 2) or exp.Literal.string(""), 285 ), 286 "ROW": exp.Struct.from_arg_list, 287 "SEQUENCE": exp.GenerateSeries.from_arg_list, 288 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 289 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 290 "STRPOS": lambda args: exp.StrPosition( 291 this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2) 292 ), 293 "TO_CHAR": _build_to_char, 294 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 295 "TO_UTF8": lambda args: exp.Encode( 296 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 297 ), 298 "MD5": exp.MD5Digest.from_arg_list, 299 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 300 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 301 } 302 303 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 304 FUNCTION_PARSERS.pop("TRIM") 305 306 class Generator(generator.Generator): 307 INTERVAL_ALLOWS_PLURAL_FORM = False 308 JOIN_HINTS = False 309 TABLE_HINTS = False 310 QUERY_HINTS = False 311 IS_BOOL_ALLOWED = False 312 TZ_TO_WITH_TIME_ZONE = True 313 NVL2_SUPPORTED = False 314 STRUCT_DELIMITER = ("(", ")") 315 LIMIT_ONLY_LITERALS = True 316 SUPPORTS_SINGLE_ARG_CONCAT = False 317 LIKE_PROPERTY_INSIDE_SCHEMA = True 318 MULTI_ARG_DISTINCT = False 319 SUPPORTS_TO_NUMBER = False 320 HEX_FUNC = "TO_HEX" 321 PARSE_JSON_NAME = "JSON_PARSE" 322 PAD_FILL_PATTERN_IS_REQUIRED = True 323 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 324 SUPPORTS_MEDIAN = False 325 ARRAY_SIZE_NAME = "CARDINALITY" 326 327 PROPERTIES_LOCATION = { 328 **generator.Generator.PROPERTIES_LOCATION, 329 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 330 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 331 } 332 333 TYPE_MAPPING = { 334 **generator.Generator.TYPE_MAPPING, 335 exp.DataType.Type.BINARY: "VARBINARY", 336 exp.DataType.Type.BIT: "BOOLEAN", 337 exp.DataType.Type.DATETIME: "TIMESTAMP", 338 exp.DataType.Type.DATETIME64: "TIMESTAMP", 339 exp.DataType.Type.FLOAT: "REAL", 340 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 341 exp.DataType.Type.INT: "INTEGER", 342 exp.DataType.Type.STRUCT: "ROW", 343 exp.DataType.Type.TEXT: "VARCHAR", 344 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 345 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 346 exp.DataType.Type.TIMETZ: "TIME", 347 } 348 349 TRANSFORMS = { 350 **generator.Generator.TRANSFORMS, 351 exp.AnyValue: rename_func("ARBITRARY"), 352 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 353 exp.ArgMax: rename_func("MAX_BY"), 354 exp.ArgMin: rename_func("MIN_BY"), 355 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 356 exp.ArrayAny: rename_func("ANY_MATCH"), 357 exp.ArrayConcat: rename_func("CONCAT"), 358 exp.ArrayContains: rename_func("CONTAINS"), 359 exp.ArrayToString: rename_func("ARRAY_JOIN"), 360 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 361 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 362 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 363 exp.BitwiseLeftShift: lambda self, e: self.func( 364 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 365 ), 366 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 367 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 368 exp.BitwiseRightShift: lambda self, e: self.func( 369 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 370 ), 371 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 372 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 373 exp.CurrentTime: lambda *_: "CURRENT_TIME", 374 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 375 exp.DateAdd: _date_delta_sql("DATE_ADD"), 376 exp.DateDiff: lambda self, e: self.func( 377 "DATE_DIFF", unit_to_str(e), e.expression, e.this 378 ), 379 exp.DateStrToDate: datestrtodate_sql, 380 exp.DateToDi: lambda self, 381 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 382 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 383 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 384 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 385 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 386 exp.DiToDate: lambda self, 387 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 388 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 389 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 390 exp.First: _first_last_sql, 391 exp.FirstValue: _first_last_sql, 392 exp.FromTimeZone: lambda self, 393 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 394 exp.GenerateSeries: sequence_sql, 395 exp.GenerateDateArray: sequence_sql, 396 exp.Group: transforms.preprocess([transforms.unalias_group]), 397 exp.If: if_sql(), 398 exp.ILike: no_ilike_sql, 399 exp.Initcap: _initcap_sql, 400 exp.JSONExtract: lambda self, e: self.jsonextract_sql(e), 401 exp.Last: _first_last_sql, 402 exp.LastValue: _first_last_sql, 403 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 404 exp.Lateral: explode_to_unnest_sql, 405 exp.Left: left_to_substring_sql, 406 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 407 rename_func("LEVENSHTEIN_DISTANCE") 408 ), 409 exp.LogicalAnd: rename_func("BOOL_AND"), 410 exp.LogicalOr: rename_func("BOOL_OR"), 411 exp.Pivot: no_pivot_sql, 412 exp.Quantile: _quantile_sql, 413 exp.RegexpExtract: regexp_extract_sql, 414 exp.RegexpExtractAll: regexp_extract_sql, 415 exp.Right: right_to_substring_sql, 416 exp.SafeDivide: no_safe_divide_sql, 417 exp.Schema: _schema_sql, 418 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 419 exp.Select: transforms.preprocess( 420 [ 421 transforms.eliminate_qualify, 422 transforms.eliminate_distinct_on, 423 transforms.explode_to_unnest(1), 424 transforms.eliminate_semi_and_anti_joins, 425 ] 426 ), 427 exp.SortArray: _no_sort_array, 428 exp.StrPosition: lambda self, e: str_position_sql(self, e, generate_instance=True), 429 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 430 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 431 exp.StrToTime: _str_to_time_sql, 432 exp.StructExtract: struct_extract_sql, 433 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 434 exp.Timestamp: no_timestamp_sql, 435 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 436 exp.TimestampTrunc: timestamptrunc_sql(), 437 exp.TimeStrToDate: timestrtotime_sql, 438 exp.TimeStrToTime: timestrtotime_sql, 439 exp.TimeStrToUnix: lambda self, e: self.func( 440 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 441 ), 442 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 443 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 444 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 445 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 446 exp.TsOrDiToDi: lambda self, 447 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 448 exp.TsOrDsAdd: _ts_or_ds_add_sql, 449 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 450 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 451 exp.Unhex: rename_func("FROM_HEX"), 452 exp.UnixToStr: lambda self, 453 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 454 exp.UnixToTime: _unix_to_time_sql, 455 exp.UnixToTimeStr: lambda self, 456 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 457 exp.VariancePop: rename_func("VAR_POP"), 458 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 459 exp.WithinGroup: transforms.preprocess( 460 [transforms.remove_within_group_for_percentiles] 461 ), 462 exp.Xor: bool_xor_sql, 463 exp.MD5Digest: rename_func("MD5"), 464 exp.SHA: rename_func("SHA1"), 465 exp.SHA2: sha256_sql, 466 } 467 468 RESERVED_KEYWORDS = { 469 "alter", 470 "and", 471 "as", 472 "between", 473 "by", 474 "case", 475 "cast", 476 "constraint", 477 "create", 478 "cross", 479 "current_time", 480 "current_timestamp", 481 "deallocate", 482 "delete", 483 "describe", 484 "distinct", 485 "drop", 486 "else", 487 "end", 488 "escape", 489 "except", 490 "execute", 491 "exists", 492 "extract", 493 "false", 494 "for", 495 "from", 496 "full", 497 "group", 498 "having", 499 "in", 500 "inner", 501 "insert", 502 "intersect", 503 "into", 504 "is", 505 "join", 506 "left", 507 "like", 508 "natural", 509 "not", 510 "null", 511 "on", 512 "or", 513 "order", 514 "outer", 515 "prepare", 516 "right", 517 "select", 518 "table", 519 "then", 520 "true", 521 "union", 522 "using", 523 "values", 524 "when", 525 "where", 526 "with", 527 } 528 529 def md5_sql(self, expression: exp.MD5) -> str: 530 this = expression.this 531 532 if not this.type: 533 from sqlglot.optimizer.annotate_types import annotate_types 534 535 this = annotate_types(this) 536 537 if this.is_type(*exp.DataType.TEXT_TYPES): 538 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 539 540 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 541 542 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 543 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 544 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 545 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 546 # which seems to be using the same time mapping as Hive, as per: 547 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 548 this = expression.this 549 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 550 value_as_timestamp = ( 551 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 552 ) 553 554 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 555 556 formatted_value = self.func( 557 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 558 ) 559 parse_with_tz = self.func( 560 "PARSE_DATETIME", 561 formatted_value, 562 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 563 ) 564 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 565 return self.func("TO_UNIXTIME", coalesced) 566 567 def bracket_sql(self, expression: exp.Bracket) -> str: 568 if expression.args.get("safe"): 569 return self.func( 570 "ELEMENT_AT", 571 expression.this, 572 seq_get( 573 apply_index_offset( 574 expression.this, 575 expression.expressions, 576 1 - expression.args.get("offset", 0), 577 ), 578 0, 579 ), 580 ) 581 return super().bracket_sql(expression) 582 583 def struct_sql(self, expression: exp.Struct) -> str: 584 from sqlglot.optimizer.annotate_types import annotate_types 585 586 expression = annotate_types(expression) 587 values: t.List[str] = [] 588 schema: t.List[str] = [] 589 unknown_type = False 590 591 for e in expression.expressions: 592 if isinstance(e, exp.PropertyEQ): 593 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 594 unknown_type = True 595 else: 596 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 597 values.append(self.sql(e, "expression")) 598 else: 599 values.append(self.sql(e)) 600 601 size = len(expression.expressions) 602 603 if not size or len(schema) != size: 604 if unknown_type: 605 self.unsupported( 606 "Cannot convert untyped key-value definitions (try annotate_types)." 607 ) 608 return self.func("ROW", *values) 609 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 610 611 def interval_sql(self, expression: exp.Interval) -> str: 612 if expression.this and expression.text("unit").upper().startswith("WEEK"): 613 return f"({expression.this.name} * INTERVAL '7' DAY)" 614 return super().interval_sql(expression) 615 616 def transaction_sql(self, expression: exp.Transaction) -> str: 617 modes = expression.args.get("modes") 618 modes = f" {', '.join(modes)}" if modes else "" 619 return f"START TRANSACTION{modes}" 620 621 def offset_limit_modifiers( 622 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 623 ) -> t.List[str]: 624 return [ 625 self.sql(expression, "offset"), 626 self.sql(limit), 627 ] 628 629 def create_sql(self, expression: exp.Create) -> str: 630 """ 631 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 632 so we need to remove them 633 """ 634 kind = expression.args["kind"] 635 schema = expression.this 636 if kind == "VIEW" and schema.expressions: 637 expression.this.set("expressions", None) 638 return super().create_sql(expression) 639 640 def delete_sql(self, expression: exp.Delete) -> str: 641 """ 642 Presto only supports DELETE FROM for a single table without an alias, so we need 643 to remove the unnecessary parts. If the original DELETE statement contains more 644 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 645 """ 646 tables = expression.args.get("tables") or [expression.this] 647 if len(tables) > 1: 648 return super().delete_sql(expression) 649 650 table = tables[0] 651 expression.set("this", table) 652 expression.set("tables", None) 653 654 if isinstance(table, exp.Table): 655 table_alias = table.args.get("alias") 656 if table_alias: 657 table_alias.pop() 658 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 659 660 return super().delete_sql(expression) 661 662 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 663 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 664 665 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 666 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 667 if not expression.args.get("variant_extract") or is_json_extract: 668 return self.func( 669 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 670 ) 671 672 this = self.sql(expression, "this") 673 674 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 675 segments = [] 676 for path_key in expression.expression.expressions[1:]: 677 if not isinstance(path_key, exp.JSONPathKey): 678 # Cannot transpile subscripts, wildcards etc to dot notation 679 self.unsupported( 680 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 681 ) 682 continue 683 key = path_key.this 684 if not exp.SAFE_IDENTIFIER_RE.match(key): 685 key = f'"{key}"' 686 segments.append(f".{key}") 687 688 expr = "".join(segments) 689 690 return f"{this}{expr}" 691 692 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 693 return self.func( 694 "ARRAY_JOIN", 695 self.func("ARRAY_AGG", expression.this), 696 expression.args.get("separator"), 697 )
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the behavior of a / b
depends on the types of a
and b
.
False means a / b
is always float division.
True means a / b
is integer division if both a
and b
are integers.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Associates this dialect's time formats with their equivalent Python strftime
formats.
Specifies the strategy according to which identifiers should be normalized.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- SUPPORTS_USER_DEFINED_TYPES
- COPY_PARAMS_ARE_CSV
- NORMALIZE_FUNCTIONS
- SAFE_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- UNESCAPED_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- REGEXP_EXTRACT_DEFAULT_GROUP
- SET_OP_DISTINCT_BY_DEFAULT
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
225 class Tokenizer(tokens.Tokenizer): 226 UNICODE_STRINGS = [ 227 (prefix + q, q) 228 for q in t.cast(t.List[str], tokens.Tokenizer.QUOTES) 229 for prefix in ("U&", "u&") 230 ] 231 232 KEYWORDS = { 233 **tokens.Tokenizer.KEYWORDS, 234 "START": TokenType.BEGIN, 235 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 236 "ROW": TokenType.STRUCT, 237 "IPADDRESS": TokenType.IPADDRESS, 238 "IPPREFIX": TokenType.IPPREFIX, 239 "TDIGEST": TokenType.TDIGEST, 240 "HYPERLOGLOG": TokenType.HLLSKETCH, 241 } 242 KEYWORDS.pop("/*+") 243 KEYWORDS.pop("QUALIFY")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
245 class Parser(parser.Parser): 246 VALUES_FOLLOWED_BY_PAREN = False 247 248 FUNCTIONS = { 249 **parser.Parser.FUNCTIONS, 250 "ARBITRARY": exp.AnyValue.from_arg_list, 251 "APPROX_DISTINCT": exp.ApproxDistinct.from_arg_list, 252 "APPROX_PERCENTILE": _build_approx_percentile, 253 "BITWISE_AND": binary_from_function(exp.BitwiseAnd), 254 "BITWISE_NOT": lambda args: exp.BitwiseNot(this=seq_get(args, 0)), 255 "BITWISE_OR": binary_from_function(exp.BitwiseOr), 256 "BITWISE_XOR": binary_from_function(exp.BitwiseXor), 257 "CARDINALITY": exp.ArraySize.from_arg_list, 258 "CONTAINS": exp.ArrayContains.from_arg_list, 259 "DATE_ADD": lambda args: exp.DateAdd( 260 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 261 ), 262 "DATE_DIFF": lambda args: exp.DateDiff( 263 this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0) 264 ), 265 "DATE_FORMAT": build_formatted_time(exp.TimeToStr, "presto"), 266 "DATE_PARSE": build_formatted_time(exp.StrToTime, "presto"), 267 "DATE_TRUNC": date_trunc_to_time, 268 "DAY_OF_WEEK": exp.DayOfWeekIso.from_arg_list, 269 "ELEMENT_AT": lambda args: exp.Bracket( 270 this=seq_get(args, 0), expressions=[seq_get(args, 1)], offset=1, safe=True 271 ), 272 "FROM_HEX": exp.Unhex.from_arg_list, 273 "FROM_UNIXTIME": _build_from_unixtime, 274 "FROM_UTF8": lambda args: exp.Decode( 275 this=seq_get(args, 0), replace=seq_get(args, 1), charset=exp.Literal.string("utf-8") 276 ), 277 "LEVENSHTEIN_DISTANCE": exp.Levenshtein.from_arg_list, 278 "NOW": exp.CurrentTimestamp.from_arg_list, 279 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 280 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 281 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 282 this=seq_get(args, 0), 283 expression=seq_get(args, 1), 284 replacement=seq_get(args, 2) or exp.Literal.string(""), 285 ), 286 "ROW": exp.Struct.from_arg_list, 287 "SEQUENCE": exp.GenerateSeries.from_arg_list, 288 "SET_AGG": exp.ArrayUniqueAgg.from_arg_list, 289 "SPLIT_TO_MAP": exp.StrToMap.from_arg_list, 290 "STRPOS": lambda args: exp.StrPosition( 291 this=seq_get(args, 0), substr=seq_get(args, 1), instance=seq_get(args, 2) 292 ), 293 "TO_CHAR": _build_to_char, 294 "TO_UNIXTIME": exp.TimeToUnix.from_arg_list, 295 "TO_UTF8": lambda args: exp.Encode( 296 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 297 ), 298 "MD5": exp.MD5Digest.from_arg_list, 299 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 300 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 301 } 302 303 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 304 FUNCTION_PARSERS.pop("TRIM")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- OPERATION_MODIFIERS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
306 class Generator(generator.Generator): 307 INTERVAL_ALLOWS_PLURAL_FORM = False 308 JOIN_HINTS = False 309 TABLE_HINTS = False 310 QUERY_HINTS = False 311 IS_BOOL_ALLOWED = False 312 TZ_TO_WITH_TIME_ZONE = True 313 NVL2_SUPPORTED = False 314 STRUCT_DELIMITER = ("(", ")") 315 LIMIT_ONLY_LITERALS = True 316 SUPPORTS_SINGLE_ARG_CONCAT = False 317 LIKE_PROPERTY_INSIDE_SCHEMA = True 318 MULTI_ARG_DISTINCT = False 319 SUPPORTS_TO_NUMBER = False 320 HEX_FUNC = "TO_HEX" 321 PARSE_JSON_NAME = "JSON_PARSE" 322 PAD_FILL_PATTERN_IS_REQUIRED = True 323 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 324 SUPPORTS_MEDIAN = False 325 ARRAY_SIZE_NAME = "CARDINALITY" 326 327 PROPERTIES_LOCATION = { 328 **generator.Generator.PROPERTIES_LOCATION, 329 exp.LocationProperty: exp.Properties.Location.UNSUPPORTED, 330 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 331 } 332 333 TYPE_MAPPING = { 334 **generator.Generator.TYPE_MAPPING, 335 exp.DataType.Type.BINARY: "VARBINARY", 336 exp.DataType.Type.BIT: "BOOLEAN", 337 exp.DataType.Type.DATETIME: "TIMESTAMP", 338 exp.DataType.Type.DATETIME64: "TIMESTAMP", 339 exp.DataType.Type.FLOAT: "REAL", 340 exp.DataType.Type.HLLSKETCH: "HYPERLOGLOG", 341 exp.DataType.Type.INT: "INTEGER", 342 exp.DataType.Type.STRUCT: "ROW", 343 exp.DataType.Type.TEXT: "VARCHAR", 344 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 345 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 346 exp.DataType.Type.TIMETZ: "TIME", 347 } 348 349 TRANSFORMS = { 350 **generator.Generator.TRANSFORMS, 351 exp.AnyValue: rename_func("ARBITRARY"), 352 exp.ApproxQuantile: rename_func("APPROX_PERCENTILE"), 353 exp.ArgMax: rename_func("MAX_BY"), 354 exp.ArgMin: rename_func("MIN_BY"), 355 exp.Array: lambda self, e: f"ARRAY[{self.expressions(e, flat=True)}]", 356 exp.ArrayAny: rename_func("ANY_MATCH"), 357 exp.ArrayConcat: rename_func("CONCAT"), 358 exp.ArrayContains: rename_func("CONTAINS"), 359 exp.ArrayToString: rename_func("ARRAY_JOIN"), 360 exp.ArrayUniqueAgg: rename_func("SET_AGG"), 361 exp.AtTimeZone: rename_func("AT_TIMEZONE"), 362 exp.BitwiseAnd: lambda self, e: self.func("BITWISE_AND", e.this, e.expression), 363 exp.BitwiseLeftShift: lambda self, e: self.func( 364 "BITWISE_ARITHMETIC_SHIFT_LEFT", e.this, e.expression 365 ), 366 exp.BitwiseNot: lambda self, e: self.func("BITWISE_NOT", e.this), 367 exp.BitwiseOr: lambda self, e: self.func("BITWISE_OR", e.this, e.expression), 368 exp.BitwiseRightShift: lambda self, e: self.func( 369 "BITWISE_ARITHMETIC_SHIFT_RIGHT", e.this, e.expression 370 ), 371 exp.BitwiseXor: lambda self, e: self.func("BITWISE_XOR", e.this, e.expression), 372 exp.Cast: transforms.preprocess([transforms.epoch_cast_to_ts]), 373 exp.CurrentTime: lambda *_: "CURRENT_TIME", 374 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 375 exp.DateAdd: _date_delta_sql("DATE_ADD"), 376 exp.DateDiff: lambda self, e: self.func( 377 "DATE_DIFF", unit_to_str(e), e.expression, e.this 378 ), 379 exp.DateStrToDate: datestrtodate_sql, 380 exp.DateToDi: lambda self, 381 e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Presto.DATEINT_FORMAT}) AS INT)", 382 exp.DateSub: _date_delta_sql("DATE_ADD", negate_interval=True), 383 exp.DayOfWeek: lambda self, e: f"(({self.func('DAY_OF_WEEK', e.this)} % 7) + 1)", 384 exp.DayOfWeekIso: rename_func("DAY_OF_WEEK"), 385 exp.Decode: lambda self, e: encode_decode_sql(self, e, "FROM_UTF8"), 386 exp.DiToDate: lambda self, 387 e: f"CAST(DATE_PARSE(CAST({self.sql(e, 'this')} AS VARCHAR), {Presto.DATEINT_FORMAT}) AS DATE)", 388 exp.Encode: lambda self, e: encode_decode_sql(self, e, "TO_UTF8"), 389 exp.FileFormatProperty: lambda self, e: f"FORMAT='{e.name.upper()}'", 390 exp.First: _first_last_sql, 391 exp.FirstValue: _first_last_sql, 392 exp.FromTimeZone: lambda self, 393 e: f"WITH_TIMEZONE({self.sql(e, 'this')}, {self.sql(e, 'zone')}) AT TIME ZONE 'UTC'", 394 exp.GenerateSeries: sequence_sql, 395 exp.GenerateDateArray: sequence_sql, 396 exp.Group: transforms.preprocess([transforms.unalias_group]), 397 exp.If: if_sql(), 398 exp.ILike: no_ilike_sql, 399 exp.Initcap: _initcap_sql, 400 exp.JSONExtract: lambda self, e: self.jsonextract_sql(e), 401 exp.Last: _first_last_sql, 402 exp.LastValue: _first_last_sql, 403 exp.LastDay: lambda self, e: self.func("LAST_DAY_OF_MONTH", e.this), 404 exp.Lateral: explode_to_unnest_sql, 405 exp.Left: left_to_substring_sql, 406 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 407 rename_func("LEVENSHTEIN_DISTANCE") 408 ), 409 exp.LogicalAnd: rename_func("BOOL_AND"), 410 exp.LogicalOr: rename_func("BOOL_OR"), 411 exp.Pivot: no_pivot_sql, 412 exp.Quantile: _quantile_sql, 413 exp.RegexpExtract: regexp_extract_sql, 414 exp.RegexpExtractAll: regexp_extract_sql, 415 exp.Right: right_to_substring_sql, 416 exp.SafeDivide: no_safe_divide_sql, 417 exp.Schema: _schema_sql, 418 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 419 exp.Select: transforms.preprocess( 420 [ 421 transforms.eliminate_qualify, 422 transforms.eliminate_distinct_on, 423 transforms.explode_to_unnest(1), 424 transforms.eliminate_semi_and_anti_joins, 425 ] 426 ), 427 exp.SortArray: _no_sort_array, 428 exp.StrPosition: lambda self, e: str_position_sql(self, e, generate_instance=True), 429 exp.StrToDate: lambda self, e: f"CAST({_str_to_time_sql(self, e)} AS DATE)", 430 exp.StrToMap: rename_func("SPLIT_TO_MAP"), 431 exp.StrToTime: _str_to_time_sql, 432 exp.StructExtract: struct_extract_sql, 433 exp.Table: transforms.preprocess([transforms.unnest_generate_series]), 434 exp.Timestamp: no_timestamp_sql, 435 exp.TimestampAdd: _date_delta_sql("DATE_ADD"), 436 exp.TimestampTrunc: timestamptrunc_sql(), 437 exp.TimeStrToDate: timestrtotime_sql, 438 exp.TimeStrToTime: timestrtotime_sql, 439 exp.TimeStrToUnix: lambda self, e: self.func( 440 "TO_UNIXTIME", self.func("DATE_PARSE", e.this, Presto.TIME_FORMAT) 441 ), 442 exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 443 exp.TimeToUnix: rename_func("TO_UNIXTIME"), 444 exp.ToChar: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)), 445 exp.TryCast: transforms.preprocess([transforms.epoch_cast_to_ts]), 446 exp.TsOrDiToDi: lambda self, 447 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS VARCHAR), '-', ''), 1, 8) AS INT)", 448 exp.TsOrDsAdd: _ts_or_ds_add_sql, 449 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 450 exp.TsOrDsToDate: _ts_or_ds_to_date_sql, 451 exp.Unhex: rename_func("FROM_HEX"), 452 exp.UnixToStr: lambda self, 453 e: f"DATE_FORMAT(FROM_UNIXTIME({self.sql(e, 'this')}), {self.format_time(e)})", 454 exp.UnixToTime: _unix_to_time_sql, 455 exp.UnixToTimeStr: lambda self, 456 e: f"CAST(FROM_UNIXTIME({self.sql(e, 'this')}) AS VARCHAR)", 457 exp.VariancePop: rename_func("VAR_POP"), 458 exp.With: transforms.preprocess([transforms.add_recursive_cte_column_names]), 459 exp.WithinGroup: transforms.preprocess( 460 [transforms.remove_within_group_for_percentiles] 461 ), 462 exp.Xor: bool_xor_sql, 463 exp.MD5Digest: rename_func("MD5"), 464 exp.SHA: rename_func("SHA1"), 465 exp.SHA2: sha256_sql, 466 } 467 468 RESERVED_KEYWORDS = { 469 "alter", 470 "and", 471 "as", 472 "between", 473 "by", 474 "case", 475 "cast", 476 "constraint", 477 "create", 478 "cross", 479 "current_time", 480 "current_timestamp", 481 "deallocate", 482 "delete", 483 "describe", 484 "distinct", 485 "drop", 486 "else", 487 "end", 488 "escape", 489 "except", 490 "execute", 491 "exists", 492 "extract", 493 "false", 494 "for", 495 "from", 496 "full", 497 "group", 498 "having", 499 "in", 500 "inner", 501 "insert", 502 "intersect", 503 "into", 504 "is", 505 "join", 506 "left", 507 "like", 508 "natural", 509 "not", 510 "null", 511 "on", 512 "or", 513 "order", 514 "outer", 515 "prepare", 516 "right", 517 "select", 518 "table", 519 "then", 520 "true", 521 "union", 522 "using", 523 "values", 524 "when", 525 "where", 526 "with", 527 } 528 529 def md5_sql(self, expression: exp.MD5) -> str: 530 this = expression.this 531 532 if not this.type: 533 from sqlglot.optimizer.annotate_types import annotate_types 534 535 this = annotate_types(this) 536 537 if this.is_type(*exp.DataType.TEXT_TYPES): 538 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 539 540 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this)))) 541 542 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 543 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 544 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 545 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 546 # which seems to be using the same time mapping as Hive, as per: 547 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 548 this = expression.this 549 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 550 value_as_timestamp = ( 551 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 552 ) 553 554 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 555 556 formatted_value = self.func( 557 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 558 ) 559 parse_with_tz = self.func( 560 "PARSE_DATETIME", 561 formatted_value, 562 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 563 ) 564 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 565 return self.func("TO_UNIXTIME", coalesced) 566 567 def bracket_sql(self, expression: exp.Bracket) -> str: 568 if expression.args.get("safe"): 569 return self.func( 570 "ELEMENT_AT", 571 expression.this, 572 seq_get( 573 apply_index_offset( 574 expression.this, 575 expression.expressions, 576 1 - expression.args.get("offset", 0), 577 ), 578 0, 579 ), 580 ) 581 return super().bracket_sql(expression) 582 583 def struct_sql(self, expression: exp.Struct) -> str: 584 from sqlglot.optimizer.annotate_types import annotate_types 585 586 expression = annotate_types(expression) 587 values: t.List[str] = [] 588 schema: t.List[str] = [] 589 unknown_type = False 590 591 for e in expression.expressions: 592 if isinstance(e, exp.PropertyEQ): 593 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 594 unknown_type = True 595 else: 596 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 597 values.append(self.sql(e, "expression")) 598 else: 599 values.append(self.sql(e)) 600 601 size = len(expression.expressions) 602 603 if not size or len(schema) != size: 604 if unknown_type: 605 self.unsupported( 606 "Cannot convert untyped key-value definitions (try annotate_types)." 607 ) 608 return self.func("ROW", *values) 609 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))" 610 611 def interval_sql(self, expression: exp.Interval) -> str: 612 if expression.this and expression.text("unit").upper().startswith("WEEK"): 613 return f"({expression.this.name} * INTERVAL '7' DAY)" 614 return super().interval_sql(expression) 615 616 def transaction_sql(self, expression: exp.Transaction) -> str: 617 modes = expression.args.get("modes") 618 modes = f" {', '.join(modes)}" if modes else "" 619 return f"START TRANSACTION{modes}" 620 621 def offset_limit_modifiers( 622 self, expression: exp.Expression, fetch: bool, limit: t.Optional[exp.Fetch | exp.Limit] 623 ) -> t.List[str]: 624 return [ 625 self.sql(expression, "offset"), 626 self.sql(limit), 627 ] 628 629 def create_sql(self, expression: exp.Create) -> str: 630 """ 631 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 632 so we need to remove them 633 """ 634 kind = expression.args["kind"] 635 schema = expression.this 636 if kind == "VIEW" and schema.expressions: 637 expression.this.set("expressions", None) 638 return super().create_sql(expression) 639 640 def delete_sql(self, expression: exp.Delete) -> str: 641 """ 642 Presto only supports DELETE FROM for a single table without an alias, so we need 643 to remove the unnecessary parts. If the original DELETE statement contains more 644 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 645 """ 646 tables = expression.args.get("tables") or [expression.this] 647 if len(tables) > 1: 648 return super().delete_sql(expression) 649 650 table = tables[0] 651 expression.set("this", table) 652 expression.set("tables", None) 653 654 if isinstance(table, exp.Table): 655 table_alias = table.args.get("alias") 656 if table_alias: 657 table_alias.pop() 658 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 659 660 return super().delete_sql(expression) 661 662 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 663 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 664 665 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 666 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 667 if not expression.args.get("variant_extract") or is_json_extract: 668 return self.func( 669 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 670 ) 671 672 this = self.sql(expression, "this") 673 674 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 675 segments = [] 676 for path_key in expression.expression.expressions[1:]: 677 if not isinstance(path_key, exp.JSONPathKey): 678 # Cannot transpile subscripts, wildcards etc to dot notation 679 self.unsupported( 680 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 681 ) 682 continue 683 key = path_key.this 684 if not exp.SAFE_IDENTIFIER_RE.match(key): 685 key = f'"{key}"' 686 segments.append(f".{key}") 687 688 expr = "".join(segments) 689 690 return f"{this}{expr}" 691 692 def groupconcat_sql(self, expression: exp.GroupConcat) -> str: 693 return self.func( 694 "ARRAY_JOIN", 695 self.func("ARRAY_AGG", expression.this), 696 expression.args.get("separator"), 697 )
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
529 def md5_sql(self, expression: exp.MD5) -> str: 530 this = expression.this 531 532 if not this.type: 533 from sqlglot.optimizer.annotate_types import annotate_types 534 535 this = annotate_types(this) 536 537 if this.is_type(*exp.DataType.TEXT_TYPES): 538 this = exp.Encode(this=this, charset=exp.Literal.string("utf-8")) 539 540 return self.func("LOWER", self.func("TO_HEX", self.func("MD5", self.sql(this))))
542 def strtounix_sql(self, expression: exp.StrToUnix) -> str: 543 # Since `TO_UNIXTIME` requires a `TIMESTAMP`, we need to parse the argument into one. 544 # To do this, we first try to `DATE_PARSE` it, but since this can fail when there's a 545 # timezone involved, we wrap it in a `TRY` call and use `PARSE_DATETIME` as a fallback, 546 # which seems to be using the same time mapping as Hive, as per: 547 # https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html 548 this = expression.this 549 value_as_text = exp.cast(this, exp.DataType.Type.TEXT) 550 value_as_timestamp = ( 551 exp.cast(this, exp.DataType.Type.TIMESTAMP) if this.is_string else this 552 ) 553 554 parse_without_tz = self.func("DATE_PARSE", value_as_text, self.format_time(expression)) 555 556 formatted_value = self.func( 557 "DATE_FORMAT", value_as_timestamp, self.format_time(expression) 558 ) 559 parse_with_tz = self.func( 560 "PARSE_DATETIME", 561 formatted_value, 562 self.format_time(expression, Hive.INVERSE_TIME_MAPPING, Hive.INVERSE_TIME_TRIE), 563 ) 564 coalesced = self.func("COALESCE", self.func("TRY", parse_without_tz), parse_with_tz) 565 return self.func("TO_UNIXTIME", coalesced)
567 def bracket_sql(self, expression: exp.Bracket) -> str: 568 if expression.args.get("safe"): 569 return self.func( 570 "ELEMENT_AT", 571 expression.this, 572 seq_get( 573 apply_index_offset( 574 expression.this, 575 expression.expressions, 576 1 - expression.args.get("offset", 0), 577 ), 578 0, 579 ), 580 ) 581 return super().bracket_sql(expression)
583 def struct_sql(self, expression: exp.Struct) -> str: 584 from sqlglot.optimizer.annotate_types import annotate_types 585 586 expression = annotate_types(expression) 587 values: t.List[str] = [] 588 schema: t.List[str] = [] 589 unknown_type = False 590 591 for e in expression.expressions: 592 if isinstance(e, exp.PropertyEQ): 593 if e.type and e.type.is_type(exp.DataType.Type.UNKNOWN): 594 unknown_type = True 595 else: 596 schema.append(f"{self.sql(e, 'this')} {self.sql(e.type)}") 597 values.append(self.sql(e, "expression")) 598 else: 599 values.append(self.sql(e)) 600 601 size = len(expression.expressions) 602 603 if not size or len(schema) != size: 604 if unknown_type: 605 self.unsupported( 606 "Cannot convert untyped key-value definitions (try annotate_types)." 607 ) 608 return self.func("ROW", *values) 609 return f"CAST(ROW({', '.join(values)}) AS ROW({', '.join(schema)}))"
629 def create_sql(self, expression: exp.Create) -> str: 630 """ 631 Presto doesn't support CREATE VIEW with expressions (ex: `CREATE VIEW x (cola)` then `(cola)` is the expression), 632 so we need to remove them 633 """ 634 kind = expression.args["kind"] 635 schema = expression.this 636 if kind == "VIEW" and schema.expressions: 637 expression.this.set("expressions", None) 638 return super().create_sql(expression)
Presto doesn't support CREATE VIEW with expressions (ex: CREATE VIEW x (cola)
then (cola)
is the expression),
so we need to remove them
640 def delete_sql(self, expression: exp.Delete) -> str: 641 """ 642 Presto only supports DELETE FROM for a single table without an alias, so we need 643 to remove the unnecessary parts. If the original DELETE statement contains more 644 than one table to be deleted, we can't safely map it 1-1 to a Presto statement. 645 """ 646 tables = expression.args.get("tables") or [expression.this] 647 if len(tables) > 1: 648 return super().delete_sql(expression) 649 650 table = tables[0] 651 expression.set("this", table) 652 expression.set("tables", None) 653 654 if isinstance(table, exp.Table): 655 table_alias = table.args.get("alias") 656 if table_alias: 657 table_alias.pop() 658 expression = t.cast(exp.Delete, expression.transform(unqualify_columns)) 659 660 return super().delete_sql(expression)
Presto only supports DELETE FROM for a single table without an alias, so we need to remove the unnecessary parts. If the original DELETE statement contains more than one table to be deleted, we can't safely map it 1-1 to a Presto statement.
662 def jsonextract_sql(self, expression: exp.JSONExtract) -> str: 663 is_json_extract = self.dialect.settings.get("variant_extract_is_json_extract", True) 664 665 # Generate JSON_EXTRACT unless the user has configured that a Snowflake / Databricks 666 # VARIANT extract (e.g. col:x.y) should map to dot notation (i.e ROW access) in Presto/Trino 667 if not expression.args.get("variant_extract") or is_json_extract: 668 return self.func( 669 "JSON_EXTRACT", expression.this, expression.expression, *expression.expressions 670 ) 671 672 this = self.sql(expression, "this") 673 674 # Convert the JSONPath extraction `JSON_EXTRACT(col, '$.x.y) to a ROW access col.x.y 675 segments = [] 676 for path_key in expression.expression.expressions[1:]: 677 if not isinstance(path_key, exp.JSONPathKey): 678 # Cannot transpile subscripts, wildcards etc to dot notation 679 self.unsupported( 680 f"Cannot transpile JSONPath segment '{path_key}' to ROW access" 681 ) 682 continue 683 key = path_key.this 684 if not exp.SAFE_IDENTIFIER_RE.match(key): 685 key = f'"{key}"' 686 segments.append(f".{key}") 687 688 expr = "".join(segments) 689 690 return f"{this}{expr}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SUPPORTED_JSON_PATH_PARTS
- CAN_IMPLEMENT_ARRAY_ANY
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- TRY_SUPPORTED
- SUPPORTS_UESCAPE
- STAR_EXCEPT
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_UNIX_SECONDS
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql